From 8aa5585ff4974b8f7ed71d684af48432b2bc6929 Mon Sep 17 00:00:00 2001 From: Ariadne Conill Date: Mon, 28 Jun 2021 08:25:59 -0600 Subject: cpio: add support for --ignore-devno like GNU cpio The --ignore-devno option is used to set device numbers to (0, 0). This can be useful in verifying whether a CPIO archive is reproducible. function old new delta cpio_o 922 961 +39 .rodata 78407 78422 +15 bbconfig_config_bz2 6161 6167 +6 packed_usage 25770 25764 -6 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/1 up/down: 60/-6) Total: 54 bytes Signed-off-by: Ariadne Conill Signed-off-by: Denys Vlasenko --- archival/cpio.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/archival/cpio.c b/archival/cpio.c index f525419b8..daf6cffc9 100644 --- a/archival/cpio.c +++ b/archival/cpio.c @@ -38,6 +38,13 @@ //config: depends on FEATURE_CPIO_O //config: help //config: Passthrough mode. Rarely used. +//config: +//config:config FEATURE_CPIO_IGNORE_DEVNO +//config: bool "Support --ignore-devno like GNU cpio" +//config: default y +//config: depends on FEATURE_CPIO_O && LONG_OPTS +//config: help +//config: Optionally ignore device numbers when creating archives. //applet:IF_CPIO(APPLET(cpio, BB_DIR_BIN, BB_SUID_DROP)) @@ -75,6 +82,9 @@ //usage: "\n -R USER[:GRP] Set owner of created files" //usage: "\n -L Dereference symlinks" //usage: "\n -0 NUL terminated input" +//usage: IF_FEATURE_CPIO_IGNORE_DEVNO( +//usage: "\n --ignore-devno" +//usage: ) /* GNU cpio 2.9 --help (abridged): @@ -162,11 +172,13 @@ enum { IF_FEATURE_CPIO_P(OPTBIT_PASSTHROUGH,) IF_LONG_OPTS( OPTBIT_QUIET ,) IF_LONG_OPTS( OPTBIT_2STDOUT ,) + IF_FEATURE_CPIO_IGNORE_DEVNO(OPTBIT_IGNORE_DEVNO,) OPT_CREATE = IF_FEATURE_CPIO_O((1 << OPTBIT_CREATE )) + 0, OPT_FORMAT = IF_FEATURE_CPIO_O((1 << OPTBIT_FORMAT )) + 0, OPT_PASSTHROUGH = IF_FEATURE_CPIO_P((1 << OPTBIT_PASSTHROUGH)) + 0, OPT_QUIET = IF_LONG_OPTS( (1 << OPTBIT_QUIET )) + 0, OPT_2STDOUT = IF_LONG_OPTS( (1 << OPTBIT_2STDOUT )) + 0, + OPT_IGNORE_DEVNO = IF_FEATURE_CPIO_IGNORE_DEVNO((1 << OPTBIT_IGNORE_DEVNO)) + 0, }; #define OPTION_STR "it0uvdmLF:R:" @@ -304,6 +316,11 @@ static NOINLINE int cpio_o(void) } } +#if ENABLE_FEATURE_CPIO_IGNORE_DEVNO + if (option_mask32 & OPT_IGNORE_DEVNO) + st.st_dev = st.st_rdev = 0; +#endif + bytes += printf("070701" "%08X%08X%08X%08X%08X%08X%08X" "%08X%08X%08X%08X" /* GNU cpio uses uppercase hex */ @@ -379,6 +396,9 @@ int cpio_main(int argc UNUSED_PARAM, char **argv) "null\0" No_argument "0" "quiet\0" No_argument "\xff" "to-stdout\0" No_argument "\xfe" +#if ENABLE_FEATURE_CPIO_IGNORE_DEVNO + "ignore-devno\0" No_argument "\xfd" +#endif ; #endif -- cgit v1.2.3-55-g6feb From 836b79211df3aeaba1b8b65c6db5ee6193172cc0 Mon Sep 17 00:00:00 2001 From: Ariadne Conill Date: Mon, 28 Jun 2021 08:31:23 -0600 Subject: cpio: add support for --renumber-inodes like GNU cpio The --renumber-inodes option renumbers the inodes starting from 1, so that the sequence of inodes is always stable. This helps with reproducibility. function old new delta cpio_o 961 1045 +84 .rodata 78422 78440 +18 bbconfig_config_bz2 6168 6164 -4 packed_usage 25764 25756 -8 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/2 up/down: 102/-12) Total: 90 bytes Signed-off-by: Ariadne Conill Signed-off-by: Denys Vlasenko --- archival/cpio.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/archival/cpio.c b/archival/cpio.c index daf6cffc9..7149782d7 100644 --- a/archival/cpio.c +++ b/archival/cpio.c @@ -45,6 +45,13 @@ //config: depends on FEATURE_CPIO_O && LONG_OPTS //config: help //config: Optionally ignore device numbers when creating archives. +//config: +//config:config FEATURE_CPIO_RENUMBER_INODES +//config: bool "Support --renumber-inodes like GNU cpio" +//config: default y +//config: depends on FEATURE_CPIO_O && LONG_OPTS +//config: help +//config: Optionally renumber inodes when creating archives. //applet:IF_CPIO(APPLET(cpio, BB_DIR_BIN, BB_SUID_DROP)) @@ -85,6 +92,9 @@ //usage: IF_FEATURE_CPIO_IGNORE_DEVNO( //usage: "\n --ignore-devno" //usage: ) +//usage: IF_FEATURE_CPIO_RENUMBER_INODES( +//usage: "\n --renumber-inodes" +//usage: ) /* GNU cpio 2.9 --help (abridged): @@ -173,18 +183,21 @@ enum { IF_LONG_OPTS( OPTBIT_QUIET ,) IF_LONG_OPTS( OPTBIT_2STDOUT ,) IF_FEATURE_CPIO_IGNORE_DEVNO(OPTBIT_IGNORE_DEVNO,) + IF_FEATURE_CPIO_RENUMBER_INODES(OPTBIT_RENUMBER_INODES,) OPT_CREATE = IF_FEATURE_CPIO_O((1 << OPTBIT_CREATE )) + 0, OPT_FORMAT = IF_FEATURE_CPIO_O((1 << OPTBIT_FORMAT )) + 0, OPT_PASSTHROUGH = IF_FEATURE_CPIO_P((1 << OPTBIT_PASSTHROUGH)) + 0, OPT_QUIET = IF_LONG_OPTS( (1 << OPTBIT_QUIET )) + 0, OPT_2STDOUT = IF_LONG_OPTS( (1 << OPTBIT_2STDOUT )) + 0, OPT_IGNORE_DEVNO = IF_FEATURE_CPIO_IGNORE_DEVNO((1 << OPTBIT_IGNORE_DEVNO)) + 0, + OPT_RENUMBER_INODES = IF_FEATURE_CPIO_RENUMBER_INODES((1 << OPTBIT_RENUMBER_INODES)) + 0, }; #define OPTION_STR "it0uvdmLF:R:" struct globals { struct bb_uidgid_t owner_ugid; + ino_t next_inode; } FIX_ALIASING; #define G (*(struct globals*)bb_common_bufsiz1) void BUG_cpio_globals_too_big(void); @@ -218,6 +231,9 @@ static NOINLINE int cpio_o(void) struct inodes_s *next; struct name_s *names; struct stat st; +#if ENABLE_FEATURE_CPIO_RENUMBER_INODES + ino_t mapped_inode; +#endif }; struct inodes_s *links = NULL; @@ -272,6 +288,10 @@ static NOINLINE int cpio_o(void) l = xzalloc(sizeof(*l)); l->st = st; l->next = links; +#if ENABLE_FEATURE_CPIO_RENUMBER_INODES + if (option_mask32 & OPT_RENUMBER_INODES) + l->mapped_inode = ++G.next_inode; +#endif links = l; break; } @@ -290,6 +310,11 @@ static NOINLINE int cpio_o(void) free(line); continue; } +#if ENABLE_FEATURE_CPIO_RENUMBER_INODES + else if (option_mask32 & OPT_RENUMBER_INODES) { + st.st_ino = ++G.next_inode; + } +#endif } else { /* line == NULL: EOF */ next_link: if (links) { @@ -297,6 +322,10 @@ static NOINLINE int cpio_o(void) st = links->st; name = links->names->name; links->names = links->names->next; +#if ENABLE_FEATURE_CPIO_RENUMBER_INODES + if (links->mapped_inode) + st.st_ino = links->mapped_inode; +#endif /* GNU cpio is reported to emit file data * only for the last instance. Mimic that. */ if (links->names == NULL) @@ -398,6 +427,9 @@ int cpio_main(int argc UNUSED_PARAM, char **argv) "to-stdout\0" No_argument "\xfe" #if ENABLE_FEATURE_CPIO_IGNORE_DEVNO "ignore-devno\0" No_argument "\xfd" +#endif +#if ENABLE_FEATURE_CPIO_RENUMBER_INODES + "renumber-inodes\0" No_argument "\xfc" #endif ; #endif -- cgit v1.2.3-55-g6feb From 15f7d618ea7f8c3a0277c98309268b709e20d77c Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 9 Nov 2021 13:51:22 +0100 Subject: which: add -a to help text function old new delta packed_usage 34075 34079 +4 Signed-off-by: Denys Vlasenko --- debianutils/which.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/debianutils/which.c b/debianutils/which.c index b9f1b92fd..23692dc6f 100644 --- a/debianutils/which.c +++ b/debianutils/which.c @@ -17,9 +17,10 @@ //kbuild:lib-$(CONFIG_WHICH) += which.o //usage:#define which_trivial_usage -//usage: "COMMAND..." +//usage: "[-a] COMMAND..." //usage:#define which_full_usage "\n\n" -//usage: "Locate COMMAND" +//usage: "Locate COMMAND\n" +//usage: "\n -a Show all matches" //usage: //usage:#define which_example_usage //usage: "$ which login\n" -- cgit v1.2.3-55-g6feb From 4bc9da10718df7ed9e992b1ddd2e80d53d894177 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 Nov 2021 11:28:11 +0100 Subject: tls: P256: 64-bit optimizations function old new delta sp_256_proj_point_dbl_8 421 428 +7 sp_256_point_from_bin2x32 78 84 +6 sp_256_cmp_8 38 42 +4 sp_256_to_bin_8 28 31 +3 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 4/0 up/down: 20/0) Total: 20 bytes Signed-off-by: Denys Vlasenko --- include/platform.h | 2 + networking/tls_sp_c32.c | 114 +++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 101 insertions(+), 15 deletions(-) diff --git a/include/platform.h b/include/platform.h index 9e1fb047d..ad27bb31a 100644 --- a/include/platform.h +++ b/include/platform.h @@ -239,6 +239,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING; # define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp)) # define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p)) # define move_from_unaligned32(v, u32p) ((v) = *(bb__aliased_uint32_t*)(u32p)) +# define move_from_unaligned64(v, u64p) ((v) = *(bb__aliased_uint64_t*)(u64p)) # define move_to_unaligned16(u16p, v) (*(bb__aliased_uint16_t*)(u16p) = (v)) # define move_to_unaligned32(u32p, v) (*(bb__aliased_uint32_t*)(u32p) = (v)) # define move_to_unaligned64(u64p, v) (*(bb__aliased_uint64_t*)(u64p) = (v)) @@ -250,6 +251,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING; # define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long))) # define move_from_unaligned16(v, u16p) (memcpy(&(v), (u16p), 2)) # define move_from_unaligned32(v, u32p) (memcpy(&(v), (u32p), 4)) +# define move_from_unaligned64(v, u64p) (memcpy(&(v), (u64p), 8)) # define move_to_unaligned16(u16p, v) do { \ uint16_t __t = (v); \ memcpy((u16p), &__t, 2); \ diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 4d4ecdd74..d09f7e881 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -29,6 +29,20 @@ static void dump_hex(const char *fmt, const void *vp, int len) typedef uint32_t sp_digit; typedef int32_t signed_sp_digit; +/* 64-bit optimizations: + * if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff, + * then loads and stores can be done in 64-bit chunks. + * + * A narrower case is when arch is also little-endian (such as x86_64), + * then "LSW first", uint32[8] and uint64[4] representations are equivalent, + * and arithmetic can be done in 64 bits too. + */ +#if defined(__GNUC__) && defined(__x86_64__) +# define UNALIGNED_LE_64BIT 1 +#else +# define UNALIGNED_LE_64BIT 0 +#endif + /* The code below is taken from parts of * wolfssl-3.15.3/wolfcrypt/src/sp_c32.c * and heavily modified. @@ -58,6 +72,22 @@ static const sp_digit p256_mod[8] = { * r A single precision integer. * a Byte array. */ +#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff +static void sp_256_to_bin_8(const sp_digit* rr, uint8_t* a) +{ + int i; + const uint64_t* r = (void*)rr; + + sp_256_norm_8(rr); + + r += 4; + for (i = 0; i < 4; i++) { + r--; + move_to_unaligned64(a, SWAP_BE64(*r)); + a += 8; + } +} +#else static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a) { int i; @@ -71,6 +101,7 @@ static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a) a += 4; } } +#endif /* Read big endian unsigned byte array into r. * @@ -78,6 +109,21 @@ static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a) * a Byte array. * n Number of bytes in array to read. */ +#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff +static void sp_256_from_bin_8(sp_digit* rr, const uint8_t* a) +{ + int i; + uint64_t* r = (void*)rr; + + r += 4; + for (i = 0; i < 4; i++) { + uint64_t v; + move_from_unaligned64(v, a); + *--r = SWAP_BE64(v); + a += 8; + } +} +#else static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a) { int i; @@ -90,6 +136,7 @@ static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a) a += 4; } } +#endif #if SP_DEBUG static void dump_256(const char *fmt, const sp_digit* r) @@ -125,6 +172,20 @@ static void sp_256_point_from_bin2x32(sp_point* p, const uint8_t *bin2x32) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ +#if UNALIGNED_LE_64BIT +static signed_sp_digit sp_256_cmp_8(const sp_digit* aa, const sp_digit* bb) +{ + const uint64_t* a = (void*)aa; + const uint64_t* b = (void*)bb; + int i; + for (i = 3; i >= 0; i--) { + if (a[i] == b[i]) + continue; + return (a[i] > b[i]) * 2 - 1; + } + return 0; +} +#else static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b) { int i; @@ -140,6 +201,7 @@ static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b) } return 0; } +#endif /* Compare two numbers to determine if they are equal. * @@ -196,8 +258,6 @@ static int sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) ); return reg; #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) - /* x86_64 has no alignment restrictions, and is little-endian, - * so 64-bit and 32-bit representations are identical */ uint64_t reg; asm volatile ( "\n movq (%0), %3" @@ -294,8 +354,6 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) ); return reg; #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) - /* x86_64 has no alignment restrictions, and is little-endian, - * so 64-bit and 32-bit representations are identical */ uint64_t reg; asm volatile ( "\n movq (%0), %3" @@ -440,8 +498,6 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) r[15] = accl; memcpy(r, rr, sizeof(rr)); #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) - /* x86_64 has no alignment restrictions, and is little-endian, - * so 64-bit and 32-bit representations are identical */ const uint64_t* aa = (const void*)a; const uint64_t* bb = (const void*)b; uint64_t rr[8]; @@ -551,17 +607,32 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) } /* Shift number right one bit. Bottom bit is lost. */ -static void sp_256_rshift1_8(sp_digit* r, sp_digit* a, sp_digit carry) +#if UNALIGNED_LE_64BIT +static void sp_256_rshift1_8(sp_digit* rr, uint64_t carry) +{ + uint64_t *r = (void*)rr; + int i; + + carry = (((uint64_t)!!carry) << 63); + for (i = 3; i >= 0; i--) { + uint64_t c = r[i] << 63; + r[i] = (r[i] >> 1) | carry; + carry = c; + } +} +#else +static void sp_256_rshift1_8(sp_digit* r, sp_digit carry) { int i; - carry = (!!carry << 31); + carry = (((sp_digit)!!carry) << 31); for (i = 7; i >= 0; i--) { - sp_digit c = a[i] << 31; - r[i] = (a[i] >> 1) | carry; + sp_digit c = r[i] << 31; + r[i] = (r[i] >> 1) | carry; carry = c; } } +#endif /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) */ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) @@ -570,7 +641,7 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) if (a[0] & 1) carry = sp_256_add_8(r, a, m); sp_256_norm_8(r); - sp_256_rshift1_8(r, r, carry); + sp_256_rshift1_8(r, carry); } /* Add two Montgomery form numbers (r = a + b % m) */ @@ -634,15 +705,28 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* } /* Shift the result in the high 256 bits down to the bottom. */ -static void sp_256_mont_shift_8(sp_digit* r, const sp_digit* a) +#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff +static void sp_256_mont_shift_8(sp_digit* rr) +{ + uint64_t *r = (void*)rr; + int i; + + for (i = 0; i < 4; i++) { + r[i] = r[i+4]; + r[i+4] = 0; + } +} +#else +static void sp_256_mont_shift_8(sp_digit* r) { int i; for (i = 0; i < 8; i++) { - r[i] = a[i+8]; + r[i] = r[i+8]; r[i+8] = 0; } } +#endif /* Mul a by scalar b and add into r. (r += a * b) */ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) @@ -800,7 +884,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/ goto inc_next_word0; } } - sp_256_mont_shift_8(a, a); + sp_256_mont_shift_8(a); if (word16th != 0) sp_256_sub_8_p256_mod(a); sp_256_norm_8(a); @@ -820,7 +904,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/ goto inc_next_word; } } - sp_256_mont_shift_8(a, a); + sp_256_mont_shift_8(a); if (word16th != 0) sp_256_sub_8_p256_mod(a); sp_256_norm_8(a); -- cgit v1.2.3-55-g6feb From 446d136109633c12d748d63e2034db238f77ef97 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 Nov 2021 12:03:43 +0100 Subject: tls: tweak debug printout Signed-off-by: Denys Vlasenko --- networking/tls.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/networking/tls.c b/networking/tls.c index 675ef4b3a..415952f16 100644 --- a/networking/tls.c +++ b/networking/tls.c @@ -1883,10 +1883,12 @@ static void process_server_key(tls_state_t *tls, int len) keybuf += 4; switch (t32) { case _0x03001d20: //curve_x25519 + dbg("got x25519 eccPubKey\n"); tls->flags |= GOT_EC_CURVE_X25519; memcpy(tls->hsd->ecc_pub_key32, keybuf, 32); break; case _0x03001741: //curve_secp256r1 (aka P256) + dbg("got P256 eccPubKey\n"); /* P256 point can be transmitted odd- or even-compressed * (first byte is 3 or 2) or uncompressed (4). */ @@ -1899,7 +1901,6 @@ static void process_server_key(tls_state_t *tls, int len) } tls->flags |= GOT_EC_KEY; - dbg("got eccPubKey\n"); } static void send_empty_client_cert(tls_state_t *tls) -- cgit v1.2.3-55-g6feb From 26c85225229b0a439bcc66c8ee786d16f23be9ed Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 Nov 2021 15:00:14 +0100 Subject: tls: P256: do not open-code copying of struct variables function old new delta sp_256_ecc_mulmod_8 536 534 -2 Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index d09f7e881..29dd04293 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -1361,13 +1361,13 @@ static void sp_256_ecc_mulmod_8(sp_point* r, const sp_point* g, const sp_digit* dump_512("t[1].y %s\n", t[1].y); dump_512("t[1].z %s\n", t[1].z); dbg("t[2] = t[%d]\n", y); - memcpy(&t[2], &t[y], sizeof(sp_point)); + t[2] = t[y]; /* struct copy */ dbg("t[2] *= 2\n"); sp_256_proj_point_dbl_8(&t[2], &t[2]); dump_512("t[2].x %s\n", t[2].x); dump_512("t[2].y %s\n", t[2].y); dump_512("t[2].z %s\n", t[2].z); - memcpy(&t[y], &t[2], sizeof(sp_point)); + t[y] = t[2]; /* struct copy */ n <<= 1; c--; -- cgit v1.2.3-55-g6feb From bbda85c74b7a53d8b2bb46f3b44d8f0932a6e95d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 Nov 2021 15:06:57 +0100 Subject: tls: P256: remove constant-time trick in sp_256_proj_point_add_8 function old new delta sp_256_proj_point_add_8 576 544 -32 Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 79 +++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 42 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 29dd04293..3b0473036 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -1269,52 +1269,47 @@ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* && (sp_256_cmp_equal_8(p->y, q->y) || sp_256_cmp_equal_8(p->y, t1)) ) { sp_256_proj_point_dbl_8(r, p); + return; } - else { - sp_point tp; - sp_point *v; - - v = r; - if (p->infinity | q->infinity) { - memset(&tp, 0, sizeof(tp)); - v = &tp; - } - *r = p->infinity ? *q : *p; /* struct copy */ - /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t1, t1, v->x /*, p256_mod, p256_mp_mod*/); - /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(t2, v->z /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t4, t2, v->z /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/); - /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_8(t3, t3, v->y /*, p256_mod, p256_mp_mod*/); - /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); - /* H = U2 - U1 */ - sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/); - /* R = S2 - S1 */ - sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/); - /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/); - /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(v->x, t4 /*, p256_mod, p256_mp_mod*/); - sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); - sp_256_mont_sub_8(v->x, v->x, t5 /*, p256_mod*/); - sp_256_mont_dbl_8(t1, v->y /*, p256_mod*/); - sp_256_mont_sub_8(v->x, v->x, t1 /*, p256_mod*/); - /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ - sp_256_mont_sub_8(v->y, v->y, v->x /*, p256_mod*/); - sp_256_mont_mul_8(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); - sp_256_mont_sub_8(v->y, v->y, t5 /*, p256_mod*/); + if (p->infinity || q->infinity) { + *r = p->infinity ? *q : *p; /* struct copy */ + return; } + + /* U1 = X1*Z2^2 */ + sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/); + /* U2 = X2*Z1^2 */ + sp_256_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/); + /* S1 = Y1*Z2^3 */ + sp_256_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/); + /* S2 = Y2*Z1^3 */ + sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); + /* H = U2 - U1 */ + sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/); + /* R = S2 - S1 */ + sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/); + /* Z3 = H*Z1*Z2 */ + sp_256_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/); + /* X3 = R^2 - H^3 - 2*U1*H^2 */ + sp_256_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sub_8(r->x, r->x, t5 /*, p256_mod*/); + sp_256_mont_dbl_8(t1, r->y /*, p256_mod*/); + sp_256_mont_sub_8(r->x, r->x, t1 /*, p256_mod*/); + /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ + sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/); + sp_256_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sub_8(r->y, r->y, t5 /*, p256_mod*/); } /* Multiply the point by the scalar and return the result. -- cgit v1.2.3-55-g6feb From 4415f7bc06f1ee382bcbaabd86c3d7aca0b46d93 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 Nov 2021 15:47:26 +0100 Subject: tls: P256: explain which functions use double-wide arrays, no code changes function old new delta sp_512to256_mont_reduce_8 - 243 +243 sp_256to512z_mont_mul_8 - 150 +150 sp_256to512z_mont_sqr_8 - 7 +7 sp_256_mont_sqr_8 7 - -7 sp_256_mont_mul_8 150 - -150 sp_256_mont_reduce_8 243 - -243 ------------------------------------------------------------------------------ (add/remove: 3/3 grow/shrink: 0/0 up/down: 400/-400) Total: 0 bytes Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 211 +++++++++++++----------------------------------- 1 file changed, 58 insertions(+), 153 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 3b0473036..74ded2cda 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -455,8 +455,10 @@ static void sp_256_sub_8_p256_mod(sp_digit* r) } #endif -/* Multiply a and b into r. (r = a * b) */ -static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) +/* Multiply a and b into r. (r = a * b) + * r should be [16] array (512 bits). + */ +static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__) sp_digit rr[15]; /* in case r coincides with a or b */ @@ -704,9 +706,11 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* } } -/* Shift the result in the high 256 bits down to the bottom. */ +/* Shift the result in the high 256 bits down to the bottom. + * High half is cleared to zeros. + */ #if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff -static void sp_256_mont_shift_8(sp_digit* rr) +static void sp_512to256_mont_shift_8(sp_digit* rr) { uint64_t *r = (void*)rr; int i; @@ -717,7 +721,7 @@ static void sp_256_mont_shift_8(sp_digit* rr) } } #else -static void sp_256_mont_shift_8(sp_digit* r) +static void sp_512to256_mont_shift_8(sp_digit* r) { int i; @@ -728,7 +732,10 @@ static void sp_256_mont_shift_8(sp_digit* r) } #endif -/* Mul a by scalar b and add into r. (r += a * b) */ +/* Mul a by scalar b and add into r. (r += a * b) + * a = p256_mod + * b = r[0] + */ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) { // const sp_digit* a = p256_mod; @@ -857,11 +864,11 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) /* Reduce the number back to 256 bits using Montgomery reduction. * - * a A single precision number to reduce in place. + * a Double-wide number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/) +static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/) { // const sp_digit* m = p256_mod; sp_digit mp = p256_mp_mod; @@ -884,7 +891,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/ goto inc_next_word0; } } - sp_256_mont_shift_8(a); + sp_512to256_mont_shift_8(a); if (word16th != 0) sp_256_sub_8_p256_mod(a); sp_256_norm_8(a); @@ -892,7 +899,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/ else { /* Same code for explicit mp == 1 (which is always the case for P256) */ sp_digit word16th = 0; for (i = 0; i < 8; i++) { - /*mu = a[i];*/ +// mu = a[i]; if (sp_256_mul_add_8(a+i /*, m, mu*/)) { int j = i + 8; inc_next_word: @@ -904,148 +911,46 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/ goto inc_next_word; } } - sp_256_mont_shift_8(a); + sp_512to256_mont_shift_8(a); if (word16th != 0) sp_256_sub_8_p256_mod(a); sp_256_norm_8(a); } } -#if 0 -//TODO: arm32 asm (also adapt for x86?) -static void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m, sp_digit mp) -{ - sp_digit ca = 0; - - asm volatile ( - # i = 0 - mov r12, #0 - ldr r10, [%[a], #0] - ldr r14, [%[a], #4] -1: - # mu = a[i] * mp - mul r8, %[mp], r10 - # a[i+0] += m[0] * mu - ldr r7, [%[m], #0] - ldr r9, [%[a], #0] - umull r6, r7, r8, r7 - adds r10, r10, r6 - adc r5, r7, #0 - # a[i+1] += m[1] * mu - ldr r7, [%[m], #4] - ldr r9, [%[a], #4] - umull r6, r7, r8, r7 - adds r10, r14, r6 - adc r4, r7, #0 - adds r10, r10, r5 - adc r4, r4, #0 - # a[i+2] += m[2] * mu - ldr r7, [%[m], #8] - ldr r14, [%[a], #8] - umull r6, r7, r8, r7 - adds r14, r14, r6 - adc r5, r7, #0 - adds r14, r14, r4 - adc r5, r5, #0 - # a[i+3] += m[3] * mu - ldr r7, [%[m], #12] - ldr r9, [%[a], #12] - umull r6, r7, r8, r7 - adds r9, r9, r6 - adc r4, r7, #0 - adds r9, r9, r5 - str r9, [%[a], #12] - adc r4, r4, #0 - # a[i+4] += m[4] * mu - ldr r7, [%[m], #16] - ldr r9, [%[a], #16] - umull r6, r7, r8, r7 - adds r9, r9, r6 - adc r5, r7, #0 - adds r9, r9, r4 - str r9, [%[a], #16] - adc r5, r5, #0 - # a[i+5] += m[5] * mu - ldr r7, [%[m], #20] - ldr r9, [%[a], #20] - umull r6, r7, r8, r7 - adds r9, r9, r6 - adc r4, r7, #0 - adds r9, r9, r5 - str r9, [%[a], #20] - adc r4, r4, #0 - # a[i+6] += m[6] * mu - ldr r7, [%[m], #24] - ldr r9, [%[a], #24] - umull r6, r7, r8, r7 - adds r9, r9, r6 - adc r5, r7, #0 - adds r9, r9, r4 - str r9, [%[a], #24] - adc r5, r5, #0 - # a[i+7] += m[7] * mu - ldr r7, [%[m], #28] - ldr r9, [%[a], #28] - umull r6, r7, r8, r7 - adds r5, r5, r6 - adcs r7, r7, %[ca] - mov %[ca], #0 - adc %[ca], %[ca], %[ca] - adds r9, r9, r5 - str r9, [%[a], #28] - ldr r9, [%[a], #32] - adcs r9, r9, r7 - str r9, [%[a], #32] - adc %[ca], %[ca], #0 - # i += 1 - add %[a], %[a], #4 - add r12, r12, #4 - cmp r12, #32 - blt 1b - - str r10, [%[a], #0] - str r14, [%[a], #4] - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" - ); - - memcpy(a, a + 8, 32); - if (ca) - a -= m; -} -#endif /* Multiply two Montogmery form numbers mod the modulus (prime). * (r = a * b mod m) * * r Result of multiplication. + * Should be [16] array (512 bits), but high half is cleared to zeros (used as scratch pad). * a First number to multiply in Montogmery form. * b Second number to multiply in Montogmery form. * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b +static void sp_256to512z_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b /*, const sp_digit* m, sp_digit mp*/) { //const sp_digit* m = p256_mod; //sp_digit mp = p256_mp_mod; - sp_256_mul_8(r, a, b); - sp_256_mont_reduce_8(r /*, m, mp*/); + sp_256to512_mul_8(r, a, b); + sp_512to256_mont_reduce_8(r /*, m, mp*/); } /* Square the Montgomery form number. (r = a * a mod m) * * r Result of squaring. + * Should be [16] array (512 bits), but high half is cleared to zeros (used as scratch pad). * a Number to square in Montogmery form. * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a +static void sp_256to512z_mont_sqr_8(sp_digit* r, const sp_digit* a /*, const sp_digit* m, sp_digit mp*/) { //const sp_digit* m = p256_mod; //sp_digit mp = p256_mp_mod; - sp_256_mont_mul_8(r, a, a /*, m, mp*/); + sp_256to512z_mont_mul_8(r, a, a /*, m, mp*/); } /* Invert the number, in Montgomery form, modulo the modulus (prime) of the @@ -1068,15 +973,15 @@ static const uint32_t p256_mod_2[8] = { #endif static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a) { - sp_digit t[2*8]; //can be just [8]? + sp_digit t[2*8]; int i; memcpy(t, a, sizeof(sp_digit) * 8); for (i = 254; i >= 0; i--) { - sp_256_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/); /*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/ if (i >= 224 || i == 192 || (i <= 95 && i != 1)) - sp_256_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/); } memcpy(r, t, sizeof(sp_digit) * 8); } @@ -1152,22 +1057,22 @@ static void sp_256_map_8(sp_point* r, sp_point* p) sp_256_mont_inv_8(t1, p->z); - sp_256_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/); /* x /= z^2 */ - sp_256_mont_mul_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/); memset(r->x + 8, 0, sizeof(r->x) / 2); - sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); + sp_512to256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); /* Reduce x to less than modulus */ if (sp_256_cmp_8(r->x, p256_mod) >= 0) sp_256_sub_8_p256_mod(r->x); sp_256_norm_8(r->x); /* y /= z^3 */ - sp_256_mont_mul_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/); memset(r->y + 8, 0, sizeof(r->y) / 2); - sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); + sp_512to256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); /* Reduce y to less than modulus */ if (sp_256_cmp_8(r->y, p256_mod) >= 0) sp_256_sub_8_p256_mod(r->y); @@ -1202,9 +1107,9 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) } /* T1 = Z * Z */ - sp_256_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/); /* Z = Y * Z */ - sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/); /* Z = 2Z */ sp_256_mont_dbl_8(r->z, r->z /*, p256_mod*/); /* T2 = X - T1 */ @@ -1212,21 +1117,21 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) /* T1 = X + T1 */ sp_256_mont_add_8(t1, r->x, t1 /*, p256_mod*/); /* T2 = T1 * T2 */ - sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/); /* T1 = 3T2 */ sp_256_mont_tpl_8(t1, t2 /*, p256_mod*/); /* Y = 2Y */ sp_256_mont_dbl_8(r->y, r->y /*, p256_mod*/); /* Y = Y * Y */ - sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/); /* T2 = Y * Y */ - sp_256_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/); /* T2 = T2/2 */ sp_256_div2_8(t2, t2, p256_mod); /* Y = Y * X */ - sp_256_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/); /* X = T1 * T1 */ - sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/); /* X = X - Y */ sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/); /* X = X - Y */ @@ -1234,7 +1139,7 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) /* Y = Y - X */ sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/); /* Y = Y * T1 */ - sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/); /* Y = Y - T2 */ sp_256_mont_sub_8(r->y, r->y, t2 /*, p256_mod*/); dump_512("y2 %s\n", r->y); @@ -1279,36 +1184,36 @@ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* } /* U1 = X1*Z2^2 */ - sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/); /* U2 = X2*Z1^2 */ - sp_256_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/); /* S1 = Y1*Z2^3 */ - sp_256_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/); /* S2 = Y2*Z1^3 */ - sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); /* H = U2 - U1 */ sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/); /* R = S2 - S1 */ sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/); /* Z3 = H*Z1*Z2 */ - sp_256_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/); /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/); - sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); sp_256_mont_sub_8(r->x, r->x, t5 /*, p256_mod*/); sp_256_mont_dbl_8(t1, r->y /*, p256_mod*/); sp_256_mont_sub_8(r->x, r->x, t1 /*, p256_mod*/); /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/); - sp_256_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/); - sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/); + sp_256to512z_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); sp_256_mont_sub_8(r->y, r->y, t5 /*, p256_mod*/); } -- cgit v1.2.3-55-g6feb From 8cbb70365f653397c8c2b9370214d5aed36ec9fa Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 Nov 2021 15:50:40 +0100 Subject: tls: P256: remove redundant zeroing in sp_256_map_8 Previous change made it obvious that we zero out already-zeroed high bits function old new delta sp_256_ecc_mulmod_8 534 494 -40 Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 74ded2cda..baed62f41 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -1062,7 +1062,6 @@ static void sp_256_map_8(sp_point* r, sp_point* p) /* x /= z^2 */ sp_256to512z_mont_mul_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/); - memset(r->x + 8, 0, sizeof(r->x) / 2); sp_512to256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); /* Reduce x to less than modulus */ if (sp_256_cmp_8(r->x, p256_mod) >= 0) @@ -1071,7 +1070,6 @@ static void sp_256_map_8(sp_point* r, sp_point* p) /* y /= z^3 */ sp_256to512z_mont_mul_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/); - memset(r->y + 8, 0, sizeof(r->y) / 2); sp_512to256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); /* Reduce y to less than modulus */ if (sp_256_cmp_8(r->y, p256_mod) >= 0) -- cgit v1.2.3-55-g6feb From dcfd8d3d1013ba989fa511f44bb0553a88c1ef10 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 Nov 2021 16:07:42 +0100 Subject: tls: P256: fix sp_256_div2_8 - it wouldn't use a[] if low bit is 0 It worked by chance because the only caller passed both parameters as two pointers to the same array. My fault (I made this error when converting from 26-bit code). Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index baed62f41..b3f7888f5 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -636,12 +636,14 @@ static void sp_256_rshift1_8(sp_digit* r, sp_digit carry) } #endif -/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) */ -static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) +/* Divide the number by 2 mod the modulus (prime). (r = (r / 2) % m) */ +static void sp_256_div2_8(sp_digit* r /*, const sp_digit* m*/) { + const sp_digit* m = p256_mod; + int carry = 0; - if (a[0] & 1) - carry = sp_256_add_8(r, a, m); + if (r[0] & 1) + carry = sp_256_add_8(r, r, m); sp_256_norm_8(r); sp_256_rshift1_8(r, carry); } @@ -1125,7 +1127,7 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) /* T2 = Y * Y */ sp_256to512z_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/); /* T2 = T2/2 */ - sp_256_div2_8(t2, t2, p256_mod); + sp_256_div2_8(t2 /*, p256_mod*/); /* Y = Y * X */ sp_256to512z_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/); /* X = T1 * T1 */ -- cgit v1.2.3-55-g6feb From 9c671fe3dd2e46a28c02d266130f56a1a6296791 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 Nov 2021 18:42:27 +0100 Subject: tls: P256: do not open-code copying of struct variables Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index b3f7888f5..3291b553c 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -865,6 +865,8 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) } /* Reduce the number back to 256 bits using Montgomery reduction. + * Note: the result is NOT guaranteed to be less than p256_mod! + * (it is only guaranteed to fit into 256 bits). * * a Double-wide number to reduce in place. * m The single precision number representing the modulus. @@ -1276,7 +1278,7 @@ static void sp_256_ecc_mulmod_8(sp_point* r, const sp_point* g, const sp_digit* if (map) sp_256_map_8(r, &t[0]); else - memcpy(r, &t[0], sizeof(sp_point)); + *r = t[0]; /* struct copy */ memset(t, 0, sizeof(t)); //paranoia } -- cgit v1.2.3-55-g6feb From f92ae1dc4bc00e352e683b826609efa5e1e22708 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 Nov 2021 19:15:43 +0100 Subject: tls: P256: change logic so that we don't need double-wide vectors everywhere Change sp_256to512z_mont_{mul,sqr}_8 to not require/zero upper 256 bits. There is only one place where we actually used that (and that's why there used to be zeroing memset of top half!). Fix up that place. As a bonus, 256x256->512 multiply no longer needs to care for "r overlaps a or b" case. This shrinks sp_point structure as well, not just temporaries. function old new delta sp_256to512z_mont_mul_8 150 - -150 sp_256_mont_mul_8 - 147 +147 sp_256to512z_mont_sqr_8 7 - -7 sp_256_mont_sqr_8 - 7 +7 sp_256_ecc_mulmod_8 494 543 +49 sp_512to256_mont_reduce_8 243 249 +6 sp_256_point_from_bin2x32 73 70 -3 sp_256_proj_point_dbl_8 353 345 -8 sp_256_proj_point_add_8 544 499 -45 ------------------------------------------------------------------------------ (add/remove: 2/2 grow/shrink: 2/3 up/down: 209/-213) Total: -4 bytes Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 178 ++++++++++++++++++++---------------------------- 1 file changed, 72 insertions(+), 106 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 3291b553c..3452b08b9 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -49,9 +49,9 @@ typedef int32_t signed_sp_digit; */ typedef struct sp_point { - sp_digit x[2 * 8]; - sp_digit y[2 * 8]; - sp_digit z[2 * 8]; + sp_digit x[8]; + sp_digit y[8]; + sp_digit z[8]; int infinity; } sp_point; @@ -456,12 +456,11 @@ static void sp_256_sub_8_p256_mod(sp_digit* r) #endif /* Multiply a and b into r. (r = a * b) - * r should be [16] array (512 bits). + * r should be [16] array (512 bits), and must not coincide with a or b. */ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__) - sp_digit rr[15]; /* in case r coincides with a or b */ int k; uint32_t accl; uint32_t acch; @@ -493,16 +492,15 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) j--; i++; } while (i != 8 && i <= k); - rr[k] = accl; + r[k] = accl; accl = acch; acch = acc_hi; } r[15] = accl; - memcpy(r, rr, sizeof(rr)); #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) const uint64_t* aa = (const void*)a; const uint64_t* bb = (const void*)b; - uint64_t rr[8]; + const uint64_t* rr = (const void*)r; int k; uint64_t accl; uint64_t acch; @@ -539,11 +537,8 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) acch = acc_hi; } rr[7] = accl; - memcpy(r, rr, sizeof(rr)); #elif 0 //TODO: arm assembly (untested) - sp_digit tmp[16]; - asm volatile ( "\n mov r5, #0" "\n mov r6, #0" @@ -575,12 +570,10 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) "\n cmp r5, #56" "\n ble 1b" "\n str r6, [%[r], r5]" - : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) + : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" ); - memcpy(r, tmp, sizeof(tmp)); #else - sp_digit rr[15]; /* in case r coincides with a or b */ int i, j, k; uint64_t acc; @@ -600,11 +593,10 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) j--; i++; } while (i != 8 && i <= k); - rr[k] = acc; + r[k] = acc; acc = (acc >> 32) | ((uint64_t)acc_hi << 32); } r[15] = acc; - memcpy(r, rr, sizeof(rr)); #endif } @@ -709,30 +701,11 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* } /* Shift the result in the high 256 bits down to the bottom. - * High half is cleared to zeros. */ -#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff -static void sp_512to256_mont_shift_8(sp_digit* rr) +static void sp_512to256_mont_shift_8(sp_digit* r, sp_digit* a) { - uint64_t *r = (void*)rr; - int i; - - for (i = 0; i < 4; i++) { - r[i] = r[i+4]; - r[i+4] = 0; - } + memcpy(r, a + 8, sizeof(*r) * 8); } -#else -static void sp_512to256_mont_shift_8(sp_digit* r) -{ - int i; - - for (i = 0; i < 8; i++) { - r[i] = r[i+8]; - r[i+8] = 0; - } -} -#endif /* Mul a by scalar b and add into r. (r += a * b) * a = p256_mod @@ -868,11 +841,12 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) * Note: the result is NOT guaranteed to be less than p256_mod! * (it is only guaranteed to fit into 256 bits). * - * a Double-wide number to reduce in place. + * r Result. + * a Double-wide number to reduce. Clobbered. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/) +static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit* m, sp_digit mp*/) { // const sp_digit* m = p256_mod; sp_digit mp = p256_mp_mod; @@ -895,10 +869,10 @@ static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit goto inc_next_word0; } } - sp_512to256_mont_shift_8(a); + sp_512to256_mont_shift_8(r, a); if (word16th != 0) - sp_256_sub_8_p256_mod(a); - sp_256_norm_8(a); + sp_256_sub_8_p256_mod(r); + sp_256_norm_8(r); } else { /* Same code for explicit mp == 1 (which is always the case for P256) */ sp_digit word16th = 0; @@ -915,10 +889,10 @@ static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit goto inc_next_word; } } - sp_512to256_mont_shift_8(a); + sp_512to256_mont_shift_8(r, a); if (word16th != 0) - sp_256_sub_8_p256_mod(a); - sp_256_norm_8(a); + sp_256_sub_8_p256_mod(r); + sp_256_norm_8(r); } } @@ -926,35 +900,34 @@ static void sp_512to256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit * (r = a * b mod m) * * r Result of multiplication. - * Should be [16] array (512 bits), but high half is cleared to zeros (used as scratch pad). * a First number to multiply in Montogmery form. * b Second number to multiply in Montogmery form. * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_256to512z_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b +static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b /*, const sp_digit* m, sp_digit mp*/) { //const sp_digit* m = p256_mod; //sp_digit mp = p256_mp_mod; - sp_256to512_mul_8(r, a, b); - sp_512to256_mont_reduce_8(r /*, m, mp*/); + sp_digit t[2 * 8]; + sp_256to512_mul_8(t, a, b); + sp_512to256_mont_reduce_8(r, t /*, m, mp*/); } /* Square the Montgomery form number. (r = a * a mod m) * * r Result of squaring. - * Should be [16] array (512 bits), but high half is cleared to zeros (used as scratch pad). * a Number to square in Montogmery form. * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_256to512z_mont_sqr_8(sp_digit* r, const sp_digit* a +static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a /*, const sp_digit* m, sp_digit mp*/) { //const sp_digit* m = p256_mod; //sp_digit mp = p256_mp_mod; - sp_256to512z_mont_mul_8(r, a, a /*, m, mp*/); + sp_256_mont_mul_8(r, a, a /*, m, mp*/); } /* Invert the number, in Montgomery form, modulo the modulus (prime) of the @@ -964,11 +937,8 @@ static void sp_256to512z_mont_sqr_8(sp_digit* r, const sp_digit* a * a Number to invert. */ #if 0 -/* Mod-2 for the P256 curve. */ -static const uint32_t p256_mod_2[8] = { - 0xfffffffd,0xffffffff,0xffffffff,0x00000000, - 0x00000000,0x00000000,0x00000001,0xffffffff, -}; +//p256_mod - 2: +//ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff - 2 //Bit pattern: //2 2 2 2 2 2 2 1...1 //5 5 4 3 2 1 0 9...0 9...1 @@ -977,15 +947,15 @@ static const uint32_t p256_mod_2[8] = { #endif static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a) { - sp_digit t[2*8]; + sp_digit t[8]; int i; memcpy(t, a, sizeof(sp_digit) * 8); for (i = 254; i >= 0; i--) { - sp_256to512z_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/); /*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/ if (i >= 224 || i == 192 || (i <= 95 && i != 1)) - sp_256to512z_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/); } memcpy(r, t, sizeof(sp_digit) * 8); } @@ -1056,25 +1026,28 @@ static void sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a) */ static void sp_256_map_8(sp_point* r, sp_point* p) { - sp_digit t1[2*8]; - sp_digit t2[2*8]; + sp_digit t1[8]; + sp_digit t2[8]; + sp_digit rr[2 * 8]; sp_256_mont_inv_8(t1, p->z); - sp_256to512z_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/); - sp_256to512z_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sqr_8(t2, t1 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/); /* x /= z^2 */ - sp_256to512z_mont_mul_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/); - sp_512to256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(rr, p->x, t2 /*, p256_mod, p256_mp_mod*/); + memset(rr + 8, 0, sizeof(rr) / 2); + sp_512to256_mont_reduce_8(r->x, rr /*, p256_mod, p256_mp_mod*/); /* Reduce x to less than modulus */ if (sp_256_cmp_8(r->x, p256_mod) >= 0) sp_256_sub_8_p256_mod(r->x); sp_256_norm_8(r->x); /* y /= z^3 */ - sp_256to512z_mont_mul_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/); - sp_512to256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(rr, p->y, t1 /*, p256_mod, p256_mp_mod*/); + memset(rr + 8, 0, sizeof(rr) / 2); + sp_512to256_mont_reduce_8(r->y, rr /*, p256_mod, p256_mp_mod*/); /* Reduce y to less than modulus */ if (sp_256_cmp_8(r->y, p256_mod) >= 0) sp_256_sub_8_p256_mod(r->y); @@ -1091,8 +1064,8 @@ static void sp_256_map_8(sp_point* r, sp_point* p) */ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) { - sp_digit t1[2*8]; - sp_digit t2[2*8]; + sp_digit t1[8]; + sp_digit t2[8]; /* Put point to double into result */ if (r != p) @@ -1101,17 +1074,10 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) if (r->infinity) return; - if (SP_DEBUG) { - /* unused part of t2, may result in spurios - * differences in debug output. Clear it. - */ - memset(t2, 0, sizeof(t2)); - } - /* T1 = Z * Z */ - sp_256to512z_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/); /* Z = Y * Z */ - sp_256to512z_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/); /* Z = 2Z */ sp_256_mont_dbl_8(r->z, r->z /*, p256_mod*/); /* T2 = X - T1 */ @@ -1119,21 +1085,21 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) /* T1 = X + T1 */ sp_256_mont_add_8(t1, r->x, t1 /*, p256_mod*/); /* T2 = T1 * T2 */ - sp_256to512z_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/); /* T1 = 3T2 */ sp_256_mont_tpl_8(t1, t2 /*, p256_mod*/); /* Y = 2Y */ sp_256_mont_dbl_8(r->y, r->y /*, p256_mod*/); /* Y = Y * Y */ - sp_256to512z_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/); /* T2 = Y * Y */ - sp_256to512z_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/); /* T2 = T2/2 */ sp_256_div2_8(t2 /*, p256_mod*/); /* Y = Y * X */ - sp_256to512z_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/); /* X = T1 * T1 */ - sp_256to512z_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/); /* X = X - Y */ sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/); /* X = X - Y */ @@ -1141,7 +1107,7 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) /* Y = Y - X */ sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/); /* Y = Y * T1 */ - sp_256to512z_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/); /* Y = Y - T2 */ sp_256_mont_sub_8(r->y, r->y, t2 /*, p256_mod*/); dump_512("y2 %s\n", r->y); @@ -1155,11 +1121,11 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) */ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q) { - sp_digit t1[2*8]; - sp_digit t2[2*8]; - sp_digit t3[2*8]; - sp_digit t4[2*8]; - sp_digit t5[2*8]; + sp_digit t1[8]; + sp_digit t2[8]; + sp_digit t3[8]; + sp_digit t4[8]; + sp_digit t5[8]; /* Ensure only the first point is the same as the result. */ if (q == r) { @@ -1186,36 +1152,36 @@ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* } /* U1 = X1*Z2^2 */ - sp_256to512z_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/); - sp_256to512z_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/); - sp_256to512z_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/); /* U2 = X2*Z1^2 */ - sp_256to512z_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/); - sp_256to512z_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/); - sp_256to512z_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/); /* S1 = Y1*Z2^3 */ - sp_256to512z_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/); /* S2 = Y2*Z1^3 */ - sp_256to512z_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); /* H = U2 - U1 */ sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/); /* R = S2 - S1 */ sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/); /* Z3 = H*Z1*Z2 */ - sp_256to512z_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/); - sp_256to512z_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/); /* X3 = R^2 - H^3 - 2*U1*H^2 */ - sp_256to512z_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/); - sp_256to512z_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); - sp_256to512z_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/); - sp_256to512z_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); sp_256_mont_sub_8(r->x, r->x, t5 /*, p256_mod*/); sp_256_mont_dbl_8(t1, r->y /*, p256_mod*/); sp_256_mont_sub_8(r->x, r->x, t1 /*, p256_mod*/); /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/); - sp_256to512z_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/); - sp_256to512z_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); sp_256_mont_sub_8(r->y, r->y, t5 /*, p256_mod*/); } -- cgit v1.2.3-55-g6feb From 0b13ab66f43fc1a9437361cfcd33b485422eb0ae Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 27 Nov 2021 19:36:23 +0100 Subject: tls: P256: trivial x86-64 fix Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 3452b08b9..4c8f08d4e 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -500,7 +500,7 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) const uint64_t* aa = (const void*)a; const uint64_t* bb = (const void*)b; - const uint64_t* rr = (const void*)r; + uint64_t* rr = (void*)r; int k; uint64_t accl; uint64_t acch; -- cgit v1.2.3-55-g6feb From 1b93c7c4ecc47318905b6e6f801732b7dd31e0ee Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 28 Nov 2021 02:56:02 +0100 Subject: tls: P256: pad struct sp_point to 64 bits (on 64-bit arches) function old new delta curve_P256_compute_pubkey_and_premaster 198 190 -8 Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 4c8f08d4e..37e1cfa1c 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -49,14 +49,19 @@ typedef int32_t signed_sp_digit; */ typedef struct sp_point { - sp_digit x[8]; + sp_digit x[8] +#if ULONG_MAX > 0xffffffff + /* Make sp_point[] arrays to not be 64-bit misaligned */ + ALIGNED(8) +#endif + ; sp_digit y[8]; sp_digit z[8]; int infinity; } sp_point; /* The modulus (prime) of the curve P256. */ -static const sp_digit p256_mod[8] = { +static const sp_digit p256_mod[8] ALIGNED(8) = { 0xffffffff,0xffffffff,0xffffffff,0x00000000, 0x00000000,0x00000000,0x00000001,0xffffffff, }; @@ -903,7 +908,7 @@ static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit * a First number to multiply in Montogmery form. * b Second number to multiply in Montogmery form. * m Modulus (prime). - * mp Montogmery mulitplier. + * mp Montogmery multiplier. */ static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b /*, const sp_digit* m, sp_digit mp*/) @@ -920,7 +925,7 @@ static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b * r Result of squaring. * a Number to square in Montogmery form. * m Modulus (prime). - * mp Montogmery mulitplier. + * mp Montogmery multiplier. */ static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a /*, const sp_digit* m, sp_digit mp*/) @@ -1145,7 +1150,6 @@ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* return; } - if (p->infinity || q->infinity) { *r = p->infinity ? *q : *p; /* struct copy */ return; -- cgit v1.2.3-55-g6feb From bfefa6ab6cf30507009cca7182c7302900fb5534 Mon Sep 17 00:00:00 2001 From: Bernhard Reutner-Fischer Date: Sun, 28 Nov 2021 10:53:22 +0100 Subject: libarchive: remove duplicate forward declaration Signed-off-by: Bernhard Reutner-Fischer --- include/bb_archive.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/bb_archive.h b/include/bb_archive.h index dc5e55f0a..e0ef8fc4e 100644 --- a/include/bb_archive.h +++ b/include/bb_archive.h @@ -195,7 +195,6 @@ char get_header_ar(archive_handle_t *archive_handle) FAST_FUNC; char get_header_cpio(archive_handle_t *archive_handle) FAST_FUNC; char get_header_tar(archive_handle_t *archive_handle) FAST_FUNC; char get_header_tar_gz(archive_handle_t *archive_handle) FAST_FUNC; -char get_header_tar_xz(archive_handle_t *archive_handle) FAST_FUNC; char get_header_tar_bz2(archive_handle_t *archive_handle) FAST_FUNC; char get_header_tar_lzma(archive_handle_t *archive_handle) FAST_FUNC; char get_header_tar_xz(archive_handle_t *archive_handle) FAST_FUNC; -- cgit v1.2.3-55-g6feb From cfb615781df5c7439fe0060a85e6b6a56d10dc7f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 28 Nov 2021 11:10:00 +0100 Subject: tls: P256: simplify sp_256_mont_inv_8 (no need for a temporary) function old new delta sp_256_ecc_mulmod_8 543 517 -26 Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 37e1cfa1c..9bd5c6832 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -938,7 +938,7 @@ static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a /* Invert the number, in Montgomery form, modulo the modulus (prime) of the * P256 curve. (r = 1 / a mod m) * - * r Inverse result. + * r Inverse result. Must not coincide with a. * a Number to invert. */ #if 0 @@ -952,17 +952,15 @@ static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a #endif static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a) { - sp_digit t[8]; int i; - memcpy(t, a, sizeof(sp_digit) * 8); + memcpy(r, a, sizeof(sp_digit) * 8); for (i = 254; i >= 0; i--) { - sp_256_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/); + sp_256_mont_sqr_8(r, r /*, p256_mod, p256_mp_mod*/); /*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/ if (i >= 224 || i == 192 || (i <= 95 && i != 1)) - sp_256_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_8(r, r, a /*, p256_mod, p256_mp_mod*/); } - memcpy(r, t, sizeof(sp_digit) * 8); } /* Multiply a number by Montogmery normalizer mod modulus (prime). -- cgit v1.2.3-55-g6feb From 00b5051cd25ef7e42ac62637ba16b70d3ac1014a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 28 Nov 2021 12:21:23 +0100 Subject: libbb: code shrink in des encryption, in setup_salt() function old new delta pw_encrypt 978 971 -7 .rodata 108208 108192 -16 des_crypt 1211 1181 -30 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-53) Total: -53 bytes Signed-off-by: Denys Vlasenko --- libbb/pw_encrypt_des.c | 29 ++++++++++++++--------------- testsuite/cryptpw.tests | 14 ++++++++++++++ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/libbb/pw_encrypt_des.c b/libbb/pw_encrypt_des.c index dcd3521e2..fe8237cfe 100644 --- a/libbb/pw_encrypt_des.c +++ b/libbb/pw_encrypt_des.c @@ -363,7 +363,7 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx) old_rawkey0 = old_rawkey1 = 0; old_salt = 0; #endif - saltbits = 0; + //saltbits = 0; /* not needed: we call setup_salt() before do_des() */ bits28 = bits32 + 4; bits24 = bits28 + 4; @@ -481,12 +481,11 @@ des_init(struct des_ctx *ctx, const struct const_des_ctx *cctx) return ctx; } - +/* Accepts 24-bit salt at max */ static void setup_salt(struct des_ctx *ctx, uint32_t salt) { - uint32_t obit, saltbit; - int i; + uint32_t invbits; #if USE_REPETITIVE_SPEEDUP if (salt == old_salt) @@ -494,15 +493,15 @@ setup_salt(struct des_ctx *ctx, uint32_t salt) old_salt = salt; #endif - saltbits = 0; - saltbit = 1; - obit = 0x800000; - for (i = 0; i < 24; i++) { - if (salt & saltbit) - saltbits |= obit; - saltbit <<= 1; - obit >>= 1; - } + invbits = 0; + + salt |= (1 << 24); + do { + invbits = (invbits << 1) + (salt & 1); + salt >>= 1; + } while (salt != 1); + + saltbits = invbits; } static void @@ -736,14 +735,14 @@ des_crypt(struct des_ctx *ctx, char output[DES_OUT_BUFSIZE], des_setkey(ctx, (char *)keybuf); /* - * salt_str - 2 bytes of salt + * salt_str - 2 chars of salt (converted to 12 bits) * key - up to 8 characters */ output[0] = salt_str[0]; output[1] = salt_str[1]; salt = (ascii_to_bin(salt_str[1]) << 6) | ascii_to_bin(salt_str[0]); - setup_salt(ctx, salt); + setup_salt(ctx, salt); /* set ctx->saltbits for do_des() */ /* Do it. */ do_des(ctx, /*0, 0,*/ &r0, &r1, 25 /* count */); diff --git a/testsuite/cryptpw.tests b/testsuite/cryptpw.tests index 8ec476c9f..0dd91fe15 100755 --- a/testsuite/cryptpw.tests +++ b/testsuite/cryptpw.tests @@ -7,6 +7,20 @@ # testing "description" "command" "result" "infile" "stdin" +#optional USE_BB_CRYPT +testing "cryptpw des 12" \ + "cryptpw -m des QWErty '123456789012345678901234567890'" \ + '12MnB3PqfVbMA\n' "" "" + +testing "cryptpw des 55" \ + "cryptpw -m des QWErty 55" \ + '55tgFLtkT1Y72\n' "" "" + +testing "cryptpw des zz" \ + "cryptpw -m des QWErty zz" \ + 'zzIZaaXWOkxVk\n' "" "" +#SKIP= + optional USE_BB_CRYPT_SHA testing "cryptpw sha256" \ "cryptpw -m sha256 QWErty '123456789012345678901234567890'" \ -- cgit v1.2.3-55-g6feb From 832626227ea3798403159080532f763a37273a91 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 28 Nov 2021 12:55:20 +0100 Subject: tls: P256: add comment on logic in sp_512to256_mont_reduce_8, no code changes Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 9bd5c6832..eb6cc2431 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -850,6 +850,20 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) * a Double-wide number to reduce. Clobbered. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. + * + * Montgomery reduction on multiprecision integers: + * Montgomery reduction requires products modulo R. + * When R is a power of B [in our case R=2^128, B=2^32], there is a variant + * of Montgomery reduction which requires products only of machine word sized + * integers. T is stored as an little-endian word array a[0..n]. The algorithm + * reduces it one word at a time. First an appropriate multiple of modulus + * is added to make T divisible by B. [In our case, it is p256_mp_mod * a[0].] + * Then a multiple of modulus is added to make T divisible by B^2. + * [In our case, it is (p256_mp_mod * a[1]) << 32.] + * And so on. Eventually T is divisible by R, and after division by R + * the algorithm is in the same place as the usual Montgomery reduction was. + * + * TODO: Can conditionally use 64-bit (if bit-little-endian arch) logic? */ static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit* m, sp_digit mp*/) { @@ -941,15 +955,6 @@ static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a * r Inverse result. Must not coincide with a. * a Number to invert. */ -#if 0 -//p256_mod - 2: -//ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff - 2 -//Bit pattern: -//2 2 2 2 2 2 2 1...1 -//5 5 4 3 2 1 0 9...0 9...1 -//543210987654321098765432109876543210987654321098765432109876543210...09876543210...09876543210 -//111111111111111111111111111111110000000000000000000000000000000100...00000111111...11111111101 -#endif static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a) { int i; @@ -957,7 +962,15 @@ static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a) memcpy(r, a, sizeof(sp_digit) * 8); for (i = 254; i >= 0; i--) { sp_256_mont_sqr_8(r, r /*, p256_mod, p256_mp_mod*/); - /*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/ +/* p256_mod - 2: + * ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff - 2 + * Bit pattern: + * 2 2 2 2 2 2 2 1...1 + * 5 5 4 3 2 1 0 9...0 9...1 + * 543210987654321098765432109876543210987654321098765432109876543210...09876543210...09876543210 + * 111111111111111111111111111111110000000000000000000000000000000100...00000111111...11111111101 + */ + /*if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))*/ if (i >= 224 || i == 192 || (i <= 95 && i != 1)) sp_256_mont_mul_8(r, r, a /*, p256_mod, p256_mp_mod*/); } -- cgit v1.2.3-55-g6feb From 90b0d3304455ad432c49f38e0419ac7820a625f7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 28 Nov 2021 15:38:51 +0100 Subject: tls: P256: add 64-bit montgomery reduce (disabled), small optimization in 32-bit code function old new delta sp_512to256_mont_reduce_8 191 185 -6 Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 177 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 159 insertions(+), 18 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index eb6cc2431..b1c410037 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -705,36 +705,174 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* } } -/* Shift the result in the high 256 bits down to the bottom. - */ +/* Shift the result in the high 256 bits down to the bottom. */ static void sp_512to256_mont_shift_8(sp_digit* r, sp_digit* a) { memcpy(r, a + 8, sizeof(*r) * 8); } +// Disabled for now. Seems to work, but ugly and 40 bytes larger on x86-64. +#if 0 //UNALIGNED_LE_64BIT +/* 64-bit little-endian optimized version. + * See generic 32-bit version below for explanation. + * The benefit of this version is: even though r[3] calculation is atrocious, + * we call sp_256_mul_add_4() four times, not 8. + */ +static int sp_256_mul_add_4(uint64_t *r /*, const uint64_t* a, uint64_t b*/) +{ + uint64_t b = r[0]; + +# if 0 + const uint64_t* a = (const void*)p256_mod; +//a[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff + uint128_t t; + int i; + t = 0; + for (i = 0; i < 4; i++) { + uint32_t t_hi; + uint128_t m = ((uint128_t)b * a[i]) + r[i]; + t += m; + t_hi = (t < m); + r[i] = (uint64_t)t; + t = (t >> 64) | ((uint128_t)t_hi << 64); + } + r[4] += (uint64_t)t; + return (r[4] < (uint64_t)t); /* 1 if addition overflowed */ +# else + // Unroll, then optimize the above loop: + //uint32_t t_hi; + //uint128_t m; + uint64_t t64, t64u; + + //m = ((uint128_t)b * a[0]) + r[0]; + // Since b is r[0] and a[0] is ffffffffffffffff, the above optimizes to: + // m = r[0] * ffffffffffffffff + r[0] = (r[0] << 64 - r[0]) + r[0] = r[0] << 64; + //t += m; + // t = r[0] << 64 = b << 64; + //t_hi = (t < m); + // t_hi = 0; + //r[0] = (uint64_t)t; +// r[0] = 0; +//the store can be eliminated since caller won't look at lower 256 bits of the result + //t = (t >> 64) | ((uint128_t)t_hi << 64); + // t = b; + + //m = ((uint128_t)b * a[1]) + r[1]; + // Since a[1] is 00000000ffffffff, the above optimizes to: + // m = b * ffffffff + r[1] = (b * 100000000 - b) + r[1] = (b << 32) - b + r[1]; + //t += m; + // t = b + (b << 32) - b + r[1] = (b << 32) + r[1]; + //t_hi = (t < m); + // t_hi = 0; + //r[1] = (uint64_t)t; + r[1] += (b << 32); + //t = (t >> 64) | ((uint128_t)t_hi << 64); + t64 = (r[1] < (b << 32)); + t64 += (b >> 32); + + //m = ((uint128_t)b * a[2]) + r[2]; + // Since a[2] is 0000000000000000, the above optimizes to: + // m = b * 0 + r[2] = r[2]; + //t += m; + // t = t64 + r[2]; + //t_hi = (t < m); + // t_hi = 0; + //r[2] = (uint64_t)t; + r[2] += t64; + //t = (t >> 64) | ((uint128_t)t_hi << 64); + t64 = (r[2] < t64); + + //m = ((uint128_t)b * a[3]) + r[3]; + // Since a[3] is ffffffff00000001, the above optimizes to: + // m = b * ffffffff00000001 + r[3]; + // m = b + b*ffffffff00000000 + r[3] + // m = b + (b*ffffffff << 32) + r[3] + // m = b + (((b<<32) - b) << 32) + r[3] + //t += m; + // t = t64 + (uint128_t)b + ((((uint128_t)b << 32) - b) << 32) + r[3]; + t64 += b; + t64u = (t64 < b); + t64 += r[3]; + t64u += (t64 < r[3]); + { + uint64_t lo,hi; + //lo = (((b << 32) - b) << 32 + //hi = (((uint128_t)b << 32) - b) >> 32 + //but without uint128_t: + hi = (b << 32) - b; /* form lower 32 bits of "hi" part 1 */ + b = (b >> 32) - (/*borrowed above?*/(b << 32) < b); /* upper 32 bits of "hi" are in b */ + lo = hi << 32; /* (use "hi" value to calculate "lo",... */ + t64 += lo; /* ...consume... */ + t64u += (t64 < lo); /* ..."lo") */ + hi >>= 32; /* form lower 32 bits of "hi" part 2 */ + hi |= (b << 32); /* combine lower and upper */ + t64u += hi; /* consume "hi" */ + } + //t_hi = (t < m); + // t_hi = 0; + //r[3] = (uint64_t)t; + r[3] = t64; + //t = (t >> 64) | ((uint128_t)t_hi << 64); + // t = t64u; + + r[4] += t64u; + return (r[4] < t64u); /* 1 if addition overflowed */ +# endif +} + +static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* aa/*, const sp_digit* m, sp_digit mp*/) +{ +// const sp_digit* m = p256_mod; + int i; + uint64_t *a = (void*)aa; + + sp_digit carry = 0; + for (i = 0; i < 4; i++) { +// mu = a[i]; + if (sp_256_mul_add_4(a+i /*, m, mu*/)) { + int j = i + 4; + inc_next_word: + if (++j > 7) { /* a[8] array has no more words? */ + carry++; + continue; + } + if (++a[j] == 0) /* did this overflow too? */ + goto inc_next_word; + } + } + sp_512to256_mont_shift_8(r, aa); + if (carry != 0) + sp_256_sub_8_p256_mod(r); + sp_256_norm_8(r); +} + +#else /* Generic 32-bit version */ + /* Mul a by scalar b and add into r. (r += a * b) * a = p256_mod * b = r[0] */ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) { -// const sp_digit* a = p256_mod; -//a[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff sp_digit b = r[0]; - uint64_t t; -// t = 0; -// for (i = 0; i < 8; i++) { -// uint32_t t_hi; -// uint64_t m = ((uint64_t)b * a[i]) + r[i]; -// t += m; -// t_hi = (t < m); -// r[i] = (sp_digit)t; -// t = (t >> 32) | ((uint64_t)t_hi << 32); -// } -// r[8] += (sp_digit)t; - +# if 0 + const sp_digit* a = p256_mod; +//a[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff + int i; + t = 0; + for (i = 0; i < 8; i++) { + uint32_t t_hi; + uint64_t m = ((uint64_t)b * a[i]) + r[i]; + t += m; + t_hi = (t < m); + r[i] = (sp_digit)t; + t = (t >> 32) | ((uint64_t)t_hi << 32); + } + r[8] += (sp_digit)t; + return (r[8] < (sp_digit)t); /* 1 if addition overflowed */ +# else // Unroll, then optimize the above loop: //uint32_t t_hi; uint64_t m; @@ -748,7 +886,8 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) //t_hi = (t < m); // t_hi = 0; //r[0] = (sp_digit)t; - r[0] = 0; +// r[0] = 0; +//the store can be eliminated since caller won't look at lower 256 bits of the result //t = (t >> 32) | ((uint64_t)t_hi << 32); // t = b; @@ -840,6 +979,7 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) r[8] += (sp_digit)t; return (r[8] < (sp_digit)t); /* 1 if addition overflowed */ +# endif } /* Reduce the number back to 256 bits using Montgomery reduction. @@ -861,7 +1001,7 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) * Then a multiple of modulus is added to make T divisible by B^2. * [In our case, it is (p256_mp_mod * a[1]) << 32.] * And so on. Eventually T is divisible by R, and after division by R - * the algorithm is in the same place as the usual Montgomery reduction was. + * the algorithm is in the same place as the usual Montgomery reduction. * * TODO: Can conditionally use 64-bit (if bit-little-endian arch) logic? */ @@ -914,6 +1054,7 @@ static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit sp_256_norm_8(r); } } +#endif /* Multiply two Montogmery form numbers mod the modulus (prime). * (r = a * b mod m) -- cgit v1.2.3-55-g6feb From 8514b4166d7a9d7720006d852ae67f43baed8ef1 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 28 Nov 2021 21:40:23 +0100 Subject: tls: P256: enable 64-bit version of montgomery reduction After more testing, (1) I'm more sure it is indeed correct, and (2) it is a significant speedup - we do a lot of those multiplications. function old new delta sp_512to256_mont_reduce_8 191 223 +32 Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index b1c410037..cb166e413 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -711,12 +711,13 @@ static void sp_512to256_mont_shift_8(sp_digit* r, sp_digit* a) memcpy(r, a + 8, sizeof(*r) * 8); } -// Disabled for now. Seems to work, but ugly and 40 bytes larger on x86-64. -#if 0 //UNALIGNED_LE_64BIT +#if UNALIGNED_LE_64BIT /* 64-bit little-endian optimized version. * See generic 32-bit version below for explanation. * The benefit of this version is: even though r[3] calculation is atrocious, * we call sp_256_mul_add_4() four times, not 8. + * Measured run time improvement of curve_P256_compute_pubkey_and_premaster() + * call on x86-64: from ~1500us to ~900us. Code size +32 bytes. */ static int sp_256_mul_add_4(uint64_t *r /*, const uint64_t* a, uint64_t b*/) { @@ -794,18 +795,18 @@ static int sp_256_mul_add_4(uint64_t *r /*, const uint64_t* a, uint64_t b*/) t64u = (t64 < b); t64 += r[3]; t64u += (t64 < r[3]); - { - uint64_t lo,hi; + { // add ((((uint128_t)b << 32) - b) << 32): + uint64_t lo, hi; //lo = (((b << 32) - b) << 32 //hi = (((uint128_t)b << 32) - b) >> 32 //but without uint128_t: - hi = (b << 32) - b; /* form lower 32 bits of "hi" part 1 */ + hi = (b << 32) - b; /* make lower 32 bits of "hi", part 1 */ b = (b >> 32) - (/*borrowed above?*/(b << 32) < b); /* upper 32 bits of "hi" are in b */ lo = hi << 32; /* (use "hi" value to calculate "lo",... */ t64 += lo; /* ...consume... */ t64u += (t64 < lo); /* ..."lo") */ - hi >>= 32; /* form lower 32 bits of "hi" part 2 */ - hi |= (b << 32); /* combine lower and upper */ + hi >>= 32; /* make lower 32 bits of "hi", part 2 */ + hi |= (b << 32); /* combine lower and upper 32 bits */ t64u += hi; /* consume "hi" */ } //t_hi = (t < m); -- cgit v1.2.3-55-g6feb From b240733ae7423cb8f542a624eef0cfa3037d05bc Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 1 Dec 2021 15:09:44 +0100 Subject: tls: x25519: code shrink by factoring out common code function old new delta fe_reduce - 37 +37 lm_add 67 43 -24 fe_mul_c 62 38 -24 fe_mul__distinct 138 112 -26 curve25519 800 767 -33 lm_sub 98 64 -34 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/5 up/down: 37/-141) Total: -104 bytes Signed-off-by: Denys Vlasenko --- networking/tls_fe.c | 68 +++++++++++++++++------------------------------------ 1 file changed, 21 insertions(+), 47 deletions(-) diff --git a/networking/tls_fe.c b/networking/tls_fe.c index 3a0a6776f..e5580fbcf 100644 --- a/networking/tls_fe.c +++ b/networking/tls_fe.c @@ -187,7 +187,7 @@ static void fprime_mul(byte *r, const byte *a, const byte *b, #if 0 //UNUSED static void fe_load(byte *x, word32 c) { - word32 i; + int i; for (i = 0; i < sizeof(c); i++) { x[i] = c; @@ -199,21 +199,29 @@ static void fe_load(byte *x, word32 c) } #endif -static void fe_normalize(byte *x) +static void fe_reduce(byte *x, word32 c) { - byte minusp[F25519_SIZE]; - unsigned c; int i; /* Reduce using 2^255 = 19 mod p */ - c = (x[31] >> 7) * 19; - x[31] &= 127; + x[31] = c & 127; + c = (c >> 7) * 19; for (i = 0; i < F25519_SIZE; i++) { c += x[i]; x[i] = (byte)c; c >>= 8; } +} + +static void fe_normalize(byte *x) +{ + byte minusp[F25519_SIZE]; + unsigned c; + int i; + + /* Reduce using 2^255 = 19 mod p */ + fe_reduce(x, x[31]); /* The number is now less than 2^255 + 18, and therefore less than * 2p. Try subtracting p, and conditionally load the subtracted @@ -247,14 +255,7 @@ static void lm_add(byte* r, const byte* a, const byte* b) } /* Reduce with 2^255 = 19 mod p */ - r[31] &= 127; - c = (c >> 7) * 19; - - for (i = 0; i < F25519_SIZE; i++) { - c += r[i]; - r[i] = (byte)c; - c >>= 8; - } + fe_reduce(r, c); } static void lm_sub(byte* r, const byte* a, const byte* b) @@ -264,21 +265,15 @@ static void lm_sub(byte* r, const byte* a, const byte* b) /* Calculate a + 2p - b, to avoid underflow */ c = 218; - for (i = 0; i + 1 < F25519_SIZE; i++) { + for (i = 0; i < F25519_SIZE - 1; i++) { c += 65280 + ((word32)a[i]) - ((word32)b[i]); r[i] = c; c >>= 8; } c += ((word32)a[31]) - ((word32)b[31]); - r[31] = c & 127; - c = (c >> 7) * 19; - for (i = 0; i < F25519_SIZE; i++) { - c += r[i]; - r[i] = c; - c >>= 8; - } + fe_reduce(r, c); } #if 0 //UNUSED @@ -289,21 +284,15 @@ static void lm_neg(byte* r, const byte* a) /* Calculate 2p - a, to avoid underflow */ c = 218; - for (i = 0; i + 1 < F25519_SIZE; i++) { + for (i = 0; i < F25519_SIZE - 1; i++) { c += 65280 - ((word32)a[i]); r[i] = c; c >>= 8; } c -= ((word32)a[31]); - r[31] = c & 127; - c = (c >> 7) * 19; - for (i = 0; i < F25519_SIZE; i++) { - c += r[i]; - r[i] = c; - c >>= 8; - } + fe_reduce(r, c); } #endif @@ -326,14 +315,7 @@ static void fe_mul__distinct(byte *r, const byte *a, const byte *b) r[i] = c; } - r[31] &= 127; - c = (c >> 7) * 19; - - for (i = 0; i < F25519_SIZE; i++) { - c += r[i]; - r[i] = c; - c >>= 8; - } + fe_reduce(r, c); } #if 0 //UNUSED @@ -357,15 +339,7 @@ static void fe_mul_c(byte *r, const byte *a, word32 b) r[i] = c; } - r[31] &= 127; - c >>= 7; - c *= 19; - - for (i = 0; i < F25519_SIZE; i++) { - c += r[i]; - r[i] = c; - c >>= 8; - } + fe_reduce(r, c); } static void fe_inv__distinct(byte *r, const byte *x) -- cgit v1.2.3-55-g6feb From 27df6aeef2d0d4b726a8b3b1ce1b1cafbbce3431 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sat, 11 Dec 2021 23:27:40 +0100 Subject: tls: P256: factor out "multiply then reduce" operation function old new delta sp_256_mont_mul_and_reduce_8 - 44 +44 sp_256_ecc_mulmod_8 517 442 -75 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/1 up/down: 44/-75) Total: -31 bytes Signed-off-by: Denys Vlasenko --- networking/tls_sp_c32.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index cb166e413..292dda24e 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c @@ -1091,6 +1091,17 @@ static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a sp_256_mont_mul_8(r, a, a /*, m, mp*/); } +static NOINLINE void sp_256_mont_mul_and_reduce_8(sp_digit* r, + const sp_digit* a, const sp_digit* b + /*, const sp_digit* m, sp_digit mp*/) +{ + sp_digit rr[2 * 8]; + + sp_256_mont_mul_8(rr, a, b /*, p256_mod, p256_mp_mod*/); + memset(rr + 8, 0, sizeof(rr) / 2); + sp_512to256_mont_reduce_8(r, rr /*, p256_mod, p256_mp_mod*/); +} + /* Invert the number, in Montgomery form, modulo the modulus (prime) of the * P256 curve. (r = 1 / a mod m) * @@ -1186,7 +1197,6 @@ static void sp_256_map_8(sp_point* r, sp_point* p) { sp_digit t1[8]; sp_digit t2[8]; - sp_digit rr[2 * 8]; sp_256_mont_inv_8(t1, p->z); @@ -1194,18 +1204,14 @@ static void sp_256_map_8(sp_point* r, sp_point* p) sp_256_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/); /* x /= z^2 */ - sp_256_mont_mul_8(rr, p->x, t2 /*, p256_mod, p256_mp_mod*/); - memset(rr + 8, 0, sizeof(rr) / 2); - sp_512to256_mont_reduce_8(r->x, rr /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_and_reduce_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/); /* Reduce x to less than modulus */ if (sp_256_cmp_8(r->x, p256_mod) >= 0) sp_256_sub_8_p256_mod(r->x); sp_256_norm_8(r->x); /* y /= z^3 */ - sp_256_mont_mul_8(rr, p->y, t1 /*, p256_mod, p256_mp_mod*/); - memset(rr + 8, 0, sizeof(rr) / 2); - sp_512to256_mont_reduce_8(r->y, rr /*, p256_mod, p256_mp_mod*/); + sp_256_mont_mul_and_reduce_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/); /* Reduce y to less than modulus */ if (sp_256_cmp_8(r->y, p256_mod) >= 0) sp_256_sub_8_p256_mod(r->y); -- cgit v1.2.3-55-g6feb From c7b90dc4d10ccc4f95940f42676ff907cee73272 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 12 Dec 2021 00:34:15 +0100 Subject: uudecode: special-case "/dev/stdout", closes 14241 function old new delta uudecode_main 295 322 +27 Signed-off-by: Denys Vlasenko --- coreutils/uudecode.c | 11 ++++++++++- docs/posix_conformance.txt | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/coreutils/uudecode.c b/coreutils/uudecode.c index a607977e9..e90902f52 100644 --- a/coreutils/uudecode.c +++ b/coreutils/uudecode.c @@ -155,7 +155,16 @@ int uudecode_main(int argc UNUSED_PARAM, char **argv) break; } dst_stream = stdout; - if (NOT_LONE_DASH(outname)) { + if (NOT_LONE_DASH(outname) +/* https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uudecode.html + * https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uuencode.html + * The above says that output file name specified in input file + * or overridden by -o OUTFILE can be special "/dev/stdout" string. + * This usually works "implicitly": many systems have /dev/stdout. + * If ENABLE_DESKTOP, support that explicitly: + */ + && (!ENABLE_DESKTOP || strcmp(outname, "/dev/stdout") != 0) + ) { dst_stream = xfopen_for_write(outname); fchmod(fileno(dst_stream), mode & (S_IRWXU | S_IRWXG | S_IRWXO)); } diff --git a/docs/posix_conformance.txt b/docs/posix_conformance.txt index f6e8858cc..5e107d74d 100644 --- a/docs/posix_conformance.txt +++ b/docs/posix_conformance.txt @@ -690,7 +690,7 @@ uniq Busybox specific options: uudecode POSIX options option | exists | compliant | remarks - -o outfile | no | no | + -o outfile | yes | no | uudecode Busybox specific options: None uuencode POSIX options -- cgit v1.2.3-55-g6feb From b9fba185c570b52fccffa2b9ae39ba32a0860daf Mon Sep 17 00:00:00 2001 From: Ildar Shaimordanov Date: Sun, 12 Dec 2021 03:19:13 +0100 Subject: wget: allow end-users to customize Content-Type for --post-data and --post-file More explanation in this PR: https://github.com/rmyorston/busybox-w32/pull/233 The real use-case: wget https://api.github.com/markdown/raw --header "Content-Type: text/plain" function old new delta wget_main 2560 2581 +21 wget_user_headers 62 76 +14 .rodata 104196 104197 +1 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/0 up/down: 36/0) Total: 36 bytes Signed-off-by: Ildar Shaimordanov Signed-off-by: Denys Vlasenko --- networking/wget.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/networking/wget.c b/networking/wget.c index 91ef99eab..9ec0e67b9 100644 --- a/networking/wget.c +++ b/networking/wget.c @@ -211,29 +211,33 @@ enum { HDR_HOST = (1<<0), HDR_USER_AGENT = (1<<1), HDR_RANGE = (1<<2), - HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION, - HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION, + HDR_CONTENT_TYPE = (1<<3), + HDR_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION, + HDR_PROXY_AUTH = (1<<5) * ENABLE_FEATURE_WGET_AUTHENTICATION, }; static const char wget_user_headers[] ALIGN1 = "Host:\0" "User-Agent:\0" "Range:\0" + "Content-Type:\0" # if ENABLE_FEATURE_WGET_AUTHENTICATION "Authorization:\0" "Proxy-Authorization:\0" # endif ; -# define USR_HEADER_HOST (G.user_headers & HDR_HOST) -# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT) -# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE) -# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH) -# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH) +# define USR_HEADER_HOST (G.user_headers & HDR_HOST) +# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT) +# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE) +# define USR_HEADER_CONTENT_TYPE (G.user_headers & HDR_CONTENT_TYPE) +# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH) +# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH) #else /* No long options, no user-headers :( */ -# define USR_HEADER_HOST 0 -# define USR_HEADER_USER_AGENT 0 -# define USR_HEADER_RANGE 0 -# define USR_HEADER_AUTH 0 -# define USR_HEADER_PROXY_AUTH 0 +# define USR_HEADER_HOST 0 +# define USR_HEADER_USER_AGENT 0 +# define USR_HEADER_RANGE 0 +# define USR_HEADER_CONTENT_TYPE 0 +# define USR_HEADER_AUTH 0 +# define USR_HEADER_PROXY_AUTH 0 #endif /* Globals */ @@ -1261,8 +1265,13 @@ static void download_one_url(const char *url) } if (G.post_data) { + /* If user did not override it... */ + if (!USR_HEADER_CONTENT_TYPE) { + SENDFMT(sfp, + "Content-Type: application/x-www-form-urlencoded\r\n" + ); + } SENDFMT(sfp, - "Content-Type: application/x-www-form-urlencoded\r\n" "Content-Length: %u\r\n" "\r\n" "%s", -- cgit v1.2.3-55-g6feb From 9b678807198611308cfd8b10427f9e08c62f7bec Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 8 Nov 2021 17:36:43 +0100 Subject: Makefile.flags: use all cflags for crypt and rt checks To check if libcrypt and librt are available, we check if we can compile and link a simple test program. These checks do not match the actual linking if CONFIG_STATIC is enabled. For CONFIG_STATIC, CFLAGS_busybox is set to -static. The checks don't use CFLAGS_busybox and detect a shared libcrypt or librt. If we link busybox later and we have no static libcrypt or librt, linking will fail. Update the libcrypt and librt checks to use CFLAGS_busybox. Signed-off-by: Martin Kaiser Signed-off-by: Denys Vlasenko --- Makefile.flags | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.flags b/Makefile.flags index 667481983..c34356230 100644 --- a/Makefile.flags +++ b/Makefile.flags @@ -151,8 +151,8 @@ LDLIBS += m # gcc-4.2.1 fails if we try to feed C source on stdin: # echo 'int main(void){return 0;}' | $(CC) $(CFLAGS) -lcrypt -o /dev/null -xc - # fall back to using a temp file: -CRYPT_AVAILABLE := $(shell echo 'int main(void){return 0;}' >bb_libtest.c; $(CC) $(CFLAGS) -lcrypt -o /dev/null bb_libtest.c >/dev/null 2>&1 && echo "y"; rm bb_libtest.c) -RT_AVAILABLE := $(shell echo 'int main(void){return 0;}' >bb_libtest.c; $(CC) $(CFLAGS) -lrt -o /dev/null bb_libtest.c >/dev/null 2>&1 && echo "y"; rm bb_libtest.c) +CRYPT_AVAILABLE := $(shell echo 'int main(void){return 0;}' >bb_libtest.c; $(CC) $(CFLAGS) $(CFLAGS_busybox) -lcrypt -o /dev/null bb_libtest.c >/dev/null 2>&1 && echo "y"; rm bb_libtest.c) +RT_AVAILABLE := $(shell echo 'int main(void){return 0;}' >bb_libtest.c; $(CC) $(CFLAGS) $(CFLAGS_busybox) -lrt -o /dev/null bb_libtest.c >/dev/null 2>&1 && echo "y"; rm bb_libtest.c) ifeq ($(CRYPT_AVAILABLE),y) LDLIBS += crypt endif -- cgit v1.2.3-55-g6feb From e67b80f4739c4075b51b0a575701b73928fe0bf1 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 12 Dec 2021 17:13:54 +0100 Subject: udhcpc6: fix udhcp_find_option to actually find DHCP6 options udhcp_insert_new_option treats code for IPv6 as follows: new->data[D6_OPT_CODE] = code >> 8; new->data[D6_OPT_CODE + 1] = code & 0xff; udhcp_find_option tests the code as follows: while (opt_list && opt_list->data[OPT_CODE] < code) ... if (opt_list && opt_list->data[OPT_CODE] == code) So yes, OPT_CODE and D6_OPT_CODE are both 0, but the D6_OPT_CLIENTID = 1 value means that the 1 is in the seconds byte, and udhcp_find_option is only looking at the first byte, So the send_d6_release can never find it the created option. function old new delta udhcp_find_option 28 53 +25 attach_option 276 284 +8 udhcpc6_main 2602 2607 +5 perform_d6_release 262 267 +5 udhcpd_main 1518 1520 +2 udhcpc_main 2542 2544 +2 add_serverid_and_clientid_options 46 48 +2 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 7/0 up/down: 49/0) Total: 49 bytes Signed-off-by: Denys Vlasenko --- networking/udhcp/common.c | 29 ++++++++++++++++++++++------- networking/udhcp/common.h | 6 +++++- networking/udhcp/d6_dhcpc.c | 5 +++-- networking/udhcp/dhcpc.c | 6 +++--- networking/udhcp/dhcpd.c | 2 +- 5 files changed, 34 insertions(+), 14 deletions(-) diff --git a/networking/udhcp/common.c b/networking/udhcp/common.c index 31e525cb0..8e9b93655 100644 --- a/networking/udhcp/common.c +++ b/networking/udhcp/common.c @@ -404,14 +404,29 @@ void FAST_FUNC udhcp_add_simple_option(struct dhcp_packet *packet, uint8_t code, #endif /* Find option 'code' in opt_list */ -struct option_set* FAST_FUNC udhcp_find_option(struct option_set *opt_list, uint8_t code) +struct option_set* FAST_FUNC udhcp_find_option(struct option_set *opt_list, uint8_t code, bool dhcpv6) { - while (opt_list && opt_list->data[OPT_CODE] < code) - opt_list = opt_list->next; + IF_NOT_UDHCPC6(bool dhcpv6 = 0;) + uint8_t cur_code; - if (opt_list && opt_list->data[OPT_CODE] == code) - return opt_list; - return NULL; + for (;;) { + if (!opt_list) + return opt_list; /* NULL */ + if (!dhcpv6) { + cur_code = opt_list->data[OPT_CODE]; + } else { +//FIXME: add support for code > 0xff + if (opt_list->data[D6_OPT_CODE] != 0) + return NULL; + cur_code = opt_list->data[D6_OPT_CODE + 1]; + } + if (cur_code >= code) { + if (cur_code == code) + return opt_list; + return NULL; + } + opt_list = opt_list->next; + } } /* Parse string to IP in network order */ @@ -499,7 +514,7 @@ static NOINLINE void attach_option( } #endif - existing = udhcp_find_option(*opt_list, optflag->code); + existing = udhcp_find_option(*opt_list, optflag->code, dhcpv6); if (!existing) { /* make a new option */ uint8_t *p = udhcp_insert_new_option(opt_list, optflag->code, length, dhcpv6); diff --git a/networking/udhcp/common.h b/networking/udhcp/common.h index e374771cb..5882238e3 100644 --- a/networking/udhcp/common.h +++ b/networking/udhcp/common.h @@ -245,7 +245,11 @@ void udhcp_add_simple_option(struct dhcp_packet *packet, uint8_t code, uint32_t char *dname_dec(const uint8_t *cstr, int clen, const char *pre) FAST_FUNC; uint8_t *dname_enc(/*const uint8_t *cstr, int clen,*/ const char *src, int *retlen) FAST_FUNC; #endif -struct option_set *udhcp_find_option(struct option_set *opt_list, uint8_t code) FAST_FUNC; +#if !ENABLE_UDHCPC6 +#define udhcp_find_option(opt_list, code, dhcpv6) \ + udhcp_find_option(opt_list, code) +#endif +struct option_set *udhcp_find_option(struct option_set *opt_list, uint8_t code, bool dhcpv6) FAST_FUNC; // RFC 2131 Table 5: Fields and options used by DHCP clients // diff --git a/networking/udhcp/d6_dhcpc.c b/networking/udhcp/d6_dhcpc.c index 8d11a7539..9d2a8f5d3 100644 --- a/networking/udhcp/d6_dhcpc.c +++ b/networking/udhcp/d6_dhcpc.c @@ -888,7 +888,8 @@ int send_d6_release(struct in6_addr *server_ipv6, struct in6_addr *our_cur_ipv6) if (client6_data.ia_pd) opt_ptr = mempcpy(opt_ptr, client6_data.ia_pd, client6_data.ia_pd->len + 2+2); /* Client-id */ - ci = udhcp_find_option(client_data.options, D6_OPT_CLIENTID); +///vda + ci = udhcp_find_option(client_data.options, D6_OPT_CLIENTID, /*dhcpv6:*/ 1); if (ci) opt_ptr = mempcpy(opt_ptr, ci->data, D6_OPT_DATA + 2+2 + 6); @@ -1272,7 +1273,7 @@ int udhcpc6_main(int argc UNUSED_PARAM, char **argv) } clientid_mac_ptr = NULL; - if (!udhcp_find_option(client_data.options, D6_OPT_CLIENTID)) { + if (!udhcp_find_option(client_data.options, D6_OPT_CLIENTID, /*dhcpv6:*/ 1)) { /* not set, set the default client ID */ clientid_mac_ptr = udhcp_insert_new_option( &client_data.options, D6_OPT_CLIENTID, diff --git a/networking/udhcp/dhcpc.c b/networking/udhcp/dhcpc.c index 331f13a8c..c757fb37c 100644 --- a/networking/udhcp/dhcpc.c +++ b/networking/udhcp/dhcpc.c @@ -658,7 +658,7 @@ static void add_client_options(struct dhcp_packet *packet) // This will be needed if we remove -V VENDOR_STR in favor of // -x vendor:VENDOR_STR - //if (!udhcp_find_option(packet.options, DHCP_VENDOR)) + //if (!udhcp_find_option(packet.options, DHCP_VENDOR, /*dhcpv6:*/ 0)) // /* not set, set the default vendor ID */ // ...add (DHCP_VENDOR, "udhcp "BB_VER) opt... } @@ -676,7 +676,7 @@ static void add_serverid_and_clientid_options(struct dhcp_packet *packet, uint32 * If the client used a 'client identifier' when it obtained the lease, * it MUST use the same 'client identifier' in the DHCPRELEASE message. */ - ci = udhcp_find_option(client_data.options, DHCP_CLIENT_ID); + ci = udhcp_find_option(client_data.options, DHCP_CLIENT_ID, /*dhcpv6:*/ 0); if (ci) udhcp_add_binary_option(packet, ci->data); } @@ -1328,7 +1328,7 @@ int udhcpc_main(int argc UNUSED_PARAM, char **argv) } clientid_mac_ptr = NULL; - if (!(opt & OPT_C) && !udhcp_find_option(client_data.options, DHCP_CLIENT_ID)) { + if (!(opt & OPT_C) && !udhcp_find_option(client_data.options, DHCP_CLIENT_ID, /*dhcpv6:*/ 0)) { /* not suppressed and not set, create default client ID */ clientid_mac_ptr = udhcp_insert_new_option( &client_data.options, DHCP_CLIENT_ID, diff --git a/networking/udhcp/dhcpd.c b/networking/udhcp/dhcpd.c index 0f5edb75c..66750e2e6 100644 --- a/networking/udhcp/dhcpd.c +++ b/networking/udhcp/dhcpd.c @@ -935,7 +935,7 @@ int udhcpd_main(int argc UNUSED_PARAM, char **argv) bb_simple_info_msg("started, v"BB_VER); - option = udhcp_find_option(server_data.options, DHCP_LEASE_TIME); + option = udhcp_find_option(server_data.options, DHCP_LEASE_TIME, /*dhcpv6:*/ 0); server_data.max_lease_sec = DEFAULT_LEASE_TIME; if (option) { move_from_unaligned32(server_data.max_lease_sec, option->data + OPT_DATA); -- cgit v1.2.3-55-g6feb From cb91a818c8f7730d8f3b30b5b4e75fd21496609f Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Fri, 26 Nov 2021 16:38:57 +0100 Subject: libarchive/get_header_ar.c: fix extraction of archives from binutils in deterministic mode GNU binutils in deterministic mode (ar rD or built with --enable-deterministic-archives) hard codes file mode to 0644 (NOT 0100644) since https://github.com/bminor/binutils-gdb/commit/36e4dce69dd2 This confuses busybox ar x (data_extract_all): touch a; ar rD a.ar a ar: creating a.ar busybox ar x a.ar ar: unrecognized file type hexdump -C a.ar 00000000 21 3c 61 72 63 68 3e 0a 61 2f 20 20 20 20 20 20 |!.a/ | 00000010 20 20 20 20 20 20 20 20 30 20 20 20 20 20 20 20 | 0 | 00000020 20 20 20 20 30 20 20 20 20 20 30 20 20 20 20 20 | 0 0 | 00000030 36 34 34 20 20 20 20 20 30 20 20 20 20 20 20 20 |644 0 | 00000040 20 20 60 0a | `.| As a workaround, force the mode bits to S_IFREG, as nothing else makes sense for ar. function old new delta get_header_ar 539 542 +3 Signed-off-by: Peter Korsgaard Signed-off-by: Denys Vlasenko --- archival/libarchive/get_header_ar.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/archival/libarchive/get_header_ar.c b/archival/libarchive/get_header_ar.c index 3a19d6ff7..6bd897392 100644 --- a/archival/libarchive/get_header_ar.c +++ b/archival/libarchive/get_header_ar.c @@ -92,8 +92,12 @@ char FAST_FUNC get_header_ar(archive_handle_t *archive_handle) /* Only size is always present, the rest may be missing in * long filename pseudo file. Thus we decode the rest * after dealing with long filename pseudo file. + * + * GNU binutils in deterministic mode hard codes mode to 0644 (NOT + * 0100644). AR archives can only contain files, so force file + * mode. */ - typed->mode = read_num(ar.formatted.mode, 8, sizeof(ar.formatted.mode)); + typed->mode = read_num(ar.formatted.mode, 8, sizeof(ar.formatted.mode)) | S_IFREG; typed->gid = read_num(ar.formatted.gid, 10, sizeof(ar.formatted.gid)); typed->uid = read_num(ar.formatted.uid, 10, sizeof(ar.formatted.uid)); typed->mtime = read_num(ar.formatted.date, 10, sizeof(ar.formatted.date)); -- cgit v1.2.3-55-g6feb From 70683faf380681a11e16a85090162581aed55d73 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 17 Dec 2021 20:37:58 +0100 Subject: httpd: don't send Content-Length in error pages header function old new delta send_headers 701 713 +12 Signed-off-by: Denys Vlasenko --- networking/httpd.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/networking/httpd.c b/networking/httpd.c index 31c8489d3..4def1b6fc 100644 --- a/networking/httpd.c +++ b/networking/httpd.c @@ -1125,7 +1125,7 @@ static void send_headers(unsigned responseNum) "Connection: close\r\n", responseNum, responseString #if ENABLE_FEATURE_HTTPD_DATE - ,date_str + , date_str #endif ); } @@ -1222,17 +1222,29 @@ static void send_headers(unsigned responseNum) // (NB: standards do not define "Transfer-Length:" _header_, // transfer-length above is just a concept). +#if ENABLE_FEATURE_HTTPD_RANGES \ + || ENABLE_FEATURE_HTTPD_LAST_MODIFIED \ + || ENABLE_FEATURE_HTTPD_ETAG len += sprintf(iobuf + len, -#if ENABLE_FEATURE_HTTPD_RANGES +# if ENABLE_FEATURE_HTTPD_RANGES "Accept-Ranges: bytes\r\n" -#endif -#if ENABLE_FEATURE_HTTPD_LAST_MODIFIED +# endif +# if ENABLE_FEATURE_HTTPD_LAST_MODIFIED "Last-Modified: %s\r\n" -#endif -#if ENABLE_FEATURE_HTTPD_ETAG +# endif +# if ENABLE_FEATURE_HTTPD_ETAG "ETag: %s\r\n" +# endif +# if ENABLE_FEATURE_HTTPD_LAST_MODIFIED + , date_str +# endif +# if ENABLE_FEATURE_HTTPD_ETAG + , G.etag +# endif #endif - + ); + if (!infoString) { + len += sprintf(iobuf + len, /* Because of 4.4 (5), we can forgo sending of "Content-Length" * since we close connection afterwards, but it helps clients * to e.g. estimate download times, show progress bars etc. @@ -1240,14 +1252,9 @@ static void send_headers(unsigned responseNum) * but de-facto standard is to send it (see comment below). */ "Content-Length: %"OFF_FMT"u\r\n", -#if ENABLE_FEATURE_HTTPD_LAST_MODIFIED - date_str, -#endif -#if ENABLE_FEATURE_HTTPD_ETAG - G.etag, -#endif file_size - ); + ); + } } /* This should be "Transfer-Encoding", not "Content-Encoding": -- cgit v1.2.3-55-g6feb From b720629dfec0e8e991e75b751dad215af2bc657f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 17 Dec 2021 21:01:15 +0100 Subject: httpd: do not send Last-Modified / ETag / Content-Length for error pages function old new delta send_headers 713 701 -12 send_headers_and_exit 20 34 +14 Signed-off-by: Denys Vlasenko --- networking/httpd.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/networking/httpd.c b/networking/httpd.c index 4def1b6fc..1ba1d1063 100644 --- a/networking/httpd.c +++ b/networking/httpd.c @@ -1222,29 +1222,17 @@ static void send_headers(unsigned responseNum) // (NB: standards do not define "Transfer-Length:" _header_, // transfer-length above is just a concept). -#if ENABLE_FEATURE_HTTPD_RANGES \ - || ENABLE_FEATURE_HTTPD_LAST_MODIFIED \ - || ENABLE_FEATURE_HTTPD_ETAG len += sprintf(iobuf + len, -# if ENABLE_FEATURE_HTTPD_RANGES +#if ENABLE_FEATURE_HTTPD_RANGES "Accept-Ranges: bytes\r\n" -# endif -# if ENABLE_FEATURE_HTTPD_LAST_MODIFIED +#endif +#if ENABLE_FEATURE_HTTPD_LAST_MODIFIED "Last-Modified: %s\r\n" -# endif -# if ENABLE_FEATURE_HTTPD_ETAG +#endif +#if ENABLE_FEATURE_HTTPD_ETAG "ETag: %s\r\n" -# endif -# if ENABLE_FEATURE_HTTPD_LAST_MODIFIED - , date_str -# endif -# if ENABLE_FEATURE_HTTPD_ETAG - , G.etag -# endif #endif - ); - if (!infoString) { - len += sprintf(iobuf + len, + /* Because of 4.4 (5), we can forgo sending of "Content-Length" * since we close connection afterwards, but it helps clients * to e.g. estimate download times, show progress bars etc. @@ -1252,9 +1240,14 @@ static void send_headers(unsigned responseNum) * but de-facto standard is to send it (see comment below). */ "Content-Length: %"OFF_FMT"u\r\n", +#if ENABLE_FEATURE_HTTPD_LAST_MODIFIED + date_str, +#endif +#if ENABLE_FEATURE_HTTPD_ETAG + G.etag, +#endif file_size - ); - } + ); } /* This should be "Transfer-Encoding", not "Content-Encoding": @@ -1297,6 +1290,7 @@ static void send_headers_and_exit(int responseNum) NORETURN; static void send_headers_and_exit(int responseNum) { IF_FEATURE_HTTPD_GZIP(content_gzip = 0;) + file_size = -1; /* no Last-Modified:, ETag:, Content-Length: */ send_headers(responseNum); log_and_exit(); } -- cgit v1.2.3-55-g6feb From 7105e4afddbf47b494accce40e2a701b8833e6ce Mon Sep 17 00:00:00 2001 From: Ron Yorston Date: Thu, 16 Dec 2021 11:19:03 +0000 Subject: printf: allow 0 as a flag and allow multiple flags The '%' character in a format specification may be followed by one or more flags from the list "+- #0". BusyBox printf didn't support the '0' flag or allow multiple flags to be provided. As a result the formats '%0*d' and '%0 d' were considered to be invalid. The lack of support for '0' was pointed out by Andrew Snyder on the musl mailing list: https://www.openwall.com/lists/musl/2021/12/14/2 function old new delta printf_main 860 891 +31 .rodata 99281 99282 +1 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/0 up/down: 32/0) Total: 32 bytes Signed-off-by: Ron Yorston Signed-off-by: Denys Vlasenko --- coreutils/printf.c | 2 +- testsuite/printf.tests | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/coreutils/printf.c b/coreutils/printf.c index dd94c8ade..2e672d15f 100644 --- a/coreutils/printf.c +++ b/coreutils/printf.c @@ -313,7 +313,7 @@ static char **print_formatted(char *f, char **argv, int *conv_err) } break; } - if (*f && strchr("-+ #", *f)) { + while (*f && strchr("-+ #0", *f)) { ++f; ++direc_length; } diff --git a/testsuite/printf.tests b/testsuite/printf.tests index 050edef71..728bbf4bf 100755 --- a/testsuite/printf.tests +++ b/testsuite/printf.tests @@ -143,4 +143,14 @@ testing "printf aborts on %r" \ "printf: %r: invalid format\n""1\n" \ "" "" +testing "printf treats leading 0 as flag" \ + "${bb}printf '%0*d\n' 2 1 2>&1; echo \$?" \ + "01\n""0\n" \ + "" "" + +testing "printf handles multiple flags" \ + "${bb}printf '%0 d\n' 2 2>&1; echo \$?" \ + " 2\n""0\n" \ + "" "" + exit $FAILCOUNT -- cgit v1.2.3-55-g6feb From aaade69ce9faac6c05ab8b800fc9e9d4dee8ed54 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 28 Nov 2021 12:11:48 +0200 Subject: find: implement -samefile function old new delta parse_params 1461 1606 +145 func_samefile - 42 +42 packed_usage 34079 34102 +23 static.params 261 271 +10 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 3/0 up/down: 220/0) Total: 220 bytes Signed-off-by: Aaro Koskinen Signed-off-by: Denys Vlasenko --- findutils/find.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/findutils/find.c b/findutils/find.c index fdc5c152d..bb6ad31e5 100644 --- a/findutils/find.c +++ b/findutils/find.c @@ -155,6 +155,13 @@ //config: default y //config: depends on FIND //config: +//config:config FEATURE_FIND_SAMEFILE +//config: bool "Enable -samefile: reference file matching" +//config: default y +//config: depends on FIND +//config: help +//config: Support the 'find -samefile' option for searching by a reference file. +//config: //config:config FEATURE_FIND_EXEC //config: bool "Enable -exec: execute commands" //config: default y @@ -350,6 +357,9 @@ //usage: IF_FEATURE_FIND_INUM( //usage: "\n -inum N File has inode number N" //usage: ) +//usage: IF_FEATURE_FIND_SAMEFILE( +//usage: "\n -samefile FILE File is same as FILE" +//usage: ) //usage: IF_FEATURE_FIND_USER( //usage: "\n -user NAME/ID File is owned by given user" //usage: ) @@ -444,6 +454,7 @@ IF_FEATURE_FIND_MTIME( ACTS(mtime, unsigned char time_type; unsigned char mtime IF_FEATURE_FIND_MMIN( ACTS(mmin, unsigned char time_type; unsigned char mmin_char; unsigned mmin_mins;)) IF_FEATURE_FIND_NEWER( ACTS(newer, time_t newer_mtime;)) IF_FEATURE_FIND_INUM( ACTS(inum, ino_t inode_num;)) +IF_FEATURE_FIND_SAMEFILE(ACTS(samefile, ino_t inode_num; dev_t device;)) IF_FEATURE_FIND_USER( ACTS(user, uid_t uid;)) IF_FEATURE_FIND_SIZE( ACTS(size, char size_char; off_t size;)) IF_FEATURE_FIND_CONTEXT(ACTS(context, security_context_t context;)) @@ -731,6 +742,13 @@ ACTF(inum) return (statbuf->st_ino == ap->inode_num); } #endif +#if ENABLE_FEATURE_FIND_SAMEFILE +ACTF(samefile) +{ + return statbuf->st_ino == ap->inode_num && + statbuf->st_dev == ap->device; +} +#endif #if ENABLE_FEATURE_FIND_EXEC static int do_exec(action_exec *ap, const char *fileName) { @@ -1125,6 +1143,7 @@ static action*** parse_params(char **argv) IF_FEATURE_FIND_CMIN( PARM_cmin ,) IF_FEATURE_FIND_NEWER( PARM_newer ,) IF_FEATURE_FIND_INUM( PARM_inum ,) + IF_FEATURE_FIND_SAMEFILE(PARM_samefile ,) IF_FEATURE_FIND_USER( PARM_user ,) IF_FEATURE_FIND_GROUP( PARM_group ,) IF_FEATURE_FIND_SIZE( PARM_size ,) @@ -1173,6 +1192,7 @@ static action*** parse_params(char **argv) IF_FEATURE_FIND_CMIN( "-cmin\0" ) IF_FEATURE_FIND_NEWER( "-newer\0" ) IF_FEATURE_FIND_INUM( "-inum\0" ) + IF_FEATURE_FIND_SAMEFILE("-samefile\0") IF_FEATURE_FIND_USER( "-user\0" ) IF_FEATURE_FIND_GROUP( "-group\0" ) IF_FEATURE_FIND_SIZE( "-size\0" ) @@ -1511,6 +1531,21 @@ static action*** parse_params(char **argv) ap->inode_num = xatoul(arg1); } #endif +#if ENABLE_FEATURE_FIND_SAMEFILE + else if (parm == PARM_samefile) { + action_samefile *ap; + struct stat stbuf; + dbg("%d", __LINE__); + if (G.recurse_flags & (ACTION_FOLLOWLINKS | + ACTION_FOLLOWLINKS_L0)) + xstat(arg1, &stbuf); + else if (lstat(arg1, &stbuf)) + bb_perror_msg_and_die("can't stat '%s'", arg1); + ap = ALLOC_ACTION(samefile); + ap->inode_num = stbuf.st_ino; + ap->device = stbuf.st_dev; + } +#endif #if ENABLE_FEATURE_FIND_USER else if (parm == PARM_user) { action_user *ap; -- cgit v1.2.3-55-g6feb From 00d10cb6eb47e73bd88ab7e884562b555462815f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 17 Dec 2021 21:38:02 +0100 Subject: docs/embedded-scripts.txt: whitespace fix Signed-off-by: Denys Vlasenko --- docs/embedded-scripts.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/embedded-scripts.txt b/docs/embedded-scripts.txt index 7a273d698..f6f107d4e 100644 --- a/docs/embedded-scripts.txt +++ b/docs/embedded-scripts.txt @@ -55,19 +55,19 @@ Next we need the configuration data. This is very similar to the example code for the native applet: //config:config MU -//config: bool "MU" -//config: default y -//config: help -//config: Returns an indeterminate value. +//config: bool "MU" +//config: default y +//config: help +//config: Returns an indeterminate value. //applet:IF_MU(APPLET_SCRIPTED(mu, scripted, BB_DIR_USR_BIN, BB_SUID_DROP, mu)) //usage:#define mu_trivial_usage -//usage: "[-abcde] FILE..." +//usage: "[-abcde] FILE..." //usage:#define mu_full_usage -//usage: "Returns an indeterminate value\n" -//usage: "\n -a First function" -//usage: "\n -b Second function" +//usage: "Returns an indeterminate value\n" +//usage: "\n -a First function" +//usage: "\n -b Second function" The only difference is that the applet is specified as being of type APPLET_SCRIPTED. It would also be useful to include details of any -- cgit v1.2.3-55-g6feb From 579894bfd28ffb38f7dabc7862d4e7ebfade2865 Mon Sep 17 00:00:00 2001 From: Walter Lozano Date: Thu, 25 Nov 2021 13:11:32 -0300 Subject: cmp: add support for -n Add support to for "-n" to cmp in order to compare at most n bytes. function old new delta cmp_main 552 589 +37 .rodata 104198 104203 +5 packed_usage 34102 34074 -28 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/1 up/down: 42/-28) Total: 14 bytes Signed-off-by: Walter Lozano Signed-off-by: Denys Vlasenko --- editors/cmp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/editors/cmp.c b/editors/cmp.c index e106d814e..9eaff2b8e 100644 --- a/editors/cmp.c +++ b/editors/cmp.c @@ -18,12 +18,13 @@ //kbuild:lib-$(CONFIG_CMP) += cmp.o //usage:#define cmp_trivial_usage -//usage: "[-ls] FILE1 [FILE2" IF_DESKTOP(" [SKIP1 [SKIP2]]") "]" +//usage: "[-ls] [-n NUM] FILE1 [FILE2" IF_DESKTOP(" [SKIP1 [SKIP2]]") "]" //usage:#define cmp_full_usage "\n\n" //usage: "Compare FILE1 with FILE2 (or stdin)\n" //usage: "\n -l Write the byte numbers (decimal) and values (octal)" //usage: "\n for all differing bytes" //usage: "\n -s Quiet" +//usage: "\n -n NUM Compare at most NUM bytes" /* BB_AUDIT SUSv3 (virtually) compliant -- uses nicer GNU format for -l. */ /* http://www.opengroup.org/onlinepubs/007904975/utilities/cmp.html */ @@ -35,9 +36,10 @@ static const char fmt_differ[] ALIGN1 = "%s %s differ: char %"OFF_FMT"u, line %u // This fmt_l_opt uses gnu-isms. SUSv3 would be "%.0s%.0s%"OFF_FMT"u %o %o\n" static const char fmt_l_opt[] ALIGN1 = "%.0s%.0s%"OFF_FMT"u %3o %3o\n"; -#define OPT_STR "sl" +#define OPT_STR "sln:" #define CMP_OPT_s (1<<0) #define CMP_OPT_l (1<<1) +#define CMP_OPT_n (1<<2) int cmp_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int cmp_main(int argc UNUSED_PARAM, char **argv) @@ -50,13 +52,15 @@ int cmp_main(int argc UNUSED_PARAM, char **argv) int c1, c2; unsigned opt; int retval = 0; + int max_count = -1; opt = getopt32(argv, "^" OPT_STR - "\0" "-1" + "\0" "-1:n+" IF_DESKTOP(":?4") IF_NOT_DESKTOP(":?2") - ":l--s:s--l" + ":l--s:s--l", + &max_count ); argv += optind; @@ -95,6 +99,8 @@ int cmp_main(int argc UNUSED_PARAM, char **argv) while (skip2) { getc(fp2); skip2--; } } do { + if (max_count >= 0 && --max_count < 0) + break; c1 = getc(fp1); c2 = getc(fp2); ++char_pos; -- cgit v1.2.3-55-g6feb From bfd8738154747d16f66ccfde3036dc21d39c7cec Mon Sep 17 00:00:00 2001 From: Sören Tempel Date: Sun, 21 Nov 2021 12:24:45 +0100 Subject: ed: add support for -p command-line option as mandated by POSIX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The POSIX.1-2008 specification of ed(1) mandates two command-line options: -p (for specifying a prompt string) and -s (to suppress writing of byte counts). This commit adds support for the former. Furthermore, it also changes the default prompt string to an empty string (instead of ": ") since this is also mandated by POSIX: -p string Use string as the prompt string when in command mode. By default, there shall be no prompt string. function old new delta ed_main 112 144 +32 packed_usage 34074 34097 +23 doCommands 1889 1887 -2 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/1 up/down: 55/-2) Total: 53 bytes Signed-off-by: Sören Tempel Signed-off-by: Denys Vlasenko --- editors/ed.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/editors/ed.c b/editors/ed.c index 14540e566..18faba5a4 100644 --- a/editors/ed.c +++ b/editors/ed.c @@ -18,7 +18,7 @@ //applet:IF_ED(APPLET(ed, BB_DIR_BIN, BB_SUID_DROP)) -//usage:#define ed_trivial_usage "[FILE]" +//usage:#define ed_trivial_usage "[-p PROMPT] [FILE]" //usage:#define ed_full_usage "" #include "libbb.h" @@ -48,6 +48,7 @@ struct globals { char *bufBase; char *bufPtr; char *fileName; + const char *prompt; LINE lines; smallint dirty; int marks[26]; @@ -57,6 +58,7 @@ struct globals { #define bufBase (G.bufBase ) #define bufPtr (G.bufPtr ) #define fileName (G.fileName ) +#define prompt (G.prompt ) #define curNum (G.curNum ) #define lastNum (G.lastNum ) #define bufUsed (G.bufUsed ) @@ -793,7 +795,7 @@ static void doCommands(void) * 0 on ctrl-C, * >0 length of input string, including terminating '\n' */ - len = read_line_input(NULL, ": ", buf, sizeof(buf)); + len = read_line_input(NULL, prompt, buf, sizeof(buf)); if (len <= 0) return; while (len && isspace(buf[--len])) @@ -1005,8 +1007,12 @@ int ed_main(int argc UNUSED_PARAM, char **argv) lines.next = &lines; lines.prev = &lines; - if (argv[1]) { - fileName = xstrdup(argv[1]); + prompt = ""; /* no prompt by default */ + getopt32(argv, "p:", &prompt); + argv += optind; + + if (argv[0]) { + fileName = xstrdup(argv[0]); if (!readLines(fileName, 1)) { return EXIT_SUCCESS; } -- cgit v1.2.3-55-g6feb From c1eac153e8b89cfc9d550991735c09bad1579201 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 17 Dec 2021 22:43:45 +0100 Subject: cmp: code shrink function old new delta .rodata 104203 104201 -2 Signed-off-by: Denys Vlasenko --- editors/cmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/editors/cmp.c b/editors/cmp.c index 9eaff2b8e..6d2b0c6c3 100644 --- a/editors/cmp.c +++ b/editors/cmp.c @@ -36,7 +36,7 @@ static const char fmt_differ[] ALIGN1 = "%s %s differ: char %"OFF_FMT"u, line %u // This fmt_l_opt uses gnu-isms. SUSv3 would be "%.0s%.0s%"OFF_FMT"u %o %o\n" static const char fmt_l_opt[] ALIGN1 = "%.0s%.0s%"OFF_FMT"u %3o %3o\n"; -#define OPT_STR "sln:" +#define OPT_STR "sln:+" #define CMP_OPT_s (1<<0) #define CMP_OPT_l (1<<1) #define CMP_OPT_n (1<<2) @@ -56,7 +56,7 @@ int cmp_main(int argc UNUSED_PARAM, char **argv) opt = getopt32(argv, "^" OPT_STR - "\0" "-1:n+" + "\0" "-1" IF_DESKTOP(":?4") IF_NOT_DESKTOP(":?2") ":l--s:s--l", -- cgit v1.2.3-55-g6feb From 7d49fedc86bec300d22f44f93ec95825320dd1c1 Mon Sep 17 00:00:00 2001 From: Matthew Slowe Date: Sat, 9 Oct 2021 12:26:40 +0100 Subject: timeout: add support for "timeout -k KILL_SECS" function old new delta timeout_main 307 373 +66 timeout_wait - 42 +42 .rodata 104201 104203 +2 packed_usage 34097 34096 -1 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 2/1 up/down: 110/-1) Total: 109 bytes Signed-off-by: Matthew Slowe Signed-off-by: Denys Vlasenko --- coreutils/timeout.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/coreutils/timeout.c b/coreutils/timeout.c index 8485e1e7d..06108f315 100644 --- a/coreutils/timeout.c +++ b/coreutils/timeout.c @@ -39,13 +39,29 @@ //kbuild:lib-$(CONFIG_TIMEOUT) += timeout.o //usage:#define timeout_trivial_usage -//usage: "[-s SIG] SECS PROG ARGS" +//usage: "[-s SIG] [-k KILL_SECS] SECS PROG ARGS" //usage:#define timeout_full_usage "\n\n" //usage: "Run PROG. Send SIG to it if it is not gone in SECS seconds.\n" //usage: "Default SIG: TERM." +//usage: "If it still exists in KILL_SECS seconds, send KILL.\n" #include "libbb.h" +static NOINLINE int timeout_wait(int timeout, pid_t pid) +{ + /* Just sleep(HUGE_NUM); kill(parent) may kill wrong process! */ + while (1) { + sleep1(); + if (--timeout <= 0) + break; + if (kill(pid, 0)) { + /* process is gone */ + return EXIT_SUCCESS; + } + } + return EXIT_FAILURE; +} + int timeout_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int timeout_main(int argc UNUSED_PARAM, char **argv) { @@ -53,23 +69,29 @@ int timeout_main(int argc UNUSED_PARAM, char **argv) int status; int parent = 0; int timeout; + int kill_timeout; pid_t pid; #if !BB_MMU char *sv1, *sv2; #endif const char *opt_s = "TERM"; + char *opt_k = NULL; /* -p option is not documented, it is needed to support NOMMU. */ /* -t SECONDS; -p PARENT_PID */ /* '+': stop at first non-option */ - getopt32(argv, "+s:" USE_FOR_NOMMU("p:+"), &opt_s, &parent); + getopt32(argv, "+s:k:" USE_FOR_NOMMU("p:+"), &opt_s, &opt_k, &parent); /*argv += optind; - no, wait for bb_daemonize_or_rexec! */ signo = get_signum(opt_s); if (signo < 0) bb_error_msg_and_die("unknown signal '%s'", opt_s); + kill_timeout = 0; + if (opt_k) + kill_timeout = parse_duration_str(opt_k); + if (!argv[optind]) bb_show_usage(); timeout = parse_duration_str(argv[optind++]); @@ -103,17 +125,16 @@ int timeout_main(int argc UNUSED_PARAM, char **argv) bb_daemonize_or_rexec(0, argv); /* Here we are grandchild. Sleep, then kill grandparent */ grandchild: - /* Just sleep(HUGE_NUM); kill(parent) may kill wrong process! */ - while (1) { - sleep1(); - if (--timeout <= 0) - break; - if (kill(parent, 0)) { - /* process is gone */ + if (timeout_wait(timeout, parent) == EXIT_SUCCESS) + return EXIT_SUCCESS; + kill(parent, signo); + + if (kill_timeout > 0) { + if (timeout_wait(kill_timeout, parent) == EXIT_SUCCESS) return EXIT_SUCCESS; - } + kill(parent, SIGKILL); } - kill(parent, signo); + return EXIT_SUCCESS; } -- cgit v1.2.3-55-g6feb From f26eb796e228cbec754e9e24545f5b0a8a50aac1 Mon Sep 17 00:00:00 2001 From: Sören Tempel Date: Wed, 17 Nov 2021 15:08:53 +0100 Subject: ed: fix current line number for file passed via the command-line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POSIX.1-2008 mandates the following regarding the file command-line argument: If the file argument is given, ed shall simulate an e command on the file named by the pathname […] The specification for the e command mandates the following behaviour regarding the current line number in POSIX.1-2008: The current line number shall be set to the address of the last line of the buffer. However, without this commit, busybox ed will set the current line number to 1 if a file is given on the command-line and this file is not empty (lastNum != 0). This is incorrect and fixed in this commit by not modifying the current line number in ed_main(). As such, the current line number will be zero for empty files and otherwise be set to the address of the last line of the buffer. function old new delta ed_main 144 128 -16 Signed-off-by: Sören Tempel Signed-off-by: Denys Vlasenko --- editors/ed.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/editors/ed.c b/editors/ed.c index 18faba5a4..fed10c470 100644 --- a/editors/ed.c +++ b/editors/ed.c @@ -1016,8 +1016,6 @@ int ed_main(int argc UNUSED_PARAM, char **argv) if (!readLines(fileName, 1)) { return EXIT_SUCCESS; } - if (lastNum) - setCurNum(1); dirty = FALSE; } -- cgit v1.2.3-55-g6feb From a05a3d5932b5002d0513adfa817b931dcc1686c0 Mon Sep 17 00:00:00 2001 From: Sören Tempel Date: Wed, 17 Nov 2021 15:12:25 +0100 Subject: ed: align output of read command with POSIX.1-2008 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POSIX.1-2008 mandates the following regarding the read command: If the read is successful, and -s was not specified, the number of bytes read shall be written to standard output in the following format: "%d\n", This commit aligns the output of busybox ed with POSIX.1-2008 by removing the file name from the output for the read command. This slipped through in 4836a0708fd0aaeb82871a3762b40fcf4b61e812. function old new delta .rodata 104203 104196 -7 readLines 409 388 -21 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-28) Total: -28 bytes Signed-off-by: Sören Tempel Signed-off-by: Denys Vlasenko --- editors/ed.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/editors/ed.c b/editors/ed.c index fed10c470..dfe0f1a77 100644 --- a/editors/ed.c +++ b/editors/ed.c @@ -402,9 +402,6 @@ static int readLines(const char *file, int num) charCount = 0; cc = 0; - printf("\"%s\", ", file); - fflush_all(); - do { cp = memchr(bufPtr, '\n', bufUsed); -- cgit v1.2.3-55-g6feb From 4fe954c14851d2f913c41c581cbe49300b0984e4 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Tue, 21 Dec 2021 21:52:29 +0900 Subject: sed: do not ignore 'g' modifier when match starts with ^ It is perfectly valid to start a regex with ^ and have other patterns with \| that can match more than once, e.g. the following example should print ca, as illustrated with gnu sed: $ echo 'abca' | sed -e 's/^a\|b//g' ca busybox before patch: $ echo 'abca' | busybox sed -e 's/^a\|b//g' bca busybox after patch: $ echo 'abca' | ./busybox sed -e 's/^a\|b//g' ca regcomp handles ^ perfectly well as illustrated with the second 'a' that did not match in the example, we ca leave the non-repeating to it if appropriate. The check had been added before using regcomp and was required at the time (f36635cec6da) but no longer makes sense now. (tested with glibc and musl libc) function old new delta add_cmd 1189 1176 -13 Signed-off-by: Dominique Martinet Signed-off-by: Denys Vlasenko --- editors/sed.c | 3 +-- testsuite/sed.tests | 6 ++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/editors/sed.c b/editors/sed.c index a6845a979..e8c82ac63 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -435,8 +435,7 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) switch (substr[idx]) { /* Replace all occurrences */ case 'g': - if (match[0] != '^') - sed_cmd->which_match = 0; + sed_cmd->which_match = 0; break; /* Print pattern space */ case 'p': diff --git a/testsuite/sed.tests b/testsuite/sed.tests index 67ff87e93..2b78c9b12 100755 --- a/testsuite/sed.tests +++ b/testsuite/sed.tests @@ -399,6 +399,12 @@ testing "sed uses previous regexp" \ "" \ "q\nw\ne\nr\n" +testing "sed ^ OR not^" \ + "sed -e 's/^a\|b//g'" \ + "ca\n" \ + "" \ + "abca\n" + # testing "description" "commands" "result" "infile" "stdin" exit $FAILCOUNT -- cgit v1.2.3-55-g6feb From 97c00ae13439ad8114ad7d2150c8dde464f04eb1 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 26 Dec 2021 14:29:37 +0100 Subject: httpd: fix compile failure if !FEATURE_HTTPD_RANGES Signed-off-by: Denys Vlasenko --- networking/httpd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/networking/httpd.c b/networking/httpd.c index 1ba1d1063..33045163f 100644 --- a/networking/httpd.c +++ b/networking/httpd.c @@ -1880,9 +1880,13 @@ static NOINLINE void send_file_and_exit(const char *url, int what) #if ENABLE_FEATURE_USE_SENDFILE { off_t offset; +# if ENABLE_FEATURE_HTTPD_RANGES if (range_start < 0) range_start = 0; offset = range_start; +# else + offset = 0; +# endif while (1) { /* sz is rounded down to 64k */ ssize_t sz = MAXINT(ssize_t) - 0xffff; -- cgit v1.2.3-55-g6feb From e512aeb0fb3c585948ae6517cfdf4a53cf99774d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 26 Dec 2021 17:55:58 +0100 Subject: Bump version to 1.35.0 Signed-off-by: Denys Vlasenko --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1216c94a5..edaa3c148 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 1 PATCHLEVEL = 35 SUBLEVEL = 0 -EXTRAVERSION = .git +EXTRAVERSION = NAME = Unnamed # *DOCUMENTATION* -- cgit v1.2.3-55-g6feb From 44075929a8b9c1861d15564fa6ac4562abb724d7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 26 Dec 2021 18:40:55 +0100 Subject: Start 1.36.0 development cycle Signed-off-by: Denys Vlasenko --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index edaa3c148..b2ce46c7c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 1 -PATCHLEVEL = 35 +PATCHLEVEL = 36 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = .git NAME = Unnamed # *DOCUMENTATION* -- cgit v1.2.3-55-g6feb