diff options
author | Ron Yorston <rmy@pobox.com> | 2021-12-27 08:21:55 +0000 |
---|---|---|
committer | Ron Yorston <rmy@pobox.com> | 2021-12-27 08:21:55 +0000 |
commit | b15f68214da209b5b293039c09c00f490c0cc193 (patch) | |
tree | d644b5d9318b79cb1baa356cbb63318cc4872c05 /networking | |
parent | 1ee308c75f4720ee38be8e81ff8c9ed4c52670d4 (diff) | |
parent | 44075929a8b9c1861d15564fa6ac4562abb724d7 (diff) | |
download | busybox-w32-b15f68214da209b5b293039c09c00f490c0cc193.tar.gz busybox-w32-b15f68214da209b5b293039c09c00f490c0cc193.tar.bz2 busybox-w32-b15f68214da209b5b293039c09c00f490c0cc193.zip |
Merge busybox into merge
Fix merge conflict in coreutils/timeout.c.
Diffstat (limited to 'networking')
-rw-r--r-- | networking/httpd.c | 7 | ||||
-rw-r--r-- | networking/tls.c | 3 | ||||
-rw-r--r-- | networking/tls_fe.c | 68 | ||||
-rw-r--r-- | networking/tls_sp_c32.c | 627 | ||||
-rw-r--r-- | networking/udhcp/common.c | 29 | ||||
-rw-r--r-- | networking/udhcp/common.h | 6 | ||||
-rw-r--r-- | networking/udhcp/d6_dhcpc.c | 5 | ||||
-rw-r--r-- | networking/udhcp/dhcpc.c | 6 | ||||
-rw-r--r-- | networking/udhcp/dhcpd.c | 2 | ||||
-rw-r--r-- | networking/wget.c | 35 |
10 files changed, 456 insertions, 332 deletions
diff --git a/networking/httpd.c b/networking/httpd.c index 6cc189272..c9daa0638 100644 --- a/networking/httpd.c +++ b/networking/httpd.c | |||
@@ -1151,7 +1151,7 @@ static void send_headers(unsigned responseNum) | |||
1151 | "Connection: close\r\n", | 1151 | "Connection: close\r\n", |
1152 | responseNum, responseString | 1152 | responseNum, responseString |
1153 | #if ENABLE_FEATURE_HTTPD_DATE | 1153 | #if ENABLE_FEATURE_HTTPD_DATE |
1154 | ,date_str | 1154 | , date_str |
1155 | #endif | 1155 | #endif |
1156 | ); | 1156 | ); |
1157 | } | 1157 | } |
@@ -1316,6 +1316,7 @@ static void send_headers_and_exit(int responseNum) NORETURN; | |||
1316 | static void send_headers_and_exit(int responseNum) | 1316 | static void send_headers_and_exit(int responseNum) |
1317 | { | 1317 | { |
1318 | IF_FEATURE_HTTPD_GZIP(content_gzip = 0;) | 1318 | IF_FEATURE_HTTPD_GZIP(content_gzip = 0;) |
1319 | file_size = -1; /* no Last-Modified:, ETag:, Content-Length: */ | ||
1319 | send_headers(responseNum); | 1320 | send_headers(responseNum); |
1320 | log_and_exit(); | 1321 | log_and_exit(); |
1321 | } | 1322 | } |
@@ -1919,9 +1920,13 @@ static NOINLINE void send_file_and_exit(const char *url, int what) | |||
1919 | #if ENABLE_FEATURE_USE_SENDFILE | 1920 | #if ENABLE_FEATURE_USE_SENDFILE |
1920 | { | 1921 | { |
1921 | off_t offset; | 1922 | off_t offset; |
1923 | # if ENABLE_FEATURE_HTTPD_RANGES | ||
1922 | if (range_start < 0) | 1924 | if (range_start < 0) |
1923 | range_start = 0; | 1925 | range_start = 0; |
1924 | offset = range_start; | 1926 | offset = range_start; |
1927 | # else | ||
1928 | offset = 0; | ||
1929 | # endif | ||
1925 | while (1) { | 1930 | while (1) { |
1926 | /* sz is rounded down to 64k */ | 1931 | /* sz is rounded down to 64k */ |
1927 | ssize_t sz = MAXINT(ssize_t) - 0xffff; | 1932 | ssize_t sz = MAXINT(ssize_t) - 0xffff; |
diff --git a/networking/tls.c b/networking/tls.c index 36f83212b..5f40aec70 100644 --- a/networking/tls.c +++ b/networking/tls.c | |||
@@ -1883,10 +1883,12 @@ static void process_server_key(tls_state_t *tls, int len) | |||
1883 | keybuf += 4; | 1883 | keybuf += 4; |
1884 | switch (t32) { | 1884 | switch (t32) { |
1885 | case _0x03001d20: //curve_x25519 | 1885 | case _0x03001d20: //curve_x25519 |
1886 | dbg("got x25519 eccPubKey\n"); | ||
1886 | tls->flags |= GOT_EC_CURVE_X25519; | 1887 | tls->flags |= GOT_EC_CURVE_X25519; |
1887 | memcpy(tls->hsd->ecc_pub_key32, keybuf, 32); | 1888 | memcpy(tls->hsd->ecc_pub_key32, keybuf, 32); |
1888 | break; | 1889 | break; |
1889 | case _0x03001741: //curve_secp256r1 (aka P256) | 1890 | case _0x03001741: //curve_secp256r1 (aka P256) |
1891 | dbg("got P256 eccPubKey\n"); | ||
1890 | /* P256 point can be transmitted odd- or even-compressed | 1892 | /* P256 point can be transmitted odd- or even-compressed |
1891 | * (first byte is 3 or 2) or uncompressed (4). | 1893 | * (first byte is 3 or 2) or uncompressed (4). |
1892 | */ | 1894 | */ |
@@ -1899,7 +1901,6 @@ static void process_server_key(tls_state_t *tls, int len) | |||
1899 | } | 1901 | } |
1900 | 1902 | ||
1901 | tls->flags |= GOT_EC_KEY; | 1903 | tls->flags |= GOT_EC_KEY; |
1902 | dbg("got eccPubKey\n"); | ||
1903 | } | 1904 | } |
1904 | 1905 | ||
1905 | static void send_empty_client_cert(tls_state_t *tls) | 1906 | static void send_empty_client_cert(tls_state_t *tls) |
diff --git a/networking/tls_fe.c b/networking/tls_fe.c index 3a0a6776f..e5580fbcf 100644 --- a/networking/tls_fe.c +++ b/networking/tls_fe.c | |||
@@ -187,7 +187,7 @@ static void fprime_mul(byte *r, const byte *a, const byte *b, | |||
187 | #if 0 //UNUSED | 187 | #if 0 //UNUSED |
188 | static void fe_load(byte *x, word32 c) | 188 | static void fe_load(byte *x, word32 c) |
189 | { | 189 | { |
190 | word32 i; | 190 | int i; |
191 | 191 | ||
192 | for (i = 0; i < sizeof(c); i++) { | 192 | for (i = 0; i < sizeof(c); i++) { |
193 | x[i] = c; | 193 | x[i] = c; |
@@ -199,21 +199,29 @@ static void fe_load(byte *x, word32 c) | |||
199 | } | 199 | } |
200 | #endif | 200 | #endif |
201 | 201 | ||
202 | static void fe_normalize(byte *x) | 202 | static void fe_reduce(byte *x, word32 c) |
203 | { | 203 | { |
204 | byte minusp[F25519_SIZE]; | ||
205 | unsigned c; | ||
206 | int i; | 204 | int i; |
207 | 205 | ||
208 | /* Reduce using 2^255 = 19 mod p */ | 206 | /* Reduce using 2^255 = 19 mod p */ |
209 | c = (x[31] >> 7) * 19; | 207 | x[31] = c & 127; |
210 | x[31] &= 127; | 208 | c = (c >> 7) * 19; |
211 | 209 | ||
212 | for (i = 0; i < F25519_SIZE; i++) { | 210 | for (i = 0; i < F25519_SIZE; i++) { |
213 | c += x[i]; | 211 | c += x[i]; |
214 | x[i] = (byte)c; | 212 | x[i] = (byte)c; |
215 | c >>= 8; | 213 | c >>= 8; |
216 | } | 214 | } |
215 | } | ||
216 | |||
217 | static void fe_normalize(byte *x) | ||
218 | { | ||
219 | byte minusp[F25519_SIZE]; | ||
220 | unsigned c; | ||
221 | int i; | ||
222 | |||
223 | /* Reduce using 2^255 = 19 mod p */ | ||
224 | fe_reduce(x, x[31]); | ||
217 | 225 | ||
218 | /* The number is now less than 2^255 + 18, and therefore less than | 226 | /* The number is now less than 2^255 + 18, and therefore less than |
219 | * 2p. Try subtracting p, and conditionally load the subtracted | 227 | * 2p. Try subtracting p, and conditionally load the subtracted |
@@ -247,14 +255,7 @@ static void lm_add(byte* r, const byte* a, const byte* b) | |||
247 | } | 255 | } |
248 | 256 | ||
249 | /* Reduce with 2^255 = 19 mod p */ | 257 | /* Reduce with 2^255 = 19 mod p */ |
250 | r[31] &= 127; | 258 | fe_reduce(r, c); |
251 | c = (c >> 7) * 19; | ||
252 | |||
253 | for (i = 0; i < F25519_SIZE; i++) { | ||
254 | c += r[i]; | ||
255 | r[i] = (byte)c; | ||
256 | c >>= 8; | ||
257 | } | ||
258 | } | 259 | } |
259 | 260 | ||
260 | static void lm_sub(byte* r, const byte* a, const byte* b) | 261 | static void lm_sub(byte* r, const byte* a, const byte* b) |
@@ -264,21 +265,15 @@ static void lm_sub(byte* r, const byte* a, const byte* b) | |||
264 | 265 | ||
265 | /* Calculate a + 2p - b, to avoid underflow */ | 266 | /* Calculate a + 2p - b, to avoid underflow */ |
266 | c = 218; | 267 | c = 218; |
267 | for (i = 0; i + 1 < F25519_SIZE; i++) { | 268 | for (i = 0; i < F25519_SIZE - 1; i++) { |
268 | c += 65280 + ((word32)a[i]) - ((word32)b[i]); | 269 | c += 65280 + ((word32)a[i]) - ((word32)b[i]); |
269 | r[i] = c; | 270 | r[i] = c; |
270 | c >>= 8; | 271 | c >>= 8; |
271 | } | 272 | } |
272 | 273 | ||
273 | c += ((word32)a[31]) - ((word32)b[31]); | 274 | c += ((word32)a[31]) - ((word32)b[31]); |
274 | r[31] = c & 127; | ||
275 | c = (c >> 7) * 19; | ||
276 | 275 | ||
277 | for (i = 0; i < F25519_SIZE; i++) { | 276 | fe_reduce(r, c); |
278 | c += r[i]; | ||
279 | r[i] = c; | ||
280 | c >>= 8; | ||
281 | } | ||
282 | } | 277 | } |
283 | 278 | ||
284 | #if 0 //UNUSED | 279 | #if 0 //UNUSED |
@@ -289,21 +284,15 @@ static void lm_neg(byte* r, const byte* a) | |||
289 | 284 | ||
290 | /* Calculate 2p - a, to avoid underflow */ | 285 | /* Calculate 2p - a, to avoid underflow */ |
291 | c = 218; | 286 | c = 218; |
292 | for (i = 0; i + 1 < F25519_SIZE; i++) { | 287 | for (i = 0; i < F25519_SIZE - 1; i++) { |
293 | c += 65280 - ((word32)a[i]); | 288 | c += 65280 - ((word32)a[i]); |
294 | r[i] = c; | 289 | r[i] = c; |
295 | c >>= 8; | 290 | c >>= 8; |
296 | } | 291 | } |
297 | 292 | ||
298 | c -= ((word32)a[31]); | 293 | c -= ((word32)a[31]); |
299 | r[31] = c & 127; | ||
300 | c = (c >> 7) * 19; | ||
301 | 294 | ||
302 | for (i = 0; i < F25519_SIZE; i++) { | 295 | fe_reduce(r, c); |
303 | c += r[i]; | ||
304 | r[i] = c; | ||
305 | c >>= 8; | ||
306 | } | ||
307 | } | 296 | } |
308 | #endif | 297 | #endif |
309 | 298 | ||
@@ -326,14 +315,7 @@ static void fe_mul__distinct(byte *r, const byte *a, const byte *b) | |||
326 | r[i] = c; | 315 | r[i] = c; |
327 | } | 316 | } |
328 | 317 | ||
329 | r[31] &= 127; | 318 | fe_reduce(r, c); |
330 | c = (c >> 7) * 19; | ||
331 | |||
332 | for (i = 0; i < F25519_SIZE; i++) { | ||
333 | c += r[i]; | ||
334 | r[i] = c; | ||
335 | c >>= 8; | ||
336 | } | ||
337 | } | 319 | } |
338 | 320 | ||
339 | #if 0 //UNUSED | 321 | #if 0 //UNUSED |
@@ -357,15 +339,7 @@ static void fe_mul_c(byte *r, const byte *a, word32 b) | |||
357 | r[i] = c; | 339 | r[i] = c; |
358 | } | 340 | } |
359 | 341 | ||
360 | r[31] &= 127; | 342 | fe_reduce(r, c); |
361 | c >>= 7; | ||
362 | c *= 19; | ||
363 | |||
364 | for (i = 0; i < F25519_SIZE; i++) { | ||
365 | c += r[i]; | ||
366 | r[i] = c; | ||
367 | c >>= 8; | ||
368 | } | ||
369 | } | 343 | } |
370 | 344 | ||
371 | static void fe_inv__distinct(byte *r, const byte *x) | 345 | static void fe_inv__distinct(byte *r, const byte *x) |
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 4d4ecdd74..292dda24e 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c | |||
@@ -29,20 +29,39 @@ static void dump_hex(const char *fmt, const void *vp, int len) | |||
29 | typedef uint32_t sp_digit; | 29 | typedef uint32_t sp_digit; |
30 | typedef int32_t signed_sp_digit; | 30 | typedef int32_t signed_sp_digit; |
31 | 31 | ||
32 | /* 64-bit optimizations: | ||
33 | * if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff, | ||
34 | * then loads and stores can be done in 64-bit chunks. | ||
35 | * | ||
36 | * A narrower case is when arch is also little-endian (such as x86_64), | ||
37 | * then "LSW first", uint32[8] and uint64[4] representations are equivalent, | ||
38 | * and arithmetic can be done in 64 bits too. | ||
39 | */ | ||
40 | #if defined(__GNUC__) && defined(__x86_64__) | ||
41 | # define UNALIGNED_LE_64BIT 1 | ||
42 | #else | ||
43 | # define UNALIGNED_LE_64BIT 0 | ||
44 | #endif | ||
45 | |||
32 | /* The code below is taken from parts of | 46 | /* The code below is taken from parts of |
33 | * wolfssl-3.15.3/wolfcrypt/src/sp_c32.c | 47 | * wolfssl-3.15.3/wolfcrypt/src/sp_c32.c |
34 | * and heavily modified. | 48 | * and heavily modified. |
35 | */ | 49 | */ |
36 | 50 | ||
37 | typedef struct sp_point { | 51 | typedef struct sp_point { |
38 | sp_digit x[2 * 8]; | 52 | sp_digit x[8] |
39 | sp_digit y[2 * 8]; | 53 | #if ULONG_MAX > 0xffffffff |
40 | sp_digit z[2 * 8]; | 54 | /* Make sp_point[] arrays to not be 64-bit misaligned */ |
55 | ALIGNED(8) | ||
56 | #endif | ||
57 | ; | ||
58 | sp_digit y[8]; | ||
59 | sp_digit z[8]; | ||
41 | int infinity; | 60 | int infinity; |
42 | } sp_point; | 61 | } sp_point; |
43 | 62 | ||
44 | /* The modulus (prime) of the curve P256. */ | 63 | /* The modulus (prime) of the curve P256. */ |
45 | static const sp_digit p256_mod[8] = { | 64 | static const sp_digit p256_mod[8] ALIGNED(8) = { |
46 | 0xffffffff,0xffffffff,0xffffffff,0x00000000, | 65 | 0xffffffff,0xffffffff,0xffffffff,0x00000000, |
47 | 0x00000000,0x00000000,0x00000001,0xffffffff, | 66 | 0x00000000,0x00000000,0x00000001,0xffffffff, |
48 | }; | 67 | }; |
@@ -58,6 +77,22 @@ static const sp_digit p256_mod[8] = { | |||
58 | * r A single precision integer. | 77 | * r A single precision integer. |
59 | * a Byte array. | 78 | * a Byte array. |
60 | */ | 79 | */ |
80 | #if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff | ||
81 | static void sp_256_to_bin_8(const sp_digit* rr, uint8_t* a) | ||
82 | { | ||
83 | int i; | ||
84 | const uint64_t* r = (void*)rr; | ||
85 | |||
86 | sp_256_norm_8(rr); | ||
87 | |||
88 | r += 4; | ||
89 | for (i = 0; i < 4; i++) { | ||
90 | r--; | ||
91 | move_to_unaligned64(a, SWAP_BE64(*r)); | ||
92 | a += 8; | ||
93 | } | ||
94 | } | ||
95 | #else | ||
61 | static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a) | 96 | static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a) |
62 | { | 97 | { |
63 | int i; | 98 | int i; |
@@ -71,6 +106,7 @@ static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a) | |||
71 | a += 4; | 106 | a += 4; |
72 | } | 107 | } |
73 | } | 108 | } |
109 | #endif | ||
74 | 110 | ||
75 | /* Read big endian unsigned byte array into r. | 111 | /* Read big endian unsigned byte array into r. |
76 | * | 112 | * |
@@ -78,6 +114,21 @@ static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a) | |||
78 | * a Byte array. | 114 | * a Byte array. |
79 | * n Number of bytes in array to read. | 115 | * n Number of bytes in array to read. |
80 | */ | 116 | */ |
117 | #if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff | ||
118 | static void sp_256_from_bin_8(sp_digit* rr, const uint8_t* a) | ||
119 | { | ||
120 | int i; | ||
121 | uint64_t* r = (void*)rr; | ||
122 | |||
123 | r += 4; | ||
124 | for (i = 0; i < 4; i++) { | ||
125 | uint64_t v; | ||
126 | move_from_unaligned64(v, a); | ||
127 | *--r = SWAP_BE64(v); | ||
128 | a += 8; | ||
129 | } | ||
130 | } | ||
131 | #else | ||
81 | static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a) | 132 | static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a) |
82 | { | 133 | { |
83 | int i; | 134 | int i; |
@@ -90,6 +141,7 @@ static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a) | |||
90 | a += 4; | 141 | a += 4; |
91 | } | 142 | } |
92 | } | 143 | } |
144 | #endif | ||
93 | 145 | ||
94 | #if SP_DEBUG | 146 | #if SP_DEBUG |
95 | static void dump_256(const char *fmt, const sp_digit* r) | 147 | static void dump_256(const char *fmt, const sp_digit* r) |
@@ -125,6 +177,20 @@ static void sp_256_point_from_bin2x32(sp_point* p, const uint8_t *bin2x32) | |||
125 | * return -ve, 0 or +ve if a is less than, equal to or greater than b | 177 | * return -ve, 0 or +ve if a is less than, equal to or greater than b |
126 | * respectively. | 178 | * respectively. |
127 | */ | 179 | */ |
180 | #if UNALIGNED_LE_64BIT | ||
181 | static signed_sp_digit sp_256_cmp_8(const sp_digit* aa, const sp_digit* bb) | ||
182 | { | ||
183 | const uint64_t* a = (void*)aa; | ||
184 | const uint64_t* b = (void*)bb; | ||
185 | int i; | ||
186 | for (i = 3; i >= 0; i--) { | ||
187 | if (a[i] == b[i]) | ||
188 | continue; | ||
189 | return (a[i] > b[i]) * 2 - 1; | ||
190 | } | ||
191 | return 0; | ||
192 | } | ||
193 | #else | ||
128 | static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b) | 194 | static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b) |
129 | { | 195 | { |
130 | int i; | 196 | int i; |
@@ -140,6 +206,7 @@ static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b) | |||
140 | } | 206 | } |
141 | return 0; | 207 | return 0; |
142 | } | 208 | } |
209 | #endif | ||
143 | 210 | ||
144 | /* Compare two numbers to determine if they are equal. | 211 | /* Compare two numbers to determine if they are equal. |
145 | * | 212 | * |
@@ -196,8 +263,6 @@ static int sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
196 | ); | 263 | ); |
197 | return reg; | 264 | return reg; |
198 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) | 265 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) |
199 | /* x86_64 has no alignment restrictions, and is little-endian, | ||
200 | * so 64-bit and 32-bit representations are identical */ | ||
201 | uint64_t reg; | 266 | uint64_t reg; |
202 | asm volatile ( | 267 | asm volatile ( |
203 | "\n movq (%0), %3" | 268 | "\n movq (%0), %3" |
@@ -294,8 +359,6 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
294 | ); | 359 | ); |
295 | return reg; | 360 | return reg; |
296 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) | 361 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) |
297 | /* x86_64 has no alignment restrictions, and is little-endian, | ||
298 | * so 64-bit and 32-bit representations are identical */ | ||
299 | uint64_t reg; | 362 | uint64_t reg; |
300 | asm volatile ( | 363 | asm volatile ( |
301 | "\n movq (%0), %3" | 364 | "\n movq (%0), %3" |
@@ -397,11 +460,12 @@ static void sp_256_sub_8_p256_mod(sp_digit* r) | |||
397 | } | 460 | } |
398 | #endif | 461 | #endif |
399 | 462 | ||
400 | /* Multiply a and b into r. (r = a * b) */ | 463 | /* Multiply a and b into r. (r = a * b) |
401 | static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | 464 | * r should be [16] array (512 bits), and must not coincide with a or b. |
465 | */ | ||
466 | static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | ||
402 | { | 467 | { |
403 | #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__) | 468 | #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__) |
404 | sp_digit rr[15]; /* in case r coincides with a or b */ | ||
405 | int k; | 469 | int k; |
406 | uint32_t accl; | 470 | uint32_t accl; |
407 | uint32_t acch; | 471 | uint32_t acch; |
@@ -433,18 +497,15 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
433 | j--; | 497 | j--; |
434 | i++; | 498 | i++; |
435 | } while (i != 8 && i <= k); | 499 | } while (i != 8 && i <= k); |
436 | rr[k] = accl; | 500 | r[k] = accl; |
437 | accl = acch; | 501 | accl = acch; |
438 | acch = acc_hi; | 502 | acch = acc_hi; |
439 | } | 503 | } |
440 | r[15] = accl; | 504 | r[15] = accl; |
441 | memcpy(r, rr, sizeof(rr)); | ||
442 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) | 505 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) |
443 | /* x86_64 has no alignment restrictions, and is little-endian, | ||
444 | * so 64-bit and 32-bit representations are identical */ | ||
445 | const uint64_t* aa = (const void*)a; | 506 | const uint64_t* aa = (const void*)a; |
446 | const uint64_t* bb = (const void*)b; | 507 | const uint64_t* bb = (const void*)b; |
447 | uint64_t rr[8]; | 508 | uint64_t* rr = (void*)r; |
448 | int k; | 509 | int k; |
449 | uint64_t accl; | 510 | uint64_t accl; |
450 | uint64_t acch; | 511 | uint64_t acch; |
@@ -481,11 +542,8 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
481 | acch = acc_hi; | 542 | acch = acc_hi; |
482 | } | 543 | } |
483 | rr[7] = accl; | 544 | rr[7] = accl; |
484 | memcpy(r, rr, sizeof(rr)); | ||
485 | #elif 0 | 545 | #elif 0 |
486 | //TODO: arm assembly (untested) | 546 | //TODO: arm assembly (untested) |
487 | sp_digit tmp[16]; | ||
488 | |||
489 | asm volatile ( | 547 | asm volatile ( |
490 | "\n mov r5, #0" | 548 | "\n mov r5, #0" |
491 | "\n mov r6, #0" | 549 | "\n mov r6, #0" |
@@ -517,12 +575,10 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
517 | "\n cmp r5, #56" | 575 | "\n cmp r5, #56" |
518 | "\n ble 1b" | 576 | "\n ble 1b" |
519 | "\n str r6, [%[r], r5]" | 577 | "\n str r6, [%[r], r5]" |
520 | : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) | 578 | : [r] "r" (r), [a] "r" (a), [b] "r" (b) |
521 | : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" | 579 | : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" |
522 | ); | 580 | ); |
523 | memcpy(r, tmp, sizeof(tmp)); | ||
524 | #else | 581 | #else |
525 | sp_digit rr[15]; /* in case r coincides with a or b */ | ||
526 | int i, j, k; | 582 | int i, j, k; |
527 | uint64_t acc; | 583 | uint64_t acc; |
528 | 584 | ||
@@ -542,35 +598,51 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
542 | j--; | 598 | j--; |
543 | i++; | 599 | i++; |
544 | } while (i != 8 && i <= k); | 600 | } while (i != 8 && i <= k); |
545 | rr[k] = acc; | 601 | r[k] = acc; |
546 | acc = (acc >> 32) | ((uint64_t)acc_hi << 32); | 602 | acc = (acc >> 32) | ((uint64_t)acc_hi << 32); |
547 | } | 603 | } |
548 | r[15] = acc; | 604 | r[15] = acc; |
549 | memcpy(r, rr, sizeof(rr)); | ||
550 | #endif | 605 | #endif |
551 | } | 606 | } |
552 | 607 | ||
553 | /* Shift number right one bit. Bottom bit is lost. */ | 608 | /* Shift number right one bit. Bottom bit is lost. */ |
554 | static void sp_256_rshift1_8(sp_digit* r, sp_digit* a, sp_digit carry) | 609 | #if UNALIGNED_LE_64BIT |
610 | static void sp_256_rshift1_8(sp_digit* rr, uint64_t carry) | ||
555 | { | 611 | { |
612 | uint64_t *r = (void*)rr; | ||
556 | int i; | 613 | int i; |
557 | 614 | ||
558 | carry = (!!carry << 31); | 615 | carry = (((uint64_t)!!carry) << 63); |
616 | for (i = 3; i >= 0; i--) { | ||
617 | uint64_t c = r[i] << 63; | ||
618 | r[i] = (r[i] >> 1) | carry; | ||
619 | carry = c; | ||
620 | } | ||
621 | } | ||
622 | #else | ||
623 | static void sp_256_rshift1_8(sp_digit* r, sp_digit carry) | ||
624 | { | ||
625 | int i; | ||
626 | |||
627 | carry = (((sp_digit)!!carry) << 31); | ||
559 | for (i = 7; i >= 0; i--) { | 628 | for (i = 7; i >= 0; i--) { |
560 | sp_digit c = a[i] << 31; | 629 | sp_digit c = r[i] << 31; |
561 | r[i] = (a[i] >> 1) | carry; | 630 | r[i] = (r[i] >> 1) | carry; |
562 | carry = c; | 631 | carry = c; |
563 | } | 632 | } |
564 | } | 633 | } |
634 | #endif | ||
565 | 635 | ||
566 | /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) */ | 636 | /* Divide the number by 2 mod the modulus (prime). (r = (r / 2) % m) */ |
567 | static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) | 637 | static void sp_256_div2_8(sp_digit* r /*, const sp_digit* m*/) |
568 | { | 638 | { |
639 | const sp_digit* m = p256_mod; | ||
640 | |||
569 | int carry = 0; | 641 | int carry = 0; |
570 | if (a[0] & 1) | 642 | if (r[0] & 1) |
571 | carry = sp_256_add_8(r, a, m); | 643 | carry = sp_256_add_8(r, r, m); |
572 | sp_256_norm_8(r); | 644 | sp_256_norm_8(r); |
573 | sp_256_rshift1_8(r, r, carry); | 645 | sp_256_rshift1_8(r, carry); |
574 | } | 646 | } |
575 | 647 | ||
576 | /* Add two Montgomery form numbers (r = a + b % m) */ | 648 | /* Add two Montgomery form numbers (r = a + b % m) */ |
@@ -634,36 +706,174 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* | |||
634 | } | 706 | } |
635 | 707 | ||
636 | /* Shift the result in the high 256 bits down to the bottom. */ | 708 | /* Shift the result in the high 256 bits down to the bottom. */ |
637 | static void sp_256_mont_shift_8(sp_digit* r, const sp_digit* a) | 709 | static void sp_512to256_mont_shift_8(sp_digit* r, sp_digit* a) |
710 | { | ||
711 | memcpy(r, a + 8, sizeof(*r) * 8); | ||
712 | } | ||
713 | |||
714 | #if UNALIGNED_LE_64BIT | ||
715 | /* 64-bit little-endian optimized version. | ||
716 | * See generic 32-bit version below for explanation. | ||
717 | * The benefit of this version is: even though r[3] calculation is atrocious, | ||
718 | * we call sp_256_mul_add_4() four times, not 8. | ||
719 | * Measured run time improvement of curve_P256_compute_pubkey_and_premaster() | ||
720 | * call on x86-64: from ~1500us to ~900us. Code size +32 bytes. | ||
721 | */ | ||
722 | static int sp_256_mul_add_4(uint64_t *r /*, const uint64_t* a, uint64_t b*/) | ||
638 | { | 723 | { |
724 | uint64_t b = r[0]; | ||
725 | |||
726 | # if 0 | ||
727 | const uint64_t* a = (const void*)p256_mod; | ||
728 | //a[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff | ||
729 | uint128_t t; | ||
639 | int i; | 730 | int i; |
731 | t = 0; | ||
732 | for (i = 0; i < 4; i++) { | ||
733 | uint32_t t_hi; | ||
734 | uint128_t m = ((uint128_t)b * a[i]) + r[i]; | ||
735 | t += m; | ||
736 | t_hi = (t < m); | ||
737 | r[i] = (uint64_t)t; | ||
738 | t = (t >> 64) | ((uint128_t)t_hi << 64); | ||
739 | } | ||
740 | r[4] += (uint64_t)t; | ||
741 | return (r[4] < (uint64_t)t); /* 1 if addition overflowed */ | ||
742 | # else | ||
743 | // Unroll, then optimize the above loop: | ||
744 | //uint32_t t_hi; | ||
745 | //uint128_t m; | ||
746 | uint64_t t64, t64u; | ||
640 | 747 | ||
641 | for (i = 0; i < 8; i++) { | 748 | //m = ((uint128_t)b * a[0]) + r[0]; |
642 | r[i] = a[i+8]; | 749 | // Since b is r[0] and a[0] is ffffffffffffffff, the above optimizes to: |
643 | r[i+8] = 0; | 750 | // m = r[0] * ffffffffffffffff + r[0] = (r[0] << 64 - r[0]) + r[0] = r[0] << 64; |
751 | //t += m; | ||
752 | // t = r[0] << 64 = b << 64; | ||
753 | //t_hi = (t < m); | ||
754 | // t_hi = 0; | ||
755 | //r[0] = (uint64_t)t; | ||
756 | // r[0] = 0; | ||
757 | //the store can be eliminated since caller won't look at lower 256 bits of the result | ||
758 | //t = (t >> 64) | ((uint128_t)t_hi << 64); | ||
759 | // t = b; | ||
760 | |||
761 | //m = ((uint128_t)b * a[1]) + r[1]; | ||
762 | // Since a[1] is 00000000ffffffff, the above optimizes to: | ||
763 | // m = b * ffffffff + r[1] = (b * 100000000 - b) + r[1] = (b << 32) - b + r[1]; | ||
764 | //t += m; | ||
765 | // t = b + (b << 32) - b + r[1] = (b << 32) + r[1]; | ||
766 | //t_hi = (t < m); | ||
767 | // t_hi = 0; | ||
768 | //r[1] = (uint64_t)t; | ||
769 | r[1] += (b << 32); | ||
770 | //t = (t >> 64) | ((uint128_t)t_hi << 64); | ||
771 | t64 = (r[1] < (b << 32)); | ||
772 | t64 += (b >> 32); | ||
773 | |||
774 | //m = ((uint128_t)b * a[2]) + r[2]; | ||
775 | // Since a[2] is 0000000000000000, the above optimizes to: | ||
776 | // m = b * 0 + r[2] = r[2]; | ||
777 | //t += m; | ||
778 | // t = t64 + r[2]; | ||
779 | //t_hi = (t < m); | ||
780 | // t_hi = 0; | ||
781 | //r[2] = (uint64_t)t; | ||
782 | r[2] += t64; | ||
783 | //t = (t >> 64) | ((uint128_t)t_hi << 64); | ||
784 | t64 = (r[2] < t64); | ||
785 | |||
786 | //m = ((uint128_t)b * a[3]) + r[3]; | ||
787 | // Since a[3] is ffffffff00000001, the above optimizes to: | ||
788 | // m = b * ffffffff00000001 + r[3]; | ||
789 | // m = b + b*ffffffff00000000 + r[3] | ||
790 | // m = b + (b*ffffffff << 32) + r[3] | ||
791 | // m = b + (((b<<32) - b) << 32) + r[3] | ||
792 | //t += m; | ||
793 | // t = t64 + (uint128_t)b + ((((uint128_t)b << 32) - b) << 32) + r[3]; | ||
794 | t64 += b; | ||
795 | t64u = (t64 < b); | ||
796 | t64 += r[3]; | ||
797 | t64u += (t64 < r[3]); | ||
798 | { // add ((((uint128_t)b << 32) - b) << 32): | ||
799 | uint64_t lo, hi; | ||
800 | //lo = (((b << 32) - b) << 32 | ||
801 | //hi = (((uint128_t)b << 32) - b) >> 32 | ||
802 | //but without uint128_t: | ||
803 | hi = (b << 32) - b; /* make lower 32 bits of "hi", part 1 */ | ||
804 | b = (b >> 32) - (/*borrowed above?*/(b << 32) < b); /* upper 32 bits of "hi" are in b */ | ||
805 | lo = hi << 32; /* (use "hi" value to calculate "lo",... */ | ||
806 | t64 += lo; /* ...consume... */ | ||
807 | t64u += (t64 < lo); /* ..."lo") */ | ||
808 | hi >>= 32; /* make lower 32 bits of "hi", part 2 */ | ||
809 | hi |= (b << 32); /* combine lower and upper 32 bits */ | ||
810 | t64u += hi; /* consume "hi" */ | ||
811 | } | ||
812 | //t_hi = (t < m); | ||
813 | // t_hi = 0; | ||
814 | //r[3] = (uint64_t)t; | ||
815 | r[3] = t64; | ||
816 | //t = (t >> 64) | ((uint128_t)t_hi << 64); | ||
817 | // t = t64u; | ||
818 | |||
819 | r[4] += t64u; | ||
820 | return (r[4] < t64u); /* 1 if addition overflowed */ | ||
821 | # endif | ||
822 | } | ||
823 | |||
824 | static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* aa/*, const sp_digit* m, sp_digit mp*/) | ||
825 | { | ||
826 | // const sp_digit* m = p256_mod; | ||
827 | int i; | ||
828 | uint64_t *a = (void*)aa; | ||
829 | |||
830 | sp_digit carry = 0; | ||
831 | for (i = 0; i < 4; i++) { | ||
832 | // mu = a[i]; | ||
833 | if (sp_256_mul_add_4(a+i /*, m, mu*/)) { | ||
834 | int j = i + 4; | ||
835 | inc_next_word: | ||
836 | if (++j > 7) { /* a[8] array has no more words? */ | ||
837 | carry++; | ||
838 | continue; | ||
839 | } | ||
840 | if (++a[j] == 0) /* did this overflow too? */ | ||
841 | goto inc_next_word; | ||
842 | } | ||
644 | } | 843 | } |
844 | sp_512to256_mont_shift_8(r, aa); | ||
845 | if (carry != 0) | ||
846 | sp_256_sub_8_p256_mod(r); | ||
847 | sp_256_norm_8(r); | ||
645 | } | 848 | } |
646 | 849 | ||
647 | /* Mul a by scalar b and add into r. (r += a * b) */ | 850 | #else /* Generic 32-bit version */ |
851 | |||
852 | /* Mul a by scalar b and add into r. (r += a * b) | ||
853 | * a = p256_mod | ||
854 | * b = r[0] | ||
855 | */ | ||
648 | static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) | 856 | static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) |
649 | { | 857 | { |
650 | // const sp_digit* a = p256_mod; | ||
651 | //a[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff | ||
652 | sp_digit b = r[0]; | 858 | sp_digit b = r[0]; |
653 | |||
654 | uint64_t t; | 859 | uint64_t t; |
655 | 860 | ||
656 | // t = 0; | 861 | # if 0 |
657 | // for (i = 0; i < 8; i++) { | 862 | const sp_digit* a = p256_mod; |
658 | // uint32_t t_hi; | 863 | //a[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff |
659 | // uint64_t m = ((uint64_t)b * a[i]) + r[i]; | 864 | int i; |
660 | // t += m; | 865 | t = 0; |
661 | // t_hi = (t < m); | 866 | for (i = 0; i < 8; i++) { |
662 | // r[i] = (sp_digit)t; | 867 | uint32_t t_hi; |
663 | // t = (t >> 32) | ((uint64_t)t_hi << 32); | 868 | uint64_t m = ((uint64_t)b * a[i]) + r[i]; |
664 | // } | 869 | t += m; |
665 | // r[8] += (sp_digit)t; | 870 | t_hi = (t < m); |
666 | 871 | r[i] = (sp_digit)t; | |
872 | t = (t >> 32) | ((uint64_t)t_hi << 32); | ||
873 | } | ||
874 | r[8] += (sp_digit)t; | ||
875 | return (r[8] < (sp_digit)t); /* 1 if addition overflowed */ | ||
876 | # else | ||
667 | // Unroll, then optimize the above loop: | 877 | // Unroll, then optimize the above loop: |
668 | //uint32_t t_hi; | 878 | //uint32_t t_hi; |
669 | uint64_t m; | 879 | uint64_t m; |
@@ -677,7 +887,8 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) | |||
677 | //t_hi = (t < m); | 887 | //t_hi = (t < m); |
678 | // t_hi = 0; | 888 | // t_hi = 0; |
679 | //r[0] = (sp_digit)t; | 889 | //r[0] = (sp_digit)t; |
680 | r[0] = 0; | 890 | // r[0] = 0; |
891 | //the store can be eliminated since caller won't look at lower 256 bits of the result | ||
681 | //t = (t >> 32) | ((uint64_t)t_hi << 32); | 892 | //t = (t >> 32) | ((uint64_t)t_hi << 32); |
682 | // t = b; | 893 | // t = b; |
683 | 894 | ||
@@ -769,15 +980,33 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) | |||
769 | 980 | ||
770 | r[8] += (sp_digit)t; | 981 | r[8] += (sp_digit)t; |
771 | return (r[8] < (sp_digit)t); /* 1 if addition overflowed */ | 982 | return (r[8] < (sp_digit)t); /* 1 if addition overflowed */ |
983 | # endif | ||
772 | } | 984 | } |
773 | 985 | ||
774 | /* Reduce the number back to 256 bits using Montgomery reduction. | 986 | /* Reduce the number back to 256 bits using Montgomery reduction. |
987 | * Note: the result is NOT guaranteed to be less than p256_mod! | ||
988 | * (it is only guaranteed to fit into 256 bits). | ||
775 | * | 989 | * |
776 | * a A single precision number to reduce in place. | 990 | * r Result. |
991 | * a Double-wide number to reduce. Clobbered. | ||
777 | * m The single precision number representing the modulus. | 992 | * m The single precision number representing the modulus. |
778 | * mp The digit representing the negative inverse of m mod 2^n. | 993 | * mp The digit representing the negative inverse of m mod 2^n. |
994 | * | ||
995 | * Montgomery reduction on multiprecision integers: | ||
996 | * Montgomery reduction requires products modulo R. | ||
997 | * When R is a power of B [in our case R=2^128, B=2^32], there is a variant | ||
998 | * of Montgomery reduction which requires products only of machine word sized | ||
999 | * integers. T is stored as an little-endian word array a[0..n]. The algorithm | ||
1000 | * reduces it one word at a time. First an appropriate multiple of modulus | ||
1001 | * is added to make T divisible by B. [In our case, it is p256_mp_mod * a[0].] | ||
1002 | * Then a multiple of modulus is added to make T divisible by B^2. | ||
1003 | * [In our case, it is (p256_mp_mod * a[1]) << 32.] | ||
1004 | * And so on. Eventually T is divisible by R, and after division by R | ||
1005 | * the algorithm is in the same place as the usual Montgomery reduction. | ||
1006 | * | ||
1007 | * TODO: Can conditionally use 64-bit (if bit-little-endian arch) logic? | ||
779 | */ | 1008 | */ |
780 | static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/) | 1009 | static void sp_512to256_mont_reduce_8(sp_digit* r, sp_digit* a/*, const sp_digit* m, sp_digit mp*/) |
781 | { | 1010 | { |
782 | // const sp_digit* m = p256_mod; | 1011 | // const sp_digit* m = p256_mod; |
783 | sp_digit mp = p256_mp_mod; | 1012 | sp_digit mp = p256_mp_mod; |
@@ -800,15 +1029,15 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/ | |||
800 | goto inc_next_word0; | 1029 | goto inc_next_word0; |
801 | } | 1030 | } |
802 | } | 1031 | } |
803 | sp_256_mont_shift_8(a, a); | 1032 | sp_512to256_mont_shift_8(r, a); |
804 | if (word16th != 0) | 1033 | if (word16th != 0) |
805 | sp_256_sub_8_p256_mod(a); | 1034 | sp_256_sub_8_p256_mod(r); |
806 | sp_256_norm_8(a); | 1035 | sp_256_norm_8(r); |
807 | } | 1036 | } |
808 | else { /* Same code for explicit mp == 1 (which is always the case for P256) */ | 1037 | else { /* Same code for explicit mp == 1 (which is always the case for P256) */ |
809 | sp_digit word16th = 0; | 1038 | sp_digit word16th = 0; |
810 | for (i = 0; i < 8; i++) { | 1039 | for (i = 0; i < 8; i++) { |
811 | /*mu = a[i];*/ | 1040 | // mu = a[i]; |
812 | if (sp_256_mul_add_8(a+i /*, m, mu*/)) { | 1041 | if (sp_256_mul_add_8(a+i /*, m, mu*/)) { |
813 | int j = i + 8; | 1042 | int j = i + 8; |
814 | inc_next_word: | 1043 | inc_next_word: |
@@ -820,115 +1049,12 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/ | |||
820 | goto inc_next_word; | 1049 | goto inc_next_word; |
821 | } | 1050 | } |
822 | } | 1051 | } |
823 | sp_256_mont_shift_8(a, a); | 1052 | sp_512to256_mont_shift_8(r, a); |
824 | if (word16th != 0) | 1053 | if (word16th != 0) |
825 | sp_256_sub_8_p256_mod(a); | 1054 | sp_256_sub_8_p256_mod(r); |
826 | sp_256_norm_8(a); | 1055 | sp_256_norm_8(r); |
827 | } | 1056 | } |
828 | } | 1057 | } |
829 | #if 0 | ||
830 | //TODO: arm32 asm (also adapt for x86?) | ||
831 | static void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m, sp_digit mp) | ||
832 | { | ||
833 | sp_digit ca = 0; | ||
834 | |||
835 | asm volatile ( | ||
836 | # i = 0 | ||
837 | mov r12, #0 | ||
838 | ldr r10, [%[a], #0] | ||
839 | ldr r14, [%[a], #4] | ||
840 | 1: | ||
841 | # mu = a[i] * mp | ||
842 | mul r8, %[mp], r10 | ||
843 | # a[i+0] += m[0] * mu | ||
844 | ldr r7, [%[m], #0] | ||
845 | ldr r9, [%[a], #0] | ||
846 | umull r6, r7, r8, r7 | ||
847 | adds r10, r10, r6 | ||
848 | adc r5, r7, #0 | ||
849 | # a[i+1] += m[1] * mu | ||
850 | ldr r7, [%[m], #4] | ||
851 | ldr r9, [%[a], #4] | ||
852 | umull r6, r7, r8, r7 | ||
853 | adds r10, r14, r6 | ||
854 | adc r4, r7, #0 | ||
855 | adds r10, r10, r5 | ||
856 | adc r4, r4, #0 | ||
857 | # a[i+2] += m[2] * mu | ||
858 | ldr r7, [%[m], #8] | ||
859 | ldr r14, [%[a], #8] | ||
860 | umull r6, r7, r8, r7 | ||
861 | adds r14, r14, r6 | ||
862 | adc r5, r7, #0 | ||
863 | adds r14, r14, r4 | ||
864 | adc r5, r5, #0 | ||
865 | # a[i+3] += m[3] * mu | ||
866 | ldr r7, [%[m], #12] | ||
867 | ldr r9, [%[a], #12] | ||
868 | umull r6, r7, r8, r7 | ||
869 | adds r9, r9, r6 | ||
870 | adc r4, r7, #0 | ||
871 | adds r9, r9, r5 | ||
872 | str r9, [%[a], #12] | ||
873 | adc r4, r4, #0 | ||
874 | # a[i+4] += m[4] * mu | ||
875 | ldr r7, [%[m], #16] | ||
876 | ldr r9, [%[a], #16] | ||
877 | umull r6, r7, r8, r7 | ||
878 | adds r9, r9, r6 | ||
879 | adc r5, r7, #0 | ||
880 | adds r9, r9, r4 | ||
881 | str r9, [%[a], #16] | ||
882 | adc r5, r5, #0 | ||
883 | # a[i+5] += m[5] * mu | ||
884 | ldr r7, [%[m], #20] | ||
885 | ldr r9, [%[a], #20] | ||
886 | umull r6, r7, r8, r7 | ||
887 | adds r9, r9, r6 | ||
888 | adc r4, r7, #0 | ||
889 | adds r9, r9, r5 | ||
890 | str r9, [%[a], #20] | ||
891 | adc r4, r4, #0 | ||
892 | # a[i+6] += m[6] * mu | ||
893 | ldr r7, [%[m], #24] | ||
894 | ldr r9, [%[a], #24] | ||
895 | umull r6, r7, r8, r7 | ||
896 | adds r9, r9, r6 | ||
897 | adc r5, r7, #0 | ||
898 | adds r9, r9, r4 | ||
899 | str r9, [%[a], #24] | ||
900 | adc r5, r5, #0 | ||
901 | # a[i+7] += m[7] * mu | ||
902 | ldr r7, [%[m], #28] | ||
903 | ldr r9, [%[a], #28] | ||
904 | umull r6, r7, r8, r7 | ||
905 | adds r5, r5, r6 | ||
906 | adcs r7, r7, %[ca] | ||
907 | mov %[ca], #0 | ||
908 | adc %[ca], %[ca], %[ca] | ||
909 | adds r9, r9, r5 | ||
910 | str r9, [%[a], #28] | ||
911 | ldr r9, [%[a], #32] | ||
912 | adcs r9, r9, r7 | ||
913 | str r9, [%[a], #32] | ||
914 | adc %[ca], %[ca], #0 | ||
915 | # i += 1 | ||
916 | add %[a], %[a], #4 | ||
917 | add r12, r12, #4 | ||
918 | cmp r12, #32 | ||
919 | blt 1b | ||
920 | |||
921 | str r10, [%[a], #0] | ||
922 | str r14, [%[a], #4] | ||
923 | : [ca] "+r" (ca), [a] "+r" (a) | ||
924 | : [m] "r" (m), [mp] "r" (mp) | ||
925 | : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" | ||
926 | ); | ||
927 | |||
928 | memcpy(a, a + 8, 32); | ||
929 | if (ca) | ||
930 | a -= m; | ||
931 | } | ||
932 | #endif | 1058 | #endif |
933 | 1059 | ||
934 | /* Multiply two Montogmery form numbers mod the modulus (prime). | 1060 | /* Multiply two Montogmery form numbers mod the modulus (prime). |
@@ -938,15 +1064,16 @@ static void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m, sp_digit mp) | |||
938 | * a First number to multiply in Montogmery form. | 1064 | * a First number to multiply in Montogmery form. |
939 | * b Second number to multiply in Montogmery form. | 1065 | * b Second number to multiply in Montogmery form. |
940 | * m Modulus (prime). | 1066 | * m Modulus (prime). |
941 | * mp Montogmery mulitplier. | 1067 | * mp Montogmery multiplier. |
942 | */ | 1068 | */ |
943 | static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b | 1069 | static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b |
944 | /*, const sp_digit* m, sp_digit mp*/) | 1070 | /*, const sp_digit* m, sp_digit mp*/) |
945 | { | 1071 | { |
946 | //const sp_digit* m = p256_mod; | 1072 | //const sp_digit* m = p256_mod; |
947 | //sp_digit mp = p256_mp_mod; | 1073 | //sp_digit mp = p256_mp_mod; |
948 | sp_256_mul_8(r, a, b); | 1074 | sp_digit t[2 * 8]; |
949 | sp_256_mont_reduce_8(r /*, m, mp*/); | 1075 | sp_256to512_mul_8(t, a, b); |
1076 | sp_512to256_mont_reduce_8(r, t /*, m, mp*/); | ||
950 | } | 1077 | } |
951 | 1078 | ||
952 | /* Square the Montgomery form number. (r = a * a mod m) | 1079 | /* Square the Montgomery form number. (r = a * a mod m) |
@@ -954,7 +1081,7 @@ static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b | |||
954 | * r Result of squaring. | 1081 | * r Result of squaring. |
955 | * a Number to square in Montogmery form. | 1082 | * a Number to square in Montogmery form. |
956 | * m Modulus (prime). | 1083 | * m Modulus (prime). |
957 | * mp Montogmery mulitplier. | 1084 | * mp Montogmery multiplier. |
958 | */ | 1085 | */ |
959 | static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a | 1086 | static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a |
960 | /*, const sp_digit* m, sp_digit mp*/) | 1087 | /*, const sp_digit* m, sp_digit mp*/) |
@@ -964,37 +1091,42 @@ static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a | |||
964 | sp_256_mont_mul_8(r, a, a /*, m, mp*/); | 1091 | sp_256_mont_mul_8(r, a, a /*, m, mp*/); |
965 | } | 1092 | } |
966 | 1093 | ||
1094 | static NOINLINE void sp_256_mont_mul_and_reduce_8(sp_digit* r, | ||
1095 | const sp_digit* a, const sp_digit* b | ||
1096 | /*, const sp_digit* m, sp_digit mp*/) | ||
1097 | { | ||
1098 | sp_digit rr[2 * 8]; | ||
1099 | |||
1100 | sp_256_mont_mul_8(rr, a, b /*, p256_mod, p256_mp_mod*/); | ||
1101 | memset(rr + 8, 0, sizeof(rr) / 2); | ||
1102 | sp_512to256_mont_reduce_8(r, rr /*, p256_mod, p256_mp_mod*/); | ||
1103 | } | ||
1104 | |||
967 | /* Invert the number, in Montgomery form, modulo the modulus (prime) of the | 1105 | /* Invert the number, in Montgomery form, modulo the modulus (prime) of the |
968 | * P256 curve. (r = 1 / a mod m) | 1106 | * P256 curve. (r = 1 / a mod m) |
969 | * | 1107 | * |
970 | * r Inverse result. | 1108 | * r Inverse result. Must not coincide with a. |
971 | * a Number to invert. | 1109 | * a Number to invert. |
972 | */ | 1110 | */ |
973 | #if 0 | ||
974 | /* Mod-2 for the P256 curve. */ | ||
975 | static const uint32_t p256_mod_2[8] = { | ||
976 | 0xfffffffd,0xffffffff,0xffffffff,0x00000000, | ||
977 | 0x00000000,0x00000000,0x00000001,0xffffffff, | ||
978 | }; | ||
979 | //Bit pattern: | ||
980 | //2 2 2 2 2 2 2 1...1 | ||
981 | //5 5 4 3 2 1 0 9...0 9...1 | ||
982 | //543210987654321098765432109876543210987654321098765432109876543210...09876543210...09876543210 | ||
983 | //111111111111111111111111111111110000000000000000000000000000000100...00000111111...11111111101 | ||
984 | #endif | ||
985 | static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a) | 1111 | static void sp_256_mont_inv_8(sp_digit* r, sp_digit* a) |
986 | { | 1112 | { |
987 | sp_digit t[2*8]; //can be just [8]? | ||
988 | int i; | 1113 | int i; |
989 | 1114 | ||
990 | memcpy(t, a, sizeof(sp_digit) * 8); | 1115 | memcpy(r, a, sizeof(sp_digit) * 8); |
991 | for (i = 254; i >= 0; i--) { | 1116 | for (i = 254; i >= 0; i--) { |
992 | sp_256_mont_sqr_8(t, t /*, p256_mod, p256_mp_mod*/); | 1117 | sp_256_mont_sqr_8(r, r /*, p256_mod, p256_mp_mod*/); |
993 | /*if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))*/ | 1118 | /* p256_mod - 2: |
1119 | * ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff - 2 | ||
1120 | * Bit pattern: | ||
1121 | * 2 2 2 2 2 2 2 1...1 | ||
1122 | * 5 5 4 3 2 1 0 9...0 9...1 | ||
1123 | * 543210987654321098765432109876543210987654321098765432109876543210...09876543210...09876543210 | ||
1124 | * 111111111111111111111111111111110000000000000000000000000000000100...00000111111...11111111101 | ||
1125 | */ | ||
1126 | /*if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))*/ | ||
994 | if (i >= 224 || i == 192 || (i <= 95 && i != 1)) | 1127 | if (i >= 224 || i == 192 || (i <= 95 && i != 1)) |
995 | sp_256_mont_mul_8(t, t, a /*, p256_mod, p256_mp_mod*/); | 1128 | sp_256_mont_mul_8(r, r, a /*, p256_mod, p256_mp_mod*/); |
996 | } | 1129 | } |
997 | memcpy(r, t, sizeof(sp_digit) * 8); | ||
998 | } | 1130 | } |
999 | 1131 | ||
1000 | /* Multiply a number by Montogmery normalizer mod modulus (prime). | 1132 | /* Multiply a number by Montogmery normalizer mod modulus (prime). |
@@ -1063,8 +1195,8 @@ static void sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a) | |||
1063 | */ | 1195 | */ |
1064 | static void sp_256_map_8(sp_point* r, sp_point* p) | 1196 | static void sp_256_map_8(sp_point* r, sp_point* p) |
1065 | { | 1197 | { |
1066 | sp_digit t1[2*8]; | 1198 | sp_digit t1[8]; |
1067 | sp_digit t2[2*8]; | 1199 | sp_digit t2[8]; |
1068 | 1200 | ||
1069 | sp_256_mont_inv_8(t1, p->z); | 1201 | sp_256_mont_inv_8(t1, p->z); |
1070 | 1202 | ||
@@ -1072,18 +1204,14 @@ static void sp_256_map_8(sp_point* r, sp_point* p) | |||
1072 | sp_256_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/); | 1204 | sp_256_mont_mul_8(t1, t2, t1 /*, p256_mod, p256_mp_mod*/); |
1073 | 1205 | ||
1074 | /* x /= z^2 */ | 1206 | /* x /= z^2 */ |
1075 | sp_256_mont_mul_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/); | 1207 | sp_256_mont_mul_and_reduce_8(r->x, p->x, t2 /*, p256_mod, p256_mp_mod*/); |
1076 | memset(r->x + 8, 0, sizeof(r->x) / 2); | ||
1077 | sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); | ||
1078 | /* Reduce x to less than modulus */ | 1208 | /* Reduce x to less than modulus */ |
1079 | if (sp_256_cmp_8(r->x, p256_mod) >= 0) | 1209 | if (sp_256_cmp_8(r->x, p256_mod) >= 0) |
1080 | sp_256_sub_8_p256_mod(r->x); | 1210 | sp_256_sub_8_p256_mod(r->x); |
1081 | sp_256_norm_8(r->x); | 1211 | sp_256_norm_8(r->x); |
1082 | 1212 | ||
1083 | /* y /= z^3 */ | 1213 | /* y /= z^3 */ |
1084 | sp_256_mont_mul_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/); | 1214 | sp_256_mont_mul_and_reduce_8(r->y, p->y, t1 /*, p256_mod, p256_mp_mod*/); |
1085 | memset(r->y + 8, 0, sizeof(r->y) / 2); | ||
1086 | sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); | ||
1087 | /* Reduce y to less than modulus */ | 1215 | /* Reduce y to less than modulus */ |
1088 | if (sp_256_cmp_8(r->y, p256_mod) >= 0) | 1216 | if (sp_256_cmp_8(r->y, p256_mod) >= 0) |
1089 | sp_256_sub_8_p256_mod(r->y); | 1217 | sp_256_sub_8_p256_mod(r->y); |
@@ -1100,8 +1228,8 @@ static void sp_256_map_8(sp_point* r, sp_point* p) | |||
1100 | */ | 1228 | */ |
1101 | static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) | 1229 | static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) |
1102 | { | 1230 | { |
1103 | sp_digit t1[2*8]; | 1231 | sp_digit t1[8]; |
1104 | sp_digit t2[2*8]; | 1232 | sp_digit t2[8]; |
1105 | 1233 | ||
1106 | /* Put point to double into result */ | 1234 | /* Put point to double into result */ |
1107 | if (r != p) | 1235 | if (r != p) |
@@ -1110,13 +1238,6 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) | |||
1110 | if (r->infinity) | 1238 | if (r->infinity) |
1111 | return; | 1239 | return; |
1112 | 1240 | ||
1113 | if (SP_DEBUG) { | ||
1114 | /* unused part of t2, may result in spurios | ||
1115 | * differences in debug output. Clear it. | ||
1116 | */ | ||
1117 | memset(t2, 0, sizeof(t2)); | ||
1118 | } | ||
1119 | |||
1120 | /* T1 = Z * Z */ | 1241 | /* T1 = Z * Z */ |
1121 | sp_256_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/); | 1242 | sp_256_mont_sqr_8(t1, r->z /*, p256_mod, p256_mp_mod*/); |
1122 | /* Z = Y * Z */ | 1243 | /* Z = Y * Z */ |
@@ -1138,7 +1259,7 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) | |||
1138 | /* T2 = Y * Y */ | 1259 | /* T2 = Y * Y */ |
1139 | sp_256_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/); | 1260 | sp_256_mont_sqr_8(t2, r->y /*, p256_mod, p256_mp_mod*/); |
1140 | /* T2 = T2/2 */ | 1261 | /* T2 = T2/2 */ |
1141 | sp_256_div2_8(t2, t2, p256_mod); | 1262 | sp_256_div2_8(t2 /*, p256_mod*/); |
1142 | /* Y = Y * X */ | 1263 | /* Y = Y * X */ |
1143 | sp_256_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/); | 1264 | sp_256_mont_mul_8(r->y, r->y, r->x /*, p256_mod, p256_mp_mod*/); |
1144 | /* X = T1 * T1 */ | 1265 | /* X = T1 * T1 */ |
@@ -1164,11 +1285,11 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) | |||
1164 | */ | 1285 | */ |
1165 | static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q) | 1286 | static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q) |
1166 | { | 1287 | { |
1167 | sp_digit t1[2*8]; | 1288 | sp_digit t1[8]; |
1168 | sp_digit t2[2*8]; | 1289 | sp_digit t2[8]; |
1169 | sp_digit t3[2*8]; | 1290 | sp_digit t3[8]; |
1170 | sp_digit t4[2*8]; | 1291 | sp_digit t4[8]; |
1171 | sp_digit t5[2*8]; | 1292 | sp_digit t5[8]; |
1172 | 1293 | ||
1173 | /* Ensure only the first point is the same as the result. */ | 1294 | /* Ensure only the first point is the same as the result. */ |
1174 | if (q == r) { | 1295 | if (q == r) { |
@@ -1185,52 +1306,46 @@ static NOINLINE void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* | |||
1185 | && (sp_256_cmp_equal_8(p->y, q->y) || sp_256_cmp_equal_8(p->y, t1)) | 1306 | && (sp_256_cmp_equal_8(p->y, q->y) || sp_256_cmp_equal_8(p->y, t1)) |
1186 | ) { | 1307 | ) { |
1187 | sp_256_proj_point_dbl_8(r, p); | 1308 | sp_256_proj_point_dbl_8(r, p); |
1309 | return; | ||
1188 | } | 1310 | } |
1189 | else { | ||
1190 | sp_point tp; | ||
1191 | sp_point *v; | ||
1192 | |||
1193 | v = r; | ||
1194 | if (p->infinity | q->infinity) { | ||
1195 | memset(&tp, 0, sizeof(tp)); | ||
1196 | v = &tp; | ||
1197 | } | ||
1198 | 1311 | ||
1312 | if (p->infinity || q->infinity) { | ||
1199 | *r = p->infinity ? *q : *p; /* struct copy */ | 1313 | *r = p->infinity ? *q : *p; /* struct copy */ |
1200 | 1314 | return; | |
1201 | /* U1 = X1*Z2^2 */ | ||
1202 | sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/); | ||
1203 | sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/); | ||
1204 | sp_256_mont_mul_8(t1, t1, v->x /*, p256_mod, p256_mp_mod*/); | ||
1205 | /* U2 = X2*Z1^2 */ | ||
1206 | sp_256_mont_sqr_8(t2, v->z /*, p256_mod, p256_mp_mod*/); | ||
1207 | sp_256_mont_mul_8(t4, t2, v->z /*, p256_mod, p256_mp_mod*/); | ||
1208 | sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/); | ||
1209 | /* S1 = Y1*Z2^3 */ | ||
1210 | sp_256_mont_mul_8(t3, t3, v->y /*, p256_mod, p256_mp_mod*/); | ||
1211 | /* S2 = Y2*Z1^3 */ | ||
1212 | sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); | ||
1213 | /* H = U2 - U1 */ | ||
1214 | sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/); | ||
1215 | /* R = S2 - S1 */ | ||
1216 | sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/); | ||
1217 | /* Z3 = H*Z1*Z2 */ | ||
1218 | sp_256_mont_mul_8(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/); | ||
1219 | sp_256_mont_mul_8(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/); | ||
1220 | /* X3 = R^2 - H^3 - 2*U1*H^2 */ | ||
1221 | sp_256_mont_sqr_8(v->x, t4 /*, p256_mod, p256_mp_mod*/); | ||
1222 | sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); | ||
1223 | sp_256_mont_mul_8(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/); | ||
1224 | sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); | ||
1225 | sp_256_mont_sub_8(v->x, v->x, t5 /*, p256_mod*/); | ||
1226 | sp_256_mont_dbl_8(t1, v->y /*, p256_mod*/); | ||
1227 | sp_256_mont_sub_8(v->x, v->x, t1 /*, p256_mod*/); | ||
1228 | /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ | ||
1229 | sp_256_mont_sub_8(v->y, v->y, v->x /*, p256_mod*/); | ||
1230 | sp_256_mont_mul_8(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/); | ||
1231 | sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); | ||
1232 | sp_256_mont_sub_8(v->y, v->y, t5 /*, p256_mod*/); | ||
1233 | } | 1315 | } |
1316 | |||
1317 | /* U1 = X1*Z2^2 */ | ||
1318 | sp_256_mont_sqr_8(t1, q->z /*, p256_mod, p256_mp_mod*/); | ||
1319 | sp_256_mont_mul_8(t3, t1, q->z /*, p256_mod, p256_mp_mod*/); | ||
1320 | sp_256_mont_mul_8(t1, t1, r->x /*, p256_mod, p256_mp_mod*/); | ||
1321 | /* U2 = X2*Z1^2 */ | ||
1322 | sp_256_mont_sqr_8(t2, r->z /*, p256_mod, p256_mp_mod*/); | ||
1323 | sp_256_mont_mul_8(t4, t2, r->z /*, p256_mod, p256_mp_mod*/); | ||
1324 | sp_256_mont_mul_8(t2, t2, q->x /*, p256_mod, p256_mp_mod*/); | ||
1325 | /* S1 = Y1*Z2^3 */ | ||
1326 | sp_256_mont_mul_8(t3, t3, r->y /*, p256_mod, p256_mp_mod*/); | ||
1327 | /* S2 = Y2*Z1^3 */ | ||
1328 | sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); | ||
1329 | /* H = U2 - U1 */ | ||
1330 | sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/); | ||
1331 | /* R = S2 - S1 */ | ||
1332 | sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/); | ||
1333 | /* Z3 = H*Z1*Z2 */ | ||
1334 | sp_256_mont_mul_8(r->z, r->z, q->z /*, p256_mod, p256_mp_mod*/); | ||
1335 | sp_256_mont_mul_8(r->z, r->z, t2 /*, p256_mod, p256_mp_mod*/); | ||
1336 | /* X3 = R^2 - H^3 - 2*U1*H^2 */ | ||
1337 | sp_256_mont_sqr_8(r->x, t4 /*, p256_mod, p256_mp_mod*/); | ||
1338 | sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); | ||
1339 | sp_256_mont_mul_8(r->y, t1, t5 /*, p256_mod, p256_mp_mod*/); | ||
1340 | sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); | ||
1341 | sp_256_mont_sub_8(r->x, r->x, t5 /*, p256_mod*/); | ||
1342 | sp_256_mont_dbl_8(t1, r->y /*, p256_mod*/); | ||
1343 | sp_256_mont_sub_8(r->x, r->x, t1 /*, p256_mod*/); | ||
1344 | /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ | ||
1345 | sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/); | ||
1346 | sp_256_mont_mul_8(r->y, r->y, t4 /*, p256_mod, p256_mp_mod*/); | ||
1347 | sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); | ||
1348 | sp_256_mont_sub_8(r->y, r->y, t5 /*, p256_mod*/); | ||
1234 | } | 1349 | } |
1235 | 1350 | ||
1236 | /* Multiply the point by the scalar and return the result. | 1351 | /* Multiply the point by the scalar and return the result. |
@@ -1277,13 +1392,13 @@ static void sp_256_ecc_mulmod_8(sp_point* r, const sp_point* g, const sp_digit* | |||
1277 | dump_512("t[1].y %s\n", t[1].y); | 1392 | dump_512("t[1].y %s\n", t[1].y); |
1278 | dump_512("t[1].z %s\n", t[1].z); | 1393 | dump_512("t[1].z %s\n", t[1].z); |
1279 | dbg("t[2] = t[%d]\n", y); | 1394 | dbg("t[2] = t[%d]\n", y); |
1280 | memcpy(&t[2], &t[y], sizeof(sp_point)); | 1395 | t[2] = t[y]; /* struct copy */ |
1281 | dbg("t[2] *= 2\n"); | 1396 | dbg("t[2] *= 2\n"); |
1282 | sp_256_proj_point_dbl_8(&t[2], &t[2]); | 1397 | sp_256_proj_point_dbl_8(&t[2], &t[2]); |
1283 | dump_512("t[2].x %s\n", t[2].x); | 1398 | dump_512("t[2].x %s\n", t[2].x); |
1284 | dump_512("t[2].y %s\n", t[2].y); | 1399 | dump_512("t[2].y %s\n", t[2].y); |
1285 | dump_512("t[2].z %s\n", t[2].z); | 1400 | dump_512("t[2].z %s\n", t[2].z); |
1286 | memcpy(&t[y], &t[2], sizeof(sp_point)); | 1401 | t[y] = t[2]; /* struct copy */ |
1287 | 1402 | ||
1288 | n <<= 1; | 1403 | n <<= 1; |
1289 | c--; | 1404 | c--; |
@@ -1292,7 +1407,7 @@ static void sp_256_ecc_mulmod_8(sp_point* r, const sp_point* g, const sp_digit* | |||
1292 | if (map) | 1407 | if (map) |
1293 | sp_256_map_8(r, &t[0]); | 1408 | sp_256_map_8(r, &t[0]); |
1294 | else | 1409 | else |
1295 | memcpy(r, &t[0], sizeof(sp_point)); | 1410 | *r = t[0]; /* struct copy */ |
1296 | 1411 | ||
1297 | memset(t, 0, sizeof(t)); //paranoia | 1412 | memset(t, 0, sizeof(t)); //paranoia |
1298 | } | 1413 | } |
diff --git a/networking/udhcp/common.c b/networking/udhcp/common.c index 31e525cb0..8e9b93655 100644 --- a/networking/udhcp/common.c +++ b/networking/udhcp/common.c | |||
@@ -404,14 +404,29 @@ void FAST_FUNC udhcp_add_simple_option(struct dhcp_packet *packet, uint8_t code, | |||
404 | #endif | 404 | #endif |
405 | 405 | ||
406 | /* Find option 'code' in opt_list */ | 406 | /* Find option 'code' in opt_list */ |
407 | struct option_set* FAST_FUNC udhcp_find_option(struct option_set *opt_list, uint8_t code) | 407 | struct option_set* FAST_FUNC udhcp_find_option(struct option_set *opt_list, uint8_t code, bool dhcpv6) |
408 | { | 408 | { |
409 | while (opt_list && opt_list->data[OPT_CODE] < code) | 409 | IF_NOT_UDHCPC6(bool dhcpv6 = 0;) |
410 | opt_list = opt_list->next; | 410 | uint8_t cur_code; |
411 | 411 | ||
412 | if (opt_list && opt_list->data[OPT_CODE] == code) | 412 | for (;;) { |
413 | return opt_list; | 413 | if (!opt_list) |
414 | return NULL; | 414 | return opt_list; /* NULL */ |
415 | if (!dhcpv6) { | ||
416 | cur_code = opt_list->data[OPT_CODE]; | ||
417 | } else { | ||
418 | //FIXME: add support for code > 0xff | ||
419 | if (opt_list->data[D6_OPT_CODE] != 0) | ||
420 | return NULL; | ||
421 | cur_code = opt_list->data[D6_OPT_CODE + 1]; | ||
422 | } | ||
423 | if (cur_code >= code) { | ||
424 | if (cur_code == code) | ||
425 | return opt_list; | ||
426 | return NULL; | ||
427 | } | ||
428 | opt_list = opt_list->next; | ||
429 | } | ||
415 | } | 430 | } |
416 | 431 | ||
417 | /* Parse string to IP in network order */ | 432 | /* Parse string to IP in network order */ |
@@ -499,7 +514,7 @@ static NOINLINE void attach_option( | |||
499 | } | 514 | } |
500 | #endif | 515 | #endif |
501 | 516 | ||
502 | existing = udhcp_find_option(*opt_list, optflag->code); | 517 | existing = udhcp_find_option(*opt_list, optflag->code, dhcpv6); |
503 | if (!existing) { | 518 | if (!existing) { |
504 | /* make a new option */ | 519 | /* make a new option */ |
505 | uint8_t *p = udhcp_insert_new_option(opt_list, optflag->code, length, dhcpv6); | 520 | uint8_t *p = udhcp_insert_new_option(opt_list, optflag->code, length, dhcpv6); |
diff --git a/networking/udhcp/common.h b/networking/udhcp/common.h index e374771cb..5882238e3 100644 --- a/networking/udhcp/common.h +++ b/networking/udhcp/common.h | |||
@@ -245,7 +245,11 @@ void udhcp_add_simple_option(struct dhcp_packet *packet, uint8_t code, uint32_t | |||
245 | char *dname_dec(const uint8_t *cstr, int clen, const char *pre) FAST_FUNC; | 245 | char *dname_dec(const uint8_t *cstr, int clen, const char *pre) FAST_FUNC; |
246 | uint8_t *dname_enc(/*const uint8_t *cstr, int clen,*/ const char *src, int *retlen) FAST_FUNC; | 246 | uint8_t *dname_enc(/*const uint8_t *cstr, int clen,*/ const char *src, int *retlen) FAST_FUNC; |
247 | #endif | 247 | #endif |
248 | struct option_set *udhcp_find_option(struct option_set *opt_list, uint8_t code) FAST_FUNC; | 248 | #if !ENABLE_UDHCPC6 |
249 | #define udhcp_find_option(opt_list, code, dhcpv6) \ | ||
250 | udhcp_find_option(opt_list, code) | ||
251 | #endif | ||
252 | struct option_set *udhcp_find_option(struct option_set *opt_list, uint8_t code, bool dhcpv6) FAST_FUNC; | ||
249 | 253 | ||
250 | // RFC 2131 Table 5: Fields and options used by DHCP clients | 254 | // RFC 2131 Table 5: Fields and options used by DHCP clients |
251 | // | 255 | // |
diff --git a/networking/udhcp/d6_dhcpc.c b/networking/udhcp/d6_dhcpc.c index 8d11a7539..9d2a8f5d3 100644 --- a/networking/udhcp/d6_dhcpc.c +++ b/networking/udhcp/d6_dhcpc.c | |||
@@ -888,7 +888,8 @@ int send_d6_release(struct in6_addr *server_ipv6, struct in6_addr *our_cur_ipv6) | |||
888 | if (client6_data.ia_pd) | 888 | if (client6_data.ia_pd) |
889 | opt_ptr = mempcpy(opt_ptr, client6_data.ia_pd, client6_data.ia_pd->len + 2+2); | 889 | opt_ptr = mempcpy(opt_ptr, client6_data.ia_pd, client6_data.ia_pd->len + 2+2); |
890 | /* Client-id */ | 890 | /* Client-id */ |
891 | ci = udhcp_find_option(client_data.options, D6_OPT_CLIENTID); | 891 | ///vda |
892 | ci = udhcp_find_option(client_data.options, D6_OPT_CLIENTID, /*dhcpv6:*/ 1); | ||
892 | if (ci) | 893 | if (ci) |
893 | opt_ptr = mempcpy(opt_ptr, ci->data, D6_OPT_DATA + 2+2 + 6); | 894 | opt_ptr = mempcpy(opt_ptr, ci->data, D6_OPT_DATA + 2+2 + 6); |
894 | 895 | ||
@@ -1272,7 +1273,7 @@ int udhcpc6_main(int argc UNUSED_PARAM, char **argv) | |||
1272 | } | 1273 | } |
1273 | 1274 | ||
1274 | clientid_mac_ptr = NULL; | 1275 | clientid_mac_ptr = NULL; |
1275 | if (!udhcp_find_option(client_data.options, D6_OPT_CLIENTID)) { | 1276 | if (!udhcp_find_option(client_data.options, D6_OPT_CLIENTID, /*dhcpv6:*/ 1)) { |
1276 | /* not set, set the default client ID */ | 1277 | /* not set, set the default client ID */ |
1277 | clientid_mac_ptr = udhcp_insert_new_option( | 1278 | clientid_mac_ptr = udhcp_insert_new_option( |
1278 | &client_data.options, D6_OPT_CLIENTID, | 1279 | &client_data.options, D6_OPT_CLIENTID, |
diff --git a/networking/udhcp/dhcpc.c b/networking/udhcp/dhcpc.c index 331f13a8c..c757fb37c 100644 --- a/networking/udhcp/dhcpc.c +++ b/networking/udhcp/dhcpc.c | |||
@@ -658,7 +658,7 @@ static void add_client_options(struct dhcp_packet *packet) | |||
658 | 658 | ||
659 | // This will be needed if we remove -V VENDOR_STR in favor of | 659 | // This will be needed if we remove -V VENDOR_STR in favor of |
660 | // -x vendor:VENDOR_STR | 660 | // -x vendor:VENDOR_STR |
661 | //if (!udhcp_find_option(packet.options, DHCP_VENDOR)) | 661 | //if (!udhcp_find_option(packet.options, DHCP_VENDOR, /*dhcpv6:*/ 0)) |
662 | // /* not set, set the default vendor ID */ | 662 | // /* not set, set the default vendor ID */ |
663 | // ...add (DHCP_VENDOR, "udhcp "BB_VER) opt... | 663 | // ...add (DHCP_VENDOR, "udhcp "BB_VER) opt... |
664 | } | 664 | } |
@@ -676,7 +676,7 @@ static void add_serverid_and_clientid_options(struct dhcp_packet *packet, uint32 | |||
676 | * If the client used a 'client identifier' when it obtained the lease, | 676 | * If the client used a 'client identifier' when it obtained the lease, |
677 | * it MUST use the same 'client identifier' in the DHCPRELEASE message. | 677 | * it MUST use the same 'client identifier' in the DHCPRELEASE message. |
678 | */ | 678 | */ |
679 | ci = udhcp_find_option(client_data.options, DHCP_CLIENT_ID); | 679 | ci = udhcp_find_option(client_data.options, DHCP_CLIENT_ID, /*dhcpv6:*/ 0); |
680 | if (ci) | 680 | if (ci) |
681 | udhcp_add_binary_option(packet, ci->data); | 681 | udhcp_add_binary_option(packet, ci->data); |
682 | } | 682 | } |
@@ -1328,7 +1328,7 @@ int udhcpc_main(int argc UNUSED_PARAM, char **argv) | |||
1328 | } | 1328 | } |
1329 | 1329 | ||
1330 | clientid_mac_ptr = NULL; | 1330 | clientid_mac_ptr = NULL; |
1331 | if (!(opt & OPT_C) && !udhcp_find_option(client_data.options, DHCP_CLIENT_ID)) { | 1331 | if (!(opt & OPT_C) && !udhcp_find_option(client_data.options, DHCP_CLIENT_ID, /*dhcpv6:*/ 0)) { |
1332 | /* not suppressed and not set, create default client ID */ | 1332 | /* not suppressed and not set, create default client ID */ |
1333 | clientid_mac_ptr = udhcp_insert_new_option( | 1333 | clientid_mac_ptr = udhcp_insert_new_option( |
1334 | &client_data.options, DHCP_CLIENT_ID, | 1334 | &client_data.options, DHCP_CLIENT_ID, |
diff --git a/networking/udhcp/dhcpd.c b/networking/udhcp/dhcpd.c index 0f5edb75c..66750e2e6 100644 --- a/networking/udhcp/dhcpd.c +++ b/networking/udhcp/dhcpd.c | |||
@@ -935,7 +935,7 @@ int udhcpd_main(int argc UNUSED_PARAM, char **argv) | |||
935 | 935 | ||
936 | bb_simple_info_msg("started, v"BB_VER); | 936 | bb_simple_info_msg("started, v"BB_VER); |
937 | 937 | ||
938 | option = udhcp_find_option(server_data.options, DHCP_LEASE_TIME); | 938 | option = udhcp_find_option(server_data.options, DHCP_LEASE_TIME, /*dhcpv6:*/ 0); |
939 | server_data.max_lease_sec = DEFAULT_LEASE_TIME; | 939 | server_data.max_lease_sec = DEFAULT_LEASE_TIME; |
940 | if (option) { | 940 | if (option) { |
941 | move_from_unaligned32(server_data.max_lease_sec, option->data + OPT_DATA); | 941 | move_from_unaligned32(server_data.max_lease_sec, option->data + OPT_DATA); |
diff --git a/networking/wget.c b/networking/wget.c index 85a04eaba..5470502aa 100644 --- a/networking/wget.c +++ b/networking/wget.c | |||
@@ -211,29 +211,33 @@ enum { | |||
211 | HDR_HOST = (1<<0), | 211 | HDR_HOST = (1<<0), |
212 | HDR_USER_AGENT = (1<<1), | 212 | HDR_USER_AGENT = (1<<1), |
213 | HDR_RANGE = (1<<2), | 213 | HDR_RANGE = (1<<2), |
214 | HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION, | 214 | HDR_CONTENT_TYPE = (1<<3), |
215 | HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION, | 215 | HDR_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION, |
216 | HDR_PROXY_AUTH = (1<<5) * ENABLE_FEATURE_WGET_AUTHENTICATION, | ||
216 | }; | 217 | }; |
217 | static const char wget_user_headers[] ALIGN1 = | 218 | static const char wget_user_headers[] ALIGN1 = |
218 | "Host:\0" | 219 | "Host:\0" |
219 | "User-Agent:\0" | 220 | "User-Agent:\0" |
220 | "Range:\0" | 221 | "Range:\0" |
222 | "Content-Type:\0" | ||
221 | # if ENABLE_FEATURE_WGET_AUTHENTICATION | 223 | # if ENABLE_FEATURE_WGET_AUTHENTICATION |
222 | "Authorization:\0" | 224 | "Authorization:\0" |
223 | "Proxy-Authorization:\0" | 225 | "Proxy-Authorization:\0" |
224 | # endif | 226 | # endif |
225 | ; | 227 | ; |
226 | # define USR_HEADER_HOST (G.user_headers & HDR_HOST) | 228 | # define USR_HEADER_HOST (G.user_headers & HDR_HOST) |
227 | # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT) | 229 | # define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT) |
228 | # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE) | 230 | # define USR_HEADER_RANGE (G.user_headers & HDR_RANGE) |
229 | # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH) | 231 | # define USR_HEADER_CONTENT_TYPE (G.user_headers & HDR_CONTENT_TYPE) |
230 | # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH) | 232 | # define USR_HEADER_AUTH (G.user_headers & HDR_AUTH) |
233 | # define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH) | ||
231 | #else /* No long options, no user-headers :( */ | 234 | #else /* No long options, no user-headers :( */ |
232 | # define USR_HEADER_HOST 0 | 235 | # define USR_HEADER_HOST 0 |
233 | # define USR_HEADER_USER_AGENT 0 | 236 | # define USR_HEADER_USER_AGENT 0 |
234 | # define USR_HEADER_RANGE 0 | 237 | # define USR_HEADER_RANGE 0 |
235 | # define USR_HEADER_AUTH 0 | 238 | # define USR_HEADER_CONTENT_TYPE 0 |
236 | # define USR_HEADER_PROXY_AUTH 0 | 239 | # define USR_HEADER_AUTH 0 |
240 | # define USR_HEADER_PROXY_AUTH 0 | ||
237 | #endif | 241 | #endif |
238 | 242 | ||
239 | /* Globals */ | 243 | /* Globals */ |
@@ -1294,8 +1298,13 @@ static void download_one_url(const char *url) | |||
1294 | } | 1298 | } |
1295 | 1299 | ||
1296 | if (G.post_data) { | 1300 | if (G.post_data) { |
1301 | /* If user did not override it... */ | ||
1302 | if (!USR_HEADER_CONTENT_TYPE) { | ||
1303 | SENDFMT(sfp, | ||
1304 | "Content-Type: application/x-www-form-urlencoded\r\n" | ||
1305 | ); | ||
1306 | } | ||
1297 | SENDFMT(sfp, | 1307 | SENDFMT(sfp, |
1298 | "Content-Type: application/x-www-form-urlencoded\r\n" | ||
1299 | "Content-Length: %u\r\n" | 1308 | "Content-Length: %u\r\n" |
1300 | "\r\n" | 1309 | "\r\n" |
1301 | "%s", | 1310 | "%s", |