aboutsummaryrefslogtreecommitdiff
path: root/networking
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2021-10-05 23:19:18 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2021-10-05 23:19:18 +0200
commitbbd723ebec33aa14746dde88b982b160977938b6 (patch)
tree41c91f86f4d96a67322234bf382e3795dfe06bec /networking
parent3b411ebbfc749f9f12b0eb739cb5ba3ec052197e (diff)
downloadbusybox-w32-bbd723ebec33aa14746dde88b982b160977938b6.tar.gz
busybox-w32-bbd723ebec33aa14746dde88b982b160977938b6.tar.bz2
busybox-w32-bbd723ebec33aa14746dde88b982b160977938b6.zip
tls: optimize sp_256_mul_8 in P256
function old new delta sp_256_mont_mul_8 151 150 -1 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'networking')
-rw-r--r--networking/tls_sp_c32.c84
1 files changed, 82 insertions, 2 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index b99951890..e1c4cdd54 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -294,6 +294,85 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
294/* Multiply a and b into r. (r = a * b) */ 294/* Multiply a and b into r. (r = a * b) */
295static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) 295static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
296{ 296{
297#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
298 sp_digit rr[15]; /* in case r coincides with a or b */
299 int k;
300 uint32_t accl;
301 uint32_t acch;
302
303 acch = accl = 0;
304 for (k = 0; k < 15; k++) {
305 int i, j;
306 uint32_t acc_hi;
307 i = k - 7;
308 if (i < 0)
309 i = 0;
310 j = k - i;
311 acc_hi = 0;
312 do {
313////////////////////////
314// uint64_t m = ((uint64_t)a[i]) * b[j];
315// acc_hi:acch:accl += m;
316 asm volatile (
317 // a[i] is already loaded in %%eax
318"\n mull %7"
319"\n addl %%eax, %0"
320"\n adcl %%edx, %1"
321"\n adcl $0, %2"
322 : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi)
323 : "0" (accl), "1" (acch), "2" (acc_hi), "a" (a[i]), "m" (b[j])
324 : "cc", "dx"
325 );
326////////////////////////
327 j--;
328 i++;
329 } while (i != 8 && i <= k);
330 rr[k] = accl;
331 accl = acch;
332 acch = acc_hi;
333 }
334 r[15] = accl;
335 memcpy(r, rr, sizeof(rr));
336#elif 0
337 //TODO: arm assembly (untested)
338 sp_digit tmp[16];
339
340 asm volatile (
341"\n mov r5, #0"
342"\n mov r6, #0"
343"\n mov r7, #0"
344"\n mov r8, #0"
345"\n 1:"
346"\n subs r3, r5, #28"
347"\n movcc r3, #0"
348"\n sub r4, r5, r3"
349"\n 2:"
350"\n ldr r14, [%[a], r3]"
351"\n ldr r12, [%[b], r4]"
352"\n umull r9, r10, r14, r12"
353"\n adds r6, r6, r9"
354"\n adcs r7, r7, r10"
355"\n adc r8, r8, #0"
356"\n add r3, r3, #4"
357"\n sub r4, r4, #4"
358"\n cmp r3, #32"
359"\n beq 3f"
360"\n cmp r3, r5"
361"\n ble 2b"
362"\n 3:"
363"\n str r6, [%[r], r5]"
364"\n mov r6, r7"
365"\n mov r7, r8"
366"\n mov r8, #0"
367"\n add r5, r5, #4"
368"\n cmp r5, #56"
369"\n ble 1b"
370"\n str r6, [%[r], r5]"
371 : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
372 : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
373 );
374 memcpy(r, tmp, sizeof(tmp));
375#else
297 sp_digit rr[15]; /* in case r coincides with a or b */ 376 sp_digit rr[15]; /* in case r coincides with a or b */
298 int i, j, k; 377 int i, j, k;
299 uint64_t acc; 378 uint64_t acc;
@@ -306,19 +385,20 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
306 i = 0; 385 i = 0;
307 j = k - i; 386 j = k - i;
308 acc_hi = 0; 387 acc_hi = 0;
309 while (i != 8 && i <= k) { 388 do {
310 uint64_t m = ((uint64_t)a[i]) * b[j]; 389 uint64_t m = ((uint64_t)a[i]) * b[j];
311 acc += m; 390 acc += m;
312 if (acc < m) 391 if (acc < m)
313 acc_hi++; 392 acc_hi++;
314 j--; 393 j--;
315 i++; 394 i++;
316 } 395 } while (i != 8 && i <= k);
317 rr[k] = acc; 396 rr[k] = acc;
318 acc = (acc >> 32) | ((uint64_t)acc_hi << 32); 397 acc = (acc >> 32) | ((uint64_t)acc_hi << 32);
319 } 398 }
320 r[15] = acc; 399 r[15] = acc;
321 memcpy(r, rr, sizeof(rr)); 400 memcpy(r, rr, sizeof(rr));
401#endif
322} 402}
323 403
324/* Shift number right one bit. Bottom bit is lost. */ 404/* Shift number right one bit. Bottom bit is lost. */