summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/libcrypto/arch/i386/Makefile.inc3
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl210
-rw-r--r--src/lib/libcrypto/bn/bn_local.h4
-rw-r--r--src/lib/libcrypto/bn/bn_mul.c172
4 files changed, 19 insertions, 370 deletions
diff --git a/src/lib/libcrypto/arch/i386/Makefile.inc b/src/lib/libcrypto/arch/i386/Makefile.inc
index 67c22262e6..6134dfdc15 100644
--- a/src/lib/libcrypto/arch/i386/Makefile.inc
+++ b/src/lib/libcrypto/arch/i386/Makefile.inc
@@ -1,4 +1,4 @@
1# $OpenBSD: Makefile.inc,v 1.7 2023/01/14 15:45:43 jsing Exp $ 1# $OpenBSD: Makefile.inc,v 1.8 2023/02/22 05:57:19 jsing Exp $
2 2
3# i386-specific libcrypto build rules 3# i386-specific libcrypto build rules
4 4
@@ -16,7 +16,6 @@ SRCS+= bf_cbc.c
16SSLASM+= bf bf-586 16SSLASM+= bf bf-586
17# bn 17# bn
18CFLAGS+= -DOPENSSL_IA32_SSE2 18CFLAGS+= -DOPENSSL_IA32_SSE2
19CFLAGS+= -DOPENSSL_BN_ASM_PART_WORDS
20SSLASM+= bn bn-586 19SSLASM+= bn bn-586
21SSLASM+= bn co-586 20SSLASM+= bn co-586
22CFLAGS+= -DOPENSSL_BN_ASM_MONT 21CFLAGS+= -DOPENSSL_BN_ASM_MONT
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
index b502fe60ee..71b775af8d 100644
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ b/src/lib/libcrypto/bn/asm/bn-586.pl
@@ -17,7 +17,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
17&bn_div_words("bn_div_words"); 17&bn_div_words("bn_div_words");
18&bn_add_words("bn_add_words"); 18&bn_add_words("bn_add_words");
19&bn_sub_words("bn_sub_words"); 19&bn_sub_words("bn_sub_words");
20&bn_sub_part_words("bn_sub_part_words");
21 20
22&asm_finish(); 21&asm_finish();
23 22
@@ -566,212 +565,3 @@ sub bn_sub_words
566 565
567 &function_end($name); 566 &function_end($name);
568 } 567 }
569
570sub bn_sub_part_words
571 {
572 local($name)=@_;
573
574 &function_begin($name,"");
575
576 &comment("");
577 $a="esi";
578 $b="edi";
579 $c="eax";
580 $r="ebx";
581 $tmp1="ecx";
582 $tmp2="edx";
583 $num="ebp";
584
585 &mov($r,&wparam(0)); # get r
586 &mov($a,&wparam(1)); # get a
587 &mov($b,&wparam(2)); # get b
588 &mov($num,&wparam(3)); # get num
589 &xor($c,$c); # clear carry
590 &and($num,0xfffffff8); # num / 8
591
592 &jz(&label("aw_finish"));
593
594 &set_label("aw_loop",0);
595 for ($i=0; $i<8; $i++)
596 {
597 &comment("Round $i");
598
599 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
600 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
601 &sub($tmp1,$c);
602 &mov($c,0);
603 &adc($c,$c);
604 &sub($tmp1,$tmp2);
605 &adc($c,0);
606 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
607 }
608
609 &comment("");
610 &add($a,32);
611 &add($b,32);
612 &add($r,32);
613 &sub($num,8);
614 &jnz(&label("aw_loop"));
615
616 &set_label("aw_finish",0);
617 &mov($num,&wparam(3)); # get num
618 &and($num,7);
619 &jz(&label("aw_end"));
620
621 for ($i=0; $i<7; $i++)
622 {
623 &comment("Tail Round $i");
624 &mov($tmp1,&DWP(0,$a,"",0)); # *a
625 &mov($tmp2,&DWP(0,$b,"",0));# *b
626 &sub($tmp1,$c);
627 &mov($c,0);
628 &adc($c,$c);
629 &sub($tmp1,$tmp2);
630 &adc($c,0);
631 &mov(&DWP(0,$r,"",0),$tmp1); # *r
632 &add($a, 4);
633 &add($b, 4);
634 &add($r, 4);
635 &dec($num) if ($i != 6);
636 &jz(&label("aw_end")) if ($i != 6);
637 }
638 &set_label("aw_end",0);
639
640 &cmp(&wparam(4),0);
641 &je(&label("pw_end"));
642
643 &mov($num,&wparam(4)); # get dl
644 &cmp($num,0);
645 &je(&label("pw_end"));
646 &jge(&label("pw_pos"));
647
648 &comment("pw_neg");
649 &mov($tmp2,0);
650 &sub($tmp2,$num);
651 &mov($num,$tmp2);
652 &and($num,0xfffffff8); # num / 8
653 &jz(&label("pw_neg_finish"));
654
655 &set_label("pw_neg_loop",0);
656 for ($i=0; $i<8; $i++)
657 {
658 &comment("dl<0 Round $i");
659
660 &mov($tmp1,0);
661 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
662 &sub($tmp1,$c);
663 &mov($c,0);
664 &adc($c,$c);
665 &sub($tmp1,$tmp2);
666 &adc($c,0);
667 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
668 }
669
670 &comment("");
671 &add($b,32);
672 &add($r,32);
673 &sub($num,8);
674 &jnz(&label("pw_neg_loop"));
675
676 &set_label("pw_neg_finish",0);
677 &mov($tmp2,&wparam(4)); # get dl
678 &mov($num,0);
679 &sub($num,$tmp2);
680 &and($num,7);
681 &jz(&label("pw_end"));
682
683 for ($i=0; $i<7; $i++)
684 {
685 &comment("dl<0 Tail Round $i");
686 &mov($tmp1,0);
687 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
688 &sub($tmp1,$c);
689 &mov($c,0);
690 &adc($c,$c);
691 &sub($tmp1,$tmp2);
692 &adc($c,0);
693 &dec($num) if ($i != 6);
694 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
695 &jz(&label("pw_end")) if ($i != 6);
696 }
697
698 &jmp(&label("pw_end"));
699
700 &set_label("pw_pos",0);
701
702 &and($num,0xfffffff8); # num / 8
703 &jz(&label("pw_pos_finish"));
704
705 &set_label("pw_pos_loop",0);
706
707 for ($i=0; $i<8; $i++)
708 {
709 &comment("dl>0 Round $i");
710
711 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
712 &sub($tmp1,$c);
713 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
714 &jnc(&label("pw_nc".$i));
715 }
716
717 &comment("");
718 &add($a,32);
719 &add($r,32);
720 &sub($num,8);
721 &jnz(&label("pw_pos_loop"));
722
723 &set_label("pw_pos_finish",0);
724 &mov($num,&wparam(4)); # get dl
725 &and($num,7);
726 &jz(&label("pw_end"));
727
728 for ($i=0; $i<7; $i++)
729 {
730 &comment("dl>0 Tail Round $i");
731 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
732 &sub($tmp1,$c);
733 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
734 &jnc(&label("pw_tail_nc".$i));
735 &dec($num) if ($i != 6);
736 &jz(&label("pw_end")) if ($i != 6);
737 }
738 &mov($c,1);
739 &jmp(&label("pw_end"));
740
741 &set_label("pw_nc_loop",0);
742 for ($i=0; $i<8; $i++)
743 {
744 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
745 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
746 &set_label("pw_nc".$i,0);
747 }
748
749 &comment("");
750 &add($a,32);
751 &add($r,32);
752 &sub($num,8);
753 &jnz(&label("pw_nc_loop"));
754
755 &mov($num,&wparam(4)); # get dl
756 &and($num,7);
757 &jz(&label("pw_nc_end"));
758
759 for ($i=0; $i<7; $i++)
760 {
761 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
762 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
763 &set_label("pw_tail_nc".$i,0);
764 &dec($num) if ($i != 6);
765 &jz(&label("pw_nc_end")) if ($i != 6);
766 }
767
768 &set_label("pw_nc_end",0);
769 &mov($c,0);
770
771 &set_label("pw_end",0);
772
773# &mov("eax",$c); # $c is "eax"
774
775 &function_end($name);
776 }
777
diff --git a/src/lib/libcrypto/bn/bn_local.h b/src/lib/libcrypto/bn/bn_local.h
index 3e37238c5e..4576c36c91 100644
--- a/src/lib/libcrypto/bn/bn_local.h
+++ b/src/lib/libcrypto/bn/bn_local.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_local.h,v 1.16 2023/02/22 05:46:37 jsing Exp $ */ 1/* $OpenBSD: bn_local.h,v 1.17 2023/02/22 05:57:19 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -264,8 +264,6 @@ void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
264void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, 264void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
265 int n, int tna, int tnb, BN_ULONG *t); 265 int n, int tna, int tnb, BN_ULONG *t);
266void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t); 266void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t);
267BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
268 int cl, int dl);
269int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 267int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
270 const BN_ULONG *np, const BN_ULONG *n0, int num); 268 const BN_ULONG *np, const BN_ULONG *n0, int num);
271 269
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
index 1d56e57b76..5e270b988f 100644
--- a/src/lib/libcrypto/bn/bn_mul.c
+++ b/src/lib/libcrypto/bn/bn_mul.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_mul.c,v 1.33 2023/02/15 18:10:16 jsing Exp $ */ 1/* $OpenBSD: bn_mul.c,v 1.34 2023/02/22 05:57:19 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -269,144 +269,6 @@ bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
269} 269}
270#endif 270#endif
271 271
272#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
273/*
274 * Here follows a specialised variant of bn_sub_words(), which has the property
275 * performing operations on arrays of different sizes. The sizes of those arrays
276 * is expressed through cl, which is the common length (basically,
277 * min(len(a),len(b))), and dl, which is the delta between the two lengths,
278 * calculated as len(a)-len(b). All lengths are the number of BN_ULONGs. For the
279 * operations that require a result array as parameter, it must have the length
280 * cl+abs(dl).
281 */
282BN_ULONG
283bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
284 int dl)
285{
286 BN_ULONG c, t;
287
288 assert(cl >= 0);
289 c = bn_sub_words(r, a, b, cl);
290
291 if (dl == 0)
292 return c;
293
294 r += cl;
295 a += cl;
296 b += cl;
297
298 if (dl < 0) {
299 for (;;) {
300 t = b[0];
301 r[0] = (0 - t - c) & BN_MASK2;
302 if (t != 0)
303 c = 1;
304 if (++dl >= 0)
305 break;
306
307 t = b[1];
308 r[1] = (0 - t - c) & BN_MASK2;
309 if (t != 0)
310 c = 1;
311 if (++dl >= 0)
312 break;
313
314 t = b[2];
315 r[2] = (0 - t - c) & BN_MASK2;
316 if (t != 0)
317 c = 1;
318 if (++dl >= 0)
319 break;
320
321 t = b[3];
322 r[3] = (0 - t - c) & BN_MASK2;
323 if (t != 0)
324 c = 1;
325 if (++dl >= 0)
326 break;
327
328 b += 4;
329 r += 4;
330 }
331 } else {
332 int save_dl = dl;
333 while (c) {
334 t = a[0];
335 r[0] = (t - c) & BN_MASK2;
336 if (t != 0)
337 c = 0;
338 if (--dl <= 0)
339 break;
340
341 t = a[1];
342 r[1] = (t - c) & BN_MASK2;
343 if (t != 0)
344 c = 0;
345 if (--dl <= 0)
346 break;
347
348 t = a[2];
349 r[2] = (t - c) & BN_MASK2;
350 if (t != 0)
351 c = 0;
352 if (--dl <= 0)
353 break;
354
355 t = a[3];
356 r[3] = (t - c) & BN_MASK2;
357 if (t != 0)
358 c = 0;
359 if (--dl <= 0)
360 break;
361
362 save_dl = dl;
363 a += 4;
364 r += 4;
365 }
366 if (dl > 0) {
367 if (save_dl > dl) {
368 switch (save_dl - dl) {
369 case 1:
370 r[1] = a[1];
371 if (--dl <= 0)
372 break;
373 case 2:
374 r[2] = a[2];
375 if (--dl <= 0)
376 break;
377 case 3:
378 r[3] = a[3];
379 if (--dl <= 0)
380 break;
381 }
382 a += 4;
383 r += 4;
384 }
385 }
386 if (dl > 0) {
387 for (;;) {
388 r[0] = a[0];
389 if (--dl <= 0)
390 break;
391 r[1] = a[1];
392 if (--dl <= 0)
393 break;
394 r[2] = a[2];
395 if (--dl <= 0)
396 break;
397 r[3] = a[3];
398 if (--dl <= 0)
399 break;
400
401 a += 4;
402 r += 4;
403 }
404 }
405 }
406 return c;
407}
408#endif
409
410void 272void
411bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) 273bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
412{ 274{
@@ -504,15 +366,15 @@ bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
504 zero = neg = 0; 366 zero = neg = 0;
505 switch (c1 * 3 + c2) { 367 switch (c1 * 3 + c2) {
506 case -4: 368 case -4:
507 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 369 bn_sub(t, n, &a[n], tna, a, n); /* - */
508 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 370 bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */
509 break; 371 break;
510 case -3: 372 case -3:
511 zero = 1; 373 zero = 1;
512 break; 374 break;
513 case -2: 375 case -2:
514 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 376 bn_sub(t, n, &a[n], tna, a, n); /* - */
515 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ 377 bn_sub(&t[n], n, &b[n], tnb, b, n); /* + */
516 neg = 1; 378 neg = 1;
517 break; 379 break;
518 case -1: 380 case -1:
@@ -521,16 +383,16 @@ bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
521 zero = 1; 383 zero = 1;
522 break; 384 break;
523 case 2: 385 case 2:
524 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ 386 bn_sub(t, n, a, n, &a[n], tna); /* + */
525 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 387 bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */
526 neg = 1; 388 neg = 1;
527 break; 389 break;
528 case 3: 390 case 3:
529 zero = 1; 391 zero = 1;
530 break; 392 break;
531 case 4: 393 case 4:
532 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); 394 bn_sub(t, n, a, n, &a[n], tna);
533 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); 395 bn_sub(&t[n], n, &b[n], tnb, b, n);
534 break; 396 break;
535 } 397 }
536 398
@@ -630,14 +492,14 @@ bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
630 neg = 0; 492 neg = 0;
631 switch (c1 * 3 + c2) { 493 switch (c1 * 3 + c2) {
632 case -4: 494 case -4:
633 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 495 bn_sub(t, n, &a[n], tna, a, n); /* - */
634 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 496 bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */
635 break; 497 break;
636 case -3: 498 case -3:
637 /* break; */ 499 /* break; */
638 case -2: 500 case -2:
639 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 501 bn_sub(t, n, &a[n], tna, a, n); /* - */
640 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ 502 bn_sub(&t[n], n, &b[n], tnb, b, n); /* + */
641 neg = 1; 503 neg = 1;
642 break; 504 break;
643 case -1: 505 case -1:
@@ -645,15 +507,15 @@ bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
645 case 1: 507 case 1:
646 /* break; */ 508 /* break; */
647 case 2: 509 case 2:
648 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ 510 bn_sub(t, n, a, n, &a[n], tna); /* + */
649 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 511 bn_sub(&t[n], n, b, n, &b[n], tnb); /* - */
650 neg = 1; 512 neg = 1;
651 break; 513 break;
652 case 3: 514 case 3:
653 /* break; */ 515 /* break; */
654 case 4: 516 case 4:
655 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); 517 bn_sub(t, n, a, n, &a[n], tna);
656 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); 518 bn_sub(&t[n], n, &b[n], tnb, b, n);
657 break; 519 break;
658 } 520 }
659 /* The zero case isn't yet implemented here. The speedup 521 /* The zero case isn't yet implemented here. The speedup