Less S390.

ok deraadt@
author: jsing <> 2016-09-04 14:31:29 +0000
committer: jsing <> 2016-09-04 14:31:29 +0000
commit: e38c58272a121e2bc9a785ec4001bbc802d68f66 (patch)
tree: 492fd2a4355d8592de425463d194374bdc85aa0a /src/lib/libcrypto/bn
parent: a9cbed3be03a99c87e2b07b16b511e65a90bf800 (diff)
download: openbsd-e38c58272a121e2bc9a785ec4001bbc802d68f66.tar.gz
openbsd-e38c58272a121e2bc9a785ec4001bbc802d68f66.tar.bz2
openbsd-e38c58272a121e2bc9a785ec4001bbc802d68f66.zip
3 files changed, 0 insertions, 1176 deletions
diff --git a/src/lib/libcrypto/bn/asm/s390x-gf2m.pl b/src/lib/libcrypto/bn/asm/s390x-gf2m.pl
deleted file mode 100644
index cd9f13eca2..0000000000
--- a/src/lib/libcrypto/bn/asm/s390x-gf2m.pl
+++ /dev/null
@@ -1,221 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# May 2011
-#
-# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
-# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
-# the time being... gcc 4.3 appeared to generate poor code, therefore
-# the effort. And indeed, the module delivers 55%-90%(*) improvement
-# on haviest ECDSA verify and ECDH benchmarks for 163- and 571-bit
-# key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196.
-# This is for 64-bit build. In 32-bit "highgprs" case improvement is
-# even higher, for example on z990 it was measured 80%-150%. ECDSA
-# sign is modest 9%-12% faster. Keep in mind that these coefficients
-# are not ones for bn_GF2m_mul_2x2 itself, as not all CPU time is
-# burnt in it...
-#
-# (*)   gcc 4.1 was observed to deliver better results than gcc 4.3,
-#       so that improvement coefficients can vary from one specific
-#       setup to another.
-$flavour = shift;
-if ($flavour =~ /3[12]/) {
-        $SIZE_T=4;
-        $g="";
-} else {
-        $SIZE_T=8;
-        $g="g";
-}
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
-$stdframe=16*$SIZE_T+4*8;
-$rp="%r2";
-$a1="%r3";
-$a0="%r4";
-$b1="%r5";
-$b0="%r6";
-$ra="%r14";
-$sp="%r15";
-@T=("%r0","%r1");
-@i=("%r12","%r13");
-($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(6..11));
-($lo,$hi,$b)=map("%r$_",(3..5)); $a=$lo; $mask=$a8;
-$code.=<<___;
-.text
-.type   _mul_1x1,\@function
-.align  16
-_mul_1x1:
-        lgr     $a1,$a
-        sllg    $a2,$a,1
-        sllg    $a4,$a,2
-        sllg    $a8,$a,3
-        srag    $lo,$a1,63                      # broadcast 63rd bit
-        nihh    $a1,0x1fff
-        srag    @i[0],$a2,63                    # broadcast 62nd bit
-        nihh    $a2,0x3fff
-        srag    @i[1],$a4,63                    # broadcast 61st bit
-        nihh    $a4,0x7fff
-        ngr     $lo,$b
-        ngr     @i[0],$b
-        ngr     @i[1],$b
-        lghi    @T[0],0
-        lgr     $a12,$a1
-        stg     @T[0],`$stdframe+0*8`($sp)      # tab[0]=0
-        xgr     $a12,$a2
-        stg     $a1,`$stdframe+1*8`($sp)        # tab[1]=a1
-         lgr    $a48,$a4
-        stg     $a2,`$stdframe+2*8`($sp)        # tab[2]=a2
-         xgr    $a48,$a8
-        stg     $a12,`$stdframe+3*8`($sp)       # tab[3]=a1^a2
-         xgr    $a1,$a4
-        stg     $a4,`$stdframe+4*8`($sp)        # tab[4]=a4
-        xgr     $a2,$a4
-        stg     $a1,`$stdframe+5*8`($sp)        # tab[5]=a1^a4
-        xgr     $a12,$a4
-        stg     $a2,`$stdframe+6*8`($sp)        # tab[6]=a2^a4
-         xgr    $a1,$a48
-        stg     $a12,`$stdframe+7*8`($sp)       # tab[7]=a1^a2^a4
-         xgr    $a2,$a48
-        stg     $a8,`$stdframe+8*8`($sp)        # tab[8]=a8
-        xgr     $a12,$a48
-        stg     $a1,`$stdframe+9*8`($sp)        # tab[9]=a1^a8
-         xgr    $a1,$a4
-        stg     $a2,`$stdframe+10*8`($sp)       # tab[10]=a2^a8
-         xgr    $a2,$a4
-        stg     $a12,`$stdframe+11*8`($sp)      # tab[11]=a1^a2^a8
-        xgr     $a12,$a4
-        stg     $a48,`$stdframe+12*8`($sp)      # tab[12]=a4^a8
-         srlg   $hi,$lo,1
-        stg     $a1,`$stdframe+13*8`($sp)       # tab[13]=a1^a4^a8
-         sllg   $lo,$lo,63
-        stg     $a2,`$stdframe+14*8`($sp)       # tab[14]=a2^a4^a8
-         srlg   @T[0],@i[0],2
-        stg     $a12,`$stdframe+15*8`($sp)      # tab[15]=a1^a2^a4^a8
-        lghi    $mask,`0xf<<3`
-        sllg    $a1,@i[0],62
-         sllg   @i[0],$b,3
-        srlg    @T[1],@i[1],3
-         ngr    @i[0],$mask
-        sllg    $a2,@i[1],61
-         srlg   @i[1],$b,4-3
-        xgr     $hi,@T[0]
-         ngr    @i[1],$mask
-        xgr     $lo,$a1
-        xgr     $hi,@T[1]
-        xgr     $lo,$a2
-        xg      $lo,$stdframe(@i[0],$sp)
-        srlg    @i[0],$b,8-3
-        ngr     @i[0],$mask
-___
-for($n=1;$n<14;$n++) {
-$code.=<<___;
-        lg      @T[1],$stdframe(@i[1],$sp)
-        srlg    @i[1],$b,`($n+2)*4`-3
-        sllg    @T[0],@T[1],`$n*4`
-        ngr     @i[1],$mask
-        srlg    @T[1],@T[1],`64-$n*4`
-        xgr     $lo,@T[0]
-        xgr     $hi,@T[1]
-___
-        push(@i,shift(@i)); push(@T,shift(@T));
-}
-$code.=<<___;
-        lg      @T[1],$stdframe(@i[1],$sp)
-        sllg    @T[0],@T[1],`$n*4`
-        srlg    @T[1],@T[1],`64-$n*4`
-        xgr     $lo,@T[0]
-        xgr     $hi,@T[1]
-        lg      @T[0],$stdframe(@i[0],$sp)
-        sllg    @T[1],@T[0],`($n+1)*4`
-        srlg    @T[0],@T[0],`64-($n+1)*4`
-        xgr     $lo,@T[1]
-        xgr     $hi,@T[0]
-        br      $ra
-.size   _mul_1x1,.-_mul_1x1
-.globl  bn_GF2m_mul_2x2
-.type   bn_GF2m_mul_2x2,\@function
-.align  16
-bn_GF2m_mul_2x2:
-        stm${g} %r3,%r15,3*$SIZE_T($sp)
-        lghi    %r1,-$stdframe-128
-        la      %r0,0($sp)
-        la      $sp,0(%r1,$sp)                  # alloca
-        st${g}  %r0,0($sp)                      # back chain
-___
-if ($SIZE_T==8) {
-my @r=map("%r$_",(6..9));
-$code.=<<___;
-        bras    $ra,_mul_1x1                    # a1�b1
-        stmg    $lo,$hi,16($rp)
-        lg      $a,`$stdframe+128+4*$SIZE_T`($sp)
-        lg      $b,`$stdframe+128+6*$SIZE_T`($sp)
-        bras    $ra,_mul_1x1                    # a0�b0
-        stmg    $lo,$hi,0($rp)
-        lg      $a,`$stdframe+128+3*$SIZE_T`($sp)
-        lg      $b,`$stdframe+128+5*$SIZE_T`($sp)
-        xg      $a,`$stdframe+128+4*$SIZE_T`($sp)
-        xg      $b,`$stdframe+128+6*$SIZE_T`($sp)
-        bras    $ra,_mul_1x1                    # (a0+a1)�(b0+b1)
-        lmg     @r[0],@r[3],0($rp)
-        xgr     $lo,$hi
-        xgr     $hi,@r[1]
-        xgr     $lo,@r[0]
-        xgr     $hi,@r[2]
-        xgr     $lo,@r[3]       
-        xgr     $hi,@r[3]
-        xgr     $lo,$hi
-        stg     $hi,16($rp)
-        stg     $lo,8($rp)
-___
-} else {
-$code.=<<___;
-        sllg    %r3,%r3,32
-        sllg    %r5,%r5,32
-        or      %r3,%r4
-        or      %r5,%r6
-        bras    $ra,_mul_1x1
-        rllg    $lo,$lo,32
-        rllg    $hi,$hi,32
-        stmg    $lo,$hi,0($rp)
-___
-}
-$code.=<<___;
-        lm${g}  %r6,%r15,`$stdframe+128+6*$SIZE_T`($sp)
-        br      $ra
-.size   bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
-.string "GF(2^m) Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
-___
-$code =~ s/\`([^\`]*)\`/eval($1)/gem;
-print $code;
-close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/s390x-mont.pl b/src/lib/libcrypto/bn/asm/s390x-mont.pl
deleted file mode 100644
index 9fd64e81ee..0000000000
--- a/src/lib/libcrypto/bn/asm/s390x-mont.pl
+++ /dev/null
@@ -1,277 +0,0 @@
-#!/usr/bin/env perl
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-# April 2007.
-#
-# Performance improvement over vanilla C code varies from 85% to 45%
-# depending on key length and benchmark. Unfortunately in this context
-# these are not very impressive results [for code that utilizes "wide"
-# 64x64=128-bit multiplication, which is not commonly available to C
-# programmers], at least hand-coded bn_asm.c replacement is known to
-# provide 30-40% better results for longest keys. Well, on a second
-# thought it's not very surprising, because z-CPUs are single-issue
-# and _strictly_ in-order execution, while bn_mul_mont is more or less
-# dependent on CPU ability to pipe-line instructions and have several
-# of them "in-flight" at the same time. I mean while other methods,
-# for example Karatsuba, aim to minimize amount of multiplications at
-# the cost of other operations increase, bn_mul_mont aim to neatly
-# "overlap" multiplications and the other operations [and on most
-# platforms even minimize the amount of the other operations, in
-# particular references to memory]. But it's possible to improve this
-# module performance by implementing dedicated squaring code-path and
-# possibly by unrolling loops...
-# January 2009.
-#
-# Reschedule to minimize/avoid Address Generation Interlock hazard,
-# make inner loops counter-based.
-# November 2010.
-#
-# Adapt for -m31 build. If kernel supports what's called "highgprs"
-# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
-# instructions and achieve "64-bit" performance even in 31-bit legacy
-# application context. The feature is not specific to any particular
-# processor, as long as it's "z-CPU". Latter implies that the code
-# remains z/Architecture specific. Compatibility with 32-bit BN_ULONG
-# is achieved by swapping words after 64-bit loads, follow _dswap-s.
-# On z990 it was measured to perform 2.6-2.2 times better than
-# compiler-generated code, less for longer keys...
-$flavour = shift;
-if ($flavour =~ /3[12]/) {
-        $SIZE_T=4;
-        $g="";
-} else {
-        $SIZE_T=8;
-        $g="g";
-}
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
-$stdframe=16*$SIZE_T+4*8;
-$mn0="%r0";
-$num="%r1";
-# int bn_mul_mont(
-$rp="%r2";              # BN_ULONG *rp,
-$ap="%r3";              # const BN_ULONG *ap,
-$bp="%r4";              # const BN_ULONG *bp,
-$np="%r5";              # const BN_ULONG *np,
-$n0="%r6";              # const BN_ULONG *n0,
-#$num="160(%r15)"       # int num);
-$bi="%r2";      # zaps rp
-$j="%r7";
-$ahi="%r8";
-$alo="%r9";
-$nhi="%r10";
-$nlo="%r11";
-$AHI="%r12";
-$NHI="%r13";
-$count="%r14";
-$sp="%r15";
-$code.=<<___;
-.text
-.globl  bn_mul_mont
-.type   bn_mul_mont,\@function
-bn_mul_mont:
-        lgf     $num,`$stdframe+$SIZE_T-4`($sp) # pull $num
-        sla     $num,`log($SIZE_T)/log(2)`      # $num to enumerate bytes
-        la      $bp,0($num,$bp)
-        st${g}  %r2,2*$SIZE_T($sp)
-        cghi    $num,16         #
-        lghi    %r2,0           #
-        blr     %r14            # if($num<16) return 0;
-___
-$code.=<<___ if ($flavour =~ /3[12]/);
-        tmll    $num,4
-        bnzr    %r14            # if ($num&1) return 0;
-___
-$code.=<<___ if ($flavour !~ /3[12]/);
-        cghi    $num,96         #
-        bhr     %r14            # if($num>96) return 0;
-___
-$code.=<<___;
-        stm${g} %r3,%r15,3*$SIZE_T($sp)
-        lghi    $rp,-$stdframe-8        # leave room for carry bit
-        lcgr    $j,$num         # -$num
-        lgr     %r0,$sp
-        la      $rp,0($rp,$sp)
-        la      $sp,0($j,$rp)   # alloca
-        st${g}  %r0,0($sp)      # back chain
-        sra     $num,3          # restore $num
-        la      $bp,0($j,$bp)   # restore $bp
-        ahi     $num,-1         # adjust $num for inner loop
-        lg      $n0,0($n0)      # pull n0
-        _dswap  $n0
-        lg      $bi,0($bp)
-        _dswap  $bi
-        lg      $alo,0($ap)
-        _dswap  $alo
-        mlgr    $ahi,$bi        # ap[0]*bp[0]
-        lgr     $AHI,$ahi
-        lgr     $mn0,$alo       # "tp[0]"*n0
-        msgr    $mn0,$n0
-        lg      $nlo,0($np)     #
-        _dswap  $nlo
-        mlgr    $nhi,$mn0       # np[0]*m1
-        algr    $nlo,$alo       # +="tp[0]"
-        lghi    $NHI,0
-        alcgr   $NHI,$nhi
-        la      $j,8(%r0)       # j=1
-        lr      $count,$num
-.align  16
-.L1st:
-        lg      $alo,0($j,$ap)
-        _dswap  $alo
-        mlgr    $ahi,$bi        # ap[j]*bp[0]
-        algr    $alo,$AHI
-        lghi    $AHI,0
-        alcgr   $AHI,$ahi
-        lg      $nlo,0($j,$np)
-        _dswap  $nlo
-        mlgr    $nhi,$mn0       # np[j]*m1
-        algr    $nlo,$NHI
-        lghi    $NHI,0
-        alcgr   $nhi,$NHI       # +="tp[j]"
-        algr    $nlo,$alo
-        alcgr   $NHI,$nhi
-        stg     $nlo,$stdframe-8($j,$sp)        # tp[j-1]=
-        la      $j,8($j)        # j++
-        brct    $count,.L1st
-        algr    $NHI,$AHI
-        lghi    $AHI,0
-        alcgr   $AHI,$AHI       # upmost overflow bit
-        stg     $NHI,$stdframe-8($j,$sp)
-        stg     $AHI,$stdframe($j,$sp)
-        la      $bp,8($bp)      # bp++
-.Louter:
-        lg      $bi,0($bp)      # bp[i]
-        _dswap  $bi
-        lg      $alo,0($ap)
-        _dswap  $alo
-        mlgr    $ahi,$bi        # ap[0]*bp[i]
-        alg     $alo,$stdframe($sp)     # +=tp[0]
-        lghi    $AHI,0
-        alcgr   $AHI,$ahi
-        lgr     $mn0,$alo
-        msgr    $mn0,$n0        # tp[0]*n0
-        lg      $nlo,0($np)     # np[0]
-        _dswap  $nlo
-        mlgr    $nhi,$mn0       # np[0]*m1
-        algr    $nlo,$alo       # +="tp[0]"
-        lghi    $NHI,0
-        alcgr   $NHI,$nhi
-        la      $j,8(%r0)       # j=1
-        lr      $count,$num
-.align  16
-.Linner:
-        lg      $alo,0($j,$ap)
-        _dswap  $alo
-        mlgr    $ahi,$bi        # ap[j]*bp[i]
-        algr    $alo,$AHI
-        lghi    $AHI,0
-        alcgr   $ahi,$AHI
-        alg     $alo,$stdframe($j,$sp)# +=tp[j]
-        alcgr   $AHI,$ahi
-        lg      $nlo,0($j,$np)
-        _dswap  $nlo
-        mlgr    $nhi,$mn0       # np[j]*m1
-        algr    $nlo,$NHI
-        lghi    $NHI,0
-        alcgr   $nhi,$NHI
-        algr    $nlo,$alo       # +="tp[j]"
-        alcgr   $NHI,$nhi
-        stg     $nlo,$stdframe-8($j,$sp)        # tp[j-1]=
-        la      $j,8($j)        # j++
-        brct    $count,.Linner
-        algr    $NHI,$AHI
-        lghi    $AHI,0
-        alcgr   $AHI,$AHI
-        alg     $NHI,$stdframe($j,$sp)# accumulate previous upmost overflow bit
-        lghi    $ahi,0
-        alcgr   $AHI,$ahi       # new upmost overflow bit
-        stg     $NHI,$stdframe-8($j,$sp)
-        stg     $AHI,$stdframe($j,$sp)
-        la      $bp,8($bp)      # bp++
-        cl${g}  $bp,`$stdframe+8+4*$SIZE_T`($j,$sp)     # compare to &bp[num]
-        jne     .Louter
-        l${g}   $rp,`$stdframe+8+2*$SIZE_T`($j,$sp)     # reincarnate rp
-        la      $ap,$stdframe($sp)
-        ahi     $num,1          # restore $num, incidentally clears "borrow"
-        la      $j,0(%r0)
-        lr      $count,$num
-.Lsub:  lg      $alo,0($j,$ap)
-        lg      $nlo,0($j,$np)
-        _dswap  $nlo
-        slbgr   $alo,$nlo
-        stg     $alo,0($j,$rp)
-        la      $j,8($j)
-        brct    $count,.Lsub
-        lghi    $ahi,0
-        slbgr   $AHI,$ahi       # handle upmost carry
-        ngr     $ap,$AHI
-        lghi    $np,-1
-        xgr     $np,$AHI
-        ngr     $np,$rp
-        ogr     $ap,$np         # ap=borrow?tp:rp
-        la      $j,0(%r0)
-        lgr     $count,$num
-.Lcopy: lg      $alo,0($j,$ap)          # copy or in-place refresh
-        _dswap  $alo
-        stg     $j,$stdframe($j,$sp)    # zap tp
-        stg     $alo,0($j,$rp)
-        la      $j,8($j)
-        brct    $count,.Lcopy
-        la      %r1,`$stdframe+8+6*$SIZE_T`($j,$sp)
-        lm${g}  %r6,%r15,0(%r1)
-        lghi    %r2,1           # signal "processed"
-        br      %r14
-.size   bn_mul_mont,.-bn_mul_mont
-.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
-___
-foreach (split("\n",$code)) {
-        s/\`([^\`]*)\`/eval $1/ge;
-        s/_dswap\s+(%r[0-9]+)/sprintf("rllg\t%s,%s,32",$1,$1) if($SIZE_T==4)/e;
-        print $_,"\n";
-}
-close STDOUT;
diff --git a/src/lib/libcrypto/bn/asm/s390x.S b/src/lib/libcrypto/bn/asm/s390x.S
deleted file mode 100755
index 43fcb79bc0..0000000000
--- a/src/lib/libcrypto/bn/asm/s390x.S
+++ /dev/null
@@ -1,678 +0,0 @@
-.ident "s390x.S, version 1.1"
-// ====================================================================
-// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-// project.
-//
-// Rights for redistribution and usage in source and binary forms are
-// granted according to the OpenSSL license. Warranty of any kind is
-// disclaimed.
-// ====================================================================
-.text
-#define zero    %r0
-// BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
-.globl  bn_mul_add_words
-.type   bn_mul_add_words,@function
-.align  4
-bn_mul_add_words:
-        lghi    zero,0          // zero = 0
-        la      %r1,0(%r2)      // put rp aside
-        lghi    %r2,0           // i=0;
-        ltgfr   %r4,%r4
-        bler    %r14            // if (len<=0) return 0;
-        stmg    %r6,%r10,48(%r15)
-        lghi    %r10,3
-        lghi    %r8,0           // carry = 0
-        nr      %r10,%r4        // len%4
-        sra     %r4,2           // cnt=len/4
-        jz      .Loop1_madd     // carry is incidentally cleared if branch taken
-        algr    zero,zero       // clear carry
-.Loop4_madd:
-        lg      %r7,0(%r2,%r3)  // ap[i]
-        mlgr    %r6,%r5         // *=w
-        alcgr   %r7,%r8         // +=carry
-        alcgr   %r6,zero
-        alg     %r7,0(%r2,%r1)  // +=rp[i]
-        stg     %r7,0(%r2,%r1)  // rp[i]=
-        lg      %r9,8(%r2,%r3)
-        mlgr    %r8,%r5
-        alcgr   %r9,%r6
-        alcgr   %r8,zero
-        alg     %r9,8(%r2,%r1)
-        stg     %r9,8(%r2,%r1)
-        lg      %r7,16(%r2,%r3)
-        mlgr    %r6,%r5
-        alcgr   %r7,%r8
-        alcgr   %r6,zero
-        alg     %r7,16(%r2,%r1)
-        stg     %r7,16(%r2,%r1)
-        lg      %r9,24(%r2,%r3)
-        mlgr    %r8,%r5
-        alcgr   %r9,%r6
-        alcgr   %r8,zero
-        alg     %r9,24(%r2,%r1)
-        stg     %r9,24(%r2,%r1)
-        la      %r2,32(%r2)     // i+=4
-        brct    %r4,.Loop4_madd
-        la      %r10,1(%r10)            // see if len%4 is zero ...
-        brct    %r10,.Loop1_madd        // without touching condition code:-)
-.Lend_madd:
-        alcgr   %r8,zero        // collect carry bit
-        lgr     %r2,%r8
-        lmg     %r6,%r10,48(%r15)
-        br      %r14
-.Loop1_madd:
-        lg      %r7,0(%r2,%r3)  // ap[i]
-        mlgr    %r6,%r5         // *=w
-        alcgr   %r7,%r8         // +=carry
-        alcgr   %r6,zero
-        alg     %r7,0(%r2,%r1)  // +=rp[i]
-        stg     %r7,0(%r2,%r1)  // rp[i]=
-        lgr     %r8,%r6
-        la      %r2,8(%r2)      // i++
-        brct    %r10,.Loop1_madd
-        j       .Lend_madd
-.size   bn_mul_add_words,.-bn_mul_add_words
-// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
-.globl  bn_mul_words
-.type   bn_mul_words,@function
-.align  4
-bn_mul_words:
-        lghi    zero,0          // zero = 0
-        la      %r1,0(%r2)      // put rp aside
-        lghi    %r2,0           // i=0;
-        ltgfr   %r4,%r4
-        bler    %r14            // if (len<=0) return 0;
-        stmg    %r6,%r10,48(%r15)
-        lghi    %r10,3
-        lghi    %r8,0           // carry = 0
-        nr      %r10,%r4        // len%4
-        sra     %r4,2           // cnt=len/4
-        jz      .Loop1_mul      // carry is incidentally cleared if branch taken
-        algr    zero,zero       // clear carry
-.Loop4_mul:
-        lg      %r7,0(%r2,%r3)  // ap[i]
-        mlgr    %r6,%r5         // *=w
-        alcgr   %r7,%r8         // +=carry
-        stg     %r7,0(%r2,%r1)  // rp[i]=
-        lg      %r9,8(%r2,%r3)
-        mlgr    %r8,%r5
-        alcgr   %r9,%r6
-        stg     %r9,8(%r2,%r1)
-        lg      %r7,16(%r2,%r3)
-        mlgr    %r6,%r5
-        alcgr   %r7,%r8
-        stg     %r7,16(%r2,%r1)
-        lg      %r9,24(%r2,%r3)
-        mlgr    %r8,%r5
-        alcgr   %r9,%r6
-        stg     %r9,24(%r2,%r1)
-        la      %r2,32(%r2)     // i+=4
-        brct    %r4,.Loop4_mul
-        la      %r10,1(%r10)            // see if len%4 is zero ...
-        brct    %r10,.Loop1_mul         // without touching condition code:-)
-.Lend_mul:
-        alcgr   %r8,zero        // collect carry bit
-        lgr     %r2,%r8
-        lmg     %r6,%r10,48(%r15)
-        br      %r14
-.Loop1_mul:
-        lg      %r7,0(%r2,%r3)  // ap[i]
-        mlgr    %r6,%r5         // *=w
-        alcgr   %r7,%r8         // +=carry
-        stg     %r7,0(%r2,%r1)  // rp[i]=
-        lgr     %r8,%r6
-        la      %r2,8(%r2)      // i++
-        brct    %r10,.Loop1_mul
-        j       .Lend_mul
-.size   bn_mul_words,.-bn_mul_words
-// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)
-.globl  bn_sqr_words
-.type   bn_sqr_words,@function
-.align  4
-bn_sqr_words:
-        ltgfr   %r4,%r4
-        bler    %r14
-        stmg    %r6,%r7,48(%r15)
-        srag    %r1,%r4,2       // cnt=len/4
-        jz      .Loop1_sqr
-.Loop4_sqr:
-        lg      %r7,0(%r3)
-        mlgr    %r6,%r7
-        stg     %r7,0(%r2)
-        stg     %r6,8(%r2)
-        lg      %r7,8(%r3)
-        mlgr    %r6,%r7
-        stg     %r7,16(%r2)
-        stg     %r6,24(%r2)
-        lg      %r7,16(%r3)
-        mlgr    %r6,%r7
-        stg     %r7,32(%r2)
-        stg     %r6,40(%r2)
-        lg      %r7,24(%r3)
-        mlgr    %r6,%r7
-        stg     %r7,48(%r2)
-        stg     %r6,56(%r2)
-        la      %r3,32(%r3)
-        la      %r2,64(%r2)
-        brct    %r1,.Loop4_sqr
-        lghi    %r1,3
-        nr      %r4,%r1         // cnt=len%4
-        jz      .Lend_sqr
-.Loop1_sqr:
-        lg      %r7,0(%r3)
-        mlgr    %r6,%r7
-        stg     %r7,0(%r2)
-        stg     %r6,8(%r2)
-        la      %r3,8(%r3)
-        la      %r2,16(%r2)
-        brct    %r4,.Loop1_sqr
-.Lend_sqr:
-        lmg     %r6,%r7,48(%r15)
-        br      %r14
-.size   bn_sqr_words,.-bn_sqr_words
-// BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d);
-.globl  bn_div_words
-.type   bn_div_words,@function
-.align  4
-bn_div_words:
-        dlgr    %r2,%r4
-        lgr     %r2,%r3
-        br      %r14
-.size   bn_div_words,.-bn_div_words
-// BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
-.globl  bn_add_words
-.type   bn_add_words,@function
-.align  4
-bn_add_words:
-        la      %r1,0(%r2)      // put rp aside
-        lghi    %r2,0           // i=0
-        ltgfr   %r5,%r5
-        bler    %r14            // if (len<=0) return 0;
-        stg     %r6,48(%r15)
-        lghi    %r6,3
-        nr      %r6,%r5         // len%4
-        sra     %r5,2           // len/4, use sra because it sets condition code
-        jz      .Loop1_add      // carry is incidentally cleared if branch taken
-        algr    %r2,%r2         // clear carry
-.Loop4_add:
-        lg      %r0,0(%r2,%r3)
-        alcg    %r0,0(%r2,%r4)
-        stg     %r0,0(%r2,%r1)
-        lg      %r0,8(%r2,%r3)
-        alcg    %r0,8(%r2,%r4)
-        stg     %r0,8(%r2,%r1)
-        lg      %r0,16(%r2,%r3)
-        alcg    %r0,16(%r2,%r4)
-        stg     %r0,16(%r2,%r1)
-        lg      %r0,24(%r2,%r3)
-        alcg    %r0,24(%r2,%r4)
-        stg     %r0,24(%r2,%r1)
-        la      %r2,32(%r2)     // i+=4
-        brct    %r5,.Loop4_add
-        la      %r6,1(%r6)      // see if len%4 is zero ...
-        brct    %r6,.Loop1_add  // without touching condition code:-)
-.Lexit_add:
-        lghi    %r2,0
-        alcgr   %r2,%r2
-        lg      %r6,48(%r15)
-        br      %r14
-.Loop1_add:
-        lg      %r0,0(%r2,%r3)
-        alcg    %r0,0(%r2,%r4)
-        stg     %r0,0(%r2,%r1)
-        la      %r2,8(%r2)      // i++
-        brct    %r6,.Loop1_add
-        j       .Lexit_add
-.size   bn_add_words,.-bn_add_words
-// BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
-.globl  bn_sub_words
-.type   bn_sub_words,@function
-.align  4
-bn_sub_words:
-        la      %r1,0(%r2)      // put rp aside
-        lghi    %r2,0           // i=0
-        ltgfr   %r5,%r5
-        bler    %r14            // if (len<=0) return 0;
-        stg     %r6,48(%r15)
-        lghi    %r6,3
-        nr      %r6,%r5         // len%4
-        sra     %r5,2           // len/4, use sra because it sets condition code
-        jnz     .Loop4_sub      // borrow is incidentally cleared if branch taken
-        slgr    %r2,%r2         // clear borrow
-.Loop1_sub:
-        lg      %r0,0(%r2,%r3)
-        slbg    %r0,0(%r2,%r4)
-        stg     %r0,0(%r2,%r1)
-        la      %r2,8(%r2)      // i++
-        brct    %r6,.Loop1_sub
-        j       .Lexit_sub
-.Loop4_sub:
-        lg      %r0,0(%r2,%r3)
-        slbg    %r0,0(%r2,%r4)
-        stg     %r0,0(%r2,%r1)
-        lg      %r0,8(%r2,%r3)
-        slbg    %r0,8(%r2,%r4)
-        stg     %r0,8(%r2,%r1)
-        lg      %r0,16(%r2,%r3)
-        slbg    %r0,16(%r2,%r4)
-        stg     %r0,16(%r2,%r1)
-        lg      %r0,24(%r2,%r3)
-        slbg    %r0,24(%r2,%r4)
-        stg     %r0,24(%r2,%r1)
-        la      %r2,32(%r2)     // i+=4
-        brct    %r5,.Loop4_sub
-        la      %r6,1(%r6)      // see if len%4 is zero ...
-        brct    %r6,.Loop1_sub  // without touching condition code:-)
-.Lexit_sub:
-        lghi    %r2,0
-        slbgr   %r2,%r2
-        lcgr    %r2,%r2
-        lg      %r6,48(%r15)
-        br      %r14
-.size   bn_sub_words,.-bn_sub_words
-#define c1      %r1
-#define c2      %r5
-#define c3      %r8
-#define mul_add_c(ai,bi,c1,c2,c3)       \
-        lg      %r7,ai*8(%r3);          \
-        mlg     %r6,bi*8(%r4);          \
-        algr    c1,%r7;                 \
-        alcgr   c2,%r6;                 \
-        alcgr   c3,zero
-// void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
-.globl  bn_mul_comba8
-.type   bn_mul_comba8,@function
-.align  4
-bn_mul_comba8:
-        stmg    %r6,%r8,48(%r15)
-        lghi    c1,0
-        lghi    c2,0
-        lghi    c3,0
-        lghi    zero,0
-        mul_add_c(0,0,c1,c2,c3);
-        stg     c1,0*8(%r2)
-        lghi    c1,0
-        mul_add_c(0,1,c2,c3,c1);
-        mul_add_c(1,0,c2,c3,c1);
-        stg     c2,1*8(%r2)
-        lghi    c2,0
-        mul_add_c(2,0,c3,c1,c2);
-        mul_add_c(1,1,c3,c1,c2);
-        mul_add_c(0,2,c3,c1,c2);
-        stg     c3,2*8(%r2)
-        lghi    c3,0
-        mul_add_c(0,3,c1,c2,c3);
-        mul_add_c(1,2,c1,c2,c3);
-        mul_add_c(2,1,c1,c2,c3);
-        mul_add_c(3,0,c1,c2,c3);
-        stg     c1,3*8(%r2)
-        lghi    c1,0
-        mul_add_c(4,0,c2,c3,c1);
-        mul_add_c(3,1,c2,c3,c1);
-        mul_add_c(2,2,c2,c3,c1);
-        mul_add_c(1,3,c2,c3,c1);
-        mul_add_c(0,4,c2,c3,c1);
-        stg     c2,4*8(%r2)
-        lghi    c2,0
-        mul_add_c(0,5,c3,c1,c2);
-        mul_add_c(1,4,c3,c1,c2);
-        mul_add_c(2,3,c3,c1,c2);
-        mul_add_c(3,2,c3,c1,c2);
-        mul_add_c(4,1,c3,c1,c2);
-        mul_add_c(5,0,c3,c1,c2);
-        stg     c3,5*8(%r2)
-        lghi    c3,0
-        mul_add_c(6,0,c1,c2,c3);
-        mul_add_c(5,1,c1,c2,c3);
-        mul_add_c(4,2,c1,c2,c3);
-        mul_add_c(3,3,c1,c2,c3);
-        mul_add_c(2,4,c1,c2,c3);
-        mul_add_c(1,5,c1,c2,c3);
-        mul_add_c(0,6,c1,c2,c3);
-        stg     c1,6*8(%r2)
-        lghi    c1,0
-        mul_add_c(0,7,c2,c3,c1);
-        mul_add_c(1,6,c2,c3,c1);
-        mul_add_c(2,5,c2,c3,c1);
-        mul_add_c(3,4,c2,c3,c1);
-        mul_add_c(4,3,c2,c3,c1);
-        mul_add_c(5,2,c2,c3,c1);
-        mul_add_c(6,1,c2,c3,c1);
-        mul_add_c(7,0,c2,c3,c1);
-        stg     c2,7*8(%r2)
-        lghi    c2,0
-        mul_add_c(7,1,c3,c1,c2);
-        mul_add_c(6,2,c3,c1,c2);
-        mul_add_c(5,3,c3,c1,c2);
-        mul_add_c(4,4,c3,c1,c2);
-        mul_add_c(3,5,c3,c1,c2);
-        mul_add_c(2,6,c3,c1,c2);
-        mul_add_c(1,7,c3,c1,c2);
-        stg     c3,8*8(%r2)
-        lghi    c3,0
-        mul_add_c(2,7,c1,c2,c3);
-        mul_add_c(3,6,c1,c2,c3);
-        mul_add_c(4,5,c1,c2,c3);
-        mul_add_c(5,4,c1,c2,c3);
-        mul_add_c(6,3,c1,c2,c3);
-        mul_add_c(7,2,c1,c2,c3);
-        stg     c1,9*8(%r2)
-        lghi    c1,0
-        mul_add_c(7,3,c2,c3,c1);
-        mul_add_c(6,4,c2,c3,c1);
-        mul_add_c(5,5,c2,c3,c1);
-        mul_add_c(4,6,c2,c3,c1);
-        mul_add_c(3,7,c2,c3,c1);
-        stg     c2,10*8(%r2)
-        lghi    c2,0
-        mul_add_c(4,7,c3,c1,c2);
-        mul_add_c(5,6,c3,c1,c2);
-        mul_add_c(6,5,c3,c1,c2);
-        mul_add_c(7,4,c3,c1,c2);
-        stg     c3,11*8(%r2)
-        lghi    c3,0
-        mul_add_c(7,5,c1,c2,c3);
-        mul_add_c(6,6,c1,c2,c3);
-        mul_add_c(5,7,c1,c2,c3);
-        stg     c1,12*8(%r2)
-        lghi    c1,0
-        mul_add_c(6,7,c2,c3,c1);
-        mul_add_c(7,6,c2,c3,c1);
-        stg     c2,13*8(%r2)
-        lghi    c2,0
-        mul_add_c(7,7,c3,c1,c2);
-        stg     c3,14*8(%r2)
-        stg     c1,15*8(%r2)
-        lmg     %r6,%r8,48(%r15)
-        br      %r14
-.size   bn_mul_comba8,.-bn_mul_comba8
-// void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
-.globl  bn_mul_comba4
-.type   bn_mul_comba4,@function
-.align  4
-bn_mul_comba4:
-        stmg    %r6,%r8,48(%r15)
-        lghi    c1,0
-        lghi    c2,0
-        lghi    c3,0
-        lghi    zero,0
-        mul_add_c(0,0,c1,c2,c3);
-        stg     c1,0*8(%r3)
-        lghi    c1,0
-        mul_add_c(0,1,c2,c3,c1);
-        mul_add_c(1,0,c2,c3,c1);
-        stg     c2,1*8(%r2)
-        lghi    c2,0
-        mul_add_c(2,0,c3,c1,c2);
-        mul_add_c(1,1,c3,c1,c2);
-        mul_add_c(0,2,c3,c1,c2);
-        stg     c3,2*8(%r2)
-        lghi    c3,0
-        mul_add_c(0,3,c1,c2,c3);
-        mul_add_c(1,2,c1,c2,c3);
-        mul_add_c(2,1,c1,c2,c3);
-        mul_add_c(3,0,c1,c2,c3);
-        stg     c1,3*8(%r2)
-        lghi    c1,0
-        mul_add_c(3,1,c2,c3,c1);
-        mul_add_c(2,2,c2,c3,c1);
-        mul_add_c(1,3,c2,c3,c1);
-        stg     c2,4*8(%r2)
-        lghi    c2,0
-        mul_add_c(2,3,c3,c1,c2);
-        mul_add_c(3,2,c3,c1,c2);
-        stg     c3,5*8(%r2)
-        lghi    c3,0
-        mul_add_c(3,3,c1,c2,c3);
-        stg     c1,6*8(%r2)
-        stg     c2,7*8(%r2)
-        stmg    %r6,%r8,48(%r15)
-        br      %r14
-.size   bn_mul_comba4,.-bn_mul_comba4
-#define sqr_add_c(ai,c1,c2,c3)          \
-        lg      %r7,ai*8(%r3);          \
-        mlgr    %r6,%r7;                \
-        algr    c1,%r7;                 \
-        alcgr   c2,%r6;                 \
-        alcgr   c3,zero
-#define sqr_add_c2(ai,aj,c1,c2,c3)      \
-        lg      %r7,ai*8(%r3);          \
-        mlg     %r6,aj*8(%r3);          \
-        algr    c1,%r7;                 \
-        alcgr   c2,%r6;                 \
-        alcgr   c3,zero;                \
-        algr    c1,%r7;                 \
-        alcgr   c2,%r6;                 \
-        alcgr   c3,zero
-// void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3);
-.globl  bn_sqr_comba8
-.type   bn_sqr_comba8,@function
-.align  4
-bn_sqr_comba8:
-        stmg    %r6,%r8,48(%r15)
-        lghi    c1,0
-        lghi    c2,0
-        lghi    c3,0
-        lghi    zero,0
-        sqr_add_c(0,c1,c2,c3);
-        stg     c1,0*8(%r2)
-        lghi    c1,0
-        sqr_add_c2(1,0,c2,c3,c1);
-        stg     c2,1*8(%r2)
-        lghi    c2,0
-        sqr_add_c(1,c3,c1,c2);
-        sqr_add_c2(2,0,c3,c1,c2);
-        stg     c3,2*8(%r2)
-        lghi    c3,0
-        sqr_add_c2(3,0,c1,c2,c3);
-        sqr_add_c2(2,1,c1,c2,c3);
-        stg     c1,3*8(%r2)
-        lghi    c1,0
-        sqr_add_c(2,c2,c3,c1);
-        sqr_add_c2(3,1,c2,c3,c1);
-        sqr_add_c2(4,0,c2,c3,c1);
-        stg     c2,4*8(%r2)
-        lghi    c2,0
-        sqr_add_c2(5,0,c3,c1,c2);
-        sqr_add_c2(4,1,c3,c1,c2);
-        sqr_add_c2(3,2,c3,c1,c2);
-        stg     c3,5*8(%r2)
-        lghi    c3,0
-        sqr_add_c(3,c1,c2,c3);
-        sqr_add_c2(4,2,c1,c2,c3);
-        sqr_add_c2(5,1,c1,c2,c3);
-        sqr_add_c2(6,0,c1,c2,c3);
-        stg     c1,6*8(%r2)
-        lghi    c1,0
-        sqr_add_c2(7,0,c2,c3,c1);
-        sqr_add_c2(6,1,c2,c3,c1);
-        sqr_add_c2(5,2,c2,c3,c1);
-        sqr_add_c2(4,3,c2,c3,c1);
-        stg     c2,7*8(%r2)
-        lghi    c2,0
-        sqr_add_c(4,c3,c1,c2);
-        sqr_add_c2(5,3,c3,c1,c2);
-        sqr_add_c2(6,2,c3,c1,c2);
-        sqr_add_c2(7,1,c3,c1,c2);
-        stg     c3,8*8(%r2)
-        lghi    c3,0
-        sqr_add_c2(7,2,c1,c2,c3);
-        sqr_add_c2(6,3,c1,c2,c3);
-        sqr_add_c2(5,4,c1,c2,c3);
-        stg     c1,9*8(%r2)
-        lghi    c1,0
-        sqr_add_c(5,c2,c3,c1);
-        sqr_add_c2(6,4,c2,c3,c1);
-        sqr_add_c2(7,3,c2,c3,c1);
-        stg     c2,10*8(%r2)
-        lghi    c2,0
-        sqr_add_c2(7,4,c3,c1,c2);
-        sqr_add_c2(6,5,c3,c1,c2);
-        stg     c3,11*8(%r2)
-        lghi    c3,0
-        sqr_add_c(6,c1,c2,c3);
-        sqr_add_c2(7,5,c1,c2,c3);
-        stg     c1,12*8(%r2)
-        lghi    c1,0
-        sqr_add_c2(7,6,c2,c3,c1);
-        stg     c2,13*8(%r2)
-        lghi    c2,0
-        sqr_add_c(7,c3,c1,c2);
-        stg     c3,14*8(%r2)
-        stg     c1,15*8(%r2)
-        lmg     %r6,%r8,48(%r15)
-        br      %r14
-.size   bn_sqr_comba8,.-bn_sqr_comba8
-// void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3);
-.globl bn_sqr_comba4
-.type   bn_sqr_comba4,@function
-.align  4
-bn_sqr_comba4:
-        stmg    %r6,%r8,48(%r15)
-        lghi    c1,0
-        lghi    c2,0
-        lghi    c3,0
-        lghi    zero,0
-        sqr_add_c(0,c1,c2,c3);
-        stg     c1,0*8(%r2)
-        lghi    c1,0
-        sqr_add_c2(1,0,c2,c3,c1);
-        stg     c2,1*8(%r2)
-        lghi    c2,0
-        sqr_add_c(1,c3,c1,c2);
-        sqr_add_c2(2,0,c3,c1,c2);
-        stg     c3,2*8(%r2)
-        lghi    c3,0
-        sqr_add_c2(3,0,c1,c2,c3);
-        sqr_add_c2(2,1,c1,c2,c3);
-        stg     c1,3*8(%r2)
-        lghi    c1,0
-        sqr_add_c(2,c2,c3,c1);
-        sqr_add_c2(3,1,c2,c3,c1);
-        stg     c2,4*8(%r2)
-        lghi    c2,0
-        sqr_add_c2(3,2,c3,c1,c2);
-        stg     c3,5*8(%r2)
-        lghi    c3,0
-        sqr_add_c(3,c1,c2,c3);
-        stg     c1,6*8(%r2)
-        stg     c2,7*8(%r2)
-        lmg     %r6,%r8,48(%r15)
-        br      %r14
-.size   bn_sqr_comba4,.-bn_sqr_comba4
author	jsing <>	2016-09-04 14:31:29 +0000
committer	jsing <>	2016-09-04 14:31:29 +0000
commit	e38c58272a121e2bc9a785ec4001bbc802d68f66 (patch)
tree	492fd2a4355d8592de425463d194374bdc85aa0a /src/lib/libcrypto/bn
parent	a9cbed3be03a99c87e2b07b16b511e65a90bf800 (diff)
download	openbsd-e38c58272a121e2bc9a785ec4001bbc802d68f66.tar.gz openbsd-e38c58272a121e2bc9a785ec4001bbc802d68f66.tar.bz2 openbsd-e38c58272a121e2bc9a785ec4001bbc802d68f66.zip