diff options
Diffstat (limited to 'src/lib/libcrypto/sha')
| -rw-r--r-- | src/lib/libcrypto/sha/asm/sha1-armv4-large.pl | 76 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/asm/sha1-sparcv9.pl | 1 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/asm/sha1-sparcv9a.pl | 1 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/asm/sha256-armv4.pl | 33 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/asm/sha512-armv4.pl | 32 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/asm/sha512-sparcv9.pl | 1 |
6 files changed, 75 insertions, 69 deletions
diff --git a/src/lib/libcrypto/sha/asm/sha1-armv4-large.pl b/src/lib/libcrypto/sha/asm/sha1-armv4-large.pl index 88861af641..6e65fe3e01 100644 --- a/src/lib/libcrypto/sha/asm/sha1-armv4-large.pl +++ b/src/lib/libcrypto/sha/asm/sha1-armv4-large.pl | |||
| @@ -37,9 +37,18 @@ | |||
| 37 | # modes are limited. As result it takes more instructions to do | 37 | # modes are limited. As result it takes more instructions to do |
| 38 | # the same job in Thumb, therefore the code is never twice as | 38 | # the same job in Thumb, therefore the code is never twice as |
| 39 | # small and always slower. | 39 | # small and always slower. |
| 40 | # [***] which is also ~35% better than compiler generated code. | 40 | # [***] which is also ~35% better than compiler generated code. Dual- |
| 41 | # issue Cortex A8 core was measured to process input block in | ||
| 42 | # ~990 cycles. | ||
| 41 | 43 | ||
| 42 | $output=shift; | 44 | # August 2010. |
| 45 | # | ||
| 46 | # Rescheduling for dual-issue pipeline resulted in 13% improvement on | ||
| 47 | # Cortex A8 core and in absolute terms ~870 cycles per input block | ||
| 48 | # [or 13.6 cycles per byte]. | ||
| 49 | |||
| 50 | |||
| 51 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | ||
| 43 | open STDOUT,">$output"; | 52 | open STDOUT,">$output"; |
| 44 | 53 | ||
| 45 | $ctx="r0"; | 54 | $ctx="r0"; |
| @@ -58,43 +67,22 @@ $t3="r12"; | |||
| 58 | $Xi="r14"; | 67 | $Xi="r14"; |
| 59 | @V=($a,$b,$c,$d,$e); | 68 | @V=($a,$b,$c,$d,$e); |
| 60 | 69 | ||
| 61 | # One can optimize this for aligned access on big-endian architecture, | ||
| 62 | # but code's endian neutrality makes it too pretty:-) | ||
| 63 | sub Xload { | ||
| 64 | my ($a,$b,$c,$d,$e)=@_; | ||
| 65 | $code.=<<___; | ||
| 66 | ldrb $t0,[$inp],#4 | ||
| 67 | ldrb $t1,[$inp,#-3] | ||
| 68 | ldrb $t2,[$inp,#-2] | ||
| 69 | ldrb $t3,[$inp,#-1] | ||
| 70 | add $e,$K,$e,ror#2 @ E+=K_00_19 | ||
| 71 | orr $t0,$t1,$t0,lsl#8 | ||
| 72 | add $e,$e,$a,ror#27 @ E+=ROR(A,27) | ||
| 73 | orr $t0,$t2,$t0,lsl#8 | ||
| 74 | eor $t1,$c,$d @ F_xx_xx | ||
| 75 | orr $t0,$t3,$t0,lsl#8 | ||
| 76 | add $e,$e,$t0 @ E+=X[i] | ||
| 77 | str $t0,[$Xi,#-4]! | ||
| 78 | ___ | ||
| 79 | } | ||
| 80 | sub Xupdate { | 70 | sub Xupdate { |
| 81 | my ($a,$b,$c,$d,$e,$flag)=@_; | 71 | my ($a,$b,$c,$d,$e,$opt1,$opt2)=@_; |
| 82 | $code.=<<___; | 72 | $code.=<<___; |
| 83 | ldr $t0,[$Xi,#15*4] | 73 | ldr $t0,[$Xi,#15*4] |
| 84 | ldr $t1,[$Xi,#13*4] | 74 | ldr $t1,[$Xi,#13*4] |
| 85 | ldr $t2,[$Xi,#7*4] | 75 | ldr $t2,[$Xi,#7*4] |
| 86 | ldr $t3,[$Xi,#2*4] | ||
| 87 | add $e,$K,$e,ror#2 @ E+=K_xx_xx | 76 | add $e,$K,$e,ror#2 @ E+=K_xx_xx |
| 77 | ldr $t3,[$Xi,#2*4] | ||
| 88 | eor $t0,$t0,$t1 | 78 | eor $t0,$t0,$t1 |
| 89 | eor $t0,$t0,$t2 | 79 | eor $t2,$t2,$t3 |
| 90 | eor $t0,$t0,$t3 | 80 | eor $t1,$c,$d @ F_xx_xx |
| 91 | add $e,$e,$a,ror#27 @ E+=ROR(A,27) | ||
| 92 | ___ | ||
| 93 | $code.=<<___ if (!defined($flag)); | ||
| 94 | eor $t1,$c,$d @ F_xx_xx, but not in 40_59 | ||
| 95 | ___ | ||
| 96 | $code.=<<___; | ||
| 97 | mov $t0,$t0,ror#31 | 81 | mov $t0,$t0,ror#31 |
| 82 | add $e,$e,$a,ror#27 @ E+=ROR(A,27) | ||
| 83 | eor $t0,$t0,$t2,ror#31 | ||
| 84 | $opt1 @ F_xx_xx | ||
| 85 | $opt2 @ F_xx_xx | ||
| 98 | add $e,$e,$t0 @ E+=X[i] | 86 | add $e,$e,$t0 @ E+=X[i] |
| 99 | str $t0,[$Xi,#-4]! | 87 | str $t0,[$Xi,#-4]! |
| 100 | ___ | 88 | ___ |
| @@ -102,19 +90,29 @@ ___ | |||
| 102 | 90 | ||
| 103 | sub BODY_00_15 { | 91 | sub BODY_00_15 { |
| 104 | my ($a,$b,$c,$d,$e)=@_; | 92 | my ($a,$b,$c,$d,$e)=@_; |
| 105 | &Xload(@_); | ||
| 106 | $code.=<<___; | 93 | $code.=<<___; |
| 94 | ldrb $t0,[$inp],#4 | ||
| 95 | ldrb $t1,[$inp,#-1] | ||
| 96 | ldrb $t2,[$inp,#-2] | ||
| 97 | add $e,$K,$e,ror#2 @ E+=K_00_19 | ||
| 98 | ldrb $t3,[$inp,#-3] | ||
| 99 | add $e,$e,$a,ror#27 @ E+=ROR(A,27) | ||
| 100 | orr $t0,$t1,$t0,lsl#24 | ||
| 101 | eor $t1,$c,$d @ F_xx_xx | ||
| 102 | orr $t0,$t0,$t2,lsl#8 | ||
| 103 | orr $t0,$t0,$t3,lsl#16 | ||
| 107 | and $t1,$b,$t1,ror#2 | 104 | and $t1,$b,$t1,ror#2 |
| 105 | add $e,$e,$t0 @ E+=X[i] | ||
| 108 | eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D) | 106 | eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D) |
| 107 | str $t0,[$Xi,#-4]! | ||
| 109 | add $e,$e,$t1 @ E+=F_00_19(B,C,D) | 108 | add $e,$e,$t1 @ E+=F_00_19(B,C,D) |
| 110 | ___ | 109 | ___ |
| 111 | } | 110 | } |
| 112 | 111 | ||
| 113 | sub BODY_16_19 { | 112 | sub BODY_16_19 { |
| 114 | my ($a,$b,$c,$d,$e)=@_; | 113 | my ($a,$b,$c,$d,$e)=@_; |
| 115 | &Xupdate(@_); | 114 | &Xupdate(@_,"and $t1,$b,$t1,ror#2"); |
| 116 | $code.=<<___; | 115 | $code.=<<___; |
| 117 | and $t1,$b,$t1,ror#2 | ||
| 118 | eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D) | 116 | eor $t1,$t1,$d,ror#2 @ F_00_19(B,C,D) |
| 119 | add $e,$e,$t1 @ E+=F_00_19(B,C,D) | 117 | add $e,$e,$t1 @ E+=F_00_19(B,C,D) |
| 120 | ___ | 118 | ___ |
| @@ -122,22 +120,18 @@ ___ | |||
| 122 | 120 | ||
| 123 | sub BODY_20_39 { | 121 | sub BODY_20_39 { |
| 124 | my ($a,$b,$c,$d,$e)=@_; | 122 | my ($a,$b,$c,$d,$e)=@_; |
| 125 | &Xupdate(@_); | 123 | &Xupdate(@_,"eor $t1,$b,$t1,ror#2"); |
| 126 | $code.=<<___; | 124 | $code.=<<___; |
| 127 | eor $t1,$b,$t1,ror#2 @ F_20_39(B,C,D) | ||
| 128 | add $e,$e,$t1 @ E+=F_20_39(B,C,D) | 125 | add $e,$e,$t1 @ E+=F_20_39(B,C,D) |
| 129 | ___ | 126 | ___ |
| 130 | } | 127 | } |
| 131 | 128 | ||
| 132 | sub BODY_40_59 { | 129 | sub BODY_40_59 { |
| 133 | my ($a,$b,$c,$d,$e)=@_; | 130 | my ($a,$b,$c,$d,$e)=@_; |
| 134 | &Xupdate(@_,1); | 131 | &Xupdate(@_,"and $t1,$b,$t1,ror#2","and $t2,$c,$d"); |
| 135 | $code.=<<___; | 132 | $code.=<<___; |
| 136 | and $t1,$b,$c,ror#2 | ||
| 137 | orr $t2,$b,$c,ror#2 | ||
| 138 | and $t2,$t2,$d,ror#2 | ||
| 139 | orr $t1,$t1,$t2 @ F_40_59(B,C,D) | ||
| 140 | add $e,$e,$t1 @ E+=F_40_59(B,C,D) | 133 | add $e,$e,$t1 @ E+=F_40_59(B,C,D) |
| 134 | add $e,$e,$t2,ror#2 | ||
| 141 | ___ | 135 | ___ |
| 142 | } | 136 | } |
| 143 | 137 | ||
diff --git a/src/lib/libcrypto/sha/asm/sha1-sparcv9.pl b/src/lib/libcrypto/sha/asm/sha1-sparcv9.pl index 8306fc88cc..5c161cecd6 100644 --- a/src/lib/libcrypto/sha/asm/sha1-sparcv9.pl +++ b/src/lib/libcrypto/sha/asm/sha1-sparcv9.pl | |||
| @@ -276,6 +276,7 @@ $code.=<<___; | |||
| 276 | .type sha1_block_data_order,#function | 276 | .type sha1_block_data_order,#function |
| 277 | .size sha1_block_data_order,(.-sha1_block_data_order) | 277 | .size sha1_block_data_order,(.-sha1_block_data_order) |
| 278 | .asciz "SHA1 block transform for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" | 278 | .asciz "SHA1 block transform for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" |
| 279 | .align 4 | ||
| 279 | ___ | 280 | ___ |
| 280 | 281 | ||
| 281 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | 282 | $code =~ s/\`([^\`]*)\`/eval $1/gem; |
diff --git a/src/lib/libcrypto/sha/asm/sha1-sparcv9a.pl b/src/lib/libcrypto/sha/asm/sha1-sparcv9a.pl index 15eb854bad..85e8d68086 100644 --- a/src/lib/libcrypto/sha/asm/sha1-sparcv9a.pl +++ b/src/lib/libcrypto/sha/asm/sha1-sparcv9a.pl | |||
| @@ -539,6 +539,7 @@ $code.=<<___; | |||
| 539 | .type sha1_block_data_order,#function | 539 | .type sha1_block_data_order,#function |
| 540 | .size sha1_block_data_order,(.-sha1_block_data_order) | 540 | .size sha1_block_data_order,(.-sha1_block_data_order) |
| 541 | .asciz "SHA1 block transform for SPARCv9a, CRYPTOGAMS by <appro\@openssl.org>" | 541 | .asciz "SHA1 block transform for SPARCv9a, CRYPTOGAMS by <appro\@openssl.org>" |
| 542 | .align 4 | ||
| 542 | ___ | 543 | ___ |
| 543 | 544 | ||
| 544 | # Purpose of these subroutines is to explicitly encode VIS instructions, | 545 | # Purpose of these subroutines is to explicitly encode VIS instructions, |
diff --git a/src/lib/libcrypto/sha/asm/sha256-armv4.pl b/src/lib/libcrypto/sha/asm/sha256-armv4.pl index 48d846deec..492cb62bc0 100644 --- a/src/lib/libcrypto/sha/asm/sha256-armv4.pl +++ b/src/lib/libcrypto/sha/asm/sha256-armv4.pl | |||
| @@ -11,9 +11,14 @@ | |||
| 11 | 11 | ||
| 12 | # Performance is ~2x better than gcc 3.4 generated code and in "abso- | 12 | # Performance is ~2x better than gcc 3.4 generated code and in "abso- |
| 13 | # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per | 13 | # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per |
| 14 | # byte. | 14 | # byte [on single-issue Xscale PXA250 core]. |
| 15 | 15 | ||
| 16 | $output=shift; | 16 | # July 2010. |
| 17 | # | ||
| 18 | # Rescheduling for dual-issue pipeline resulted in 22% improvement on | ||
| 19 | # Cortex A8 core and ~20 cycles per processed byte. | ||
| 20 | |||
| 21 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | ||
| 17 | open STDOUT,">$output"; | 22 | open STDOUT,">$output"; |
| 18 | 23 | ||
| 19 | $ctx="r0"; $t0="r0"; | 24 | $ctx="r0"; $t0="r0"; |
| @@ -52,27 +57,27 @@ $code.=<<___ if ($i<16); | |||
| 52 | ___ | 57 | ___ |
| 53 | $code.=<<___; | 58 | $code.=<<___; |
| 54 | ldr $t2,[$Ktbl],#4 @ *K256++ | 59 | ldr $t2,[$Ktbl],#4 @ *K256++ |
| 55 | str $T1,[sp,#`$i%16`*4] | ||
| 56 | mov $t0,$e,ror#$Sigma1[0] | 60 | mov $t0,$e,ror#$Sigma1[0] |
| 61 | str $T1,[sp,#`$i%16`*4] | ||
| 57 | eor $t0,$t0,$e,ror#$Sigma1[1] | 62 | eor $t0,$t0,$e,ror#$Sigma1[1] |
| 58 | eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e) | ||
| 59 | add $T1,$T1,$t0 | ||
| 60 | eor $t1,$f,$g | 63 | eor $t1,$f,$g |
| 64 | eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e) | ||
| 61 | and $t1,$t1,$e | 65 | and $t1,$t1,$e |
| 66 | add $T1,$T1,$t0 | ||
| 62 | eor $t1,$t1,$g @ Ch(e,f,g) | 67 | eor $t1,$t1,$g @ Ch(e,f,g) |
| 63 | add $T1,$T1,$t1 | ||
| 64 | add $T1,$T1,$h | 68 | add $T1,$T1,$h |
| 65 | add $T1,$T1,$t2 | ||
| 66 | mov $h,$a,ror#$Sigma0[0] | 69 | mov $h,$a,ror#$Sigma0[0] |
| 70 | add $T1,$T1,$t1 | ||
| 67 | eor $h,$h,$a,ror#$Sigma0[1] | 71 | eor $h,$h,$a,ror#$Sigma0[1] |
| 72 | add $T1,$T1,$t2 | ||
| 68 | eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a) | 73 | eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a) |
| 69 | orr $t0,$a,$b | 74 | orr $t0,$a,$b |
| 70 | and $t0,$t0,$c | ||
| 71 | and $t1,$a,$b | 75 | and $t1,$a,$b |
| 76 | and $t0,$t0,$c | ||
| 77 | add $h,$h,$T1 | ||
| 72 | orr $t0,$t0,$t1 @ Maj(a,b,c) | 78 | orr $t0,$t0,$t1 @ Maj(a,b,c) |
| 73 | add $h,$h,$t0 | ||
| 74 | add $d,$d,$T1 | 79 | add $d,$d,$T1 |
| 75 | add $h,$h,$T1 | 80 | add $h,$h,$t0 |
| 76 | ___ | 81 | ___ |
| 77 | } | 82 | } |
| 78 | 83 | ||
| @@ -80,19 +85,19 @@ sub BODY_16_XX { | |||
| 80 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; | 85 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; |
| 81 | 86 | ||
| 82 | $code.=<<___; | 87 | $code.=<<___; |
| 83 | ldr $t1,[sp,#`($i+1)%16`*4] @ $i | 88 | ldr $t1,[sp,#`($i+1)%16`*4] @ $i |
| 84 | ldr $t2,[sp,#`($i+14)%16`*4] | 89 | ldr $t2,[sp,#`($i+14)%16`*4] |
| 85 | ldr $T1,[sp,#`($i+0)%16`*4] | 90 | ldr $T1,[sp,#`($i+0)%16`*4] |
| 86 | ldr $inp,[sp,#`($i+9)%16`*4] | ||
| 87 | mov $t0,$t1,ror#$sigma0[0] | 91 | mov $t0,$t1,ror#$sigma0[0] |
| 92 | ldr $inp,[sp,#`($i+9)%16`*4] | ||
| 88 | eor $t0,$t0,$t1,ror#$sigma0[1] | 93 | eor $t0,$t0,$t1,ror#$sigma0[1] |
| 89 | eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) | 94 | eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) |
| 90 | mov $t1,$t2,ror#$sigma1[0] | 95 | mov $t1,$t2,ror#$sigma1[0] |
| 96 | add $T1,$T1,$t0 | ||
| 91 | eor $t1,$t1,$t2,ror#$sigma1[1] | 97 | eor $t1,$t1,$t2,ror#$sigma1[1] |
| 98 | add $T1,$T1,$inp | ||
| 92 | eor $t1,$t1,$t2,lsr#$sigma1[2] @ sigma1(X[i+14]) | 99 | eor $t1,$t1,$t2,lsr#$sigma1[2] @ sigma1(X[i+14]) |
| 93 | add $T1,$T1,$t0 | ||
| 94 | add $T1,$T1,$t1 | 100 | add $T1,$T1,$t1 |
| 95 | add $T1,$T1,$inp | ||
| 96 | ___ | 101 | ___ |
| 97 | &BODY_00_15(@_); | 102 | &BODY_00_15(@_); |
| 98 | } | 103 | } |
diff --git a/src/lib/libcrypto/sha/asm/sha512-armv4.pl b/src/lib/libcrypto/sha/asm/sha512-armv4.pl index 4fbb94a914..3a35861ac6 100644 --- a/src/lib/libcrypto/sha/asm/sha512-armv4.pl +++ b/src/lib/libcrypto/sha/asm/sha512-armv4.pl | |||
| @@ -10,7 +10,13 @@ | |||
| 10 | # SHA512 block procedure for ARMv4. September 2007. | 10 | # SHA512 block procedure for ARMv4. September 2007. |
| 11 | 11 | ||
| 12 | # This code is ~4.5 (four and a half) times faster than code generated | 12 | # This code is ~4.5 (four and a half) times faster than code generated |
| 13 | # by gcc 3.4 and it spends ~72 clock cycles per byte. | 13 | # by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue |
| 14 | # Xscale PXA250 core]. | ||
| 15 | # | ||
| 16 | # July 2010. | ||
| 17 | # | ||
| 18 | # Rescheduling for dual-issue pipeline resulted in 6% improvement on | ||
| 19 | # Cortex A8 core and ~40 cycles per processed byte. | ||
| 14 | 20 | ||
| 15 | # Byte order [in]dependence. ========================================= | 21 | # Byte order [in]dependence. ========================================= |
| 16 | # | 22 | # |
| @@ -22,7 +28,7 @@ $hi=0; | |||
| 22 | $lo=4; | 28 | $lo=4; |
| 23 | # ==================================================================== | 29 | # ==================================================================== |
| 24 | 30 | ||
| 25 | $output=shift; | 31 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} |
| 26 | open STDOUT,">$output"; | 32 | open STDOUT,">$output"; |
| 27 | 33 | ||
| 28 | $ctx="r0"; | 34 | $ctx="r0"; |
| @@ -73,33 +79,31 @@ $code.=<<___; | |||
| 73 | eor $t0,$t0,$Elo,lsl#23 | 79 | eor $t0,$t0,$Elo,lsl#23 |
| 74 | eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e) | 80 | eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e) |
| 75 | adds $Tlo,$Tlo,$t0 | 81 | adds $Tlo,$Tlo,$t0 |
| 76 | adc $Thi,$Thi,$t1 @ T += Sigma1(e) | ||
| 77 | adds $Tlo,$Tlo,$t2 | ||
| 78 | adc $Thi,$Thi,$t3 @ T += h | ||
| 79 | |||
| 80 | ldr $t0,[sp,#$Foff+0] @ f.lo | 82 | ldr $t0,[sp,#$Foff+0] @ f.lo |
| 83 | adc $Thi,$Thi,$t1 @ T += Sigma1(e) | ||
| 81 | ldr $t1,[sp,#$Foff+4] @ f.hi | 84 | ldr $t1,[sp,#$Foff+4] @ f.hi |
| 85 | adds $Tlo,$Tlo,$t2 | ||
| 82 | ldr $t2,[sp,#$Goff+0] @ g.lo | 86 | ldr $t2,[sp,#$Goff+0] @ g.lo |
| 87 | adc $Thi,$Thi,$t3 @ T += h | ||
| 83 | ldr $t3,[sp,#$Goff+4] @ g.hi | 88 | ldr $t3,[sp,#$Goff+4] @ g.hi |
| 84 | str $Elo,[sp,#$Eoff+0] | ||
| 85 | str $Ehi,[sp,#$Eoff+4] | ||
| 86 | str $Alo,[sp,#$Aoff+0] | ||
| 87 | str $Ahi,[sp,#$Aoff+4] | ||
| 88 | 89 | ||
| 89 | eor $t0,$t0,$t2 | 90 | eor $t0,$t0,$t2 |
| 91 | str $Elo,[sp,#$Eoff+0] | ||
| 90 | eor $t1,$t1,$t3 | 92 | eor $t1,$t1,$t3 |
| 93 | str $Ehi,[sp,#$Eoff+4] | ||
| 91 | and $t0,$t0,$Elo | 94 | and $t0,$t0,$Elo |
| 95 | str $Alo,[sp,#$Aoff+0] | ||
| 92 | and $t1,$t1,$Ehi | 96 | and $t1,$t1,$Ehi |
| 97 | str $Ahi,[sp,#$Aoff+4] | ||
| 93 | eor $t0,$t0,$t2 | 98 | eor $t0,$t0,$t2 |
| 94 | eor $t1,$t1,$t3 @ Ch(e,f,g) | ||
| 95 | |||
| 96 | ldr $t2,[$Ktbl,#4] @ K[i].lo | 99 | ldr $t2,[$Ktbl,#4] @ K[i].lo |
| 100 | eor $t1,$t1,$t3 @ Ch(e,f,g) | ||
| 97 | ldr $t3,[$Ktbl,#0] @ K[i].hi | 101 | ldr $t3,[$Ktbl,#0] @ K[i].hi |
| 98 | ldr $Elo,[sp,#$Doff+0] @ d.lo | ||
| 99 | ldr $Ehi,[sp,#$Doff+4] @ d.hi | ||
| 100 | 102 | ||
| 101 | adds $Tlo,$Tlo,$t0 | 103 | adds $Tlo,$Tlo,$t0 |
| 104 | ldr $Elo,[sp,#$Doff+0] @ d.lo | ||
| 102 | adc $Thi,$Thi,$t1 @ T += Ch(e,f,g) | 105 | adc $Thi,$Thi,$t1 @ T += Ch(e,f,g) |
| 106 | ldr $Ehi,[sp,#$Doff+4] @ d.hi | ||
| 103 | adds $Tlo,$Tlo,$t2 | 107 | adds $Tlo,$Tlo,$t2 |
| 104 | adc $Thi,$Thi,$t3 @ T += K[i] | 108 | adc $Thi,$Thi,$t3 @ T += K[i] |
| 105 | adds $Elo,$Elo,$Tlo | 109 | adds $Elo,$Elo,$Tlo |
diff --git a/src/lib/libcrypto/sha/asm/sha512-sparcv9.pl b/src/lib/libcrypto/sha/asm/sha512-sparcv9.pl index 54241aab50..ec5d78135e 100644 --- a/src/lib/libcrypto/sha/asm/sha512-sparcv9.pl +++ b/src/lib/libcrypto/sha/asm/sha512-sparcv9.pl | |||
| @@ -586,6 +586,7 @@ $code.=<<___; | |||
| 586 | .type sha${label}_block_data_order,#function | 586 | .type sha${label}_block_data_order,#function |
| 587 | .size sha${label}_block_data_order,(.-sha${label}_block_data_order) | 587 | .size sha${label}_block_data_order,(.-sha${label}_block_data_order) |
| 588 | .asciz "SHA${label} block transform for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" | 588 | .asciz "SHA${label} block transform for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" |
| 589 | .align 4 | ||
| 589 | ___ | 590 | ___ |
| 590 | 591 | ||
| 591 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | 592 | $code =~ s/\`([^\`]*)\`/eval $1/gem; |
