diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib/libcrypto/sha/asm/sha1-ia64.pl | 192 |
1 files changed, 95 insertions, 97 deletions
diff --git a/src/lib/libcrypto/sha/asm/sha1-ia64.pl b/src/lib/libcrypto/sha/asm/sha1-ia64.pl index 51c4f47ecb..db28f0805a 100644 --- a/src/lib/libcrypto/sha/asm/sha1-ia64.pl +++ b/src/lib/libcrypto/sha/asm/sha1-ia64.pl | |||
@@ -15,7 +15,7 @@ | |||
15 | # is >50% better than HP C and >2x better than gcc. | 15 | # is >50% better than HP C and >2x better than gcc. |
16 | 16 | ||
17 | $code=<<___; | 17 | $code=<<___; |
18 | .ident \"sha1-ia64.s, version 1.2\" | 18 | .ident \"sha1-ia64.s, version 1.3\" |
19 | .ident \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\" | 19 | .ident \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\" |
20 | .explicit | 20 | .explicit |
21 | 21 | ||
@@ -26,14 +26,10 @@ if ($^O eq "hpux") { | |||
26 | $ADDP="addp4"; | 26 | $ADDP="addp4"; |
27 | for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); } | 27 | for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); } |
28 | } else { $ADDP="add"; } | 28 | } else { $ADDP="add"; } |
29 | for (@ARGV) { $big_endian=1 if (/\-DB_ENDIAN/); | ||
30 | $big_endian=0 if (/\-DL_ENDIAN/); } | ||
31 | if (!defined($big_endian)) | ||
32 | { $big_endian=(unpack('L',pack('N',1))==1); } | ||
33 | 29 | ||
34 | #$human=1; | 30 | #$human=1; |
35 | if ($human) { # useful for visual code auditing... | 31 | if ($human) { # useful for visual code auditing... |
36 | ($A,$B,$C,$D,$E,$T) = ("A","B","C","D","E","T"); | 32 | ($A,$B,$C,$D,$E) = ("A","B","C","D","E"); |
37 | ($h0,$h1,$h2,$h3,$h4) = ("h0","h1","h2","h3","h4"); | 33 | ($h0,$h1,$h2,$h3,$h4) = ("h0","h1","h2","h3","h4"); |
38 | ($K_00_19, $K_20_39, $K_40_59, $K_60_79) = | 34 | ($K_00_19, $K_20_39, $K_40_59, $K_60_79) = |
39 | ( "K_00_19","K_20_39","K_40_59","K_60_79" ); | 35 | ( "K_00_19","K_20_39","K_40_59","K_60_79" ); |
@@ -41,47 +37,50 @@ if ($human) { # useful for visual code auditing... | |||
41 | "X8", "X9","X10","X11","X12","X13","X14","X15" ); | 37 | "X8", "X9","X10","X11","X12","X13","X14","X15" ); |
42 | } | 38 | } |
43 | else { | 39 | else { |
44 | ($A,$B,$C,$D,$E,$T) = ("loc0","loc1","loc2","loc3","loc4","loc5"); | 40 | ($A,$B,$C,$D,$E) = ("loc0","loc1","loc2","loc3","loc4"); |
45 | ($h0,$h1,$h2,$h3,$h4) = ("loc6","loc7","loc8","loc9","loc10"); | 41 | ($h0,$h1,$h2,$h3,$h4) = ("loc5","loc6","loc7","loc8","loc9"); |
46 | ($K_00_19, $K_20_39, $K_40_59, $K_60_79) = | 42 | ($K_00_19, $K_20_39, $K_40_59, $K_60_79) = |
47 | ( "r14", "r15", "loc11", "loc12" ); | 43 | ( "r14", "r15", "loc10", "loc11" ); |
48 | @X= ( "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", | 44 | @X= ( "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", |
49 | "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" ); | 45 | "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" ); |
50 | } | 46 | } |
51 | 47 | ||
52 | sub BODY_00_15 { | 48 | sub BODY_00_15 { |
53 | local *code=shift; | 49 | local *code=shift; |
54 | local ($i,$a,$b,$c,$d,$e,$f)=@_; | 50 | my ($i,$a,$b,$c,$d,$e)=@_; |
51 | my $j=$i+1; | ||
52 | my $Xn=@X[$j%16]; | ||
55 | 53 | ||
56 | $code.=<<___ if ($i==0); | 54 | $code.=<<___ if ($i==0); |
57 | { .mmi; ld1 $X[$i&0xf]=[inp],2 // MSB | 55 | { .mmi; ld1 $X[$i]=[inp],2 // MSB |
58 | ld1 tmp2=[tmp3],2 };; | 56 | ld1 tmp2=[tmp3],2 };; |
59 | { .mmi; ld1 tmp0=[inp],2 | 57 | { .mmi; ld1 tmp0=[inp],2 |
60 | ld1 tmp4=[tmp3],2 // LSB | 58 | ld1 tmp4=[tmp3],2 // LSB |
61 | dep $X[$i&0xf]=$X[$i&0xf],tmp2,8,8 };; | 59 | dep $X[$i]=$X[$i],tmp2,8,8 };; |
62 | ___ | 60 | ___ |
63 | if ($i<15) { | 61 | if ($i<15) { |
64 | $code.=<<___; | 62 | $code.=<<___; |
65 | { .mmi; ld1 $X[($i+1)&0xf]=[inp],2 // +1 | 63 | { .mmi; ld1 $Xn=[inp],2 // forward Xload |
64 | nop.m 0x0 | ||
66 | dep tmp1=tmp0,tmp4,8,8 };; | 65 | dep tmp1=tmp0,tmp4,8,8 };; |
67 | { .mmi; ld1 tmp2=[tmp3],2 // +1 | 66 | { .mmi; ld1 tmp2=[tmp3],2 // forward Xload |
68 | and tmp4=$c,$b | 67 | and tmp4=$c,$b |
69 | dep $X[$i&0xf]=$X[$i&0xf],tmp1,16,16 } //;; | 68 | dep $X[$i]=$X[$i],tmp1,16,16} //;; |
70 | { .mmi; andcm tmp1=$d,$b | 69 | { .mmi; add $e=$e,$K_00_19 // e+=K_00_19 |
71 | add tmp0=$e,$K_00_19 | 70 | andcm tmp1=$d,$b |
72 | dep.z tmp5=$a,5,27 };; // a<<5 | 71 | dep.z tmp5=$a,5,27 };; // a<<5 |
73 | { .mmi; or tmp4=tmp4,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) | 72 | { .mmi; add $e=$e,$X[$i] // e+=Xload |
74 | add $f=tmp0,$X[$i&0xf] // f=xi+e+K_00_19 | 73 | or tmp4=tmp4,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) |
75 | extr.u tmp1=$a,27,5 };; // a>>27 | 74 | extr.u tmp1=$a,27,5 };; // a>>27 |
76 | { .mmi; ld1 tmp0=[inp],2 // +1 | 75 | { .mmi; ld1 tmp0=[inp],2 // forward Xload |
77 | add $f=$f,tmp4 // f+=F_00_19(b,c,d) | 76 | add $e=$e,tmp4 // e+=F_00_19(b,c,d) |
78 | shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) | 77 | shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) |
79 | { .mmi; ld1 tmp4=[tmp3],2 // +1 | 78 | { .mmi; ld1 tmp4=[tmp3],2 // forward Xload |
80 | or tmp5=tmp1,tmp5 // ROTATE(a,5) | 79 | or tmp5=tmp1,tmp5 // ROTATE(a,5) |
81 | mux2 tmp6=$a,0x44 };; // see b in next iteration | 80 | mux2 tmp6=$a,0x44 };; // see b in next iteration |
82 | { .mii; add $f=$f,tmp5 // f+=ROTATE(a,5) | 81 | { .mii; add $e=$e,tmp5 // e+=ROTATE(a,5) |
83 | dep $X[($i+1)&0xf]=$X[($i+1)&0xf],tmp2,8,8 // +1 | 82 | dep $Xn=$Xn,tmp2,8,8 // forward Xload |
84 | mux2 $X[$i&0xf]=$X[$i&0xf],0x44 } //;; | 83 | mux2 $X[$i]=$X[$i],0x44 } //;; |
85 | 84 | ||
86 | ___ | 85 | ___ |
87 | } | 86 | } |
@@ -89,24 +88,24 @@ else { | |||
89 | $code.=<<___; | 88 | $code.=<<___; |
90 | { .mii; and tmp3=$c,$b | 89 | { .mii; and tmp3=$c,$b |
91 | dep tmp1=tmp0,tmp4,8,8;; | 90 | dep tmp1=tmp0,tmp4,8,8;; |
92 | dep $X[$i&0xf]=$X[$i&0xf],tmp1,16,16 } //;; | 91 | dep $X[$i]=$X[$i],tmp1,16,16} //;; |
93 | { .mmi; andcm tmp1=$d,$b | 92 | { .mmi; add $e=$e,$K_00_19 // e+=K_00_19 |
94 | add tmp0=$e,$K_00_19 | 93 | andcm tmp1=$d,$b |
95 | dep.z tmp5=$a,5,27 };; // a<<5 | 94 | dep.z tmp5=$a,5,27 };; // a<<5 |
96 | { .mmi; or tmp4=tmp3,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) | 95 | { .mmi; add $e=$e,$X[$i] // e+=Xupdate |
97 | add $f=tmp0,$X[$i&0xf] // f=xi+e+K_00_19 | 96 | or tmp4=tmp3,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) |
98 | extr.u tmp1=$a,27,5 } // a>>27 | 97 | extr.u tmp1=$a,27,5 } // a>>27 |
99 | { .mmi; xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 | 98 | { .mmi; xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate |
100 | xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 | 99 | xor tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate |
101 | nop.i 0 };; | 100 | nop.i 0 };; |
102 | { .mmi; add $f=$f,tmp4 // f+=F_00_19(b,c,d) | 101 | { .mmi; add $e=$e,tmp4 // e+=F_00_19(b,c,d) |
103 | xor tmp2=tmp2,tmp3 // +1 | 102 | xor $Xn=$Xn,tmp3 // forward Xupdate |
104 | shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) | 103 | shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) |
105 | { .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) | 104 | { .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) |
106 | mux2 tmp6=$a,0x44 };; // see b in next iteration | 105 | mux2 tmp6=$a,0x44 };; // see b in next iteration |
107 | { .mii; add $f=$f,tmp1 // f+=ROTATE(a,5) | 106 | { .mii; add $e=$e,tmp1 // e+=ROTATE(a,5) |
108 | shrp $e=tmp2,tmp2,31 // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) | 107 | shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) |
109 | mux2 $X[$i&0xf]=$X[$i&0xf],0x44 };; | 108 | mux2 $X[$i]=$X[$i],0x44 };; |
110 | 109 | ||
111 | ___ | 110 | ___ |
112 | } | 111 | } |
@@ -114,27 +113,28 @@ ___ | |||
114 | 113 | ||
115 | sub BODY_16_19 { | 114 | sub BODY_16_19 { |
116 | local *code=shift; | 115 | local *code=shift; |
117 | local ($i,$a,$b,$c,$d,$e,$f)=@_; | 116 | my ($i,$a,$b,$c,$d,$e)=@_; |
117 | my $j=$i+1; | ||
118 | my $Xn=@X[$j%16]; | ||
118 | 119 | ||
119 | $code.=<<___; | 120 | $code.=<<___; |
120 | { .mmi; mov $X[$i&0xf]=$f // Xupdate | 121 | { .mib; add $e=$e,$K_00_19 // e+=K_00_19 |
121 | and tmp0=$c,$b | ||
122 | dep.z tmp5=$a,5,27 } // a<<5 | 122 | dep.z tmp5=$a,5,27 } // a<<5 |
123 | { .mmi; andcm tmp1=$d,$b | 123 | { .mib; andcm tmp1=$d,$b |
124 | add tmp4=$e,$K_00_19 };; | 124 | and tmp0=$c,$b };; |
125 | { .mmi; or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) | 125 | { .mmi; add $e=$e,$X[$i%16] // e+=Xupdate |
126 | add $f=$f,tmp4 // f+=e+K_00_19 | 126 | or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)|(~b&d) |
127 | extr.u tmp1=$a,27,5 } // a>>27 | 127 | extr.u tmp1=$a,27,5 } // a>>27 |
128 | { .mmi; xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 | 128 | { .mmi; xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate |
129 | xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 | 129 | xor tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate |
130 | nop.i 0 };; | 130 | nop.i 0 };; |
131 | { .mmi; add $f=$f,tmp0 // f+=F_00_19(b,c,d) | 131 | { .mmi; add $e=$e,tmp0 // f+=F_00_19(b,c,d) |
132 | xor tmp2=tmp2,tmp3 // +1 | 132 | xor $Xn=$Xn,tmp3 // forward Xupdate |
133 | shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) | 133 | shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) |
134 | { .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) | 134 | { .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) |
135 | mux2 tmp6=$a,0x44 };; // see b in next iteration | 135 | mux2 tmp6=$a,0x44 };; // see b in next iteration |
136 | { .mii; add $f=$f,tmp1 // f+=ROTATE(a,5) | 136 | { .mii; add $e=$e,tmp1 // e+=ROTATE(a,5) |
137 | shrp $e=tmp2,tmp2,31 // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) | 137 | shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) |
138 | nop.i 0 };; | 138 | nop.i 0 };; |
139 | 139 | ||
140 | ___ | 140 | ___ |
@@ -142,49 +142,47 @@ ___ | |||
142 | 142 | ||
143 | sub BODY_20_39 { | 143 | sub BODY_20_39 { |
144 | local *code=shift; | 144 | local *code=shift; |
145 | local ($i,$a,$b,$c,$d,$e,$f,$Konst)=@_; | 145 | my ($i,$a,$b,$c,$d,$e,$Konst)=@_; |
146 | $Konst = $K_20_39 if (!defined($Konst)); | 146 | $Konst = $K_20_39 if (!defined($Konst)); |
147 | my $j=$i+1; | ||
148 | my $Xn=@X[$j%16]; | ||
147 | 149 | ||
148 | if ($i<79) { | 150 | if ($i<79) { |
149 | $code.=<<___; | 151 | $code.=<<___; |
150 | { .mib; mov $X[$i&0xf]=$f // Xupdate | 152 | { .mib; add $e=$e,$Konst // e+=K_XX_XX |
151 | dep.z tmp5=$a,5,27 } // a<<5 | 153 | dep.z tmp5=$a,5,27 } // a<<5 |
152 | { .mib; xor tmp0=$c,$b | 154 | { .mib; xor tmp0=$c,$b |
153 | add tmp4=$e,$Konst };; | 155 | xor $Xn=$Xn,$X[($j+2)%16] };; // forward Xupdate |
154 | { .mmi; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d | 156 | { .mib; add $e=$e,$X[$i%16] // e+=Xupdate |
155 | add $f=$f,tmp4 // f+=e+K_20_39 | ||
156 | extr.u tmp1=$a,27,5 } // a>>27 | 157 | extr.u tmp1=$a,27,5 } // a>>27 |
157 | { .mmi; xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 | 158 | { .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d |
158 | xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 | 159 | xor $Xn=$Xn,$X[($j+8)%16] };; // forward Xupdate |
159 | nop.i 0 };; | 160 | { .mmi; add $e=$e,tmp0 // e+=F_20_39(b,c,d) |
160 | { .mmi; add $f=$f,tmp0 // f+=F_20_39(b,c,d) | 161 | xor $Xn=$Xn,$X[($j+13)%16] // forward Xupdate |
161 | xor tmp2=tmp2,tmp3 // +1 | ||
162 | shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) | 162 | shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) |
163 | { .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) | 163 | { .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) |
164 | mux2 tmp6=$a,0x44 };; // see b in next iteration | 164 | mux2 tmp6=$a,0x44 };; // see b in next iteration |
165 | { .mii; add $f=$f,tmp1 // f+=ROTATE(a,5) | 165 | { .mii; add $e=$e,tmp1 // e+=ROTATE(a,5) |
166 | shrp $e=tmp2,tmp2,31 // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) | 166 | shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) |
167 | nop.i 0 };; | 167 | nop.i 0 };; |
168 | 168 | ||
169 | ___ | 169 | ___ |
170 | } | 170 | } |
171 | else { | 171 | else { |
172 | $code.=<<___; | 172 | $code.=<<___; |
173 | { .mib; mov $X[$i&0xf]=$f // Xupdate | 173 | { .mib; add $e=$e,$Konst // e+=K_60_79 |
174 | dep.z tmp5=$a,5,27 } // a<<5 | 174 | dep.z tmp5=$a,5,27 } // a<<5 |
175 | { .mib; xor tmp0=$c,$b | 175 | { .mib; xor tmp0=$c,$b |
176 | add tmp4=$e,$Konst };; | ||
177 | { .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d | ||
178 | extr.u tmp1=$a,27,5 } // a>>27 | ||
179 | { .mib; add $f=$f,tmp4 // f+=e+K_20_39 | ||
180 | add $h1=$h1,$a };; // wrap up | 176 | add $h1=$h1,$a };; // wrap up |
181 | { .mmi; add $f=$f,tmp0 // f+=F_20_39(b,c,d) | 177 | { .mib; add $e=$e,$X[$i%16] // e+=Xupdate |
182 | shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) ;;? | 178 | extr.u tmp1=$a,27,5 } // a>>27 |
183 | { .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) | 179 | { .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d |
184 | add $h3=$h3,$c };; // wrap up | 180 | add $h3=$h3,$c };; // wrap up |
185 | { .mib; add tmp3=1,inp // used in unaligned codepath | 181 | { .mmi; add $e=$e,tmp0 // e+=F_20_39(b,c,d) |
186 | add $f=$f,tmp1 } // f+=ROTATE(a,5) | 182 | or tmp1=tmp1,tmp5 // ROTATE(a,5) |
187 | { .mib; add $h2=$h2,$b // wrap up | 183 | shrp $b=tmp6,tmp6,2 };; // b=ROTATE(b,30) ;;? |
184 | { .mmi; add $e=$e,tmp1 // e+=ROTATE(a,5) | ||
185 | add tmp3=1,inp // used in unaligned codepath | ||
188 | add $h4=$h4,$d };; // wrap up | 186 | add $h4=$h4,$d };; // wrap up |
189 | 187 | ||
190 | ___ | 188 | ___ |
@@ -193,29 +191,29 @@ ___ | |||
193 | 191 | ||
194 | sub BODY_40_59 { | 192 | sub BODY_40_59 { |
195 | local *code=shift; | 193 | local *code=shift; |
196 | local ($i,$a,$b,$c,$d,$e,$f)=@_; | 194 | my ($i,$a,$b,$c,$d,$e)=@_; |
195 | my $j=$i+1; | ||
196 | my $Xn=@X[$j%16]; | ||
197 | 197 | ||
198 | $code.=<<___; | 198 | $code.=<<___; |
199 | { .mmi; mov $X[$i&0xf]=$f // Xupdate | 199 | { .mib; add $e=$e,$K_40_59 // e+=K_40_59 |
200 | and tmp0=$c,$b | ||
201 | dep.z tmp5=$a,5,27 } // a<<5 | 200 | dep.z tmp5=$a,5,27 } // a<<5 |
202 | { .mmi; and tmp1=$d,$b | 201 | { .mib; and tmp1=$c,$d |
203 | add tmp4=$e,$K_40_59 };; | 202 | xor tmp0=$c,$d };; |
204 | { .mmi; or tmp0=tmp0,tmp1 // (b&c)|(b&d) | 203 | { .mmi; add $e=$e,$X[$i%16] // e+=Xupdate |
205 | add $f=$f,tmp4 // f+=e+K_40_59 | 204 | add tmp5=tmp5,tmp1 // a<<5+(c&d) |
206 | extr.u tmp1=$a,27,5 } // a>>27 | 205 | extr.u tmp1=$a,27,5 } // a>>27 |
207 | { .mmi; and tmp4=$c,$d | 206 | { .mmi; and tmp0=tmp0,$b |
208 | xor tmp2=$X[($i+0+1)&0xf],$X[($i+2+1)&0xf] // +1 | 207 | xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate |
209 | xor tmp3=$X[($i+8+1)&0xf],$X[($i+13+1)&0xf] // +1 | 208 | xor tmp3=$X[($j+8)%16],$X[($j+13)%16] };; // forward Xupdate |
210 | };; | 209 | { .mmi; add $e=$e,tmp0 // e+=b&(c^d) |
211 | { .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5) | 210 | add tmp5=tmp5,tmp1 // ROTATE(a,5)+(c&d) |
212 | xor tmp2=tmp2,tmp3 // +1 | ||
213 | shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) | 211 | shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30) |
214 | { .mmi; or tmp0=tmp0,tmp4 // F_40_59(b,c,d)=(b&c)|(b&d)|(c&d) | 212 | { .mmi; xor $Xn=$Xn,tmp3 |
215 | mux2 tmp6=$a,0x44 };; // see b in next iteration | 213 | mux2 tmp6=$a,0x44 };; // see b in next iteration |
216 | { .mii; add $f=$f,tmp0 // f+=F_40_59(b,c,d) | 214 | { .mii; add $e=$e,tmp5 // e+=ROTATE(a,5)+(c&d) |
217 | shrp $e=tmp2,tmp2,31;; // f+1=ROTATE(x[0]^x[2]^x[8]^x[13],1) | 215 | shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1) |
218 | add $f=$f,tmp1 };; // f+=ROTATE(a,5) | 216 | nop.i 0x0 };; |
219 | 217 | ||
220 | ___ | 218 | ___ |
221 | } | 219 | } |
@@ -237,7 +235,7 @@ inp=r33; // in1 | |||
237 | .align 32 | 235 | .align 32 |
238 | sha1_block_data_order: | 236 | sha1_block_data_order: |
239 | .prologue | 237 | .prologue |
240 | { .mmi; alloc tmp1=ar.pfs,3,15,0,0 | 238 | { .mmi; alloc tmp1=ar.pfs,3,14,0,0 |
241 | $ADDP tmp0=4,ctx | 239 | $ADDP tmp0=4,ctx |
242 | .save ar.lc,r3 | 240 | .save ar.lc,r3 |
243 | mov r3=ar.lc } | 241 | mov r3=ar.lc } |
@@ -245,8 +243,8 @@ sha1_block_data_order: | |||
245 | $ADDP inp=0,inp | 243 | $ADDP inp=0,inp |
246 | mov r2=pr };; | 244 | mov r2=pr };; |
247 | tmp4=in2; | 245 | tmp4=in2; |
248 | tmp5=loc13; | 246 | tmp5=loc12; |
249 | tmp6=loc14; | 247 | tmp6=loc13; |
250 | .body | 248 | .body |
251 | { .mlx; ld4 $h0=[ctx],8 | 249 | { .mlx; ld4 $h0=[ctx],8 |
252 | movl $K_00_19=0x5a827999 } | 250 | movl $K_00_19=0x5a827999 } |
@@ -273,7 +271,7 @@ tmp6=loc14; | |||
273 | 271 | ||
274 | ___ | 272 | ___ |
275 | 273 | ||
276 | { my $i,@V=($A,$B,$C,$D,$E,$T); | 274 | { my $i,@V=($A,$B,$C,$D,$E); |
277 | 275 | ||
278 | for($i=0;$i<16;$i++) { &BODY_00_15(\$code,$i,@V); unshift(@V,pop(@V)); } | 276 | for($i=0;$i<16;$i++) { &BODY_00_15(\$code,$i,@V); unshift(@V,pop(@V)); } |
279 | for(;$i<20;$i++) { &BODY_16_19(\$code,$i,@V); unshift(@V,pop(@V)); } | 277 | for(;$i<20;$i++) { &BODY_16_19(\$code,$i,@V); unshift(@V,pop(@V)); } |
@@ -281,12 +279,12 @@ ___ | |||
281 | for(;$i<60;$i++) { &BODY_40_59(\$code,$i,@V); unshift(@V,pop(@V)); } | 279 | for(;$i<60;$i++) { &BODY_40_59(\$code,$i,@V); unshift(@V,pop(@V)); } |
282 | for(;$i<80;$i++) { &BODY_60_79(\$code,$i,@V); unshift(@V,pop(@V)); } | 280 | for(;$i<80;$i++) { &BODY_60_79(\$code,$i,@V); unshift(@V,pop(@V)); } |
283 | 281 | ||
284 | (($V[5] eq $D) and ($V[0] eq $E)) or die; # double-check | 282 | (($V[0] eq $A) and ($V[4] eq $E)) or die; # double-check |
285 | } | 283 | } |
286 | 284 | ||
287 | $code.=<<___; | 285 | $code.=<<___; |
288 | { .mmb; add $h0=$h0,$E | 286 | { .mmb; add $h0=$h0,$A |
289 | nop.m 0 | 287 | add $h2=$h2,$C |
290 | br.ctop.dptk.many .Ldtop };; | 288 | br.ctop.dptk.many .Ldtop };; |
291 | .Ldend: | 289 | .Ldend: |
292 | { .mmi; add tmp0=4,ctx | 290 | { .mmi; add tmp0=4,ctx |