diff options
author | miod <> | 2014-05-01 19:06:46 +0000 |
---|---|---|
committer | miod <> | 2014-05-01 19:06:46 +0000 |
commit | a6c5862b9505a86f8d6f76d0f4cf1ec8c18b63d6 (patch) | |
tree | 60af04beb29a22b281d400bbf428b80059d962c7 /src/lib | |
parent | 835fdf4f61033948fa1b34ef14e7bb8754a4c4d4 (diff) | |
download | openbsd-a6c5862b9505a86f8d6f76d0f4cf1ec8c18b63d6.tar.gz openbsd-a6c5862b9505a86f8d6f76d0f4cf1ec8c18b63d6.tar.bz2 openbsd-a6c5862b9505a86f8d6f76d0f4cf1ec8c18b63d6.zip |
dead meat
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/libcrypto/bn/asm/mips3-mont.pl | 327 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/asm/mips3.s | 2201 | ||||
-rw-r--r-- | src/lib/libssl/src/crypto/bn/asm/mips3-mont.pl | 327 | ||||
-rw-r--r-- | src/lib/libssl/src/crypto/bn/asm/mips3.s | 2201 |
4 files changed, 0 insertions, 5056 deletions
diff --git a/src/lib/libcrypto/bn/asm/mips3-mont.pl b/src/lib/libcrypto/bn/asm/mips3-mont.pl deleted file mode 100644 index 8f9156e02a..0000000000 --- a/src/lib/libcrypto/bn/asm/mips3-mont.pl +++ /dev/null | |||
@@ -1,327 +0,0 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | |||
10 | # This module doesn't present direct interest for OpenSSL, because it | ||
11 | # doesn't provide better performance for longer keys. While 512-bit | ||
12 | # RSA private key operations are 40% faster, 1024-bit ones are hardly | ||
13 | # faster at all, while longer key operations are slower by up to 20%. | ||
14 | # It might be of interest to embedded system developers though, as | ||
15 | # it's smaller than 1KB, yet offers ~3x improvement over compiler | ||
16 | # generated code. | ||
17 | # | ||
18 | # The module targets N32 and N64 MIPS ABIs and currently is a bit | ||
19 | # IRIX-centric, i.e. is likely to require adaptation for other OSes. | ||
20 | |||
21 | # int bn_mul_mont( | ||
22 | $rp="a0"; # BN_ULONG *rp, | ||
23 | $ap="a1"; # const BN_ULONG *ap, | ||
24 | $bp="a2"; # const BN_ULONG *bp, | ||
25 | $np="a3"; # const BN_ULONG *np, | ||
26 | $n0="a4"; # const BN_ULONG *n0, | ||
27 | $num="a5"; # int num); | ||
28 | |||
29 | $lo0="a6"; | ||
30 | $hi0="a7"; | ||
31 | $lo1="v0"; | ||
32 | $hi1="v1"; | ||
33 | $aj="t0"; | ||
34 | $bi="t1"; | ||
35 | $nj="t2"; | ||
36 | $tp="t3"; | ||
37 | $alo="s0"; | ||
38 | $ahi="s1"; | ||
39 | $nlo="s2"; | ||
40 | $nhi="s3"; | ||
41 | $tj="s4"; | ||
42 | $i="s5"; | ||
43 | $j="s6"; | ||
44 | $fp="t8"; | ||
45 | $m1="t9"; | ||
46 | |||
47 | $FRAME=8*(2+8); | ||
48 | |||
49 | $code=<<___; | ||
50 | #include <asm.h> | ||
51 | #include <regdef.h> | ||
52 | |||
53 | .text | ||
54 | |||
55 | .set noat | ||
56 | .set reorder | ||
57 | |||
58 | .align 5 | ||
59 | .globl bn_mul_mont | ||
60 | .ent bn_mul_mont | ||
61 | bn_mul_mont: | ||
62 | .set noreorder | ||
63 | PTR_SUB sp,64 | ||
64 | move $fp,sp | ||
65 | .frame $fp,64,ra | ||
66 | slt AT,$num,4 | ||
67 | li v0,0 | ||
68 | beqzl AT,.Lproceed | ||
69 | nop | ||
70 | jr ra | ||
71 | PTR_ADD sp,$fp,64 | ||
72 | .set reorder | ||
73 | .align 5 | ||
74 | .Lproceed: | ||
75 | ld $n0,0($n0) | ||
76 | ld $bi,0($bp) # bp[0] | ||
77 | ld $aj,0($ap) # ap[0] | ||
78 | ld $nj,0($np) # np[0] | ||
79 | PTR_SUB sp,16 # place for two extra words | ||
80 | sll $num,3 | ||
81 | li AT,-4096 | ||
82 | PTR_SUB sp,$num | ||
83 | and sp,AT | ||
84 | |||
85 | sd s0,0($fp) | ||
86 | sd s1,8($fp) | ||
87 | sd s2,16($fp) | ||
88 | sd s3,24($fp) | ||
89 | sd s4,32($fp) | ||
90 | sd s5,40($fp) | ||
91 | sd s6,48($fp) | ||
92 | sd s7,56($fp) | ||
93 | |||
94 | dmultu $aj,$bi | ||
95 | ld $alo,8($ap) | ||
96 | ld $nlo,8($np) | ||
97 | mflo $lo0 | ||
98 | mfhi $hi0 | ||
99 | dmultu $lo0,$n0 | ||
100 | mflo $m1 | ||
101 | |||
102 | dmultu $alo,$bi | ||
103 | mflo $alo | ||
104 | mfhi $ahi | ||
105 | |||
106 | dmultu $nj,$m1 | ||
107 | mflo $lo1 | ||
108 | mfhi $hi1 | ||
109 | dmultu $nlo,$m1 | ||
110 | daddu $lo1,$lo0 | ||
111 | sltu AT,$lo1,$lo0 | ||
112 | daddu $hi1,AT | ||
113 | mflo $nlo | ||
114 | mfhi $nhi | ||
115 | |||
116 | move $tp,sp | ||
117 | li $j,16 | ||
118 | .align 4 | ||
119 | .L1st: | ||
120 | .set noreorder | ||
121 | PTR_ADD $aj,$ap,$j | ||
122 | ld $aj,($aj) | ||
123 | PTR_ADD $nj,$np,$j | ||
124 | ld $nj,($nj) | ||
125 | |||
126 | dmultu $aj,$bi | ||
127 | daddu $lo0,$alo,$hi0 | ||
128 | daddu $lo1,$nlo,$hi1 | ||
129 | sltu AT,$lo0,$hi0 | ||
130 | sltu s7,$lo1,$hi1 | ||
131 | daddu $hi0,$ahi,AT | ||
132 | daddu $hi1,$nhi,s7 | ||
133 | mflo $alo | ||
134 | mfhi $ahi | ||
135 | |||
136 | daddu $lo1,$lo0 | ||
137 | sltu AT,$lo1,$lo0 | ||
138 | dmultu $nj,$m1 | ||
139 | daddu $hi1,AT | ||
140 | addu $j,8 | ||
141 | sd $lo1,($tp) | ||
142 | sltu s7,$j,$num | ||
143 | mflo $nlo | ||
144 | mfhi $nhi | ||
145 | |||
146 | bnez s7,.L1st | ||
147 | PTR_ADD $tp,8 | ||
148 | .set reorder | ||
149 | |||
150 | daddu $lo0,$alo,$hi0 | ||
151 | sltu AT,$lo0,$hi0 | ||
152 | daddu $hi0,$ahi,AT | ||
153 | |||
154 | daddu $lo1,$nlo,$hi1 | ||
155 | sltu s7,$lo1,$hi1 | ||
156 | daddu $hi1,$nhi,s7 | ||
157 | daddu $lo1,$lo0 | ||
158 | sltu AT,$lo1,$lo0 | ||
159 | daddu $hi1,AT | ||
160 | |||
161 | sd $lo1,($tp) | ||
162 | |||
163 | daddu $hi1,$hi0 | ||
164 | sltu AT,$hi1,$hi0 | ||
165 | sd $hi1,8($tp) | ||
166 | sd AT,16($tp) | ||
167 | |||
168 | li $i,8 | ||
169 | .align 4 | ||
170 | .Louter: | ||
171 | PTR_ADD $bi,$bp,$i | ||
172 | ld $bi,($bi) | ||
173 | ld $aj,($ap) | ||
174 | ld $alo,8($ap) | ||
175 | ld $tj,(sp) | ||
176 | |||
177 | dmultu $aj,$bi | ||
178 | ld $nj,($np) | ||
179 | ld $nlo,8($np) | ||
180 | mflo $lo0 | ||
181 | mfhi $hi0 | ||
182 | daddu $lo0,$tj | ||
183 | dmultu $lo0,$n0 | ||
184 | sltu AT,$lo0,$tj | ||
185 | daddu $hi0,AT | ||
186 | mflo $m1 | ||
187 | |||
188 | dmultu $alo,$bi | ||
189 | mflo $alo | ||
190 | mfhi $ahi | ||
191 | |||
192 | dmultu $nj,$m1 | ||
193 | mflo $lo1 | ||
194 | mfhi $hi1 | ||
195 | |||
196 | dmultu $nlo,$m1 | ||
197 | daddu $lo1,$lo0 | ||
198 | sltu AT,$lo1,$lo0 | ||
199 | daddu $hi1,AT | ||
200 | mflo $nlo | ||
201 | mfhi $nhi | ||
202 | |||
203 | move $tp,sp | ||
204 | li $j,16 | ||
205 | ld $tj,8($tp) | ||
206 | .align 4 | ||
207 | .Linner: | ||
208 | .set noreorder | ||
209 | PTR_ADD $aj,$ap,$j | ||
210 | ld $aj,($aj) | ||
211 | PTR_ADD $nj,$np,$j | ||
212 | ld $nj,($nj) | ||
213 | |||
214 | dmultu $aj,$bi | ||
215 | daddu $lo0,$alo,$hi0 | ||
216 | daddu $lo1,$nlo,$hi1 | ||
217 | sltu AT,$lo0,$hi0 | ||
218 | sltu s7,$lo1,$hi1 | ||
219 | daddu $hi0,$ahi,AT | ||
220 | daddu $hi1,$nhi,s7 | ||
221 | mflo $alo | ||
222 | mfhi $ahi | ||
223 | |||
224 | daddu $lo0,$tj | ||
225 | addu $j,8 | ||
226 | dmultu $nj,$m1 | ||
227 | sltu AT,$lo0,$tj | ||
228 | daddu $lo1,$lo0 | ||
229 | daddu $hi0,AT | ||
230 | sltu s7,$lo1,$lo0 | ||
231 | ld $tj,16($tp) | ||
232 | daddu $hi1,s7 | ||
233 | sltu AT,$j,$num | ||
234 | mflo $nlo | ||
235 | mfhi $nhi | ||
236 | sd $lo1,($tp) | ||
237 | bnez AT,.Linner | ||
238 | PTR_ADD $tp,8 | ||
239 | .set reorder | ||
240 | |||
241 | daddu $lo0,$alo,$hi0 | ||
242 | sltu AT,$lo0,$hi0 | ||
243 | daddu $hi0,$ahi,AT | ||
244 | daddu $lo0,$tj | ||
245 | sltu s7,$lo0,$tj | ||
246 | daddu $hi0,s7 | ||
247 | |||
248 | ld $tj,16($tp) | ||
249 | daddu $lo1,$nlo,$hi1 | ||
250 | sltu AT,$lo1,$hi1 | ||
251 | daddu $hi1,$nhi,AT | ||
252 | daddu $lo1,$lo0 | ||
253 | sltu s7,$lo1,$lo0 | ||
254 | daddu $hi1,s7 | ||
255 | sd $lo1,($tp) | ||
256 | |||
257 | daddu $lo1,$hi1,$hi0 | ||
258 | sltu $hi1,$lo1,$hi0 | ||
259 | daddu $lo1,$tj | ||
260 | sltu AT,$lo1,$tj | ||
261 | daddu $hi1,AT | ||
262 | sd $lo1,8($tp) | ||
263 | sd $hi1,16($tp) | ||
264 | |||
265 | addu $i,8 | ||
266 | sltu s7,$i,$num | ||
267 | bnez s7,.Louter | ||
268 | |||
269 | .set noreorder | ||
270 | PTR_ADD $tj,sp,$num # &tp[num] | ||
271 | move $tp,sp | ||
272 | move $ap,sp | ||
273 | li $hi0,0 # clear borrow bit | ||
274 | |||
275 | .align 4 | ||
276 | .Lsub: ld $lo0,($tp) | ||
277 | ld $lo1,($np) | ||
278 | PTR_ADD $tp,8 | ||
279 | PTR_ADD $np,8 | ||
280 | dsubu $lo1,$lo0,$lo1 # tp[i]-np[i] | ||
281 | sgtu AT,$lo1,$lo0 | ||
282 | dsubu $lo0,$lo1,$hi0 | ||
283 | sgtu $hi0,$lo0,$lo1 | ||
284 | sd $lo0,($rp) | ||
285 | or $hi0,AT | ||
286 | sltu AT,$tp,$tj | ||
287 | bnez AT,.Lsub | ||
288 | PTR_ADD $rp,8 | ||
289 | |||
290 | dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit | ||
291 | move $tp,sp | ||
292 | PTR_SUB $rp,$num # restore rp | ||
293 | not $hi1,$hi0 | ||
294 | |||
295 | and $ap,$hi0,sp | ||
296 | and $bp,$hi1,$rp | ||
297 | or $ap,$ap,$bp # ap=borrow?tp:rp | ||
298 | |||
299 | .align 4 | ||
300 | .Lcopy: ld $aj,($ap) | ||
301 | PTR_ADD $ap,8 | ||
302 | PTR_ADD $tp,8 | ||
303 | sd zero,-8($tp) | ||
304 | sltu AT,$tp,$tj | ||
305 | sd $aj,($rp) | ||
306 | bnez AT,.Lcopy | ||
307 | PTR_ADD $rp,8 | ||
308 | |||
309 | ld s0,0($fp) | ||
310 | ld s1,8($fp) | ||
311 | ld s2,16($fp) | ||
312 | ld s3,24($fp) | ||
313 | ld s4,32($fp) | ||
314 | ld s5,40($fp) | ||
315 | ld s6,48($fp) | ||
316 | ld s7,56($fp) | ||
317 | li v0,1 | ||
318 | jr ra | ||
319 | PTR_ADD sp,$fp,64 | ||
320 | .set reorder | ||
321 | END(bn_mul_mont) | ||
322 | .rdata | ||
323 | .asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>" | ||
324 | ___ | ||
325 | |||
326 | print $code; | ||
327 | close STDOUT; | ||
diff --git a/src/lib/libcrypto/bn/asm/mips3.s b/src/lib/libcrypto/bn/asm/mips3.s deleted file mode 100644 index dca4105c7d..0000000000 --- a/src/lib/libcrypto/bn/asm/mips3.s +++ /dev/null | |||
@@ -1,2201 +0,0 @@ | |||
1 | .rdata | ||
2 | .asciiz "mips3.s, Version 1.1" | ||
3 | .asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" | ||
4 | |||
5 | /* | ||
6 | * ==================================================================== | ||
7 | * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
8 | * project. | ||
9 | * | ||
10 | * Rights for redistribution and usage in source and binary forms are | ||
11 | * granted according to the OpenSSL license. Warranty of any kind is | ||
12 | * disclaimed. | ||
13 | * ==================================================================== | ||
14 | */ | ||
15 | |||
16 | /* | ||
17 | * This is my modest contributon to the OpenSSL project (see | ||
18 | * http://www.openssl.org/ for more information about it) and is | ||
19 | * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c | ||
20 | * module. For updates see http://fy.chalmers.se/~appro/hpe/. | ||
21 | * | ||
22 | * The module is designed to work with either of the "new" MIPS ABI(5), | ||
23 | * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under | ||
24 | * IRIX 5.x not only because it doesn't support new ABIs but also | ||
25 | * because 5.x kernels put R4x00 CPU into 32-bit mode and all those | ||
26 | * 64-bit instructions (daddu, dmultu, etc.) found below gonna only | ||
27 | * cause illegal instruction exception:-( | ||
28 | * | ||
29 | * In addition the code depends on preprocessor flags set up by MIPSpro | ||
30 | * compiler driver (either as or cc) and therefore (probably?) can't be | ||
31 | * compiled by the GNU assembler. GNU C driver manages fine though... | ||
32 | * I mean as long as -mmips-as is specified or is the default option, | ||
33 | * because then it simply invokes /usr/bin/as which in turn takes | ||
34 | * perfect care of the preprocessor definitions. Another neat feature | ||
35 | * offered by the MIPSpro assembler is an optimization pass. This gave | ||
36 | * me the opportunity to have the code looking more regular as all those | ||
37 | * architecture dependent instruction rescheduling details were left to | ||
38 | * the assembler. Cool, huh? | ||
39 | * | ||
40 | * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' | ||
41 | * goes way over 3 times faster! | ||
42 | * | ||
43 | * <appro@fy.chalmers.se> | ||
44 | */ | ||
45 | #include <asm.h> | ||
46 | #include <regdef.h> | ||
47 | |||
48 | #if _MIPS_ISA>=4 | ||
49 | #define MOVNZ(cond,dst,src) \ | ||
50 | movn dst,src,cond | ||
51 | #else | ||
52 | #define MOVNZ(cond,dst,src) \ | ||
53 | .set noreorder; \ | ||
54 | bnezl cond,.+8; \ | ||
55 | move dst,src; \ | ||
56 | .set reorder | ||
57 | #endif | ||
58 | |||
59 | .text | ||
60 | |||
61 | .set noat | ||
62 | .set reorder | ||
63 | |||
64 | #define MINUS4 v1 | ||
65 | |||
66 | .align 5 | ||
67 | LEAF(bn_mul_add_words) | ||
68 | .set noreorder | ||
69 | bgtzl a2,.L_bn_mul_add_words_proceed | ||
70 | ld t0,0(a1) | ||
71 | jr ra | ||
72 | move v0,zero | ||
73 | .set reorder | ||
74 | |||
75 | .L_bn_mul_add_words_proceed: | ||
76 | li MINUS4,-4 | ||
77 | and ta0,a2,MINUS4 | ||
78 | move v0,zero | ||
79 | beqz ta0,.L_bn_mul_add_words_tail | ||
80 | |||
81 | .L_bn_mul_add_words_loop: | ||
82 | dmultu t0,a3 | ||
83 | ld t1,0(a0) | ||
84 | ld t2,8(a1) | ||
85 | ld t3,8(a0) | ||
86 | ld ta0,16(a1) | ||
87 | ld ta1,16(a0) | ||
88 | daddu t1,v0 | ||
89 | sltu v0,t1,v0 /* All manuals say it "compares 32-bit | ||
90 | * values", but it seems to work fine | ||
91 | * even on 64-bit registers. */ | ||
92 | mflo AT | ||
93 | mfhi t0 | ||
94 | daddu t1,AT | ||
95 | daddu v0,t0 | ||
96 | sltu AT,t1,AT | ||
97 | sd t1,0(a0) | ||
98 | daddu v0,AT | ||
99 | |||
100 | dmultu t2,a3 | ||
101 | ld ta2,24(a1) | ||
102 | ld ta3,24(a0) | ||
103 | daddu t3,v0 | ||
104 | sltu v0,t3,v0 | ||
105 | mflo AT | ||
106 | mfhi t2 | ||
107 | daddu t3,AT | ||
108 | daddu v0,t2 | ||
109 | sltu AT,t3,AT | ||
110 | sd t3,8(a0) | ||
111 | daddu v0,AT | ||
112 | |||
113 | dmultu ta0,a3 | ||
114 | subu a2,4 | ||
115 | PTR_ADD a0,32 | ||
116 | PTR_ADD a1,32 | ||
117 | daddu ta1,v0 | ||
118 | sltu v0,ta1,v0 | ||
119 | mflo AT | ||
120 | mfhi ta0 | ||
121 | daddu ta1,AT | ||
122 | daddu v0,ta0 | ||
123 | sltu AT,ta1,AT | ||
124 | sd ta1,-16(a0) | ||
125 | daddu v0,AT | ||
126 | |||
127 | |||
128 | dmultu ta2,a3 | ||
129 | and ta0,a2,MINUS4 | ||
130 | daddu ta3,v0 | ||
131 | sltu v0,ta3,v0 | ||
132 | mflo AT | ||
133 | mfhi ta2 | ||
134 | daddu ta3,AT | ||
135 | daddu v0,ta2 | ||
136 | sltu AT,ta3,AT | ||
137 | sd ta3,-8(a0) | ||
138 | daddu v0,AT | ||
139 | .set noreorder | ||
140 | bgtzl ta0,.L_bn_mul_add_words_loop | ||
141 | ld t0,0(a1) | ||
142 | |||
143 | bnezl a2,.L_bn_mul_add_words_tail | ||
144 | ld t0,0(a1) | ||
145 | .set reorder | ||
146 | |||
147 | .L_bn_mul_add_words_return: | ||
148 | jr ra | ||
149 | |||
150 | .L_bn_mul_add_words_tail: | ||
151 | dmultu t0,a3 | ||
152 | ld t1,0(a0) | ||
153 | subu a2,1 | ||
154 | daddu t1,v0 | ||
155 | sltu v0,t1,v0 | ||
156 | mflo AT | ||
157 | mfhi t0 | ||
158 | daddu t1,AT | ||
159 | daddu v0,t0 | ||
160 | sltu AT,t1,AT | ||
161 | sd t1,0(a0) | ||
162 | daddu v0,AT | ||
163 | beqz a2,.L_bn_mul_add_words_return | ||
164 | |||
165 | ld t0,8(a1) | ||
166 | dmultu t0,a3 | ||
167 | ld t1,8(a0) | ||
168 | subu a2,1 | ||
169 | daddu t1,v0 | ||
170 | sltu v0,t1,v0 | ||
171 | mflo AT | ||
172 | mfhi t0 | ||
173 | daddu t1,AT | ||
174 | daddu v0,t0 | ||
175 | sltu AT,t1,AT | ||
176 | sd t1,8(a0) | ||
177 | daddu v0,AT | ||
178 | beqz a2,.L_bn_mul_add_words_return | ||
179 | |||
180 | ld t0,16(a1) | ||
181 | dmultu t0,a3 | ||
182 | ld t1,16(a0) | ||
183 | daddu t1,v0 | ||
184 | sltu v0,t1,v0 | ||
185 | mflo AT | ||
186 | mfhi t0 | ||
187 | daddu t1,AT | ||
188 | daddu v0,t0 | ||
189 | sltu AT,t1,AT | ||
190 | sd t1,16(a0) | ||
191 | daddu v0,AT | ||
192 | jr ra | ||
193 | END(bn_mul_add_words) | ||
194 | |||
195 | .align 5 | ||
196 | LEAF(bn_mul_words) | ||
197 | .set noreorder | ||
198 | bgtzl a2,.L_bn_mul_words_proceed | ||
199 | ld t0,0(a1) | ||
200 | jr ra | ||
201 | move v0,zero | ||
202 | .set reorder | ||
203 | |||
204 | .L_bn_mul_words_proceed: | ||
205 | li MINUS4,-4 | ||
206 | and ta0,a2,MINUS4 | ||
207 | move v0,zero | ||
208 | beqz ta0,.L_bn_mul_words_tail | ||
209 | |||
210 | .L_bn_mul_words_loop: | ||
211 | dmultu t0,a3 | ||
212 | ld t2,8(a1) | ||
213 | ld ta0,16(a1) | ||
214 | ld ta2,24(a1) | ||
215 | mflo AT | ||
216 | mfhi t0 | ||
217 | daddu v0,AT | ||
218 | sltu t1,v0,AT | ||
219 | sd v0,0(a0) | ||
220 | daddu v0,t1,t0 | ||
221 | |||
222 | dmultu t2,a3 | ||
223 | subu a2,4 | ||
224 | PTR_ADD a0,32 | ||
225 | PTR_ADD a1,32 | ||
226 | mflo AT | ||
227 | mfhi t2 | ||
228 | daddu v0,AT | ||
229 | sltu t3,v0,AT | ||
230 | sd v0,-24(a0) | ||
231 | daddu v0,t3,t2 | ||
232 | |||
233 | dmultu ta0,a3 | ||
234 | mflo AT | ||
235 | mfhi ta0 | ||
236 | daddu v0,AT | ||
237 | sltu ta1,v0,AT | ||
238 | sd v0,-16(a0) | ||
239 | daddu v0,ta1,ta0 | ||
240 | |||
241 | |||
242 | dmultu ta2,a3 | ||
243 | and ta0,a2,MINUS4 | ||
244 | mflo AT | ||
245 | mfhi ta2 | ||
246 | daddu v0,AT | ||
247 | sltu ta3,v0,AT | ||
248 | sd v0,-8(a0) | ||
249 | daddu v0,ta3,ta2 | ||
250 | .set noreorder | ||
251 | bgtzl ta0,.L_bn_mul_words_loop | ||
252 | ld t0,0(a1) | ||
253 | |||
254 | bnezl a2,.L_bn_mul_words_tail | ||
255 | ld t0,0(a1) | ||
256 | .set reorder | ||
257 | |||
258 | .L_bn_mul_words_return: | ||
259 | jr ra | ||
260 | |||
261 | .L_bn_mul_words_tail: | ||
262 | dmultu t0,a3 | ||
263 | subu a2,1 | ||
264 | mflo AT | ||
265 | mfhi t0 | ||
266 | daddu v0,AT | ||
267 | sltu t1,v0,AT | ||
268 | sd v0,0(a0) | ||
269 | daddu v0,t1,t0 | ||
270 | beqz a2,.L_bn_mul_words_return | ||
271 | |||
272 | ld t0,8(a1) | ||
273 | dmultu t0,a3 | ||
274 | subu a2,1 | ||
275 | mflo AT | ||
276 | mfhi t0 | ||
277 | daddu v0,AT | ||
278 | sltu t1,v0,AT | ||
279 | sd v0,8(a0) | ||
280 | daddu v0,t1,t0 | ||
281 | beqz a2,.L_bn_mul_words_return | ||
282 | |||
283 | ld t0,16(a1) | ||
284 | dmultu t0,a3 | ||
285 | mflo AT | ||
286 | mfhi t0 | ||
287 | daddu v0,AT | ||
288 | sltu t1,v0,AT | ||
289 | sd v0,16(a0) | ||
290 | daddu v0,t1,t0 | ||
291 | jr ra | ||
292 | END(bn_mul_words) | ||
293 | |||
294 | .align 5 | ||
295 | LEAF(bn_sqr_words) | ||
296 | .set noreorder | ||
297 | bgtzl a2,.L_bn_sqr_words_proceed | ||
298 | ld t0,0(a1) | ||
299 | jr ra | ||
300 | move v0,zero | ||
301 | .set reorder | ||
302 | |||
303 | .L_bn_sqr_words_proceed: | ||
304 | li MINUS4,-4 | ||
305 | and ta0,a2,MINUS4 | ||
306 | move v0,zero | ||
307 | beqz ta0,.L_bn_sqr_words_tail | ||
308 | |||
309 | .L_bn_sqr_words_loop: | ||
310 | dmultu t0,t0 | ||
311 | ld t2,8(a1) | ||
312 | ld ta0,16(a1) | ||
313 | ld ta2,24(a1) | ||
314 | mflo t1 | ||
315 | mfhi t0 | ||
316 | sd t1,0(a0) | ||
317 | sd t0,8(a0) | ||
318 | |||
319 | dmultu t2,t2 | ||
320 | subu a2,4 | ||
321 | PTR_ADD a0,64 | ||
322 | PTR_ADD a1,32 | ||
323 | mflo t3 | ||
324 | mfhi t2 | ||
325 | sd t3,-48(a0) | ||
326 | sd t2,-40(a0) | ||
327 | |||
328 | dmultu ta0,ta0 | ||
329 | mflo ta1 | ||
330 | mfhi ta0 | ||
331 | sd ta1,-32(a0) | ||
332 | sd ta0,-24(a0) | ||
333 | |||
334 | |||
335 | dmultu ta2,ta2 | ||
336 | and ta0,a2,MINUS4 | ||
337 | mflo ta3 | ||
338 | mfhi ta2 | ||
339 | sd ta3,-16(a0) | ||
340 | sd ta2,-8(a0) | ||
341 | |||
342 | .set noreorder | ||
343 | bgtzl ta0,.L_bn_sqr_words_loop | ||
344 | ld t0,0(a1) | ||
345 | |||
346 | bnezl a2,.L_bn_sqr_words_tail | ||
347 | ld t0,0(a1) | ||
348 | .set reorder | ||
349 | |||
350 | .L_bn_sqr_words_return: | ||
351 | move v0,zero | ||
352 | jr ra | ||
353 | |||
354 | .L_bn_sqr_words_tail: | ||
355 | dmultu t0,t0 | ||
356 | subu a2,1 | ||
357 | mflo t1 | ||
358 | mfhi t0 | ||
359 | sd t1,0(a0) | ||
360 | sd t0,8(a0) | ||
361 | beqz a2,.L_bn_sqr_words_return | ||
362 | |||
363 | ld t0,8(a1) | ||
364 | dmultu t0,t0 | ||
365 | subu a2,1 | ||
366 | mflo t1 | ||
367 | mfhi t0 | ||
368 | sd t1,16(a0) | ||
369 | sd t0,24(a0) | ||
370 | beqz a2,.L_bn_sqr_words_return | ||
371 | |||
372 | ld t0,16(a1) | ||
373 | dmultu t0,t0 | ||
374 | mflo t1 | ||
375 | mfhi t0 | ||
376 | sd t1,32(a0) | ||
377 | sd t0,40(a0) | ||
378 | jr ra | ||
379 | END(bn_sqr_words) | ||
380 | |||
381 | .align 5 | ||
382 | LEAF(bn_add_words) | ||
383 | .set noreorder | ||
384 | bgtzl a3,.L_bn_add_words_proceed | ||
385 | ld t0,0(a1) | ||
386 | jr ra | ||
387 | move v0,zero | ||
388 | .set reorder | ||
389 | |||
390 | .L_bn_add_words_proceed: | ||
391 | li MINUS4,-4 | ||
392 | and AT,a3,MINUS4 | ||
393 | move v0,zero | ||
394 | beqz AT,.L_bn_add_words_tail | ||
395 | |||
396 | .L_bn_add_words_loop: | ||
397 | ld ta0,0(a2) | ||
398 | subu a3,4 | ||
399 | ld t1,8(a1) | ||
400 | and AT,a3,MINUS4 | ||
401 | ld t2,16(a1) | ||
402 | PTR_ADD a2,32 | ||
403 | ld t3,24(a1) | ||
404 | PTR_ADD a0,32 | ||
405 | ld ta1,-24(a2) | ||
406 | PTR_ADD a1,32 | ||
407 | ld ta2,-16(a2) | ||
408 | ld ta3,-8(a2) | ||
409 | daddu ta0,t0 | ||
410 | sltu t8,ta0,t0 | ||
411 | daddu t0,ta0,v0 | ||
412 | sltu v0,t0,ta0 | ||
413 | sd t0,-32(a0) | ||
414 | daddu v0,t8 | ||
415 | |||
416 | daddu ta1,t1 | ||
417 | sltu t9,ta1,t1 | ||
418 | daddu t1,ta1,v0 | ||
419 | sltu v0,t1,ta1 | ||
420 | sd t1,-24(a0) | ||
421 | daddu v0,t9 | ||
422 | |||
423 | daddu ta2,t2 | ||
424 | sltu t8,ta2,t2 | ||
425 | daddu t2,ta2,v0 | ||
426 | sltu v0,t2,ta2 | ||
427 | sd t2,-16(a0) | ||
428 | daddu v0,t8 | ||
429 | |||
430 | daddu ta3,t3 | ||
431 | sltu t9,ta3,t3 | ||
432 | daddu t3,ta3,v0 | ||
433 | sltu v0,t3,ta3 | ||
434 | sd t3,-8(a0) | ||
435 | daddu v0,t9 | ||
436 | |||
437 | .set noreorder | ||
438 | bgtzl AT,.L_bn_add_words_loop | ||
439 | ld t0,0(a1) | ||
440 | |||
441 | bnezl a3,.L_bn_add_words_tail | ||
442 | ld t0,0(a1) | ||
443 | .set reorder | ||
444 | |||
445 | .L_bn_add_words_return: | ||
446 | jr ra | ||
447 | |||
448 | .L_bn_add_words_tail: | ||
449 | ld ta0,0(a2) | ||
450 | daddu ta0,t0 | ||
451 | subu a3,1 | ||
452 | sltu t8,ta0,t0 | ||
453 | daddu t0,ta0,v0 | ||
454 | sltu v0,t0,ta0 | ||
455 | sd t0,0(a0) | ||
456 | daddu v0,t8 | ||
457 | beqz a3,.L_bn_add_words_return | ||
458 | |||
459 | ld t1,8(a1) | ||
460 | ld ta1,8(a2) | ||
461 | daddu ta1,t1 | ||
462 | subu a3,1 | ||
463 | sltu t9,ta1,t1 | ||
464 | daddu t1,ta1,v0 | ||
465 | sltu v0,t1,ta1 | ||
466 | sd t1,8(a0) | ||
467 | daddu v0,t9 | ||
468 | beqz a3,.L_bn_add_words_return | ||
469 | |||
470 | ld t2,16(a1) | ||
471 | ld ta2,16(a2) | ||
472 | daddu ta2,t2 | ||
473 | sltu t8,ta2,t2 | ||
474 | daddu t2,ta2,v0 | ||
475 | sltu v0,t2,ta2 | ||
476 | sd t2,16(a0) | ||
477 | daddu v0,t8 | ||
478 | jr ra | ||
479 | END(bn_add_words) | ||
480 | |||
481 | .align 5 | ||
482 | LEAF(bn_sub_words) | ||
483 | .set noreorder | ||
484 | bgtzl a3,.L_bn_sub_words_proceed | ||
485 | ld t0,0(a1) | ||
486 | jr ra | ||
487 | move v0,zero | ||
488 | .set reorder | ||
489 | |||
490 | .L_bn_sub_words_proceed: | ||
491 | li MINUS4,-4 | ||
492 | and AT,a3,MINUS4 | ||
493 | move v0,zero | ||
494 | beqz AT,.L_bn_sub_words_tail | ||
495 | |||
496 | .L_bn_sub_words_loop: | ||
497 | ld ta0,0(a2) | ||
498 | subu a3,4 | ||
499 | ld t1,8(a1) | ||
500 | and AT,a3,MINUS4 | ||
501 | ld t2,16(a1) | ||
502 | PTR_ADD a2,32 | ||
503 | ld t3,24(a1) | ||
504 | PTR_ADD a0,32 | ||
505 | ld ta1,-24(a2) | ||
506 | PTR_ADD a1,32 | ||
507 | ld ta2,-16(a2) | ||
508 | ld ta3,-8(a2) | ||
509 | sltu t8,t0,ta0 | ||
510 | dsubu t0,ta0 | ||
511 | dsubu ta0,t0,v0 | ||
512 | sd ta0,-32(a0) | ||
513 | MOVNZ (t0,v0,t8) | ||
514 | |||
515 | sltu t9,t1,ta1 | ||
516 | dsubu t1,ta1 | ||
517 | dsubu ta1,t1,v0 | ||
518 | sd ta1,-24(a0) | ||
519 | MOVNZ (t1,v0,t9) | ||
520 | |||
521 | |||
522 | sltu t8,t2,ta2 | ||
523 | dsubu t2,ta2 | ||
524 | dsubu ta2,t2,v0 | ||
525 | sd ta2,-16(a0) | ||
526 | MOVNZ (t2,v0,t8) | ||
527 | |||
528 | sltu t9,t3,ta3 | ||
529 | dsubu t3,ta3 | ||
530 | dsubu ta3,t3,v0 | ||
531 | sd ta3,-8(a0) | ||
532 | MOVNZ (t3,v0,t9) | ||
533 | |||
534 | .set noreorder | ||
535 | bgtzl AT,.L_bn_sub_words_loop | ||
536 | ld t0,0(a1) | ||
537 | |||
538 | bnezl a3,.L_bn_sub_words_tail | ||
539 | ld t0,0(a1) | ||
540 | .set reorder | ||
541 | |||
542 | .L_bn_sub_words_return: | ||
543 | jr ra | ||
544 | |||
545 | .L_bn_sub_words_tail: | ||
546 | ld ta0,0(a2) | ||
547 | subu a3,1 | ||
548 | sltu t8,t0,ta0 | ||
549 | dsubu t0,ta0 | ||
550 | dsubu ta0,t0,v0 | ||
551 | MOVNZ (t0,v0,t8) | ||
552 | sd ta0,0(a0) | ||
553 | beqz a3,.L_bn_sub_words_return | ||
554 | |||
555 | ld t1,8(a1) | ||
556 | subu a3,1 | ||
557 | ld ta1,8(a2) | ||
558 | sltu t9,t1,ta1 | ||
559 | dsubu t1,ta1 | ||
560 | dsubu ta1,t1,v0 | ||
561 | MOVNZ (t1,v0,t9) | ||
562 | sd ta1,8(a0) | ||
563 | beqz a3,.L_bn_sub_words_return | ||
564 | |||
565 | ld t2,16(a1) | ||
566 | ld ta2,16(a2) | ||
567 | sltu t8,t2,ta2 | ||
568 | dsubu t2,ta2 | ||
569 | dsubu ta2,t2,v0 | ||
570 | MOVNZ (t2,v0,t8) | ||
571 | sd ta2,16(a0) | ||
572 | jr ra | ||
573 | END(bn_sub_words) | ||
574 | |||
575 | #undef MINUS4 | ||
576 | |||
577 | .align 5 | ||
578 | LEAF(bn_div_3_words) | ||
579 | .set reorder | ||
580 | move a3,a0 /* we know that bn_div_words doesn't | ||
581 | * touch a3, ta2, ta3 and preserves a2 | ||
582 | * so that we can save two arguments | ||
583 | * and return address in registers | ||
584 | * instead of stack:-) | ||
585 | */ | ||
586 | ld a0,(a3) | ||
587 | move ta2,a1 | ||
588 | ld a1,-8(a3) | ||
589 | bne a0,a2,.L_bn_div_3_words_proceed | ||
590 | li v0,-1 | ||
591 | jr ra | ||
592 | .L_bn_div_3_words_proceed: | ||
593 | move ta3,ra | ||
594 | bal bn_div_words | ||
595 | move ra,ta3 | ||
596 | dmultu ta2,v0 | ||
597 | ld t2,-16(a3) | ||
598 | move ta0,zero | ||
599 | mfhi t1 | ||
600 | mflo t0 | ||
601 | sltu t8,t1,v1 | ||
602 | .L_bn_div_3_words_inner_loop: | ||
603 | bnez t8,.L_bn_div_3_words_inner_loop_done | ||
604 | sgeu AT,t2,t0 | ||
605 | seq t9,t1,v1 | ||
606 | and AT,t9 | ||
607 | sltu t3,t0,ta2 | ||
608 | daddu v1,a2 | ||
609 | dsubu t1,t3 | ||
610 | dsubu t0,ta2 | ||
611 | sltu t8,t1,v1 | ||
612 | sltu ta0,v1,a2 | ||
613 | or t8,ta0 | ||
614 | .set noreorder | ||
615 | beqzl AT,.L_bn_div_3_words_inner_loop | ||
616 | dsubu v0,1 | ||
617 | .set reorder | ||
618 | .L_bn_div_3_words_inner_loop_done: | ||
619 | jr ra | ||
620 | END(bn_div_3_words) | ||
621 | |||
622 | .align 5 | ||
623 | LEAF(bn_div_words) | ||
624 | .set noreorder | ||
625 | bnezl a2,.L_bn_div_words_proceed | ||
626 | move v1,zero | ||
627 | jr ra | ||
628 | li v0,-1 /* I'd rather signal div-by-zero | ||
629 | * which can be done with 'break 7' */ | ||
630 | |||
631 | .L_bn_div_words_proceed: | ||
632 | bltz a2,.L_bn_div_words_body | ||
633 | move t9,v1 | ||
634 | dsll a2,1 | ||
635 | bgtz a2,.-4 | ||
636 | addu t9,1 | ||
637 | |||
638 | .set reorder | ||
639 | negu t1,t9 | ||
640 | li t2,-1 | ||
641 | dsll t2,t1 | ||
642 | and t2,a0 | ||
643 | dsrl AT,a1,t1 | ||
644 | .set noreorder | ||
645 | bnezl t2,.+8 | ||
646 | break 6 /* signal overflow */ | ||
647 | .set reorder | ||
648 | dsll a0,t9 | ||
649 | dsll a1,t9 | ||
650 | or a0,AT | ||
651 | |||
652 | #define QT ta0 | ||
653 | #define HH ta1 | ||
654 | #define DH v1 | ||
655 | .L_bn_div_words_body: | ||
656 | dsrl DH,a2,32 | ||
657 | sgeu AT,a0,a2 | ||
658 | .set noreorder | ||
659 | bnezl AT,.+8 | ||
660 | dsubu a0,a2 | ||
661 | .set reorder | ||
662 | |||
663 | li QT,-1 | ||
664 | dsrl HH,a0,32 | ||
665 | dsrl QT,32 /* q=0xffffffff */ | ||
666 | beq DH,HH,.L_bn_div_words_skip_div1 | ||
667 | ddivu zero,a0,DH | ||
668 | mflo QT | ||
669 | .L_bn_div_words_skip_div1: | ||
670 | dmultu a2,QT | ||
671 | dsll t3,a0,32 | ||
672 | dsrl AT,a1,32 | ||
673 | or t3,AT | ||
674 | mflo t0 | ||
675 | mfhi t1 | ||
676 | .L_bn_div_words_inner_loop1: | ||
677 | sltu t2,t3,t0 | ||
678 | seq t8,HH,t1 | ||
679 | sltu AT,HH,t1 | ||
680 | and t2,t8 | ||
681 | sltu v0,t0,a2 | ||
682 | or AT,t2 | ||
683 | .set noreorder | ||
684 | beqz AT,.L_bn_div_words_inner_loop1_done | ||
685 | dsubu t1,v0 | ||
686 | dsubu t0,a2 | ||
687 | b .L_bn_div_words_inner_loop1 | ||
688 | dsubu QT,1 | ||
689 | .set reorder | ||
690 | .L_bn_div_words_inner_loop1_done: | ||
691 | |||
692 | dsll a1,32 | ||
693 | dsubu a0,t3,t0 | ||
694 | dsll v0,QT,32 | ||
695 | |||
696 | li QT,-1 | ||
697 | dsrl HH,a0,32 | ||
698 | dsrl QT,32 /* q=0xffffffff */ | ||
699 | beq DH,HH,.L_bn_div_words_skip_div2 | ||
700 | ddivu zero,a0,DH | ||
701 | mflo QT | ||
702 | .L_bn_div_words_skip_div2: | ||
703 | #undef DH | ||
704 | dmultu a2,QT | ||
705 | dsll t3,a0,32 | ||
706 | dsrl AT,a1,32 | ||
707 | or t3,AT | ||
708 | mflo t0 | ||
709 | mfhi t1 | ||
710 | .L_bn_div_words_inner_loop2: | ||
711 | sltu t2,t3,t0 | ||
712 | seq t8,HH,t1 | ||
713 | sltu AT,HH,t1 | ||
714 | and t2,t8 | ||
715 | sltu v1,t0,a2 | ||
716 | or AT,t2 | ||
717 | .set noreorder | ||
718 | beqz AT,.L_bn_div_words_inner_loop2_done | ||
719 | dsubu t1,v1 | ||
720 | dsubu t0,a2 | ||
721 | b .L_bn_div_words_inner_loop2 | ||
722 | dsubu QT,1 | ||
723 | .set reorder | ||
724 | .L_bn_div_words_inner_loop2_done: | ||
725 | #undef HH | ||
726 | |||
727 | dsubu a0,t3,t0 | ||
728 | or v0,QT | ||
729 | dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ | ||
730 | dsrl a2,t9 /* restore a2 */ | ||
731 | jr ra | ||
732 | #undef QT | ||
733 | END(bn_div_words) | ||
734 | |||
735 | #define a_0 t0 | ||
736 | #define a_1 t1 | ||
737 | #define a_2 t2 | ||
738 | #define a_3 t3 | ||
739 | #define b_0 ta0 | ||
740 | #define b_1 ta1 | ||
741 | #define b_2 ta2 | ||
742 | #define b_3 ta3 | ||
743 | |||
744 | #define a_4 s0 | ||
745 | #define a_5 s2 | ||
746 | #define a_6 s4 | ||
747 | #define a_7 a1 /* once we load a[7] we don't need a anymore */ | ||
748 | #define b_4 s1 | ||
749 | #define b_5 s3 | ||
750 | #define b_6 s5 | ||
751 | #define b_7 a2 /* once we load b[7] we don't need b anymore */ | ||
752 | |||
753 | #define t_1 t8 | ||
754 | #define t_2 t9 | ||
755 | |||
756 | #define c_1 v0 | ||
757 | #define c_2 v1 | ||
758 | #define c_3 a3 | ||
759 | |||
760 | #define FRAME_SIZE 48 | ||
761 | |||
762 | .align 5 | ||
763 | LEAF(bn_mul_comba8) | ||
764 | .set noreorder | ||
765 | PTR_SUB sp,FRAME_SIZE | ||
766 | .frame sp,64,ra | ||
767 | .set reorder | ||
768 | ld a_0,0(a1) /* If compiled with -mips3 option on | ||
769 | * R5000 box assembler barks on this | ||
770 | * line with "shouldn't have mult/div | ||
771 | * as last instruction in bb (R10K | ||
772 | * bug)" warning. If anybody out there | ||
773 | * has a clue about how to circumvent | ||
774 | * this do send me a note. | ||
775 | * <appro@fy.chalmers.se> | ||
776 | */ | ||
777 | ld b_0,0(a2) | ||
778 | ld a_1,8(a1) | ||
779 | ld a_2,16(a1) | ||
780 | ld a_3,24(a1) | ||
781 | ld b_1,8(a2) | ||
782 | ld b_2,16(a2) | ||
783 | ld b_3,24(a2) | ||
784 | dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
785 | sd s0,0(sp) | ||
786 | sd s1,8(sp) | ||
787 | sd s2,16(sp) | ||
788 | sd s3,24(sp) | ||
789 | sd s4,32(sp) | ||
790 | sd s5,40(sp) | ||
791 | mflo c_1 | ||
792 | mfhi c_2 | ||
793 | |||
794 | dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ | ||
795 | ld a_4,32(a1) | ||
796 | ld a_5,40(a1) | ||
797 | ld a_6,48(a1) | ||
798 | ld a_7,56(a1) | ||
799 | ld b_4,32(a2) | ||
800 | ld b_5,40(a2) | ||
801 | mflo t_1 | ||
802 | mfhi t_2 | ||
803 | daddu c_2,t_1 | ||
804 | sltu AT,c_2,t_1 | ||
805 | daddu c_3,t_2,AT | ||
806 | dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ | ||
807 | ld b_6,48(a2) | ||
808 | ld b_7,56(a2) | ||
809 | sd c_1,0(a0) /* r[0]=c1; */ | ||
810 | mflo t_1 | ||
811 | mfhi t_2 | ||
812 | daddu c_2,t_1 | ||
813 | sltu AT,c_2,t_1 | ||
814 | daddu t_2,AT | ||
815 | daddu c_3,t_2 | ||
816 | sltu c_1,c_3,t_2 | ||
817 | sd c_2,8(a0) /* r[1]=c2; */ | ||
818 | |||
819 | dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ | ||
820 | mflo t_1 | ||
821 | mfhi t_2 | ||
822 | daddu c_3,t_1 | ||
823 | sltu AT,c_3,t_1 | ||
824 | daddu t_2,AT | ||
825 | daddu c_1,t_2 | ||
826 | dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
827 | mflo t_1 | ||
828 | mfhi t_2 | ||
829 | daddu c_3,t_1 | ||
830 | sltu AT,c_3,t_1 | ||
831 | daddu t_2,AT | ||
832 | daddu c_1,t_2 | ||
833 | sltu c_2,c_1,t_2 | ||
834 | dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ | ||
835 | mflo t_1 | ||
836 | mfhi t_2 | ||
837 | daddu c_3,t_1 | ||
838 | sltu AT,c_3,t_1 | ||
839 | daddu t_2,AT | ||
840 | daddu c_1,t_2 | ||
841 | sltu AT,c_1,t_2 | ||
842 | daddu c_2,AT | ||
843 | sd c_3,16(a0) /* r[2]=c3; */ | ||
844 | |||
845 | dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ | ||
846 | mflo t_1 | ||
847 | mfhi t_2 | ||
848 | daddu c_1,t_1 | ||
849 | sltu AT,c_1,t_1 | ||
850 | daddu t_2,AT | ||
851 | daddu c_2,t_2 | ||
852 | sltu c_3,c_2,t_2 | ||
853 | dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ | ||
854 | mflo t_1 | ||
855 | mfhi t_2 | ||
856 | daddu c_1,t_1 | ||
857 | sltu AT,c_1,t_1 | ||
858 | daddu t_2,AT | ||
859 | daddu c_2,t_2 | ||
860 | sltu AT,c_2,t_2 | ||
861 | daddu c_3,AT | ||
862 | dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ | ||
863 | mflo t_1 | ||
864 | mfhi t_2 | ||
865 | daddu c_1,t_1 | ||
866 | sltu AT,c_1,t_1 | ||
867 | daddu t_2,AT | ||
868 | daddu c_2,t_2 | ||
869 | sltu AT,c_2,t_2 | ||
870 | daddu c_3,AT | ||
871 | dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ | ||
872 | mflo t_1 | ||
873 | mfhi t_2 | ||
874 | daddu c_1,t_1 | ||
875 | sltu AT,c_1,t_1 | ||
876 | daddu t_2,AT | ||
877 | daddu c_2,t_2 | ||
878 | sltu AT,c_2,t_2 | ||
879 | daddu c_3,AT | ||
880 | sd c_1,24(a0) /* r[3]=c1; */ | ||
881 | |||
882 | dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ | ||
883 | mflo t_1 | ||
884 | mfhi t_2 | ||
885 | daddu c_2,t_1 | ||
886 | sltu AT,c_2,t_1 | ||
887 | daddu t_2,AT | ||
888 | daddu c_3,t_2 | ||
889 | sltu c_1,c_3,t_2 | ||
890 | dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ | ||
891 | mflo t_1 | ||
892 | mfhi t_2 | ||
893 | daddu c_2,t_1 | ||
894 | sltu AT,c_2,t_1 | ||
895 | daddu t_2,AT | ||
896 | daddu c_3,t_2 | ||
897 | sltu AT,c_3,t_2 | ||
898 | daddu c_1,AT | ||
899 | dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
900 | mflo t_1 | ||
901 | mfhi t_2 | ||
902 | daddu c_2,t_1 | ||
903 | sltu AT,c_2,t_1 | ||
904 | daddu t_2,AT | ||
905 | daddu c_3,t_2 | ||
906 | sltu AT,c_3,t_2 | ||
907 | daddu c_1,AT | ||
908 | dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ | ||
909 | mflo t_1 | ||
910 | mfhi t_2 | ||
911 | daddu c_2,t_1 | ||
912 | sltu AT,c_2,t_1 | ||
913 | daddu t_2,AT | ||
914 | daddu c_3,t_2 | ||
915 | sltu AT,c_3,t_2 | ||
916 | daddu c_1,AT | ||
917 | dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ | ||
918 | mflo t_1 | ||
919 | mfhi t_2 | ||
920 | daddu c_2,t_1 | ||
921 | sltu AT,c_2,t_1 | ||
922 | daddu t_2,AT | ||
923 | daddu c_3,t_2 | ||
924 | sltu AT,c_3,t_2 | ||
925 | daddu c_1,AT | ||
926 | sd c_2,32(a0) /* r[4]=c2; */ | ||
927 | |||
928 | dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ | ||
929 | mflo t_1 | ||
930 | mfhi t_2 | ||
931 | daddu c_3,t_1 | ||
932 | sltu AT,c_3,t_1 | ||
933 | daddu t_2,AT | ||
934 | daddu c_1,t_2 | ||
935 | sltu c_2,c_1,t_2 | ||
936 | dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ | ||
937 | mflo t_1 | ||
938 | mfhi t_2 | ||
939 | daddu c_3,t_1 | ||
940 | sltu AT,c_3,t_1 | ||
941 | daddu t_2,AT | ||
942 | daddu c_1,t_2 | ||
943 | sltu AT,c_1,t_2 | ||
944 | daddu c_2,AT | ||
945 | dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ | ||
946 | mflo t_1 | ||
947 | mfhi t_2 | ||
948 | daddu c_3,t_1 | ||
949 | sltu AT,c_3,t_1 | ||
950 | daddu t_2,AT | ||
951 | daddu c_1,t_2 | ||
952 | sltu AT,c_1,t_2 | ||
953 | daddu c_2,AT | ||
954 | dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ | ||
955 | mflo t_1 | ||
956 | mfhi t_2 | ||
957 | daddu c_3,t_1 | ||
958 | sltu AT,c_3,t_1 | ||
959 | daddu t_2,AT | ||
960 | daddu c_1,t_2 | ||
961 | sltu AT,c_1,t_2 | ||
962 | daddu c_2,AT | ||
963 | dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ | ||
964 | mflo t_1 | ||
965 | mfhi t_2 | ||
966 | daddu c_3,t_1 | ||
967 | sltu AT,c_3,t_1 | ||
968 | daddu t_2,AT | ||
969 | daddu c_1,t_2 | ||
970 | sltu AT,c_1,t_2 | ||
971 | daddu c_2,AT | ||
972 | dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ | ||
973 | mflo t_1 | ||
974 | mfhi t_2 | ||
975 | daddu c_3,t_1 | ||
976 | sltu AT,c_3,t_1 | ||
977 | daddu t_2,AT | ||
978 | daddu c_1,t_2 | ||
979 | sltu AT,c_1,t_2 | ||
980 | daddu c_2,AT | ||
981 | sd c_3,40(a0) /* r[5]=c3; */ | ||
982 | |||
983 | dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ | ||
984 | mflo t_1 | ||
985 | mfhi t_2 | ||
986 | daddu c_1,t_1 | ||
987 | sltu AT,c_1,t_1 | ||
988 | daddu t_2,AT | ||
989 | daddu c_2,t_2 | ||
990 | sltu c_3,c_2,t_2 | ||
991 | dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ | ||
992 | mflo t_1 | ||
993 | mfhi t_2 | ||
994 | daddu c_1,t_1 | ||
995 | sltu AT,c_1,t_1 | ||
996 | daddu t_2,AT | ||
997 | daddu c_2,t_2 | ||
998 | sltu AT,c_2,t_2 | ||
999 | daddu c_3,AT | ||
1000 | dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ | ||
1001 | mflo t_1 | ||
1002 | mfhi t_2 | ||
1003 | daddu c_1,t_1 | ||
1004 | sltu AT,c_1,t_1 | ||
1005 | daddu t_2,AT | ||
1006 | daddu c_2,t_2 | ||
1007 | sltu AT,c_2,t_2 | ||
1008 | daddu c_3,AT | ||
1009 | dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
1010 | mflo t_1 | ||
1011 | mfhi t_2 | ||
1012 | daddu c_1,t_1 | ||
1013 | sltu AT,c_1,t_1 | ||
1014 | daddu t_2,AT | ||
1015 | daddu c_2,t_2 | ||
1016 | sltu AT,c_2,t_2 | ||
1017 | daddu c_3,AT | ||
1018 | dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ | ||
1019 | mflo t_1 | ||
1020 | mfhi t_2 | ||
1021 | daddu c_1,t_1 | ||
1022 | sltu AT,c_1,t_1 | ||
1023 | daddu t_2,AT | ||
1024 | daddu c_2,t_2 | ||
1025 | sltu AT,c_2,t_2 | ||
1026 | daddu c_3,AT | ||
1027 | dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ | ||
1028 | mflo t_1 | ||
1029 | mfhi t_2 | ||
1030 | daddu c_1,t_1 | ||
1031 | sltu AT,c_1,t_1 | ||
1032 | daddu t_2,AT | ||
1033 | daddu c_2,t_2 | ||
1034 | sltu AT,c_2,t_2 | ||
1035 | daddu c_3,AT | ||
1036 | dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ | ||
1037 | mflo t_1 | ||
1038 | mfhi t_2 | ||
1039 | daddu c_1,t_1 | ||
1040 | sltu AT,c_1,t_1 | ||
1041 | daddu t_2,AT | ||
1042 | daddu c_2,t_2 | ||
1043 | sltu AT,c_2,t_2 | ||
1044 | daddu c_3,AT | ||
1045 | sd c_1,48(a0) /* r[6]=c1; */ | ||
1046 | |||
1047 | dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ | ||
1048 | mflo t_1 | ||
1049 | mfhi t_2 | ||
1050 | daddu c_2,t_1 | ||
1051 | sltu AT,c_2,t_1 | ||
1052 | daddu t_2,AT | ||
1053 | daddu c_3,t_2 | ||
1054 | sltu c_1,c_3,t_2 | ||
1055 | dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ | ||
1056 | mflo t_1 | ||
1057 | mfhi t_2 | ||
1058 | daddu c_2,t_1 | ||
1059 | sltu AT,c_2,t_1 | ||
1060 | daddu t_2,AT | ||
1061 | daddu c_3,t_2 | ||
1062 | sltu AT,c_3,t_2 | ||
1063 | daddu c_1,AT | ||
1064 | dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ | ||
1065 | mflo t_1 | ||
1066 | mfhi t_2 | ||
1067 | daddu c_2,t_1 | ||
1068 | sltu AT,c_2,t_1 | ||
1069 | daddu t_2,AT | ||
1070 | daddu c_3,t_2 | ||
1071 | sltu AT,c_3,t_2 | ||
1072 | daddu c_1,AT | ||
1073 | dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ | ||
1074 | mflo t_1 | ||
1075 | mfhi t_2 | ||
1076 | daddu c_2,t_1 | ||
1077 | sltu AT,c_2,t_1 | ||
1078 | daddu t_2,AT | ||
1079 | daddu c_3,t_2 | ||
1080 | sltu AT,c_3,t_2 | ||
1081 | daddu c_1,AT | ||
1082 | dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ | ||
1083 | mflo t_1 | ||
1084 | mfhi t_2 | ||
1085 | daddu c_2,t_1 | ||
1086 | sltu AT,c_2,t_1 | ||
1087 | daddu t_2,AT | ||
1088 | daddu c_3,t_2 | ||
1089 | sltu AT,c_3,t_2 | ||
1090 | daddu c_1,AT | ||
1091 | dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ | ||
1092 | mflo t_1 | ||
1093 | mfhi t_2 | ||
1094 | daddu c_2,t_1 | ||
1095 | sltu AT,c_2,t_1 | ||
1096 | daddu t_2,AT | ||
1097 | daddu c_3,t_2 | ||
1098 | sltu AT,c_3,t_2 | ||
1099 | daddu c_1,AT | ||
1100 | dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ | ||
1101 | mflo t_1 | ||
1102 | mfhi t_2 | ||
1103 | daddu c_2,t_1 | ||
1104 | sltu AT,c_2,t_1 | ||
1105 | daddu t_2,AT | ||
1106 | daddu c_3,t_2 | ||
1107 | sltu AT,c_3,t_2 | ||
1108 | daddu c_1,AT | ||
1109 | dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ | ||
1110 | mflo t_1 | ||
1111 | mfhi t_2 | ||
1112 | daddu c_2,t_1 | ||
1113 | sltu AT,c_2,t_1 | ||
1114 | daddu t_2,AT | ||
1115 | daddu c_3,t_2 | ||
1116 | sltu AT,c_3,t_2 | ||
1117 | daddu c_1,AT | ||
1118 | sd c_2,56(a0) /* r[7]=c2; */ | ||
1119 | |||
1120 | dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ | ||
1121 | mflo t_1 | ||
1122 | mfhi t_2 | ||
1123 | daddu c_3,t_1 | ||
1124 | sltu AT,c_3,t_1 | ||
1125 | daddu t_2,AT | ||
1126 | daddu c_1,t_2 | ||
1127 | sltu c_2,c_1,t_2 | ||
1128 | dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ | ||
1129 | mflo t_1 | ||
1130 | mfhi t_2 | ||
1131 | daddu c_3,t_1 | ||
1132 | sltu AT,c_3,t_1 | ||
1133 | daddu t_2,AT | ||
1134 | daddu c_1,t_2 | ||
1135 | sltu AT,c_1,t_2 | ||
1136 | daddu c_2,AT | ||
1137 | dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ | ||
1138 | mflo t_1 | ||
1139 | mfhi t_2 | ||
1140 | daddu c_3,t_1 | ||
1141 | sltu AT,c_3,t_1 | ||
1142 | daddu t_2,AT | ||
1143 | daddu c_1,t_2 | ||
1144 | sltu AT,c_1,t_2 | ||
1145 | daddu c_2,AT | ||
1146 | dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ | ||
1147 | mflo t_1 | ||
1148 | mfhi t_2 | ||
1149 | daddu c_3,t_1 | ||
1150 | sltu AT,c_3,t_1 | ||
1151 | daddu t_2,AT | ||
1152 | daddu c_1,t_2 | ||
1153 | sltu AT,c_1,t_2 | ||
1154 | daddu c_2,AT | ||
1155 | dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ | ||
1156 | mflo t_1 | ||
1157 | mfhi t_2 | ||
1158 | daddu c_3,t_1 | ||
1159 | sltu AT,c_3,t_1 | ||
1160 | daddu t_2,AT | ||
1161 | daddu c_1,t_2 | ||
1162 | sltu AT,c_1,t_2 | ||
1163 | daddu c_2,AT | ||
1164 | dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ | ||
1165 | mflo t_1 | ||
1166 | mfhi t_2 | ||
1167 | daddu c_3,t_1 | ||
1168 | sltu AT,c_3,t_1 | ||
1169 | daddu t_2,AT | ||
1170 | daddu c_1,t_2 | ||
1171 | sltu AT,c_1,t_2 | ||
1172 | daddu c_2,AT | ||
1173 | dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ | ||
1174 | mflo t_1 | ||
1175 | mfhi t_2 | ||
1176 | daddu c_3,t_1 | ||
1177 | sltu AT,c_3,t_1 | ||
1178 | daddu t_2,AT | ||
1179 | daddu c_1,t_2 | ||
1180 | sltu AT,c_1,t_2 | ||
1181 | daddu c_2,AT | ||
1182 | sd c_3,64(a0) /* r[8]=c3; */ | ||
1183 | |||
1184 | dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ | ||
1185 | mflo t_1 | ||
1186 | mfhi t_2 | ||
1187 | daddu c_1,t_1 | ||
1188 | sltu AT,c_1,t_1 | ||
1189 | daddu t_2,AT | ||
1190 | daddu c_2,t_2 | ||
1191 | sltu c_3,c_2,t_2 | ||
1192 | dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ | ||
1193 | mflo t_1 | ||
1194 | mfhi t_2 | ||
1195 | daddu c_1,t_1 | ||
1196 | sltu AT,c_1,t_1 | ||
1197 | daddu t_2,AT | ||
1198 | daddu c_2,t_2 | ||
1199 | sltu AT,c_2,t_2 | ||
1200 | daddu c_3,AT | ||
1201 | dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ | ||
1202 | mflo t_1 | ||
1203 | mfhi t_2 | ||
1204 | daddu c_1,t_1 | ||
1205 | sltu AT,c_1,t_1 | ||
1206 | daddu t_2,AT | ||
1207 | daddu c_2,t_2 | ||
1208 | sltu AT,c_2,t_2 | ||
1209 | daddu c_3,AT | ||
1210 | dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ | ||
1211 | mflo t_1 | ||
1212 | mfhi t_2 | ||
1213 | daddu c_1,t_1 | ||
1214 | sltu AT,c_1,t_1 | ||
1215 | daddu t_2,AT | ||
1216 | daddu c_2,t_2 | ||
1217 | sltu AT,c_2,t_2 | ||
1218 | daddu c_3,AT | ||
1219 | dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ | ||
1220 | mflo t_1 | ||
1221 | mfhi t_2 | ||
1222 | daddu c_1,t_1 | ||
1223 | sltu AT,c_1,t_1 | ||
1224 | daddu t_2,AT | ||
1225 | daddu c_2,t_2 | ||
1226 | sltu AT,c_2,t_2 | ||
1227 | daddu c_3,AT | ||
1228 | dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ | ||
1229 | mflo t_1 | ||
1230 | mfhi t_2 | ||
1231 | daddu c_1,t_1 | ||
1232 | sltu AT,c_1,t_1 | ||
1233 | daddu t_2,AT | ||
1234 | daddu c_2,t_2 | ||
1235 | sltu AT,c_2,t_2 | ||
1236 | daddu c_3,AT | ||
1237 | sd c_1,72(a0) /* r[9]=c1; */ | ||
1238 | |||
1239 | dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ | ||
1240 | mflo t_1 | ||
1241 | mfhi t_2 | ||
1242 | daddu c_2,t_1 | ||
1243 | sltu AT,c_2,t_1 | ||
1244 | daddu t_2,AT | ||
1245 | daddu c_3,t_2 | ||
1246 | sltu c_1,c_3,t_2 | ||
1247 | dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ | ||
1248 | mflo t_1 | ||
1249 | mfhi t_2 | ||
1250 | daddu c_2,t_1 | ||
1251 | sltu AT,c_2,t_1 | ||
1252 | daddu t_2,AT | ||
1253 | daddu c_3,t_2 | ||
1254 | sltu AT,c_3,t_2 | ||
1255 | daddu c_1,AT | ||
1256 | dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ | ||
1257 | mflo t_1 | ||
1258 | mfhi t_2 | ||
1259 | daddu c_2,t_1 | ||
1260 | sltu AT,c_2,t_1 | ||
1261 | daddu t_2,AT | ||
1262 | daddu c_3,t_2 | ||
1263 | sltu AT,c_3,t_2 | ||
1264 | daddu c_1,AT | ||
1265 | dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ | ||
1266 | mflo t_1 | ||
1267 | mfhi t_2 | ||
1268 | daddu c_2,t_1 | ||
1269 | sltu AT,c_2,t_1 | ||
1270 | daddu t_2,AT | ||
1271 | daddu c_3,t_2 | ||
1272 | sltu AT,c_3,t_2 | ||
1273 | daddu c_1,AT | ||
1274 | dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ | ||
1275 | mflo t_1 | ||
1276 | mfhi t_2 | ||
1277 | daddu c_2,t_1 | ||
1278 | sltu AT,c_2,t_1 | ||
1279 | daddu t_2,AT | ||
1280 | daddu c_3,t_2 | ||
1281 | sltu AT,c_3,t_2 | ||
1282 | daddu c_1,AT | ||
1283 | sd c_2,80(a0) /* r[10]=c2; */ | ||
1284 | |||
1285 | dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ | ||
1286 | mflo t_1 | ||
1287 | mfhi t_2 | ||
1288 | daddu c_3,t_1 | ||
1289 | sltu AT,c_3,t_1 | ||
1290 | daddu t_2,AT | ||
1291 | daddu c_1,t_2 | ||
1292 | sltu c_2,c_1,t_2 | ||
1293 | dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ | ||
1294 | mflo t_1 | ||
1295 | mfhi t_2 | ||
1296 | daddu c_3,t_1 | ||
1297 | sltu AT,c_3,t_1 | ||
1298 | daddu t_2,AT | ||
1299 | daddu c_1,t_2 | ||
1300 | sltu AT,c_1,t_2 | ||
1301 | daddu c_2,AT | ||
1302 | dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ | ||
1303 | mflo t_1 | ||
1304 | mfhi t_2 | ||
1305 | daddu c_3,t_1 | ||
1306 | sltu AT,c_3,t_1 | ||
1307 | daddu t_2,AT | ||
1308 | daddu c_1,t_2 | ||
1309 | sltu AT,c_1,t_2 | ||
1310 | daddu c_2,AT | ||
1311 | dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ | ||
1312 | mflo t_1 | ||
1313 | mfhi t_2 | ||
1314 | daddu c_3,t_1 | ||
1315 | sltu AT,c_3,t_1 | ||
1316 | daddu t_2,AT | ||
1317 | daddu c_1,t_2 | ||
1318 | sltu AT,c_1,t_2 | ||
1319 | daddu c_2,AT | ||
1320 | sd c_3,88(a0) /* r[11]=c3; */ | ||
1321 | |||
1322 | dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ | ||
1323 | mflo t_1 | ||
1324 | mfhi t_2 | ||
1325 | daddu c_1,t_1 | ||
1326 | sltu AT,c_1,t_1 | ||
1327 | daddu t_2,AT | ||
1328 | daddu c_2,t_2 | ||
1329 | sltu c_3,c_2,t_2 | ||
1330 | dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ | ||
1331 | mflo t_1 | ||
1332 | mfhi t_2 | ||
1333 | daddu c_1,t_1 | ||
1334 | sltu AT,c_1,t_1 | ||
1335 | daddu t_2,AT | ||
1336 | daddu c_2,t_2 | ||
1337 | sltu AT,c_2,t_2 | ||
1338 | daddu c_3,AT | ||
1339 | dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ | ||
1340 | mflo t_1 | ||
1341 | mfhi t_2 | ||
1342 | daddu c_1,t_1 | ||
1343 | sltu AT,c_1,t_1 | ||
1344 | daddu t_2,AT | ||
1345 | daddu c_2,t_2 | ||
1346 | sltu AT,c_2,t_2 | ||
1347 | daddu c_3,AT | ||
1348 | sd c_1,96(a0) /* r[12]=c1; */ | ||
1349 | |||
1350 | dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ | ||
1351 | mflo t_1 | ||
1352 | mfhi t_2 | ||
1353 | daddu c_2,t_1 | ||
1354 | sltu AT,c_2,t_1 | ||
1355 | daddu t_2,AT | ||
1356 | daddu c_3,t_2 | ||
1357 | sltu c_1,c_3,t_2 | ||
1358 | dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ | ||
1359 | mflo t_1 | ||
1360 | mfhi t_2 | ||
1361 | daddu c_2,t_1 | ||
1362 | sltu AT,c_2,t_1 | ||
1363 | daddu t_2,AT | ||
1364 | daddu c_3,t_2 | ||
1365 | sltu AT,c_3,t_2 | ||
1366 | daddu c_1,AT | ||
1367 | sd c_2,104(a0) /* r[13]=c2; */ | ||
1368 | |||
1369 | dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ | ||
1370 | ld s0,0(sp) | ||
1371 | ld s1,8(sp) | ||
1372 | ld s2,16(sp) | ||
1373 | ld s3,24(sp) | ||
1374 | ld s4,32(sp) | ||
1375 | ld s5,40(sp) | ||
1376 | mflo t_1 | ||
1377 | mfhi t_2 | ||
1378 | daddu c_3,t_1 | ||
1379 | sltu AT,c_3,t_1 | ||
1380 | daddu t_2,AT | ||
1381 | daddu c_1,t_2 | ||
1382 | sd c_3,112(a0) /* r[14]=c3; */ | ||
1383 | sd c_1,120(a0) /* r[15]=c1; */ | ||
1384 | |||
1385 | PTR_ADD sp,FRAME_SIZE | ||
1386 | |||
1387 | jr ra | ||
1388 | END(bn_mul_comba8) | ||
1389 | |||
1390 | .align 5 | ||
1391 | LEAF(bn_mul_comba4) | ||
1392 | .set reorder | ||
1393 | ld a_0,0(a1) | ||
1394 | ld b_0,0(a2) | ||
1395 | ld a_1,8(a1) | ||
1396 | ld a_2,16(a1) | ||
1397 | dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
1398 | ld a_3,24(a1) | ||
1399 | ld b_1,8(a2) | ||
1400 | ld b_2,16(a2) | ||
1401 | ld b_3,24(a2) | ||
1402 | mflo c_1 | ||
1403 | mfhi c_2 | ||
1404 | sd c_1,0(a0) | ||
1405 | |||
1406 | dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ | ||
1407 | mflo t_1 | ||
1408 | mfhi t_2 | ||
1409 | daddu c_2,t_1 | ||
1410 | sltu AT,c_2,t_1 | ||
1411 | daddu c_3,t_2,AT | ||
1412 | dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ | ||
1413 | mflo t_1 | ||
1414 | mfhi t_2 | ||
1415 | daddu c_2,t_1 | ||
1416 | sltu AT,c_2,t_1 | ||
1417 | daddu t_2,AT | ||
1418 | daddu c_3,t_2 | ||
1419 | sltu c_1,c_3,t_2 | ||
1420 | sd c_2,8(a0) | ||
1421 | |||
1422 | dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ | ||
1423 | mflo t_1 | ||
1424 | mfhi t_2 | ||
1425 | daddu c_3,t_1 | ||
1426 | sltu AT,c_3,t_1 | ||
1427 | daddu t_2,AT | ||
1428 | daddu c_1,t_2 | ||
1429 | dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
1430 | mflo t_1 | ||
1431 | mfhi t_2 | ||
1432 | daddu c_3,t_1 | ||
1433 | sltu AT,c_3,t_1 | ||
1434 | daddu t_2,AT | ||
1435 | daddu c_1,t_2 | ||
1436 | sltu c_2,c_1,t_2 | ||
1437 | dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ | ||
1438 | mflo t_1 | ||
1439 | mfhi t_2 | ||
1440 | daddu c_3,t_1 | ||
1441 | sltu AT,c_3,t_1 | ||
1442 | daddu t_2,AT | ||
1443 | daddu c_1,t_2 | ||
1444 | sltu AT,c_1,t_2 | ||
1445 | daddu c_2,AT | ||
1446 | sd c_3,16(a0) | ||
1447 | |||
1448 | dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ | ||
1449 | mflo t_1 | ||
1450 | mfhi t_2 | ||
1451 | daddu c_1,t_1 | ||
1452 | sltu AT,c_1,t_1 | ||
1453 | daddu t_2,AT | ||
1454 | daddu c_2,t_2 | ||
1455 | sltu c_3,c_2,t_2 | ||
1456 | dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ | ||
1457 | mflo t_1 | ||
1458 | mfhi t_2 | ||
1459 | daddu c_1,t_1 | ||
1460 | sltu AT,c_1,t_1 | ||
1461 | daddu t_2,AT | ||
1462 | daddu c_2,t_2 | ||
1463 | sltu AT,c_2,t_2 | ||
1464 | daddu c_3,AT | ||
1465 | dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ | ||
1466 | mflo t_1 | ||
1467 | mfhi t_2 | ||
1468 | daddu c_1,t_1 | ||
1469 | sltu AT,c_1,t_1 | ||
1470 | daddu t_2,AT | ||
1471 | daddu c_2,t_2 | ||
1472 | sltu AT,c_2,t_2 | ||
1473 | daddu c_3,AT | ||
1474 | dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ | ||
1475 | mflo t_1 | ||
1476 | mfhi t_2 | ||
1477 | daddu c_1,t_1 | ||
1478 | sltu AT,c_1,t_1 | ||
1479 | daddu t_2,AT | ||
1480 | daddu c_2,t_2 | ||
1481 | sltu AT,c_2,t_2 | ||
1482 | daddu c_3,AT | ||
1483 | sd c_1,24(a0) | ||
1484 | |||
1485 | dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ | ||
1486 | mflo t_1 | ||
1487 | mfhi t_2 | ||
1488 | daddu c_2,t_1 | ||
1489 | sltu AT,c_2,t_1 | ||
1490 | daddu t_2,AT | ||
1491 | daddu c_3,t_2 | ||
1492 | sltu c_1,c_3,t_2 | ||
1493 | dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
1494 | mflo t_1 | ||
1495 | mfhi t_2 | ||
1496 | daddu c_2,t_1 | ||
1497 | sltu AT,c_2,t_1 | ||
1498 | daddu t_2,AT | ||
1499 | daddu c_3,t_2 | ||
1500 | sltu AT,c_3,t_2 | ||
1501 | daddu c_1,AT | ||
1502 | dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ | ||
1503 | mflo t_1 | ||
1504 | mfhi t_2 | ||
1505 | daddu c_2,t_1 | ||
1506 | sltu AT,c_2,t_1 | ||
1507 | daddu t_2,AT | ||
1508 | daddu c_3,t_2 | ||
1509 | sltu AT,c_3,t_2 | ||
1510 | daddu c_1,AT | ||
1511 | sd c_2,32(a0) | ||
1512 | |||
1513 | dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ | ||
1514 | mflo t_1 | ||
1515 | mfhi t_2 | ||
1516 | daddu c_3,t_1 | ||
1517 | sltu AT,c_3,t_1 | ||
1518 | daddu t_2,AT | ||
1519 | daddu c_1,t_2 | ||
1520 | sltu c_2,c_1,t_2 | ||
1521 | dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ | ||
1522 | mflo t_1 | ||
1523 | mfhi t_2 | ||
1524 | daddu c_3,t_1 | ||
1525 | sltu AT,c_3,t_1 | ||
1526 | daddu t_2,AT | ||
1527 | daddu c_1,t_2 | ||
1528 | sltu AT,c_1,t_2 | ||
1529 | daddu c_2,AT | ||
1530 | sd c_3,40(a0) | ||
1531 | |||
1532 | dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
1533 | mflo t_1 | ||
1534 | mfhi t_2 | ||
1535 | daddu c_1,t_1 | ||
1536 | sltu AT,c_1,t_1 | ||
1537 | daddu t_2,AT | ||
1538 | daddu c_2,t_2 | ||
1539 | sd c_1,48(a0) | ||
1540 | sd c_2,56(a0) | ||
1541 | |||
1542 | jr ra | ||
1543 | END(bn_mul_comba4) | ||
1544 | |||
1545 | #undef a_4 | ||
1546 | #undef a_5 | ||
1547 | #undef a_6 | ||
1548 | #undef a_7 | ||
1549 | #define a_4 b_0 | ||
1550 | #define a_5 b_1 | ||
1551 | #define a_6 b_2 | ||
1552 | #define a_7 b_3 | ||
1553 | |||
1554 | .align 5 | ||
1555 | LEAF(bn_sqr_comba8) | ||
1556 | .set reorder | ||
1557 | ld a_0,0(a1) | ||
1558 | ld a_1,8(a1) | ||
1559 | ld a_2,16(a1) | ||
1560 | ld a_3,24(a1) | ||
1561 | |||
1562 | dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
1563 | ld a_4,32(a1) | ||
1564 | ld a_5,40(a1) | ||
1565 | ld a_6,48(a1) | ||
1566 | ld a_7,56(a1) | ||
1567 | mflo c_1 | ||
1568 | mfhi c_2 | ||
1569 | sd c_1,0(a0) | ||
1570 | |||
1571 | dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ | ||
1572 | mflo t_1 | ||
1573 | mfhi t_2 | ||
1574 | slt c_1,t_2,zero | ||
1575 | dsll t_2,1 | ||
1576 | slt a2,t_1,zero | ||
1577 | daddu t_2,a2 | ||
1578 | dsll t_1,1 | ||
1579 | daddu c_2,t_1 | ||
1580 | sltu AT,c_2,t_1 | ||
1581 | daddu c_3,t_2,AT | ||
1582 | sd c_2,8(a0) | ||
1583 | |||
1584 | dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ | ||
1585 | mflo t_1 | ||
1586 | mfhi t_2 | ||
1587 | slt c_2,t_2,zero | ||
1588 | dsll t_2,1 | ||
1589 | slt a2,t_1,zero | ||
1590 | daddu t_2,a2 | ||
1591 | dsll t_1,1 | ||
1592 | daddu c_3,t_1 | ||
1593 | sltu AT,c_3,t_1 | ||
1594 | daddu t_2,AT | ||
1595 | daddu c_1,t_2 | ||
1596 | sltu AT,c_1,t_2 | ||
1597 | daddu c_2,AT | ||
1598 | dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
1599 | mflo t_1 | ||
1600 | mfhi t_2 | ||
1601 | daddu c_3,t_1 | ||
1602 | sltu AT,c_3,t_1 | ||
1603 | daddu t_2,AT | ||
1604 | daddu c_1,t_2 | ||
1605 | sltu AT,c_1,t_2 | ||
1606 | daddu c_2,AT | ||
1607 | sd c_3,16(a0) | ||
1608 | |||
1609 | dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ | ||
1610 | mflo t_1 | ||
1611 | mfhi t_2 | ||
1612 | slt c_3,t_2,zero | ||
1613 | dsll t_2,1 | ||
1614 | slt a2,t_1,zero | ||
1615 | daddu t_2,a2 | ||
1616 | dsll t_1,1 | ||
1617 | daddu c_1,t_1 | ||
1618 | sltu AT,c_1,t_1 | ||
1619 | daddu t_2,AT | ||
1620 | daddu c_2,t_2 | ||
1621 | sltu AT,c_2,t_2 | ||
1622 | daddu c_3,AT | ||
1623 | dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ | ||
1624 | mflo t_1 | ||
1625 | mfhi t_2 | ||
1626 | slt AT,t_2,zero | ||
1627 | daddu c_3,AT | ||
1628 | dsll t_2,1 | ||
1629 | slt a2,t_1,zero | ||
1630 | daddu t_2,a2 | ||
1631 | dsll t_1,1 | ||
1632 | daddu c_1,t_1 | ||
1633 | sltu AT,c_1,t_1 | ||
1634 | daddu t_2,AT | ||
1635 | daddu c_2,t_2 | ||
1636 | sltu AT,c_2,t_2 | ||
1637 | daddu c_3,AT | ||
1638 | sd c_1,24(a0) | ||
1639 | |||
1640 | dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ | ||
1641 | mflo t_1 | ||
1642 | mfhi t_2 | ||
1643 | slt c_1,t_2,zero | ||
1644 | dsll t_2,1 | ||
1645 | slt a2,t_1,zero | ||
1646 | daddu t_2,a2 | ||
1647 | dsll t_1,1 | ||
1648 | daddu c_2,t_1 | ||
1649 | sltu AT,c_2,t_1 | ||
1650 | daddu t_2,AT | ||
1651 | daddu c_3,t_2 | ||
1652 | sltu AT,c_3,t_2 | ||
1653 | daddu c_1,AT | ||
1654 | dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ | ||
1655 | mflo t_1 | ||
1656 | mfhi t_2 | ||
1657 | slt AT,t_2,zero | ||
1658 | daddu c_1,AT | ||
1659 | dsll t_2,1 | ||
1660 | slt a2,t_1,zero | ||
1661 | daddu t_2,a2 | ||
1662 | dsll t_1,1 | ||
1663 | daddu c_2,t_1 | ||
1664 | sltu AT,c_2,t_1 | ||
1665 | daddu t_2,AT | ||
1666 | daddu c_3,t_2 | ||
1667 | sltu AT,c_3,t_2 | ||
1668 | daddu c_1,AT | ||
1669 | dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
1670 | mflo t_1 | ||
1671 | mfhi t_2 | ||
1672 | daddu c_2,t_1 | ||
1673 | sltu AT,c_2,t_1 | ||
1674 | daddu t_2,AT | ||
1675 | daddu c_3,t_2 | ||
1676 | sltu AT,c_3,t_2 | ||
1677 | daddu c_1,AT | ||
1678 | sd c_2,32(a0) | ||
1679 | |||
1680 | dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ | ||
1681 | mflo t_1 | ||
1682 | mfhi t_2 | ||
1683 | slt c_2,t_2,zero | ||
1684 | dsll t_2,1 | ||
1685 | slt a2,t_1,zero | ||
1686 | daddu t_2,a2 | ||
1687 | dsll t_1,1 | ||
1688 | daddu c_3,t_1 | ||
1689 | sltu AT,c_3,t_1 | ||
1690 | daddu t_2,AT | ||
1691 | daddu c_1,t_2 | ||
1692 | sltu AT,c_1,t_2 | ||
1693 | daddu c_2,AT | ||
1694 | dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ | ||
1695 | mflo t_1 | ||
1696 | mfhi t_2 | ||
1697 | slt AT,t_2,zero | ||
1698 | daddu c_2,AT | ||
1699 | dsll t_2,1 | ||
1700 | slt a2,t_1,zero | ||
1701 | daddu t_2,a2 | ||
1702 | dsll t_1,1 | ||
1703 | daddu c_3,t_1 | ||
1704 | sltu AT,c_3,t_1 | ||
1705 | daddu t_2,AT | ||
1706 | daddu c_1,t_2 | ||
1707 | sltu AT,c_1,t_2 | ||
1708 | daddu c_2,AT | ||
1709 | dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ | ||
1710 | mflo t_1 | ||
1711 | mfhi t_2 | ||
1712 | slt AT,t_2,zero | ||
1713 | daddu c_2,AT | ||
1714 | dsll t_2,1 | ||
1715 | slt a2,t_1,zero | ||
1716 | daddu t_2,a2 | ||
1717 | dsll t_1,1 | ||
1718 | daddu c_3,t_1 | ||
1719 | sltu AT,c_3,t_1 | ||
1720 | daddu t_2,AT | ||
1721 | daddu c_1,t_2 | ||
1722 | sltu AT,c_1,t_2 | ||
1723 | daddu c_2,AT | ||
1724 | sd c_3,40(a0) | ||
1725 | |||
1726 | dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ | ||
1727 | mflo t_1 | ||
1728 | mfhi t_2 | ||
1729 | slt c_3,t_2,zero | ||
1730 | dsll t_2,1 | ||
1731 | slt a2,t_1,zero | ||
1732 | daddu t_2,a2 | ||
1733 | dsll t_1,1 | ||
1734 | daddu c_1,t_1 | ||
1735 | sltu AT,c_1,t_1 | ||
1736 | daddu t_2,AT | ||
1737 | daddu c_2,t_2 | ||
1738 | sltu AT,c_2,t_2 | ||
1739 | daddu c_3,AT | ||
1740 | dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ | ||
1741 | mflo t_1 | ||
1742 | mfhi t_2 | ||
1743 | slt AT,t_2,zero | ||
1744 | daddu c_3,AT | ||
1745 | dsll t_2,1 | ||
1746 | slt a2,t_1,zero | ||
1747 | daddu t_2,a2 | ||
1748 | dsll t_1,1 | ||
1749 | daddu c_1,t_1 | ||
1750 | sltu AT,c_1,t_1 | ||
1751 | daddu t_2,AT | ||
1752 | daddu c_2,t_2 | ||
1753 | sltu AT,c_2,t_2 | ||
1754 | daddu c_3,AT | ||
1755 | dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ | ||
1756 | mflo t_1 | ||
1757 | mfhi t_2 | ||
1758 | slt AT,t_2,zero | ||
1759 | daddu c_3,AT | ||
1760 | dsll t_2,1 | ||
1761 | slt a2,t_1,zero | ||
1762 | daddu t_2,a2 | ||
1763 | dsll t_1,1 | ||
1764 | daddu c_1,t_1 | ||
1765 | sltu AT,c_1,t_1 | ||
1766 | daddu t_2,AT | ||
1767 | daddu c_2,t_2 | ||
1768 | sltu AT,c_2,t_2 | ||
1769 | daddu c_3,AT | ||
1770 | dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
1771 | mflo t_1 | ||
1772 | mfhi t_2 | ||
1773 | daddu c_1,t_1 | ||
1774 | sltu AT,c_1,t_1 | ||
1775 | daddu t_2,AT | ||
1776 | daddu c_2,t_2 | ||
1777 | sltu AT,c_2,t_2 | ||
1778 | daddu c_3,AT | ||
1779 | sd c_1,48(a0) | ||
1780 | |||
1781 | dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ | ||
1782 | mflo t_1 | ||
1783 | mfhi t_2 | ||
1784 | slt c_1,t_2,zero | ||
1785 | dsll t_2,1 | ||
1786 | slt a2,t_1,zero | ||
1787 | daddu t_2,a2 | ||
1788 | dsll t_1,1 | ||
1789 | daddu c_2,t_1 | ||
1790 | sltu AT,c_2,t_1 | ||
1791 | daddu t_2,AT | ||
1792 | daddu c_3,t_2 | ||
1793 | sltu AT,c_3,t_2 | ||
1794 | daddu c_1,AT | ||
1795 | dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ | ||
1796 | mflo t_1 | ||
1797 | mfhi t_2 | ||
1798 | slt AT,t_2,zero | ||
1799 | daddu c_1,AT | ||
1800 | dsll t_2,1 | ||
1801 | slt a2,t_1,zero | ||
1802 | daddu t_2,a2 | ||
1803 | dsll t_1,1 | ||
1804 | daddu c_2,t_1 | ||
1805 | sltu AT,c_2,t_1 | ||
1806 | daddu t_2,AT | ||
1807 | daddu c_3,t_2 | ||
1808 | sltu AT,c_3,t_2 | ||
1809 | daddu c_1,AT | ||
1810 | dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ | ||
1811 | mflo t_1 | ||
1812 | mfhi t_2 | ||
1813 | slt AT,t_2,zero | ||
1814 | daddu c_1,AT | ||
1815 | dsll t_2,1 | ||
1816 | slt a2,t_1,zero | ||
1817 | daddu t_2,a2 | ||
1818 | dsll t_1,1 | ||
1819 | daddu c_2,t_1 | ||
1820 | sltu AT,c_2,t_1 | ||
1821 | daddu t_2,AT | ||
1822 | daddu c_3,t_2 | ||
1823 | sltu AT,c_3,t_2 | ||
1824 | daddu c_1,AT | ||
1825 | dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ | ||
1826 | mflo t_1 | ||
1827 | mfhi t_2 | ||
1828 | slt AT,t_2,zero | ||
1829 | daddu c_1,AT | ||
1830 | dsll t_2,1 | ||
1831 | slt a2,t_1,zero | ||
1832 | daddu t_2,a2 | ||
1833 | dsll t_1,1 | ||
1834 | daddu c_2,t_1 | ||
1835 | sltu AT,c_2,t_1 | ||
1836 | daddu t_2,AT | ||
1837 | daddu c_3,t_2 | ||
1838 | sltu AT,c_3,t_2 | ||
1839 | daddu c_1,AT | ||
1840 | sd c_2,56(a0) | ||
1841 | |||
1842 | dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ | ||
1843 | mflo t_1 | ||
1844 | mfhi t_2 | ||
1845 | slt c_2,t_2,zero | ||
1846 | dsll t_2,1 | ||
1847 | slt a2,t_1,zero | ||
1848 | daddu t_2,a2 | ||
1849 | dsll t_1,1 | ||
1850 | daddu c_3,t_1 | ||
1851 | sltu AT,c_3,t_1 | ||
1852 | daddu t_2,AT | ||
1853 | daddu c_1,t_2 | ||
1854 | sltu AT,c_1,t_2 | ||
1855 | daddu c_2,AT | ||
1856 | dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ | ||
1857 | mflo t_1 | ||
1858 | mfhi t_2 | ||
1859 | slt AT,t_2,zero | ||
1860 | daddu c_2,AT | ||
1861 | dsll t_2,1 | ||
1862 | slt a2,t_1,zero | ||
1863 | daddu t_2,a2 | ||
1864 | dsll t_1,1 | ||
1865 | daddu c_3,t_1 | ||
1866 | sltu AT,c_3,t_1 | ||
1867 | daddu t_2,AT | ||
1868 | daddu c_1,t_2 | ||
1869 | sltu AT,c_1,t_2 | ||
1870 | daddu c_2,AT | ||
1871 | dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ | ||
1872 | mflo t_1 | ||
1873 | mfhi t_2 | ||
1874 | slt AT,t_2,zero | ||
1875 | daddu c_2,AT | ||
1876 | dsll t_2,1 | ||
1877 | slt a2,t_1,zero | ||
1878 | daddu t_2,a2 | ||
1879 | dsll t_1,1 | ||
1880 | daddu c_3,t_1 | ||
1881 | sltu AT,c_3,t_1 | ||
1882 | daddu t_2,AT | ||
1883 | daddu c_1,t_2 | ||
1884 | sltu AT,c_1,t_2 | ||
1885 | daddu c_2,AT | ||
1886 | dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ | ||
1887 | mflo t_1 | ||
1888 | mfhi t_2 | ||
1889 | daddu c_3,t_1 | ||
1890 | sltu AT,c_3,t_1 | ||
1891 | daddu t_2,AT | ||
1892 | daddu c_1,t_2 | ||
1893 | sltu AT,c_1,t_2 | ||
1894 | daddu c_2,AT | ||
1895 | sd c_3,64(a0) | ||
1896 | |||
1897 | dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ | ||
1898 | mflo t_1 | ||
1899 | mfhi t_2 | ||
1900 | slt c_3,t_2,zero | ||
1901 | dsll t_2,1 | ||
1902 | slt a2,t_1,zero | ||
1903 | daddu t_2,a2 | ||
1904 | dsll t_1,1 | ||
1905 | daddu c_1,t_1 | ||
1906 | sltu AT,c_1,t_1 | ||
1907 | daddu t_2,AT | ||
1908 | daddu c_2,t_2 | ||
1909 | sltu AT,c_2,t_2 | ||
1910 | daddu c_3,AT | ||
1911 | dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ | ||
1912 | mflo t_1 | ||
1913 | mfhi t_2 | ||
1914 | slt AT,t_2,zero | ||
1915 | daddu c_3,AT | ||
1916 | dsll t_2,1 | ||
1917 | slt a2,t_1,zero | ||
1918 | daddu t_2,a2 | ||
1919 | dsll t_1,1 | ||
1920 | daddu c_1,t_1 | ||
1921 | sltu AT,c_1,t_1 | ||
1922 | daddu t_2,AT | ||
1923 | daddu c_2,t_2 | ||
1924 | sltu AT,c_2,t_2 | ||
1925 | daddu c_3,AT | ||
1926 | dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ | ||
1927 | mflo t_1 | ||
1928 | mfhi t_2 | ||
1929 | slt AT,t_2,zero | ||
1930 | daddu c_3,AT | ||
1931 | dsll t_2,1 | ||
1932 | slt a2,t_1,zero | ||
1933 | daddu t_2,a2 | ||
1934 | dsll t_1,1 | ||
1935 | daddu c_1,t_1 | ||
1936 | sltu AT,c_1,t_1 | ||
1937 | daddu t_2,AT | ||
1938 | daddu c_2,t_2 | ||
1939 | sltu AT,c_2,t_2 | ||
1940 | daddu c_3,AT | ||
1941 | sd c_1,72(a0) | ||
1942 | |||
1943 | dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ | ||
1944 | mflo t_1 | ||
1945 | mfhi t_2 | ||
1946 | slt c_1,t_2,zero | ||
1947 | dsll t_2,1 | ||
1948 | slt a2,t_1,zero | ||
1949 | daddu t_2,a2 | ||
1950 | dsll t_1,1 | ||
1951 | daddu c_2,t_1 | ||
1952 | sltu AT,c_2,t_1 | ||
1953 | daddu t_2,AT | ||
1954 | daddu c_3,t_2 | ||
1955 | sltu AT,c_3,t_2 | ||
1956 | daddu c_1,AT | ||
1957 | dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ | ||
1958 | mflo t_1 | ||
1959 | mfhi t_2 | ||
1960 | slt AT,t_2,zero | ||
1961 | daddu c_1,AT | ||
1962 | dsll t_2,1 | ||
1963 | slt a2,t_1,zero | ||
1964 | daddu t_2,a2 | ||
1965 | dsll t_1,1 | ||
1966 | daddu c_2,t_1 | ||
1967 | sltu AT,c_2,t_1 | ||
1968 | daddu t_2,AT | ||
1969 | daddu c_3,t_2 | ||
1970 | sltu AT,c_3,t_2 | ||
1971 | daddu c_1,AT | ||
1972 | dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ | ||
1973 | mflo t_1 | ||
1974 | mfhi t_2 | ||
1975 | daddu c_2,t_1 | ||
1976 | sltu AT,c_2,t_1 | ||
1977 | daddu t_2,AT | ||
1978 | daddu c_3,t_2 | ||
1979 | sltu AT,c_3,t_2 | ||
1980 | daddu c_1,AT | ||
1981 | sd c_2,80(a0) | ||
1982 | |||
1983 | dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ | ||
1984 | mflo t_1 | ||
1985 | mfhi t_2 | ||
1986 | slt c_2,t_2,zero | ||
1987 | dsll t_2,1 | ||
1988 | slt a2,t_1,zero | ||
1989 | daddu t_2,a2 | ||
1990 | dsll t_1,1 | ||
1991 | daddu c_3,t_1 | ||
1992 | sltu AT,c_3,t_1 | ||
1993 | daddu t_2,AT | ||
1994 | daddu c_1,t_2 | ||
1995 | sltu AT,c_1,t_2 | ||
1996 | daddu c_2,AT | ||
1997 | dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ | ||
1998 | mflo t_1 | ||
1999 | mfhi t_2 | ||
2000 | slt AT,t_2,zero | ||
2001 | daddu c_2,AT | ||
2002 | dsll t_2,1 | ||
2003 | slt a2,t_1,zero | ||
2004 | daddu t_2,a2 | ||
2005 | dsll t_1,1 | ||
2006 | daddu c_3,t_1 | ||
2007 | sltu AT,c_3,t_1 | ||
2008 | daddu t_2,AT | ||
2009 | daddu c_1,t_2 | ||
2010 | sltu AT,c_1,t_2 | ||
2011 | daddu c_2,AT | ||
2012 | sd c_3,88(a0) | ||
2013 | |||
2014 | dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ | ||
2015 | mflo t_1 | ||
2016 | mfhi t_2 | ||
2017 | slt c_3,t_2,zero | ||
2018 | dsll t_2,1 | ||
2019 | slt a2,t_1,zero | ||
2020 | daddu t_2,a2 | ||
2021 | dsll t_1,1 | ||
2022 | daddu c_1,t_1 | ||
2023 | sltu AT,c_1,t_1 | ||
2024 | daddu t_2,AT | ||
2025 | daddu c_2,t_2 | ||
2026 | sltu AT,c_2,t_2 | ||
2027 | daddu c_3,AT | ||
2028 | dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ | ||
2029 | mflo t_1 | ||
2030 | mfhi t_2 | ||
2031 | daddu c_1,t_1 | ||
2032 | sltu AT,c_1,t_1 | ||
2033 | daddu t_2,AT | ||
2034 | daddu c_2,t_2 | ||
2035 | sltu AT,c_2,t_2 | ||
2036 | daddu c_3,AT | ||
2037 | sd c_1,96(a0) | ||
2038 | |||
2039 | dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ | ||
2040 | mflo t_1 | ||
2041 | mfhi t_2 | ||
2042 | slt c_1,t_2,zero | ||
2043 | dsll t_2,1 | ||
2044 | slt a2,t_1,zero | ||
2045 | daddu t_2,a2 | ||
2046 | dsll t_1,1 | ||
2047 | daddu c_2,t_1 | ||
2048 | sltu AT,c_2,t_1 | ||
2049 | daddu t_2,AT | ||
2050 | daddu c_3,t_2 | ||
2051 | sltu AT,c_3,t_2 | ||
2052 | daddu c_1,AT | ||
2053 | sd c_2,104(a0) | ||
2054 | |||
2055 | dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ | ||
2056 | mflo t_1 | ||
2057 | mfhi t_2 | ||
2058 | daddu c_3,t_1 | ||
2059 | sltu AT,c_3,t_1 | ||
2060 | daddu t_2,AT | ||
2061 | daddu c_1,t_2 | ||
2062 | sd c_3,112(a0) | ||
2063 | sd c_1,120(a0) | ||
2064 | |||
2065 | jr ra | ||
2066 | END(bn_sqr_comba8) | ||
2067 | |||
2068 | .align 5 | ||
2069 | LEAF(bn_sqr_comba4) | ||
2070 | .set reorder | ||
2071 | ld a_0,0(a1) | ||
2072 | ld a_1,8(a1) | ||
2073 | ld a_2,16(a1) | ||
2074 | ld a_3,24(a1) | ||
2075 | dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
2076 | mflo c_1 | ||
2077 | mfhi c_2 | ||
2078 | sd c_1,0(a0) | ||
2079 | |||
2080 | dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ | ||
2081 | mflo t_1 | ||
2082 | mfhi t_2 | ||
2083 | slt c_1,t_2,zero | ||
2084 | dsll t_2,1 | ||
2085 | slt a2,t_1,zero | ||
2086 | daddu t_2,a2 | ||
2087 | dsll t_1,1 | ||
2088 | daddu c_2,t_1 | ||
2089 | sltu AT,c_2,t_1 | ||
2090 | daddu c_3,t_2,AT | ||
2091 | sd c_2,8(a0) | ||
2092 | |||
2093 | dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ | ||
2094 | mflo t_1 | ||
2095 | mfhi t_2 | ||
2096 | slt c_2,t_2,zero | ||
2097 | dsll t_2,1 | ||
2098 | slt a2,t_1,zero | ||
2099 | daddu t_2,a2 | ||
2100 | dsll t_1,1 | ||
2101 | daddu c_3,t_1 | ||
2102 | sltu AT,c_3,t_1 | ||
2103 | daddu t_2,AT | ||
2104 | daddu c_1,t_2 | ||
2105 | sltu AT,c_1,t_2 | ||
2106 | daddu c_2,AT | ||
2107 | dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
2108 | mflo t_1 | ||
2109 | mfhi t_2 | ||
2110 | daddu c_3,t_1 | ||
2111 | sltu AT,c_3,t_1 | ||
2112 | daddu t_2,AT | ||
2113 | daddu c_1,t_2 | ||
2114 | sltu AT,c_1,t_2 | ||
2115 | daddu c_2,AT | ||
2116 | sd c_3,16(a0) | ||
2117 | |||
2118 | dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ | ||
2119 | mflo t_1 | ||
2120 | mfhi t_2 | ||
2121 | slt c_3,t_2,zero | ||
2122 | dsll t_2,1 | ||
2123 | slt a2,t_1,zero | ||
2124 | daddu t_2,a2 | ||
2125 | dsll t_1,1 | ||
2126 | daddu c_1,t_1 | ||
2127 | sltu AT,c_1,t_1 | ||
2128 | daddu t_2,AT | ||
2129 | daddu c_2,t_2 | ||
2130 | sltu AT,c_2,t_2 | ||
2131 | daddu c_3,AT | ||
2132 | dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ | ||
2133 | mflo t_1 | ||
2134 | mfhi t_2 | ||
2135 | slt AT,t_2,zero | ||
2136 | daddu c_3,AT | ||
2137 | dsll t_2,1 | ||
2138 | slt a2,t_1,zero | ||
2139 | daddu t_2,a2 | ||
2140 | dsll t_1,1 | ||
2141 | daddu c_1,t_1 | ||
2142 | sltu AT,c_1,t_1 | ||
2143 | daddu t_2,AT | ||
2144 | daddu c_2,t_2 | ||
2145 | sltu AT,c_2,t_2 | ||
2146 | daddu c_3,AT | ||
2147 | sd c_1,24(a0) | ||
2148 | |||
2149 | dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ | ||
2150 | mflo t_1 | ||
2151 | mfhi t_2 | ||
2152 | slt c_1,t_2,zero | ||
2153 | dsll t_2,1 | ||
2154 | slt a2,t_1,zero | ||
2155 | daddu t_2,a2 | ||
2156 | dsll t_1,1 | ||
2157 | daddu c_2,t_1 | ||
2158 | sltu AT,c_2,t_1 | ||
2159 | daddu t_2,AT | ||
2160 | daddu c_3,t_2 | ||
2161 | sltu AT,c_3,t_2 | ||
2162 | daddu c_1,AT | ||
2163 | dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
2164 | mflo t_1 | ||
2165 | mfhi t_2 | ||
2166 | daddu c_2,t_1 | ||
2167 | sltu AT,c_2,t_1 | ||
2168 | daddu t_2,AT | ||
2169 | daddu c_3,t_2 | ||
2170 | sltu AT,c_3,t_2 | ||
2171 | daddu c_1,AT | ||
2172 | sd c_2,32(a0) | ||
2173 | |||
2174 | dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ | ||
2175 | mflo t_1 | ||
2176 | mfhi t_2 | ||
2177 | slt c_2,t_2,zero | ||
2178 | dsll t_2,1 | ||
2179 | slt a2,t_1,zero | ||
2180 | daddu t_2,a2 | ||
2181 | dsll t_1,1 | ||
2182 | daddu c_3,t_1 | ||
2183 | sltu AT,c_3,t_1 | ||
2184 | daddu t_2,AT | ||
2185 | daddu c_1,t_2 | ||
2186 | sltu AT,c_1,t_2 | ||
2187 | daddu c_2,AT | ||
2188 | sd c_3,40(a0) | ||
2189 | |||
2190 | dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
2191 | mflo t_1 | ||
2192 | mfhi t_2 | ||
2193 | daddu c_1,t_1 | ||
2194 | sltu AT,c_1,t_1 | ||
2195 | daddu t_2,AT | ||
2196 | daddu c_2,t_2 | ||
2197 | sd c_1,48(a0) | ||
2198 | sd c_2,56(a0) | ||
2199 | |||
2200 | jr ra | ||
2201 | END(bn_sqr_comba4) | ||
diff --git a/src/lib/libssl/src/crypto/bn/asm/mips3-mont.pl b/src/lib/libssl/src/crypto/bn/asm/mips3-mont.pl deleted file mode 100644 index 8f9156e02a..0000000000 --- a/src/lib/libssl/src/crypto/bn/asm/mips3-mont.pl +++ /dev/null | |||
@@ -1,327 +0,0 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | |||
10 | # This module doesn't present direct interest for OpenSSL, because it | ||
11 | # doesn't provide better performance for longer keys. While 512-bit | ||
12 | # RSA private key operations are 40% faster, 1024-bit ones are hardly | ||
13 | # faster at all, while longer key operations are slower by up to 20%. | ||
14 | # It might be of interest to embedded system developers though, as | ||
15 | # it's smaller than 1KB, yet offers ~3x improvement over compiler | ||
16 | # generated code. | ||
17 | # | ||
18 | # The module targets N32 and N64 MIPS ABIs and currently is a bit | ||
19 | # IRIX-centric, i.e. is likely to require adaptation for other OSes. | ||
20 | |||
21 | # int bn_mul_mont( | ||
22 | $rp="a0"; # BN_ULONG *rp, | ||
23 | $ap="a1"; # const BN_ULONG *ap, | ||
24 | $bp="a2"; # const BN_ULONG *bp, | ||
25 | $np="a3"; # const BN_ULONG *np, | ||
26 | $n0="a4"; # const BN_ULONG *n0, | ||
27 | $num="a5"; # int num); | ||
28 | |||
29 | $lo0="a6"; | ||
30 | $hi0="a7"; | ||
31 | $lo1="v0"; | ||
32 | $hi1="v1"; | ||
33 | $aj="t0"; | ||
34 | $bi="t1"; | ||
35 | $nj="t2"; | ||
36 | $tp="t3"; | ||
37 | $alo="s0"; | ||
38 | $ahi="s1"; | ||
39 | $nlo="s2"; | ||
40 | $nhi="s3"; | ||
41 | $tj="s4"; | ||
42 | $i="s5"; | ||
43 | $j="s6"; | ||
44 | $fp="t8"; | ||
45 | $m1="t9"; | ||
46 | |||
47 | $FRAME=8*(2+8); | ||
48 | |||
49 | $code=<<___; | ||
50 | #include <asm.h> | ||
51 | #include <regdef.h> | ||
52 | |||
53 | .text | ||
54 | |||
55 | .set noat | ||
56 | .set reorder | ||
57 | |||
58 | .align 5 | ||
59 | .globl bn_mul_mont | ||
60 | .ent bn_mul_mont | ||
61 | bn_mul_mont: | ||
62 | .set noreorder | ||
63 | PTR_SUB sp,64 | ||
64 | move $fp,sp | ||
65 | .frame $fp,64,ra | ||
66 | slt AT,$num,4 | ||
67 | li v0,0 | ||
68 | beqzl AT,.Lproceed | ||
69 | nop | ||
70 | jr ra | ||
71 | PTR_ADD sp,$fp,64 | ||
72 | .set reorder | ||
73 | .align 5 | ||
74 | .Lproceed: | ||
75 | ld $n0,0($n0) | ||
76 | ld $bi,0($bp) # bp[0] | ||
77 | ld $aj,0($ap) # ap[0] | ||
78 | ld $nj,0($np) # np[0] | ||
79 | PTR_SUB sp,16 # place for two extra words | ||
80 | sll $num,3 | ||
81 | li AT,-4096 | ||
82 | PTR_SUB sp,$num | ||
83 | and sp,AT | ||
84 | |||
85 | sd s0,0($fp) | ||
86 | sd s1,8($fp) | ||
87 | sd s2,16($fp) | ||
88 | sd s3,24($fp) | ||
89 | sd s4,32($fp) | ||
90 | sd s5,40($fp) | ||
91 | sd s6,48($fp) | ||
92 | sd s7,56($fp) | ||
93 | |||
94 | dmultu $aj,$bi | ||
95 | ld $alo,8($ap) | ||
96 | ld $nlo,8($np) | ||
97 | mflo $lo0 | ||
98 | mfhi $hi0 | ||
99 | dmultu $lo0,$n0 | ||
100 | mflo $m1 | ||
101 | |||
102 | dmultu $alo,$bi | ||
103 | mflo $alo | ||
104 | mfhi $ahi | ||
105 | |||
106 | dmultu $nj,$m1 | ||
107 | mflo $lo1 | ||
108 | mfhi $hi1 | ||
109 | dmultu $nlo,$m1 | ||
110 | daddu $lo1,$lo0 | ||
111 | sltu AT,$lo1,$lo0 | ||
112 | daddu $hi1,AT | ||
113 | mflo $nlo | ||
114 | mfhi $nhi | ||
115 | |||
116 | move $tp,sp | ||
117 | li $j,16 | ||
118 | .align 4 | ||
119 | .L1st: | ||
120 | .set noreorder | ||
121 | PTR_ADD $aj,$ap,$j | ||
122 | ld $aj,($aj) | ||
123 | PTR_ADD $nj,$np,$j | ||
124 | ld $nj,($nj) | ||
125 | |||
126 | dmultu $aj,$bi | ||
127 | daddu $lo0,$alo,$hi0 | ||
128 | daddu $lo1,$nlo,$hi1 | ||
129 | sltu AT,$lo0,$hi0 | ||
130 | sltu s7,$lo1,$hi1 | ||
131 | daddu $hi0,$ahi,AT | ||
132 | daddu $hi1,$nhi,s7 | ||
133 | mflo $alo | ||
134 | mfhi $ahi | ||
135 | |||
136 | daddu $lo1,$lo0 | ||
137 | sltu AT,$lo1,$lo0 | ||
138 | dmultu $nj,$m1 | ||
139 | daddu $hi1,AT | ||
140 | addu $j,8 | ||
141 | sd $lo1,($tp) | ||
142 | sltu s7,$j,$num | ||
143 | mflo $nlo | ||
144 | mfhi $nhi | ||
145 | |||
146 | bnez s7,.L1st | ||
147 | PTR_ADD $tp,8 | ||
148 | .set reorder | ||
149 | |||
150 | daddu $lo0,$alo,$hi0 | ||
151 | sltu AT,$lo0,$hi0 | ||
152 | daddu $hi0,$ahi,AT | ||
153 | |||
154 | daddu $lo1,$nlo,$hi1 | ||
155 | sltu s7,$lo1,$hi1 | ||
156 | daddu $hi1,$nhi,s7 | ||
157 | daddu $lo1,$lo0 | ||
158 | sltu AT,$lo1,$lo0 | ||
159 | daddu $hi1,AT | ||
160 | |||
161 | sd $lo1,($tp) | ||
162 | |||
163 | daddu $hi1,$hi0 | ||
164 | sltu AT,$hi1,$hi0 | ||
165 | sd $hi1,8($tp) | ||
166 | sd AT,16($tp) | ||
167 | |||
168 | li $i,8 | ||
169 | .align 4 | ||
170 | .Louter: | ||
171 | PTR_ADD $bi,$bp,$i | ||
172 | ld $bi,($bi) | ||
173 | ld $aj,($ap) | ||
174 | ld $alo,8($ap) | ||
175 | ld $tj,(sp) | ||
176 | |||
177 | dmultu $aj,$bi | ||
178 | ld $nj,($np) | ||
179 | ld $nlo,8($np) | ||
180 | mflo $lo0 | ||
181 | mfhi $hi0 | ||
182 | daddu $lo0,$tj | ||
183 | dmultu $lo0,$n0 | ||
184 | sltu AT,$lo0,$tj | ||
185 | daddu $hi0,AT | ||
186 | mflo $m1 | ||
187 | |||
188 | dmultu $alo,$bi | ||
189 | mflo $alo | ||
190 | mfhi $ahi | ||
191 | |||
192 | dmultu $nj,$m1 | ||
193 | mflo $lo1 | ||
194 | mfhi $hi1 | ||
195 | |||
196 | dmultu $nlo,$m1 | ||
197 | daddu $lo1,$lo0 | ||
198 | sltu AT,$lo1,$lo0 | ||
199 | daddu $hi1,AT | ||
200 | mflo $nlo | ||
201 | mfhi $nhi | ||
202 | |||
203 | move $tp,sp | ||
204 | li $j,16 | ||
205 | ld $tj,8($tp) | ||
206 | .align 4 | ||
207 | .Linner: | ||
208 | .set noreorder | ||
209 | PTR_ADD $aj,$ap,$j | ||
210 | ld $aj,($aj) | ||
211 | PTR_ADD $nj,$np,$j | ||
212 | ld $nj,($nj) | ||
213 | |||
214 | dmultu $aj,$bi | ||
215 | daddu $lo0,$alo,$hi0 | ||
216 | daddu $lo1,$nlo,$hi1 | ||
217 | sltu AT,$lo0,$hi0 | ||
218 | sltu s7,$lo1,$hi1 | ||
219 | daddu $hi0,$ahi,AT | ||
220 | daddu $hi1,$nhi,s7 | ||
221 | mflo $alo | ||
222 | mfhi $ahi | ||
223 | |||
224 | daddu $lo0,$tj | ||
225 | addu $j,8 | ||
226 | dmultu $nj,$m1 | ||
227 | sltu AT,$lo0,$tj | ||
228 | daddu $lo1,$lo0 | ||
229 | daddu $hi0,AT | ||
230 | sltu s7,$lo1,$lo0 | ||
231 | ld $tj,16($tp) | ||
232 | daddu $hi1,s7 | ||
233 | sltu AT,$j,$num | ||
234 | mflo $nlo | ||
235 | mfhi $nhi | ||
236 | sd $lo1,($tp) | ||
237 | bnez AT,.Linner | ||
238 | PTR_ADD $tp,8 | ||
239 | .set reorder | ||
240 | |||
241 | daddu $lo0,$alo,$hi0 | ||
242 | sltu AT,$lo0,$hi0 | ||
243 | daddu $hi0,$ahi,AT | ||
244 | daddu $lo0,$tj | ||
245 | sltu s7,$lo0,$tj | ||
246 | daddu $hi0,s7 | ||
247 | |||
248 | ld $tj,16($tp) | ||
249 | daddu $lo1,$nlo,$hi1 | ||
250 | sltu AT,$lo1,$hi1 | ||
251 | daddu $hi1,$nhi,AT | ||
252 | daddu $lo1,$lo0 | ||
253 | sltu s7,$lo1,$lo0 | ||
254 | daddu $hi1,s7 | ||
255 | sd $lo1,($tp) | ||
256 | |||
257 | daddu $lo1,$hi1,$hi0 | ||
258 | sltu $hi1,$lo1,$hi0 | ||
259 | daddu $lo1,$tj | ||
260 | sltu AT,$lo1,$tj | ||
261 | daddu $hi1,AT | ||
262 | sd $lo1,8($tp) | ||
263 | sd $hi1,16($tp) | ||
264 | |||
265 | addu $i,8 | ||
266 | sltu s7,$i,$num | ||
267 | bnez s7,.Louter | ||
268 | |||
269 | .set noreorder | ||
270 | PTR_ADD $tj,sp,$num # &tp[num] | ||
271 | move $tp,sp | ||
272 | move $ap,sp | ||
273 | li $hi0,0 # clear borrow bit | ||
274 | |||
275 | .align 4 | ||
276 | .Lsub: ld $lo0,($tp) | ||
277 | ld $lo1,($np) | ||
278 | PTR_ADD $tp,8 | ||
279 | PTR_ADD $np,8 | ||
280 | dsubu $lo1,$lo0,$lo1 # tp[i]-np[i] | ||
281 | sgtu AT,$lo1,$lo0 | ||
282 | dsubu $lo0,$lo1,$hi0 | ||
283 | sgtu $hi0,$lo0,$lo1 | ||
284 | sd $lo0,($rp) | ||
285 | or $hi0,AT | ||
286 | sltu AT,$tp,$tj | ||
287 | bnez AT,.Lsub | ||
288 | PTR_ADD $rp,8 | ||
289 | |||
290 | dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit | ||
291 | move $tp,sp | ||
292 | PTR_SUB $rp,$num # restore rp | ||
293 | not $hi1,$hi0 | ||
294 | |||
295 | and $ap,$hi0,sp | ||
296 | and $bp,$hi1,$rp | ||
297 | or $ap,$ap,$bp # ap=borrow?tp:rp | ||
298 | |||
299 | .align 4 | ||
300 | .Lcopy: ld $aj,($ap) | ||
301 | PTR_ADD $ap,8 | ||
302 | PTR_ADD $tp,8 | ||
303 | sd zero,-8($tp) | ||
304 | sltu AT,$tp,$tj | ||
305 | sd $aj,($rp) | ||
306 | bnez AT,.Lcopy | ||
307 | PTR_ADD $rp,8 | ||
308 | |||
309 | ld s0,0($fp) | ||
310 | ld s1,8($fp) | ||
311 | ld s2,16($fp) | ||
312 | ld s3,24($fp) | ||
313 | ld s4,32($fp) | ||
314 | ld s5,40($fp) | ||
315 | ld s6,48($fp) | ||
316 | ld s7,56($fp) | ||
317 | li v0,1 | ||
318 | jr ra | ||
319 | PTR_ADD sp,$fp,64 | ||
320 | .set reorder | ||
321 | END(bn_mul_mont) | ||
322 | .rdata | ||
323 | .asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>" | ||
324 | ___ | ||
325 | |||
326 | print $code; | ||
327 | close STDOUT; | ||
diff --git a/src/lib/libssl/src/crypto/bn/asm/mips3.s b/src/lib/libssl/src/crypto/bn/asm/mips3.s deleted file mode 100644 index dca4105c7d..0000000000 --- a/src/lib/libssl/src/crypto/bn/asm/mips3.s +++ /dev/null | |||
@@ -1,2201 +0,0 @@ | |||
1 | .rdata | ||
2 | .asciiz "mips3.s, Version 1.1" | ||
3 | .asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" | ||
4 | |||
5 | /* | ||
6 | * ==================================================================== | ||
7 | * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
8 | * project. | ||
9 | * | ||
10 | * Rights for redistribution and usage in source and binary forms are | ||
11 | * granted according to the OpenSSL license. Warranty of any kind is | ||
12 | * disclaimed. | ||
13 | * ==================================================================== | ||
14 | */ | ||
15 | |||
16 | /* | ||
17 | * This is my modest contributon to the OpenSSL project (see | ||
18 | * http://www.openssl.org/ for more information about it) and is | ||
19 | * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c | ||
20 | * module. For updates see http://fy.chalmers.se/~appro/hpe/. | ||
21 | * | ||
22 | * The module is designed to work with either of the "new" MIPS ABI(5), | ||
23 | * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under | ||
24 | * IRIX 5.x not only because it doesn't support new ABIs but also | ||
25 | * because 5.x kernels put R4x00 CPU into 32-bit mode and all those | ||
26 | * 64-bit instructions (daddu, dmultu, etc.) found below gonna only | ||
27 | * cause illegal instruction exception:-( | ||
28 | * | ||
29 | * In addition the code depends on preprocessor flags set up by MIPSpro | ||
30 | * compiler driver (either as or cc) and therefore (probably?) can't be | ||
31 | * compiled by the GNU assembler. GNU C driver manages fine though... | ||
32 | * I mean as long as -mmips-as is specified or is the default option, | ||
33 | * because then it simply invokes /usr/bin/as which in turn takes | ||
34 | * perfect care of the preprocessor definitions. Another neat feature | ||
35 | * offered by the MIPSpro assembler is an optimization pass. This gave | ||
36 | * me the opportunity to have the code looking more regular as all those | ||
37 | * architecture dependent instruction rescheduling details were left to | ||
38 | * the assembler. Cool, huh? | ||
39 | * | ||
40 | * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' | ||
41 | * goes way over 3 times faster! | ||
42 | * | ||
43 | * <appro@fy.chalmers.se> | ||
44 | */ | ||
45 | #include <asm.h> | ||
46 | #include <regdef.h> | ||
47 | |||
48 | #if _MIPS_ISA>=4 | ||
49 | #define MOVNZ(cond,dst,src) \ | ||
50 | movn dst,src,cond | ||
51 | #else | ||
52 | #define MOVNZ(cond,dst,src) \ | ||
53 | .set noreorder; \ | ||
54 | bnezl cond,.+8; \ | ||
55 | move dst,src; \ | ||
56 | .set reorder | ||
57 | #endif | ||
58 | |||
59 | .text | ||
60 | |||
61 | .set noat | ||
62 | .set reorder | ||
63 | |||
64 | #define MINUS4 v1 | ||
65 | |||
66 | .align 5 | ||
67 | LEAF(bn_mul_add_words) | ||
68 | .set noreorder | ||
69 | bgtzl a2,.L_bn_mul_add_words_proceed | ||
70 | ld t0,0(a1) | ||
71 | jr ra | ||
72 | move v0,zero | ||
73 | .set reorder | ||
74 | |||
75 | .L_bn_mul_add_words_proceed: | ||
76 | li MINUS4,-4 | ||
77 | and ta0,a2,MINUS4 | ||
78 | move v0,zero | ||
79 | beqz ta0,.L_bn_mul_add_words_tail | ||
80 | |||
81 | .L_bn_mul_add_words_loop: | ||
82 | dmultu t0,a3 | ||
83 | ld t1,0(a0) | ||
84 | ld t2,8(a1) | ||
85 | ld t3,8(a0) | ||
86 | ld ta0,16(a1) | ||
87 | ld ta1,16(a0) | ||
88 | daddu t1,v0 | ||
89 | sltu v0,t1,v0 /* All manuals say it "compares 32-bit | ||
90 | * values", but it seems to work fine | ||
91 | * even on 64-bit registers. */ | ||
92 | mflo AT | ||
93 | mfhi t0 | ||
94 | daddu t1,AT | ||
95 | daddu v0,t0 | ||
96 | sltu AT,t1,AT | ||
97 | sd t1,0(a0) | ||
98 | daddu v0,AT | ||
99 | |||
100 | dmultu t2,a3 | ||
101 | ld ta2,24(a1) | ||
102 | ld ta3,24(a0) | ||
103 | daddu t3,v0 | ||
104 | sltu v0,t3,v0 | ||
105 | mflo AT | ||
106 | mfhi t2 | ||
107 | daddu t3,AT | ||
108 | daddu v0,t2 | ||
109 | sltu AT,t3,AT | ||
110 | sd t3,8(a0) | ||
111 | daddu v0,AT | ||
112 | |||
113 | dmultu ta0,a3 | ||
114 | subu a2,4 | ||
115 | PTR_ADD a0,32 | ||
116 | PTR_ADD a1,32 | ||
117 | daddu ta1,v0 | ||
118 | sltu v0,ta1,v0 | ||
119 | mflo AT | ||
120 | mfhi ta0 | ||
121 | daddu ta1,AT | ||
122 | daddu v0,ta0 | ||
123 | sltu AT,ta1,AT | ||
124 | sd ta1,-16(a0) | ||
125 | daddu v0,AT | ||
126 | |||
127 | |||
128 | dmultu ta2,a3 | ||
129 | and ta0,a2,MINUS4 | ||
130 | daddu ta3,v0 | ||
131 | sltu v0,ta3,v0 | ||
132 | mflo AT | ||
133 | mfhi ta2 | ||
134 | daddu ta3,AT | ||
135 | daddu v0,ta2 | ||
136 | sltu AT,ta3,AT | ||
137 | sd ta3,-8(a0) | ||
138 | daddu v0,AT | ||
139 | .set noreorder | ||
140 | bgtzl ta0,.L_bn_mul_add_words_loop | ||
141 | ld t0,0(a1) | ||
142 | |||
143 | bnezl a2,.L_bn_mul_add_words_tail | ||
144 | ld t0,0(a1) | ||
145 | .set reorder | ||
146 | |||
147 | .L_bn_mul_add_words_return: | ||
148 | jr ra | ||
149 | |||
150 | .L_bn_mul_add_words_tail: | ||
151 | dmultu t0,a3 | ||
152 | ld t1,0(a0) | ||
153 | subu a2,1 | ||
154 | daddu t1,v0 | ||
155 | sltu v0,t1,v0 | ||
156 | mflo AT | ||
157 | mfhi t0 | ||
158 | daddu t1,AT | ||
159 | daddu v0,t0 | ||
160 | sltu AT,t1,AT | ||
161 | sd t1,0(a0) | ||
162 | daddu v0,AT | ||
163 | beqz a2,.L_bn_mul_add_words_return | ||
164 | |||
165 | ld t0,8(a1) | ||
166 | dmultu t0,a3 | ||
167 | ld t1,8(a0) | ||
168 | subu a2,1 | ||
169 | daddu t1,v0 | ||
170 | sltu v0,t1,v0 | ||
171 | mflo AT | ||
172 | mfhi t0 | ||
173 | daddu t1,AT | ||
174 | daddu v0,t0 | ||
175 | sltu AT,t1,AT | ||
176 | sd t1,8(a0) | ||
177 | daddu v0,AT | ||
178 | beqz a2,.L_bn_mul_add_words_return | ||
179 | |||
180 | ld t0,16(a1) | ||
181 | dmultu t0,a3 | ||
182 | ld t1,16(a0) | ||
183 | daddu t1,v0 | ||
184 | sltu v0,t1,v0 | ||
185 | mflo AT | ||
186 | mfhi t0 | ||
187 | daddu t1,AT | ||
188 | daddu v0,t0 | ||
189 | sltu AT,t1,AT | ||
190 | sd t1,16(a0) | ||
191 | daddu v0,AT | ||
192 | jr ra | ||
193 | END(bn_mul_add_words) | ||
194 | |||
195 | .align 5 | ||
196 | LEAF(bn_mul_words) | ||
197 | .set noreorder | ||
198 | bgtzl a2,.L_bn_mul_words_proceed | ||
199 | ld t0,0(a1) | ||
200 | jr ra | ||
201 | move v0,zero | ||
202 | .set reorder | ||
203 | |||
204 | .L_bn_mul_words_proceed: | ||
205 | li MINUS4,-4 | ||
206 | and ta0,a2,MINUS4 | ||
207 | move v0,zero | ||
208 | beqz ta0,.L_bn_mul_words_tail | ||
209 | |||
210 | .L_bn_mul_words_loop: | ||
211 | dmultu t0,a3 | ||
212 | ld t2,8(a1) | ||
213 | ld ta0,16(a1) | ||
214 | ld ta2,24(a1) | ||
215 | mflo AT | ||
216 | mfhi t0 | ||
217 | daddu v0,AT | ||
218 | sltu t1,v0,AT | ||
219 | sd v0,0(a0) | ||
220 | daddu v0,t1,t0 | ||
221 | |||
222 | dmultu t2,a3 | ||
223 | subu a2,4 | ||
224 | PTR_ADD a0,32 | ||
225 | PTR_ADD a1,32 | ||
226 | mflo AT | ||
227 | mfhi t2 | ||
228 | daddu v0,AT | ||
229 | sltu t3,v0,AT | ||
230 | sd v0,-24(a0) | ||
231 | daddu v0,t3,t2 | ||
232 | |||
233 | dmultu ta0,a3 | ||
234 | mflo AT | ||
235 | mfhi ta0 | ||
236 | daddu v0,AT | ||
237 | sltu ta1,v0,AT | ||
238 | sd v0,-16(a0) | ||
239 | daddu v0,ta1,ta0 | ||
240 | |||
241 | |||
242 | dmultu ta2,a3 | ||
243 | and ta0,a2,MINUS4 | ||
244 | mflo AT | ||
245 | mfhi ta2 | ||
246 | daddu v0,AT | ||
247 | sltu ta3,v0,AT | ||
248 | sd v0,-8(a0) | ||
249 | daddu v0,ta3,ta2 | ||
250 | .set noreorder | ||
251 | bgtzl ta0,.L_bn_mul_words_loop | ||
252 | ld t0,0(a1) | ||
253 | |||
254 | bnezl a2,.L_bn_mul_words_tail | ||
255 | ld t0,0(a1) | ||
256 | .set reorder | ||
257 | |||
258 | .L_bn_mul_words_return: | ||
259 | jr ra | ||
260 | |||
261 | .L_bn_mul_words_tail: | ||
262 | dmultu t0,a3 | ||
263 | subu a2,1 | ||
264 | mflo AT | ||
265 | mfhi t0 | ||
266 | daddu v0,AT | ||
267 | sltu t1,v0,AT | ||
268 | sd v0,0(a0) | ||
269 | daddu v0,t1,t0 | ||
270 | beqz a2,.L_bn_mul_words_return | ||
271 | |||
272 | ld t0,8(a1) | ||
273 | dmultu t0,a3 | ||
274 | subu a2,1 | ||
275 | mflo AT | ||
276 | mfhi t0 | ||
277 | daddu v0,AT | ||
278 | sltu t1,v0,AT | ||
279 | sd v0,8(a0) | ||
280 | daddu v0,t1,t0 | ||
281 | beqz a2,.L_bn_mul_words_return | ||
282 | |||
283 | ld t0,16(a1) | ||
284 | dmultu t0,a3 | ||
285 | mflo AT | ||
286 | mfhi t0 | ||
287 | daddu v0,AT | ||
288 | sltu t1,v0,AT | ||
289 | sd v0,16(a0) | ||
290 | daddu v0,t1,t0 | ||
291 | jr ra | ||
292 | END(bn_mul_words) | ||
293 | |||
294 | .align 5 | ||
295 | LEAF(bn_sqr_words) | ||
296 | .set noreorder | ||
297 | bgtzl a2,.L_bn_sqr_words_proceed | ||
298 | ld t0,0(a1) | ||
299 | jr ra | ||
300 | move v0,zero | ||
301 | .set reorder | ||
302 | |||
303 | .L_bn_sqr_words_proceed: | ||
304 | li MINUS4,-4 | ||
305 | and ta0,a2,MINUS4 | ||
306 | move v0,zero | ||
307 | beqz ta0,.L_bn_sqr_words_tail | ||
308 | |||
309 | .L_bn_sqr_words_loop: | ||
310 | dmultu t0,t0 | ||
311 | ld t2,8(a1) | ||
312 | ld ta0,16(a1) | ||
313 | ld ta2,24(a1) | ||
314 | mflo t1 | ||
315 | mfhi t0 | ||
316 | sd t1,0(a0) | ||
317 | sd t0,8(a0) | ||
318 | |||
319 | dmultu t2,t2 | ||
320 | subu a2,4 | ||
321 | PTR_ADD a0,64 | ||
322 | PTR_ADD a1,32 | ||
323 | mflo t3 | ||
324 | mfhi t2 | ||
325 | sd t3,-48(a0) | ||
326 | sd t2,-40(a0) | ||
327 | |||
328 | dmultu ta0,ta0 | ||
329 | mflo ta1 | ||
330 | mfhi ta0 | ||
331 | sd ta1,-32(a0) | ||
332 | sd ta0,-24(a0) | ||
333 | |||
334 | |||
335 | dmultu ta2,ta2 | ||
336 | and ta0,a2,MINUS4 | ||
337 | mflo ta3 | ||
338 | mfhi ta2 | ||
339 | sd ta3,-16(a0) | ||
340 | sd ta2,-8(a0) | ||
341 | |||
342 | .set noreorder | ||
343 | bgtzl ta0,.L_bn_sqr_words_loop | ||
344 | ld t0,0(a1) | ||
345 | |||
346 | bnezl a2,.L_bn_sqr_words_tail | ||
347 | ld t0,0(a1) | ||
348 | .set reorder | ||
349 | |||
350 | .L_bn_sqr_words_return: | ||
351 | move v0,zero | ||
352 | jr ra | ||
353 | |||
354 | .L_bn_sqr_words_tail: | ||
355 | dmultu t0,t0 | ||
356 | subu a2,1 | ||
357 | mflo t1 | ||
358 | mfhi t0 | ||
359 | sd t1,0(a0) | ||
360 | sd t0,8(a0) | ||
361 | beqz a2,.L_bn_sqr_words_return | ||
362 | |||
363 | ld t0,8(a1) | ||
364 | dmultu t0,t0 | ||
365 | subu a2,1 | ||
366 | mflo t1 | ||
367 | mfhi t0 | ||
368 | sd t1,16(a0) | ||
369 | sd t0,24(a0) | ||
370 | beqz a2,.L_bn_sqr_words_return | ||
371 | |||
372 | ld t0,16(a1) | ||
373 | dmultu t0,t0 | ||
374 | mflo t1 | ||
375 | mfhi t0 | ||
376 | sd t1,32(a0) | ||
377 | sd t0,40(a0) | ||
378 | jr ra | ||
379 | END(bn_sqr_words) | ||
380 | |||
381 | .align 5 | ||
382 | LEAF(bn_add_words) | ||
383 | .set noreorder | ||
384 | bgtzl a3,.L_bn_add_words_proceed | ||
385 | ld t0,0(a1) | ||
386 | jr ra | ||
387 | move v0,zero | ||
388 | .set reorder | ||
389 | |||
390 | .L_bn_add_words_proceed: | ||
391 | li MINUS4,-4 | ||
392 | and AT,a3,MINUS4 | ||
393 | move v0,zero | ||
394 | beqz AT,.L_bn_add_words_tail | ||
395 | |||
396 | .L_bn_add_words_loop: | ||
397 | ld ta0,0(a2) | ||
398 | subu a3,4 | ||
399 | ld t1,8(a1) | ||
400 | and AT,a3,MINUS4 | ||
401 | ld t2,16(a1) | ||
402 | PTR_ADD a2,32 | ||
403 | ld t3,24(a1) | ||
404 | PTR_ADD a0,32 | ||
405 | ld ta1,-24(a2) | ||
406 | PTR_ADD a1,32 | ||
407 | ld ta2,-16(a2) | ||
408 | ld ta3,-8(a2) | ||
409 | daddu ta0,t0 | ||
410 | sltu t8,ta0,t0 | ||
411 | daddu t0,ta0,v0 | ||
412 | sltu v0,t0,ta0 | ||
413 | sd t0,-32(a0) | ||
414 | daddu v0,t8 | ||
415 | |||
416 | daddu ta1,t1 | ||
417 | sltu t9,ta1,t1 | ||
418 | daddu t1,ta1,v0 | ||
419 | sltu v0,t1,ta1 | ||
420 | sd t1,-24(a0) | ||
421 | daddu v0,t9 | ||
422 | |||
423 | daddu ta2,t2 | ||
424 | sltu t8,ta2,t2 | ||
425 | daddu t2,ta2,v0 | ||
426 | sltu v0,t2,ta2 | ||
427 | sd t2,-16(a0) | ||
428 | daddu v0,t8 | ||
429 | |||
430 | daddu ta3,t3 | ||
431 | sltu t9,ta3,t3 | ||
432 | daddu t3,ta3,v0 | ||
433 | sltu v0,t3,ta3 | ||
434 | sd t3,-8(a0) | ||
435 | daddu v0,t9 | ||
436 | |||
437 | .set noreorder | ||
438 | bgtzl AT,.L_bn_add_words_loop | ||
439 | ld t0,0(a1) | ||
440 | |||
441 | bnezl a3,.L_bn_add_words_tail | ||
442 | ld t0,0(a1) | ||
443 | .set reorder | ||
444 | |||
445 | .L_bn_add_words_return: | ||
446 | jr ra | ||
447 | |||
448 | .L_bn_add_words_tail: | ||
449 | ld ta0,0(a2) | ||
450 | daddu ta0,t0 | ||
451 | subu a3,1 | ||
452 | sltu t8,ta0,t0 | ||
453 | daddu t0,ta0,v0 | ||
454 | sltu v0,t0,ta0 | ||
455 | sd t0,0(a0) | ||
456 | daddu v0,t8 | ||
457 | beqz a3,.L_bn_add_words_return | ||
458 | |||
459 | ld t1,8(a1) | ||
460 | ld ta1,8(a2) | ||
461 | daddu ta1,t1 | ||
462 | subu a3,1 | ||
463 | sltu t9,ta1,t1 | ||
464 | daddu t1,ta1,v0 | ||
465 | sltu v0,t1,ta1 | ||
466 | sd t1,8(a0) | ||
467 | daddu v0,t9 | ||
468 | beqz a3,.L_bn_add_words_return | ||
469 | |||
470 | ld t2,16(a1) | ||
471 | ld ta2,16(a2) | ||
472 | daddu ta2,t2 | ||
473 | sltu t8,ta2,t2 | ||
474 | daddu t2,ta2,v0 | ||
475 | sltu v0,t2,ta2 | ||
476 | sd t2,16(a0) | ||
477 | daddu v0,t8 | ||
478 | jr ra | ||
479 | END(bn_add_words) | ||
480 | |||
481 | .align 5 | ||
482 | LEAF(bn_sub_words) | ||
483 | .set noreorder | ||
484 | bgtzl a3,.L_bn_sub_words_proceed | ||
485 | ld t0,0(a1) | ||
486 | jr ra | ||
487 | move v0,zero | ||
488 | .set reorder | ||
489 | |||
490 | .L_bn_sub_words_proceed: | ||
491 | li MINUS4,-4 | ||
492 | and AT,a3,MINUS4 | ||
493 | move v0,zero | ||
494 | beqz AT,.L_bn_sub_words_tail | ||
495 | |||
496 | .L_bn_sub_words_loop: | ||
497 | ld ta0,0(a2) | ||
498 | subu a3,4 | ||
499 | ld t1,8(a1) | ||
500 | and AT,a3,MINUS4 | ||
501 | ld t2,16(a1) | ||
502 | PTR_ADD a2,32 | ||
503 | ld t3,24(a1) | ||
504 | PTR_ADD a0,32 | ||
505 | ld ta1,-24(a2) | ||
506 | PTR_ADD a1,32 | ||
507 | ld ta2,-16(a2) | ||
508 | ld ta3,-8(a2) | ||
509 | sltu t8,t0,ta0 | ||
510 | dsubu t0,ta0 | ||
511 | dsubu ta0,t0,v0 | ||
512 | sd ta0,-32(a0) | ||
513 | MOVNZ (t0,v0,t8) | ||
514 | |||
515 | sltu t9,t1,ta1 | ||
516 | dsubu t1,ta1 | ||
517 | dsubu ta1,t1,v0 | ||
518 | sd ta1,-24(a0) | ||
519 | MOVNZ (t1,v0,t9) | ||
520 | |||
521 | |||
522 | sltu t8,t2,ta2 | ||
523 | dsubu t2,ta2 | ||
524 | dsubu ta2,t2,v0 | ||
525 | sd ta2,-16(a0) | ||
526 | MOVNZ (t2,v0,t8) | ||
527 | |||
528 | sltu t9,t3,ta3 | ||
529 | dsubu t3,ta3 | ||
530 | dsubu ta3,t3,v0 | ||
531 | sd ta3,-8(a0) | ||
532 | MOVNZ (t3,v0,t9) | ||
533 | |||
534 | .set noreorder | ||
535 | bgtzl AT,.L_bn_sub_words_loop | ||
536 | ld t0,0(a1) | ||
537 | |||
538 | bnezl a3,.L_bn_sub_words_tail | ||
539 | ld t0,0(a1) | ||
540 | .set reorder | ||
541 | |||
542 | .L_bn_sub_words_return: | ||
543 | jr ra | ||
544 | |||
545 | .L_bn_sub_words_tail: | ||
546 | ld ta0,0(a2) | ||
547 | subu a3,1 | ||
548 | sltu t8,t0,ta0 | ||
549 | dsubu t0,ta0 | ||
550 | dsubu ta0,t0,v0 | ||
551 | MOVNZ (t0,v0,t8) | ||
552 | sd ta0,0(a0) | ||
553 | beqz a3,.L_bn_sub_words_return | ||
554 | |||
555 | ld t1,8(a1) | ||
556 | subu a3,1 | ||
557 | ld ta1,8(a2) | ||
558 | sltu t9,t1,ta1 | ||
559 | dsubu t1,ta1 | ||
560 | dsubu ta1,t1,v0 | ||
561 | MOVNZ (t1,v0,t9) | ||
562 | sd ta1,8(a0) | ||
563 | beqz a3,.L_bn_sub_words_return | ||
564 | |||
565 | ld t2,16(a1) | ||
566 | ld ta2,16(a2) | ||
567 | sltu t8,t2,ta2 | ||
568 | dsubu t2,ta2 | ||
569 | dsubu ta2,t2,v0 | ||
570 | MOVNZ (t2,v0,t8) | ||
571 | sd ta2,16(a0) | ||
572 | jr ra | ||
573 | END(bn_sub_words) | ||
574 | |||
575 | #undef MINUS4 | ||
576 | |||
577 | .align 5 | ||
578 | LEAF(bn_div_3_words) | ||
579 | .set reorder | ||
580 | move a3,a0 /* we know that bn_div_words doesn't | ||
581 | * touch a3, ta2, ta3 and preserves a2 | ||
582 | * so that we can save two arguments | ||
583 | * and return address in registers | ||
584 | * instead of stack:-) | ||
585 | */ | ||
586 | ld a0,(a3) | ||
587 | move ta2,a1 | ||
588 | ld a1,-8(a3) | ||
589 | bne a0,a2,.L_bn_div_3_words_proceed | ||
590 | li v0,-1 | ||
591 | jr ra | ||
592 | .L_bn_div_3_words_proceed: | ||
593 | move ta3,ra | ||
594 | bal bn_div_words | ||
595 | move ra,ta3 | ||
596 | dmultu ta2,v0 | ||
597 | ld t2,-16(a3) | ||
598 | move ta0,zero | ||
599 | mfhi t1 | ||
600 | mflo t0 | ||
601 | sltu t8,t1,v1 | ||
602 | .L_bn_div_3_words_inner_loop: | ||
603 | bnez t8,.L_bn_div_3_words_inner_loop_done | ||
604 | sgeu AT,t2,t0 | ||
605 | seq t9,t1,v1 | ||
606 | and AT,t9 | ||
607 | sltu t3,t0,ta2 | ||
608 | daddu v1,a2 | ||
609 | dsubu t1,t3 | ||
610 | dsubu t0,ta2 | ||
611 | sltu t8,t1,v1 | ||
612 | sltu ta0,v1,a2 | ||
613 | or t8,ta0 | ||
614 | .set noreorder | ||
615 | beqzl AT,.L_bn_div_3_words_inner_loop | ||
616 | dsubu v0,1 | ||
617 | .set reorder | ||
618 | .L_bn_div_3_words_inner_loop_done: | ||
619 | jr ra | ||
620 | END(bn_div_3_words) | ||
621 | |||
622 | .align 5 | ||
623 | LEAF(bn_div_words) | ||
624 | .set noreorder | ||
625 | bnezl a2,.L_bn_div_words_proceed | ||
626 | move v1,zero | ||
627 | jr ra | ||
628 | li v0,-1 /* I'd rather signal div-by-zero | ||
629 | * which can be done with 'break 7' */ | ||
630 | |||
631 | .L_bn_div_words_proceed: | ||
632 | bltz a2,.L_bn_div_words_body | ||
633 | move t9,v1 | ||
634 | dsll a2,1 | ||
635 | bgtz a2,.-4 | ||
636 | addu t9,1 | ||
637 | |||
638 | .set reorder | ||
639 | negu t1,t9 | ||
640 | li t2,-1 | ||
641 | dsll t2,t1 | ||
642 | and t2,a0 | ||
643 | dsrl AT,a1,t1 | ||
644 | .set noreorder | ||
645 | bnezl t2,.+8 | ||
646 | break 6 /* signal overflow */ | ||
647 | .set reorder | ||
648 | dsll a0,t9 | ||
649 | dsll a1,t9 | ||
650 | or a0,AT | ||
651 | |||
652 | #define QT ta0 | ||
653 | #define HH ta1 | ||
654 | #define DH v1 | ||
655 | .L_bn_div_words_body: | ||
656 | dsrl DH,a2,32 | ||
657 | sgeu AT,a0,a2 | ||
658 | .set noreorder | ||
659 | bnezl AT,.+8 | ||
660 | dsubu a0,a2 | ||
661 | .set reorder | ||
662 | |||
663 | li QT,-1 | ||
664 | dsrl HH,a0,32 | ||
665 | dsrl QT,32 /* q=0xffffffff */ | ||
666 | beq DH,HH,.L_bn_div_words_skip_div1 | ||
667 | ddivu zero,a0,DH | ||
668 | mflo QT | ||
669 | .L_bn_div_words_skip_div1: | ||
670 | dmultu a2,QT | ||
671 | dsll t3,a0,32 | ||
672 | dsrl AT,a1,32 | ||
673 | or t3,AT | ||
674 | mflo t0 | ||
675 | mfhi t1 | ||
676 | .L_bn_div_words_inner_loop1: | ||
677 | sltu t2,t3,t0 | ||
678 | seq t8,HH,t1 | ||
679 | sltu AT,HH,t1 | ||
680 | and t2,t8 | ||
681 | sltu v0,t0,a2 | ||
682 | or AT,t2 | ||
683 | .set noreorder | ||
684 | beqz AT,.L_bn_div_words_inner_loop1_done | ||
685 | dsubu t1,v0 | ||
686 | dsubu t0,a2 | ||
687 | b .L_bn_div_words_inner_loop1 | ||
688 | dsubu QT,1 | ||
689 | .set reorder | ||
690 | .L_bn_div_words_inner_loop1_done: | ||
691 | |||
692 | dsll a1,32 | ||
693 | dsubu a0,t3,t0 | ||
694 | dsll v0,QT,32 | ||
695 | |||
696 | li QT,-1 | ||
697 | dsrl HH,a0,32 | ||
698 | dsrl QT,32 /* q=0xffffffff */ | ||
699 | beq DH,HH,.L_bn_div_words_skip_div2 | ||
700 | ddivu zero,a0,DH | ||
701 | mflo QT | ||
702 | .L_bn_div_words_skip_div2: | ||
703 | #undef DH | ||
704 | dmultu a2,QT | ||
705 | dsll t3,a0,32 | ||
706 | dsrl AT,a1,32 | ||
707 | or t3,AT | ||
708 | mflo t0 | ||
709 | mfhi t1 | ||
710 | .L_bn_div_words_inner_loop2: | ||
711 | sltu t2,t3,t0 | ||
712 | seq t8,HH,t1 | ||
713 | sltu AT,HH,t1 | ||
714 | and t2,t8 | ||
715 | sltu v1,t0,a2 | ||
716 | or AT,t2 | ||
717 | .set noreorder | ||
718 | beqz AT,.L_bn_div_words_inner_loop2_done | ||
719 | dsubu t1,v1 | ||
720 | dsubu t0,a2 | ||
721 | b .L_bn_div_words_inner_loop2 | ||
722 | dsubu QT,1 | ||
723 | .set reorder | ||
724 | .L_bn_div_words_inner_loop2_done: | ||
725 | #undef HH | ||
726 | |||
727 | dsubu a0,t3,t0 | ||
728 | or v0,QT | ||
729 | dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ | ||
730 | dsrl a2,t9 /* restore a2 */ | ||
731 | jr ra | ||
732 | #undef QT | ||
733 | END(bn_div_words) | ||
734 | |||
735 | #define a_0 t0 | ||
736 | #define a_1 t1 | ||
737 | #define a_2 t2 | ||
738 | #define a_3 t3 | ||
739 | #define b_0 ta0 | ||
740 | #define b_1 ta1 | ||
741 | #define b_2 ta2 | ||
742 | #define b_3 ta3 | ||
743 | |||
744 | #define a_4 s0 | ||
745 | #define a_5 s2 | ||
746 | #define a_6 s4 | ||
747 | #define a_7 a1 /* once we load a[7] we don't need a anymore */ | ||
748 | #define b_4 s1 | ||
749 | #define b_5 s3 | ||
750 | #define b_6 s5 | ||
751 | #define b_7 a2 /* once we load b[7] we don't need b anymore */ | ||
752 | |||
753 | #define t_1 t8 | ||
754 | #define t_2 t9 | ||
755 | |||
756 | #define c_1 v0 | ||
757 | #define c_2 v1 | ||
758 | #define c_3 a3 | ||
759 | |||
760 | #define FRAME_SIZE 48 | ||
761 | |||
762 | .align 5 | ||
763 | LEAF(bn_mul_comba8) | ||
764 | .set noreorder | ||
765 | PTR_SUB sp,FRAME_SIZE | ||
766 | .frame sp,64,ra | ||
767 | .set reorder | ||
768 | ld a_0,0(a1) /* If compiled with -mips3 option on | ||
769 | * R5000 box assembler barks on this | ||
770 | * line with "shouldn't have mult/div | ||
771 | * as last instruction in bb (R10K | ||
772 | * bug)" warning. If anybody out there | ||
773 | * has a clue about how to circumvent | ||
774 | * this do send me a note. | ||
775 | * <appro@fy.chalmers.se> | ||
776 | */ | ||
777 | ld b_0,0(a2) | ||
778 | ld a_1,8(a1) | ||
779 | ld a_2,16(a1) | ||
780 | ld a_3,24(a1) | ||
781 | ld b_1,8(a2) | ||
782 | ld b_2,16(a2) | ||
783 | ld b_3,24(a2) | ||
784 | dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
785 | sd s0,0(sp) | ||
786 | sd s1,8(sp) | ||
787 | sd s2,16(sp) | ||
788 | sd s3,24(sp) | ||
789 | sd s4,32(sp) | ||
790 | sd s5,40(sp) | ||
791 | mflo c_1 | ||
792 | mfhi c_2 | ||
793 | |||
794 | dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ | ||
795 | ld a_4,32(a1) | ||
796 | ld a_5,40(a1) | ||
797 | ld a_6,48(a1) | ||
798 | ld a_7,56(a1) | ||
799 | ld b_4,32(a2) | ||
800 | ld b_5,40(a2) | ||
801 | mflo t_1 | ||
802 | mfhi t_2 | ||
803 | daddu c_2,t_1 | ||
804 | sltu AT,c_2,t_1 | ||
805 | daddu c_3,t_2,AT | ||
806 | dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ | ||
807 | ld b_6,48(a2) | ||
808 | ld b_7,56(a2) | ||
809 | sd c_1,0(a0) /* r[0]=c1; */ | ||
810 | mflo t_1 | ||
811 | mfhi t_2 | ||
812 | daddu c_2,t_1 | ||
813 | sltu AT,c_2,t_1 | ||
814 | daddu t_2,AT | ||
815 | daddu c_3,t_2 | ||
816 | sltu c_1,c_3,t_2 | ||
817 | sd c_2,8(a0) /* r[1]=c2; */ | ||
818 | |||
819 | dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ | ||
820 | mflo t_1 | ||
821 | mfhi t_2 | ||
822 | daddu c_3,t_1 | ||
823 | sltu AT,c_3,t_1 | ||
824 | daddu t_2,AT | ||
825 | daddu c_1,t_2 | ||
826 | dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
827 | mflo t_1 | ||
828 | mfhi t_2 | ||
829 | daddu c_3,t_1 | ||
830 | sltu AT,c_3,t_1 | ||
831 | daddu t_2,AT | ||
832 | daddu c_1,t_2 | ||
833 | sltu c_2,c_1,t_2 | ||
834 | dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ | ||
835 | mflo t_1 | ||
836 | mfhi t_2 | ||
837 | daddu c_3,t_1 | ||
838 | sltu AT,c_3,t_1 | ||
839 | daddu t_2,AT | ||
840 | daddu c_1,t_2 | ||
841 | sltu AT,c_1,t_2 | ||
842 | daddu c_2,AT | ||
843 | sd c_3,16(a0) /* r[2]=c3; */ | ||
844 | |||
845 | dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ | ||
846 | mflo t_1 | ||
847 | mfhi t_2 | ||
848 | daddu c_1,t_1 | ||
849 | sltu AT,c_1,t_1 | ||
850 | daddu t_2,AT | ||
851 | daddu c_2,t_2 | ||
852 | sltu c_3,c_2,t_2 | ||
853 | dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ | ||
854 | mflo t_1 | ||
855 | mfhi t_2 | ||
856 | daddu c_1,t_1 | ||
857 | sltu AT,c_1,t_1 | ||
858 | daddu t_2,AT | ||
859 | daddu c_2,t_2 | ||
860 | sltu AT,c_2,t_2 | ||
861 | daddu c_3,AT | ||
862 | dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ | ||
863 | mflo t_1 | ||
864 | mfhi t_2 | ||
865 | daddu c_1,t_1 | ||
866 | sltu AT,c_1,t_1 | ||
867 | daddu t_2,AT | ||
868 | daddu c_2,t_2 | ||
869 | sltu AT,c_2,t_2 | ||
870 | daddu c_3,AT | ||
871 | dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ | ||
872 | mflo t_1 | ||
873 | mfhi t_2 | ||
874 | daddu c_1,t_1 | ||
875 | sltu AT,c_1,t_1 | ||
876 | daddu t_2,AT | ||
877 | daddu c_2,t_2 | ||
878 | sltu AT,c_2,t_2 | ||
879 | daddu c_3,AT | ||
880 | sd c_1,24(a0) /* r[3]=c1; */ | ||
881 | |||
882 | dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ | ||
883 | mflo t_1 | ||
884 | mfhi t_2 | ||
885 | daddu c_2,t_1 | ||
886 | sltu AT,c_2,t_1 | ||
887 | daddu t_2,AT | ||
888 | daddu c_3,t_2 | ||
889 | sltu c_1,c_3,t_2 | ||
890 | dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ | ||
891 | mflo t_1 | ||
892 | mfhi t_2 | ||
893 | daddu c_2,t_1 | ||
894 | sltu AT,c_2,t_1 | ||
895 | daddu t_2,AT | ||
896 | daddu c_3,t_2 | ||
897 | sltu AT,c_3,t_2 | ||
898 | daddu c_1,AT | ||
899 | dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
900 | mflo t_1 | ||
901 | mfhi t_2 | ||
902 | daddu c_2,t_1 | ||
903 | sltu AT,c_2,t_1 | ||
904 | daddu t_2,AT | ||
905 | daddu c_3,t_2 | ||
906 | sltu AT,c_3,t_2 | ||
907 | daddu c_1,AT | ||
908 | dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ | ||
909 | mflo t_1 | ||
910 | mfhi t_2 | ||
911 | daddu c_2,t_1 | ||
912 | sltu AT,c_2,t_1 | ||
913 | daddu t_2,AT | ||
914 | daddu c_3,t_2 | ||
915 | sltu AT,c_3,t_2 | ||
916 | daddu c_1,AT | ||
917 | dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ | ||
918 | mflo t_1 | ||
919 | mfhi t_2 | ||
920 | daddu c_2,t_1 | ||
921 | sltu AT,c_2,t_1 | ||
922 | daddu t_2,AT | ||
923 | daddu c_3,t_2 | ||
924 | sltu AT,c_3,t_2 | ||
925 | daddu c_1,AT | ||
926 | sd c_2,32(a0) /* r[4]=c2; */ | ||
927 | |||
928 | dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ | ||
929 | mflo t_1 | ||
930 | mfhi t_2 | ||
931 | daddu c_3,t_1 | ||
932 | sltu AT,c_3,t_1 | ||
933 | daddu t_2,AT | ||
934 | daddu c_1,t_2 | ||
935 | sltu c_2,c_1,t_2 | ||
936 | dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ | ||
937 | mflo t_1 | ||
938 | mfhi t_2 | ||
939 | daddu c_3,t_1 | ||
940 | sltu AT,c_3,t_1 | ||
941 | daddu t_2,AT | ||
942 | daddu c_1,t_2 | ||
943 | sltu AT,c_1,t_2 | ||
944 | daddu c_2,AT | ||
945 | dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ | ||
946 | mflo t_1 | ||
947 | mfhi t_2 | ||
948 | daddu c_3,t_1 | ||
949 | sltu AT,c_3,t_1 | ||
950 | daddu t_2,AT | ||
951 | daddu c_1,t_2 | ||
952 | sltu AT,c_1,t_2 | ||
953 | daddu c_2,AT | ||
954 | dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ | ||
955 | mflo t_1 | ||
956 | mfhi t_2 | ||
957 | daddu c_3,t_1 | ||
958 | sltu AT,c_3,t_1 | ||
959 | daddu t_2,AT | ||
960 | daddu c_1,t_2 | ||
961 | sltu AT,c_1,t_2 | ||
962 | daddu c_2,AT | ||
963 | dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ | ||
964 | mflo t_1 | ||
965 | mfhi t_2 | ||
966 | daddu c_3,t_1 | ||
967 | sltu AT,c_3,t_1 | ||
968 | daddu t_2,AT | ||
969 | daddu c_1,t_2 | ||
970 | sltu AT,c_1,t_2 | ||
971 | daddu c_2,AT | ||
972 | dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ | ||
973 | mflo t_1 | ||
974 | mfhi t_2 | ||
975 | daddu c_3,t_1 | ||
976 | sltu AT,c_3,t_1 | ||
977 | daddu t_2,AT | ||
978 | daddu c_1,t_2 | ||
979 | sltu AT,c_1,t_2 | ||
980 | daddu c_2,AT | ||
981 | sd c_3,40(a0) /* r[5]=c3; */ | ||
982 | |||
983 | dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ | ||
984 | mflo t_1 | ||
985 | mfhi t_2 | ||
986 | daddu c_1,t_1 | ||
987 | sltu AT,c_1,t_1 | ||
988 | daddu t_2,AT | ||
989 | daddu c_2,t_2 | ||
990 | sltu c_3,c_2,t_2 | ||
991 | dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ | ||
992 | mflo t_1 | ||
993 | mfhi t_2 | ||
994 | daddu c_1,t_1 | ||
995 | sltu AT,c_1,t_1 | ||
996 | daddu t_2,AT | ||
997 | daddu c_2,t_2 | ||
998 | sltu AT,c_2,t_2 | ||
999 | daddu c_3,AT | ||
1000 | dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ | ||
1001 | mflo t_1 | ||
1002 | mfhi t_2 | ||
1003 | daddu c_1,t_1 | ||
1004 | sltu AT,c_1,t_1 | ||
1005 | daddu t_2,AT | ||
1006 | daddu c_2,t_2 | ||
1007 | sltu AT,c_2,t_2 | ||
1008 | daddu c_3,AT | ||
1009 | dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
1010 | mflo t_1 | ||
1011 | mfhi t_2 | ||
1012 | daddu c_1,t_1 | ||
1013 | sltu AT,c_1,t_1 | ||
1014 | daddu t_2,AT | ||
1015 | daddu c_2,t_2 | ||
1016 | sltu AT,c_2,t_2 | ||
1017 | daddu c_3,AT | ||
1018 | dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ | ||
1019 | mflo t_1 | ||
1020 | mfhi t_2 | ||
1021 | daddu c_1,t_1 | ||
1022 | sltu AT,c_1,t_1 | ||
1023 | daddu t_2,AT | ||
1024 | daddu c_2,t_2 | ||
1025 | sltu AT,c_2,t_2 | ||
1026 | daddu c_3,AT | ||
1027 | dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ | ||
1028 | mflo t_1 | ||
1029 | mfhi t_2 | ||
1030 | daddu c_1,t_1 | ||
1031 | sltu AT,c_1,t_1 | ||
1032 | daddu t_2,AT | ||
1033 | daddu c_2,t_2 | ||
1034 | sltu AT,c_2,t_2 | ||
1035 | daddu c_3,AT | ||
1036 | dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ | ||
1037 | mflo t_1 | ||
1038 | mfhi t_2 | ||
1039 | daddu c_1,t_1 | ||
1040 | sltu AT,c_1,t_1 | ||
1041 | daddu t_2,AT | ||
1042 | daddu c_2,t_2 | ||
1043 | sltu AT,c_2,t_2 | ||
1044 | daddu c_3,AT | ||
1045 | sd c_1,48(a0) /* r[6]=c1; */ | ||
1046 | |||
1047 | dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ | ||
1048 | mflo t_1 | ||
1049 | mfhi t_2 | ||
1050 | daddu c_2,t_1 | ||
1051 | sltu AT,c_2,t_1 | ||
1052 | daddu t_2,AT | ||
1053 | daddu c_3,t_2 | ||
1054 | sltu c_1,c_3,t_2 | ||
1055 | dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ | ||
1056 | mflo t_1 | ||
1057 | mfhi t_2 | ||
1058 | daddu c_2,t_1 | ||
1059 | sltu AT,c_2,t_1 | ||
1060 | daddu t_2,AT | ||
1061 | daddu c_3,t_2 | ||
1062 | sltu AT,c_3,t_2 | ||
1063 | daddu c_1,AT | ||
1064 | dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ | ||
1065 | mflo t_1 | ||
1066 | mfhi t_2 | ||
1067 | daddu c_2,t_1 | ||
1068 | sltu AT,c_2,t_1 | ||
1069 | daddu t_2,AT | ||
1070 | daddu c_3,t_2 | ||
1071 | sltu AT,c_3,t_2 | ||
1072 | daddu c_1,AT | ||
1073 | dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ | ||
1074 | mflo t_1 | ||
1075 | mfhi t_2 | ||
1076 | daddu c_2,t_1 | ||
1077 | sltu AT,c_2,t_1 | ||
1078 | daddu t_2,AT | ||
1079 | daddu c_3,t_2 | ||
1080 | sltu AT,c_3,t_2 | ||
1081 | daddu c_1,AT | ||
1082 | dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ | ||
1083 | mflo t_1 | ||
1084 | mfhi t_2 | ||
1085 | daddu c_2,t_1 | ||
1086 | sltu AT,c_2,t_1 | ||
1087 | daddu t_2,AT | ||
1088 | daddu c_3,t_2 | ||
1089 | sltu AT,c_3,t_2 | ||
1090 | daddu c_1,AT | ||
1091 | dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ | ||
1092 | mflo t_1 | ||
1093 | mfhi t_2 | ||
1094 | daddu c_2,t_1 | ||
1095 | sltu AT,c_2,t_1 | ||
1096 | daddu t_2,AT | ||
1097 | daddu c_3,t_2 | ||
1098 | sltu AT,c_3,t_2 | ||
1099 | daddu c_1,AT | ||
1100 | dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ | ||
1101 | mflo t_1 | ||
1102 | mfhi t_2 | ||
1103 | daddu c_2,t_1 | ||
1104 | sltu AT,c_2,t_1 | ||
1105 | daddu t_2,AT | ||
1106 | daddu c_3,t_2 | ||
1107 | sltu AT,c_3,t_2 | ||
1108 | daddu c_1,AT | ||
1109 | dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ | ||
1110 | mflo t_1 | ||
1111 | mfhi t_2 | ||
1112 | daddu c_2,t_1 | ||
1113 | sltu AT,c_2,t_1 | ||
1114 | daddu t_2,AT | ||
1115 | daddu c_3,t_2 | ||
1116 | sltu AT,c_3,t_2 | ||
1117 | daddu c_1,AT | ||
1118 | sd c_2,56(a0) /* r[7]=c2; */ | ||
1119 | |||
1120 | dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ | ||
1121 | mflo t_1 | ||
1122 | mfhi t_2 | ||
1123 | daddu c_3,t_1 | ||
1124 | sltu AT,c_3,t_1 | ||
1125 | daddu t_2,AT | ||
1126 | daddu c_1,t_2 | ||
1127 | sltu c_2,c_1,t_2 | ||
1128 | dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ | ||
1129 | mflo t_1 | ||
1130 | mfhi t_2 | ||
1131 | daddu c_3,t_1 | ||
1132 | sltu AT,c_3,t_1 | ||
1133 | daddu t_2,AT | ||
1134 | daddu c_1,t_2 | ||
1135 | sltu AT,c_1,t_2 | ||
1136 | daddu c_2,AT | ||
1137 | dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ | ||
1138 | mflo t_1 | ||
1139 | mfhi t_2 | ||
1140 | daddu c_3,t_1 | ||
1141 | sltu AT,c_3,t_1 | ||
1142 | daddu t_2,AT | ||
1143 | daddu c_1,t_2 | ||
1144 | sltu AT,c_1,t_2 | ||
1145 | daddu c_2,AT | ||
1146 | dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ | ||
1147 | mflo t_1 | ||
1148 | mfhi t_2 | ||
1149 | daddu c_3,t_1 | ||
1150 | sltu AT,c_3,t_1 | ||
1151 | daddu t_2,AT | ||
1152 | daddu c_1,t_2 | ||
1153 | sltu AT,c_1,t_2 | ||
1154 | daddu c_2,AT | ||
1155 | dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ | ||
1156 | mflo t_1 | ||
1157 | mfhi t_2 | ||
1158 | daddu c_3,t_1 | ||
1159 | sltu AT,c_3,t_1 | ||
1160 | daddu t_2,AT | ||
1161 | daddu c_1,t_2 | ||
1162 | sltu AT,c_1,t_2 | ||
1163 | daddu c_2,AT | ||
1164 | dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ | ||
1165 | mflo t_1 | ||
1166 | mfhi t_2 | ||
1167 | daddu c_3,t_1 | ||
1168 | sltu AT,c_3,t_1 | ||
1169 | daddu t_2,AT | ||
1170 | daddu c_1,t_2 | ||
1171 | sltu AT,c_1,t_2 | ||
1172 | daddu c_2,AT | ||
1173 | dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ | ||
1174 | mflo t_1 | ||
1175 | mfhi t_2 | ||
1176 | daddu c_3,t_1 | ||
1177 | sltu AT,c_3,t_1 | ||
1178 | daddu t_2,AT | ||
1179 | daddu c_1,t_2 | ||
1180 | sltu AT,c_1,t_2 | ||
1181 | daddu c_2,AT | ||
1182 | sd c_3,64(a0) /* r[8]=c3; */ | ||
1183 | |||
1184 | dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ | ||
1185 | mflo t_1 | ||
1186 | mfhi t_2 | ||
1187 | daddu c_1,t_1 | ||
1188 | sltu AT,c_1,t_1 | ||
1189 | daddu t_2,AT | ||
1190 | daddu c_2,t_2 | ||
1191 | sltu c_3,c_2,t_2 | ||
1192 | dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ | ||
1193 | mflo t_1 | ||
1194 | mfhi t_2 | ||
1195 | daddu c_1,t_1 | ||
1196 | sltu AT,c_1,t_1 | ||
1197 | daddu t_2,AT | ||
1198 | daddu c_2,t_2 | ||
1199 | sltu AT,c_2,t_2 | ||
1200 | daddu c_3,AT | ||
1201 | dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ | ||
1202 | mflo t_1 | ||
1203 | mfhi t_2 | ||
1204 | daddu c_1,t_1 | ||
1205 | sltu AT,c_1,t_1 | ||
1206 | daddu t_2,AT | ||
1207 | daddu c_2,t_2 | ||
1208 | sltu AT,c_2,t_2 | ||
1209 | daddu c_3,AT | ||
1210 | dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ | ||
1211 | mflo t_1 | ||
1212 | mfhi t_2 | ||
1213 | daddu c_1,t_1 | ||
1214 | sltu AT,c_1,t_1 | ||
1215 | daddu t_2,AT | ||
1216 | daddu c_2,t_2 | ||
1217 | sltu AT,c_2,t_2 | ||
1218 | daddu c_3,AT | ||
1219 | dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ | ||
1220 | mflo t_1 | ||
1221 | mfhi t_2 | ||
1222 | daddu c_1,t_1 | ||
1223 | sltu AT,c_1,t_1 | ||
1224 | daddu t_2,AT | ||
1225 | daddu c_2,t_2 | ||
1226 | sltu AT,c_2,t_2 | ||
1227 | daddu c_3,AT | ||
1228 | dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ | ||
1229 | mflo t_1 | ||
1230 | mfhi t_2 | ||
1231 | daddu c_1,t_1 | ||
1232 | sltu AT,c_1,t_1 | ||
1233 | daddu t_2,AT | ||
1234 | daddu c_2,t_2 | ||
1235 | sltu AT,c_2,t_2 | ||
1236 | daddu c_3,AT | ||
1237 | sd c_1,72(a0) /* r[9]=c1; */ | ||
1238 | |||
1239 | dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ | ||
1240 | mflo t_1 | ||
1241 | mfhi t_2 | ||
1242 | daddu c_2,t_1 | ||
1243 | sltu AT,c_2,t_1 | ||
1244 | daddu t_2,AT | ||
1245 | daddu c_3,t_2 | ||
1246 | sltu c_1,c_3,t_2 | ||
1247 | dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ | ||
1248 | mflo t_1 | ||
1249 | mfhi t_2 | ||
1250 | daddu c_2,t_1 | ||
1251 | sltu AT,c_2,t_1 | ||
1252 | daddu t_2,AT | ||
1253 | daddu c_3,t_2 | ||
1254 | sltu AT,c_3,t_2 | ||
1255 | daddu c_1,AT | ||
1256 | dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ | ||
1257 | mflo t_1 | ||
1258 | mfhi t_2 | ||
1259 | daddu c_2,t_1 | ||
1260 | sltu AT,c_2,t_1 | ||
1261 | daddu t_2,AT | ||
1262 | daddu c_3,t_2 | ||
1263 | sltu AT,c_3,t_2 | ||
1264 | daddu c_1,AT | ||
1265 | dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ | ||
1266 | mflo t_1 | ||
1267 | mfhi t_2 | ||
1268 | daddu c_2,t_1 | ||
1269 | sltu AT,c_2,t_1 | ||
1270 | daddu t_2,AT | ||
1271 | daddu c_3,t_2 | ||
1272 | sltu AT,c_3,t_2 | ||
1273 | daddu c_1,AT | ||
1274 | dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ | ||
1275 | mflo t_1 | ||
1276 | mfhi t_2 | ||
1277 | daddu c_2,t_1 | ||
1278 | sltu AT,c_2,t_1 | ||
1279 | daddu t_2,AT | ||
1280 | daddu c_3,t_2 | ||
1281 | sltu AT,c_3,t_2 | ||
1282 | daddu c_1,AT | ||
1283 | sd c_2,80(a0) /* r[10]=c2; */ | ||
1284 | |||
1285 | dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ | ||
1286 | mflo t_1 | ||
1287 | mfhi t_2 | ||
1288 | daddu c_3,t_1 | ||
1289 | sltu AT,c_3,t_1 | ||
1290 | daddu t_2,AT | ||
1291 | daddu c_1,t_2 | ||
1292 | sltu c_2,c_1,t_2 | ||
1293 | dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ | ||
1294 | mflo t_1 | ||
1295 | mfhi t_2 | ||
1296 | daddu c_3,t_1 | ||
1297 | sltu AT,c_3,t_1 | ||
1298 | daddu t_2,AT | ||
1299 | daddu c_1,t_2 | ||
1300 | sltu AT,c_1,t_2 | ||
1301 | daddu c_2,AT | ||
1302 | dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ | ||
1303 | mflo t_1 | ||
1304 | mfhi t_2 | ||
1305 | daddu c_3,t_1 | ||
1306 | sltu AT,c_3,t_1 | ||
1307 | daddu t_2,AT | ||
1308 | daddu c_1,t_2 | ||
1309 | sltu AT,c_1,t_2 | ||
1310 | daddu c_2,AT | ||
1311 | dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ | ||
1312 | mflo t_1 | ||
1313 | mfhi t_2 | ||
1314 | daddu c_3,t_1 | ||
1315 | sltu AT,c_3,t_1 | ||
1316 | daddu t_2,AT | ||
1317 | daddu c_1,t_2 | ||
1318 | sltu AT,c_1,t_2 | ||
1319 | daddu c_2,AT | ||
1320 | sd c_3,88(a0) /* r[11]=c3; */ | ||
1321 | |||
1322 | dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ | ||
1323 | mflo t_1 | ||
1324 | mfhi t_2 | ||
1325 | daddu c_1,t_1 | ||
1326 | sltu AT,c_1,t_1 | ||
1327 | daddu t_2,AT | ||
1328 | daddu c_2,t_2 | ||
1329 | sltu c_3,c_2,t_2 | ||
1330 | dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ | ||
1331 | mflo t_1 | ||
1332 | mfhi t_2 | ||
1333 | daddu c_1,t_1 | ||
1334 | sltu AT,c_1,t_1 | ||
1335 | daddu t_2,AT | ||
1336 | daddu c_2,t_2 | ||
1337 | sltu AT,c_2,t_2 | ||
1338 | daddu c_3,AT | ||
1339 | dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ | ||
1340 | mflo t_1 | ||
1341 | mfhi t_2 | ||
1342 | daddu c_1,t_1 | ||
1343 | sltu AT,c_1,t_1 | ||
1344 | daddu t_2,AT | ||
1345 | daddu c_2,t_2 | ||
1346 | sltu AT,c_2,t_2 | ||
1347 | daddu c_3,AT | ||
1348 | sd c_1,96(a0) /* r[12]=c1; */ | ||
1349 | |||
1350 | dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ | ||
1351 | mflo t_1 | ||
1352 | mfhi t_2 | ||
1353 | daddu c_2,t_1 | ||
1354 | sltu AT,c_2,t_1 | ||
1355 | daddu t_2,AT | ||
1356 | daddu c_3,t_2 | ||
1357 | sltu c_1,c_3,t_2 | ||
1358 | dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ | ||
1359 | mflo t_1 | ||
1360 | mfhi t_2 | ||
1361 | daddu c_2,t_1 | ||
1362 | sltu AT,c_2,t_1 | ||
1363 | daddu t_2,AT | ||
1364 | daddu c_3,t_2 | ||
1365 | sltu AT,c_3,t_2 | ||
1366 | daddu c_1,AT | ||
1367 | sd c_2,104(a0) /* r[13]=c2; */ | ||
1368 | |||
1369 | dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ | ||
1370 | ld s0,0(sp) | ||
1371 | ld s1,8(sp) | ||
1372 | ld s2,16(sp) | ||
1373 | ld s3,24(sp) | ||
1374 | ld s4,32(sp) | ||
1375 | ld s5,40(sp) | ||
1376 | mflo t_1 | ||
1377 | mfhi t_2 | ||
1378 | daddu c_3,t_1 | ||
1379 | sltu AT,c_3,t_1 | ||
1380 | daddu t_2,AT | ||
1381 | daddu c_1,t_2 | ||
1382 | sd c_3,112(a0) /* r[14]=c3; */ | ||
1383 | sd c_1,120(a0) /* r[15]=c1; */ | ||
1384 | |||
1385 | PTR_ADD sp,FRAME_SIZE | ||
1386 | |||
1387 | jr ra | ||
1388 | END(bn_mul_comba8) | ||
1389 | |||
1390 | .align 5 | ||
1391 | LEAF(bn_mul_comba4) | ||
1392 | .set reorder | ||
1393 | ld a_0,0(a1) | ||
1394 | ld b_0,0(a2) | ||
1395 | ld a_1,8(a1) | ||
1396 | ld a_2,16(a1) | ||
1397 | dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
1398 | ld a_3,24(a1) | ||
1399 | ld b_1,8(a2) | ||
1400 | ld b_2,16(a2) | ||
1401 | ld b_3,24(a2) | ||
1402 | mflo c_1 | ||
1403 | mfhi c_2 | ||
1404 | sd c_1,0(a0) | ||
1405 | |||
1406 | dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ | ||
1407 | mflo t_1 | ||
1408 | mfhi t_2 | ||
1409 | daddu c_2,t_1 | ||
1410 | sltu AT,c_2,t_1 | ||
1411 | daddu c_3,t_2,AT | ||
1412 | dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ | ||
1413 | mflo t_1 | ||
1414 | mfhi t_2 | ||
1415 | daddu c_2,t_1 | ||
1416 | sltu AT,c_2,t_1 | ||
1417 | daddu t_2,AT | ||
1418 | daddu c_3,t_2 | ||
1419 | sltu c_1,c_3,t_2 | ||
1420 | sd c_2,8(a0) | ||
1421 | |||
1422 | dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ | ||
1423 | mflo t_1 | ||
1424 | mfhi t_2 | ||
1425 | daddu c_3,t_1 | ||
1426 | sltu AT,c_3,t_1 | ||
1427 | daddu t_2,AT | ||
1428 | daddu c_1,t_2 | ||
1429 | dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
1430 | mflo t_1 | ||
1431 | mfhi t_2 | ||
1432 | daddu c_3,t_1 | ||
1433 | sltu AT,c_3,t_1 | ||
1434 | daddu t_2,AT | ||
1435 | daddu c_1,t_2 | ||
1436 | sltu c_2,c_1,t_2 | ||
1437 | dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ | ||
1438 | mflo t_1 | ||
1439 | mfhi t_2 | ||
1440 | daddu c_3,t_1 | ||
1441 | sltu AT,c_3,t_1 | ||
1442 | daddu t_2,AT | ||
1443 | daddu c_1,t_2 | ||
1444 | sltu AT,c_1,t_2 | ||
1445 | daddu c_2,AT | ||
1446 | sd c_3,16(a0) | ||
1447 | |||
1448 | dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ | ||
1449 | mflo t_1 | ||
1450 | mfhi t_2 | ||
1451 | daddu c_1,t_1 | ||
1452 | sltu AT,c_1,t_1 | ||
1453 | daddu t_2,AT | ||
1454 | daddu c_2,t_2 | ||
1455 | sltu c_3,c_2,t_2 | ||
1456 | dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ | ||
1457 | mflo t_1 | ||
1458 | mfhi t_2 | ||
1459 | daddu c_1,t_1 | ||
1460 | sltu AT,c_1,t_1 | ||
1461 | daddu t_2,AT | ||
1462 | daddu c_2,t_2 | ||
1463 | sltu AT,c_2,t_2 | ||
1464 | daddu c_3,AT | ||
1465 | dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ | ||
1466 | mflo t_1 | ||
1467 | mfhi t_2 | ||
1468 | daddu c_1,t_1 | ||
1469 | sltu AT,c_1,t_1 | ||
1470 | daddu t_2,AT | ||
1471 | daddu c_2,t_2 | ||
1472 | sltu AT,c_2,t_2 | ||
1473 | daddu c_3,AT | ||
1474 | dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ | ||
1475 | mflo t_1 | ||
1476 | mfhi t_2 | ||
1477 | daddu c_1,t_1 | ||
1478 | sltu AT,c_1,t_1 | ||
1479 | daddu t_2,AT | ||
1480 | daddu c_2,t_2 | ||
1481 | sltu AT,c_2,t_2 | ||
1482 | daddu c_3,AT | ||
1483 | sd c_1,24(a0) | ||
1484 | |||
1485 | dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ | ||
1486 | mflo t_1 | ||
1487 | mfhi t_2 | ||
1488 | daddu c_2,t_1 | ||
1489 | sltu AT,c_2,t_1 | ||
1490 | daddu t_2,AT | ||
1491 | daddu c_3,t_2 | ||
1492 | sltu c_1,c_3,t_2 | ||
1493 | dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
1494 | mflo t_1 | ||
1495 | mfhi t_2 | ||
1496 | daddu c_2,t_1 | ||
1497 | sltu AT,c_2,t_1 | ||
1498 | daddu t_2,AT | ||
1499 | daddu c_3,t_2 | ||
1500 | sltu AT,c_3,t_2 | ||
1501 | daddu c_1,AT | ||
1502 | dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ | ||
1503 | mflo t_1 | ||
1504 | mfhi t_2 | ||
1505 | daddu c_2,t_1 | ||
1506 | sltu AT,c_2,t_1 | ||
1507 | daddu t_2,AT | ||
1508 | daddu c_3,t_2 | ||
1509 | sltu AT,c_3,t_2 | ||
1510 | daddu c_1,AT | ||
1511 | sd c_2,32(a0) | ||
1512 | |||
1513 | dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ | ||
1514 | mflo t_1 | ||
1515 | mfhi t_2 | ||
1516 | daddu c_3,t_1 | ||
1517 | sltu AT,c_3,t_1 | ||
1518 | daddu t_2,AT | ||
1519 | daddu c_1,t_2 | ||
1520 | sltu c_2,c_1,t_2 | ||
1521 | dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ | ||
1522 | mflo t_1 | ||
1523 | mfhi t_2 | ||
1524 | daddu c_3,t_1 | ||
1525 | sltu AT,c_3,t_1 | ||
1526 | daddu t_2,AT | ||
1527 | daddu c_1,t_2 | ||
1528 | sltu AT,c_1,t_2 | ||
1529 | daddu c_2,AT | ||
1530 | sd c_3,40(a0) | ||
1531 | |||
1532 | dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
1533 | mflo t_1 | ||
1534 | mfhi t_2 | ||
1535 | daddu c_1,t_1 | ||
1536 | sltu AT,c_1,t_1 | ||
1537 | daddu t_2,AT | ||
1538 | daddu c_2,t_2 | ||
1539 | sd c_1,48(a0) | ||
1540 | sd c_2,56(a0) | ||
1541 | |||
1542 | jr ra | ||
1543 | END(bn_mul_comba4) | ||
1544 | |||
1545 | #undef a_4 | ||
1546 | #undef a_5 | ||
1547 | #undef a_6 | ||
1548 | #undef a_7 | ||
1549 | #define a_4 b_0 | ||
1550 | #define a_5 b_1 | ||
1551 | #define a_6 b_2 | ||
1552 | #define a_7 b_3 | ||
1553 | |||
1554 | .align 5 | ||
1555 | LEAF(bn_sqr_comba8) | ||
1556 | .set reorder | ||
1557 | ld a_0,0(a1) | ||
1558 | ld a_1,8(a1) | ||
1559 | ld a_2,16(a1) | ||
1560 | ld a_3,24(a1) | ||
1561 | |||
1562 | dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
1563 | ld a_4,32(a1) | ||
1564 | ld a_5,40(a1) | ||
1565 | ld a_6,48(a1) | ||
1566 | ld a_7,56(a1) | ||
1567 | mflo c_1 | ||
1568 | mfhi c_2 | ||
1569 | sd c_1,0(a0) | ||
1570 | |||
1571 | dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ | ||
1572 | mflo t_1 | ||
1573 | mfhi t_2 | ||
1574 | slt c_1,t_2,zero | ||
1575 | dsll t_2,1 | ||
1576 | slt a2,t_1,zero | ||
1577 | daddu t_2,a2 | ||
1578 | dsll t_1,1 | ||
1579 | daddu c_2,t_1 | ||
1580 | sltu AT,c_2,t_1 | ||
1581 | daddu c_3,t_2,AT | ||
1582 | sd c_2,8(a0) | ||
1583 | |||
1584 | dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ | ||
1585 | mflo t_1 | ||
1586 | mfhi t_2 | ||
1587 | slt c_2,t_2,zero | ||
1588 | dsll t_2,1 | ||
1589 | slt a2,t_1,zero | ||
1590 | daddu t_2,a2 | ||
1591 | dsll t_1,1 | ||
1592 | daddu c_3,t_1 | ||
1593 | sltu AT,c_3,t_1 | ||
1594 | daddu t_2,AT | ||
1595 | daddu c_1,t_2 | ||
1596 | sltu AT,c_1,t_2 | ||
1597 | daddu c_2,AT | ||
1598 | dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
1599 | mflo t_1 | ||
1600 | mfhi t_2 | ||
1601 | daddu c_3,t_1 | ||
1602 | sltu AT,c_3,t_1 | ||
1603 | daddu t_2,AT | ||
1604 | daddu c_1,t_2 | ||
1605 | sltu AT,c_1,t_2 | ||
1606 | daddu c_2,AT | ||
1607 | sd c_3,16(a0) | ||
1608 | |||
1609 | dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ | ||
1610 | mflo t_1 | ||
1611 | mfhi t_2 | ||
1612 | slt c_3,t_2,zero | ||
1613 | dsll t_2,1 | ||
1614 | slt a2,t_1,zero | ||
1615 | daddu t_2,a2 | ||
1616 | dsll t_1,1 | ||
1617 | daddu c_1,t_1 | ||
1618 | sltu AT,c_1,t_1 | ||
1619 | daddu t_2,AT | ||
1620 | daddu c_2,t_2 | ||
1621 | sltu AT,c_2,t_2 | ||
1622 | daddu c_3,AT | ||
1623 | dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ | ||
1624 | mflo t_1 | ||
1625 | mfhi t_2 | ||
1626 | slt AT,t_2,zero | ||
1627 | daddu c_3,AT | ||
1628 | dsll t_2,1 | ||
1629 | slt a2,t_1,zero | ||
1630 | daddu t_2,a2 | ||
1631 | dsll t_1,1 | ||
1632 | daddu c_1,t_1 | ||
1633 | sltu AT,c_1,t_1 | ||
1634 | daddu t_2,AT | ||
1635 | daddu c_2,t_2 | ||
1636 | sltu AT,c_2,t_2 | ||
1637 | daddu c_3,AT | ||
1638 | sd c_1,24(a0) | ||
1639 | |||
1640 | dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ | ||
1641 | mflo t_1 | ||
1642 | mfhi t_2 | ||
1643 | slt c_1,t_2,zero | ||
1644 | dsll t_2,1 | ||
1645 | slt a2,t_1,zero | ||
1646 | daddu t_2,a2 | ||
1647 | dsll t_1,1 | ||
1648 | daddu c_2,t_1 | ||
1649 | sltu AT,c_2,t_1 | ||
1650 | daddu t_2,AT | ||
1651 | daddu c_3,t_2 | ||
1652 | sltu AT,c_3,t_2 | ||
1653 | daddu c_1,AT | ||
1654 | dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ | ||
1655 | mflo t_1 | ||
1656 | mfhi t_2 | ||
1657 | slt AT,t_2,zero | ||
1658 | daddu c_1,AT | ||
1659 | dsll t_2,1 | ||
1660 | slt a2,t_1,zero | ||
1661 | daddu t_2,a2 | ||
1662 | dsll t_1,1 | ||
1663 | daddu c_2,t_1 | ||
1664 | sltu AT,c_2,t_1 | ||
1665 | daddu t_2,AT | ||
1666 | daddu c_3,t_2 | ||
1667 | sltu AT,c_3,t_2 | ||
1668 | daddu c_1,AT | ||
1669 | dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
1670 | mflo t_1 | ||
1671 | mfhi t_2 | ||
1672 | daddu c_2,t_1 | ||
1673 | sltu AT,c_2,t_1 | ||
1674 | daddu t_2,AT | ||
1675 | daddu c_3,t_2 | ||
1676 | sltu AT,c_3,t_2 | ||
1677 | daddu c_1,AT | ||
1678 | sd c_2,32(a0) | ||
1679 | |||
1680 | dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ | ||
1681 | mflo t_1 | ||
1682 | mfhi t_2 | ||
1683 | slt c_2,t_2,zero | ||
1684 | dsll t_2,1 | ||
1685 | slt a2,t_1,zero | ||
1686 | daddu t_2,a2 | ||
1687 | dsll t_1,1 | ||
1688 | daddu c_3,t_1 | ||
1689 | sltu AT,c_3,t_1 | ||
1690 | daddu t_2,AT | ||
1691 | daddu c_1,t_2 | ||
1692 | sltu AT,c_1,t_2 | ||
1693 | daddu c_2,AT | ||
1694 | dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ | ||
1695 | mflo t_1 | ||
1696 | mfhi t_2 | ||
1697 | slt AT,t_2,zero | ||
1698 | daddu c_2,AT | ||
1699 | dsll t_2,1 | ||
1700 | slt a2,t_1,zero | ||
1701 | daddu t_2,a2 | ||
1702 | dsll t_1,1 | ||
1703 | daddu c_3,t_1 | ||
1704 | sltu AT,c_3,t_1 | ||
1705 | daddu t_2,AT | ||
1706 | daddu c_1,t_2 | ||
1707 | sltu AT,c_1,t_2 | ||
1708 | daddu c_2,AT | ||
1709 | dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ | ||
1710 | mflo t_1 | ||
1711 | mfhi t_2 | ||
1712 | slt AT,t_2,zero | ||
1713 | daddu c_2,AT | ||
1714 | dsll t_2,1 | ||
1715 | slt a2,t_1,zero | ||
1716 | daddu t_2,a2 | ||
1717 | dsll t_1,1 | ||
1718 | daddu c_3,t_1 | ||
1719 | sltu AT,c_3,t_1 | ||
1720 | daddu t_2,AT | ||
1721 | daddu c_1,t_2 | ||
1722 | sltu AT,c_1,t_2 | ||
1723 | daddu c_2,AT | ||
1724 | sd c_3,40(a0) | ||
1725 | |||
1726 | dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ | ||
1727 | mflo t_1 | ||
1728 | mfhi t_2 | ||
1729 | slt c_3,t_2,zero | ||
1730 | dsll t_2,1 | ||
1731 | slt a2,t_1,zero | ||
1732 | daddu t_2,a2 | ||
1733 | dsll t_1,1 | ||
1734 | daddu c_1,t_1 | ||
1735 | sltu AT,c_1,t_1 | ||
1736 | daddu t_2,AT | ||
1737 | daddu c_2,t_2 | ||
1738 | sltu AT,c_2,t_2 | ||
1739 | daddu c_3,AT | ||
1740 | dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ | ||
1741 | mflo t_1 | ||
1742 | mfhi t_2 | ||
1743 | slt AT,t_2,zero | ||
1744 | daddu c_3,AT | ||
1745 | dsll t_2,1 | ||
1746 | slt a2,t_1,zero | ||
1747 | daddu t_2,a2 | ||
1748 | dsll t_1,1 | ||
1749 | daddu c_1,t_1 | ||
1750 | sltu AT,c_1,t_1 | ||
1751 | daddu t_2,AT | ||
1752 | daddu c_2,t_2 | ||
1753 | sltu AT,c_2,t_2 | ||
1754 | daddu c_3,AT | ||
1755 | dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ | ||
1756 | mflo t_1 | ||
1757 | mfhi t_2 | ||
1758 | slt AT,t_2,zero | ||
1759 | daddu c_3,AT | ||
1760 | dsll t_2,1 | ||
1761 | slt a2,t_1,zero | ||
1762 | daddu t_2,a2 | ||
1763 | dsll t_1,1 | ||
1764 | daddu c_1,t_1 | ||
1765 | sltu AT,c_1,t_1 | ||
1766 | daddu t_2,AT | ||
1767 | daddu c_2,t_2 | ||
1768 | sltu AT,c_2,t_2 | ||
1769 | daddu c_3,AT | ||
1770 | dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
1771 | mflo t_1 | ||
1772 | mfhi t_2 | ||
1773 | daddu c_1,t_1 | ||
1774 | sltu AT,c_1,t_1 | ||
1775 | daddu t_2,AT | ||
1776 | daddu c_2,t_2 | ||
1777 | sltu AT,c_2,t_2 | ||
1778 | daddu c_3,AT | ||
1779 | sd c_1,48(a0) | ||
1780 | |||
1781 | dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ | ||
1782 | mflo t_1 | ||
1783 | mfhi t_2 | ||
1784 | slt c_1,t_2,zero | ||
1785 | dsll t_2,1 | ||
1786 | slt a2,t_1,zero | ||
1787 | daddu t_2,a2 | ||
1788 | dsll t_1,1 | ||
1789 | daddu c_2,t_1 | ||
1790 | sltu AT,c_2,t_1 | ||
1791 | daddu t_2,AT | ||
1792 | daddu c_3,t_2 | ||
1793 | sltu AT,c_3,t_2 | ||
1794 | daddu c_1,AT | ||
1795 | dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ | ||
1796 | mflo t_1 | ||
1797 | mfhi t_2 | ||
1798 | slt AT,t_2,zero | ||
1799 | daddu c_1,AT | ||
1800 | dsll t_2,1 | ||
1801 | slt a2,t_1,zero | ||
1802 | daddu t_2,a2 | ||
1803 | dsll t_1,1 | ||
1804 | daddu c_2,t_1 | ||
1805 | sltu AT,c_2,t_1 | ||
1806 | daddu t_2,AT | ||
1807 | daddu c_3,t_2 | ||
1808 | sltu AT,c_3,t_2 | ||
1809 | daddu c_1,AT | ||
1810 | dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ | ||
1811 | mflo t_1 | ||
1812 | mfhi t_2 | ||
1813 | slt AT,t_2,zero | ||
1814 | daddu c_1,AT | ||
1815 | dsll t_2,1 | ||
1816 | slt a2,t_1,zero | ||
1817 | daddu t_2,a2 | ||
1818 | dsll t_1,1 | ||
1819 | daddu c_2,t_1 | ||
1820 | sltu AT,c_2,t_1 | ||
1821 | daddu t_2,AT | ||
1822 | daddu c_3,t_2 | ||
1823 | sltu AT,c_3,t_2 | ||
1824 | daddu c_1,AT | ||
1825 | dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ | ||
1826 | mflo t_1 | ||
1827 | mfhi t_2 | ||
1828 | slt AT,t_2,zero | ||
1829 | daddu c_1,AT | ||
1830 | dsll t_2,1 | ||
1831 | slt a2,t_1,zero | ||
1832 | daddu t_2,a2 | ||
1833 | dsll t_1,1 | ||
1834 | daddu c_2,t_1 | ||
1835 | sltu AT,c_2,t_1 | ||
1836 | daddu t_2,AT | ||
1837 | daddu c_3,t_2 | ||
1838 | sltu AT,c_3,t_2 | ||
1839 | daddu c_1,AT | ||
1840 | sd c_2,56(a0) | ||
1841 | |||
1842 | dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ | ||
1843 | mflo t_1 | ||
1844 | mfhi t_2 | ||
1845 | slt c_2,t_2,zero | ||
1846 | dsll t_2,1 | ||
1847 | slt a2,t_1,zero | ||
1848 | daddu t_2,a2 | ||
1849 | dsll t_1,1 | ||
1850 | daddu c_3,t_1 | ||
1851 | sltu AT,c_3,t_1 | ||
1852 | daddu t_2,AT | ||
1853 | daddu c_1,t_2 | ||
1854 | sltu AT,c_1,t_2 | ||
1855 | daddu c_2,AT | ||
1856 | dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ | ||
1857 | mflo t_1 | ||
1858 | mfhi t_2 | ||
1859 | slt AT,t_2,zero | ||
1860 | daddu c_2,AT | ||
1861 | dsll t_2,1 | ||
1862 | slt a2,t_1,zero | ||
1863 | daddu t_2,a2 | ||
1864 | dsll t_1,1 | ||
1865 | daddu c_3,t_1 | ||
1866 | sltu AT,c_3,t_1 | ||
1867 | daddu t_2,AT | ||
1868 | daddu c_1,t_2 | ||
1869 | sltu AT,c_1,t_2 | ||
1870 | daddu c_2,AT | ||
1871 | dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ | ||
1872 | mflo t_1 | ||
1873 | mfhi t_2 | ||
1874 | slt AT,t_2,zero | ||
1875 | daddu c_2,AT | ||
1876 | dsll t_2,1 | ||
1877 | slt a2,t_1,zero | ||
1878 | daddu t_2,a2 | ||
1879 | dsll t_1,1 | ||
1880 | daddu c_3,t_1 | ||
1881 | sltu AT,c_3,t_1 | ||
1882 | daddu t_2,AT | ||
1883 | daddu c_1,t_2 | ||
1884 | sltu AT,c_1,t_2 | ||
1885 | daddu c_2,AT | ||
1886 | dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ | ||
1887 | mflo t_1 | ||
1888 | mfhi t_2 | ||
1889 | daddu c_3,t_1 | ||
1890 | sltu AT,c_3,t_1 | ||
1891 | daddu t_2,AT | ||
1892 | daddu c_1,t_2 | ||
1893 | sltu AT,c_1,t_2 | ||
1894 | daddu c_2,AT | ||
1895 | sd c_3,64(a0) | ||
1896 | |||
1897 | dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ | ||
1898 | mflo t_1 | ||
1899 | mfhi t_2 | ||
1900 | slt c_3,t_2,zero | ||
1901 | dsll t_2,1 | ||
1902 | slt a2,t_1,zero | ||
1903 | daddu t_2,a2 | ||
1904 | dsll t_1,1 | ||
1905 | daddu c_1,t_1 | ||
1906 | sltu AT,c_1,t_1 | ||
1907 | daddu t_2,AT | ||
1908 | daddu c_2,t_2 | ||
1909 | sltu AT,c_2,t_2 | ||
1910 | daddu c_3,AT | ||
1911 | dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ | ||
1912 | mflo t_1 | ||
1913 | mfhi t_2 | ||
1914 | slt AT,t_2,zero | ||
1915 | daddu c_3,AT | ||
1916 | dsll t_2,1 | ||
1917 | slt a2,t_1,zero | ||
1918 | daddu t_2,a2 | ||
1919 | dsll t_1,1 | ||
1920 | daddu c_1,t_1 | ||
1921 | sltu AT,c_1,t_1 | ||
1922 | daddu t_2,AT | ||
1923 | daddu c_2,t_2 | ||
1924 | sltu AT,c_2,t_2 | ||
1925 | daddu c_3,AT | ||
1926 | dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ | ||
1927 | mflo t_1 | ||
1928 | mfhi t_2 | ||
1929 | slt AT,t_2,zero | ||
1930 | daddu c_3,AT | ||
1931 | dsll t_2,1 | ||
1932 | slt a2,t_1,zero | ||
1933 | daddu t_2,a2 | ||
1934 | dsll t_1,1 | ||
1935 | daddu c_1,t_1 | ||
1936 | sltu AT,c_1,t_1 | ||
1937 | daddu t_2,AT | ||
1938 | daddu c_2,t_2 | ||
1939 | sltu AT,c_2,t_2 | ||
1940 | daddu c_3,AT | ||
1941 | sd c_1,72(a0) | ||
1942 | |||
1943 | dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ | ||
1944 | mflo t_1 | ||
1945 | mfhi t_2 | ||
1946 | slt c_1,t_2,zero | ||
1947 | dsll t_2,1 | ||
1948 | slt a2,t_1,zero | ||
1949 | daddu t_2,a2 | ||
1950 | dsll t_1,1 | ||
1951 | daddu c_2,t_1 | ||
1952 | sltu AT,c_2,t_1 | ||
1953 | daddu t_2,AT | ||
1954 | daddu c_3,t_2 | ||
1955 | sltu AT,c_3,t_2 | ||
1956 | daddu c_1,AT | ||
1957 | dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ | ||
1958 | mflo t_1 | ||
1959 | mfhi t_2 | ||
1960 | slt AT,t_2,zero | ||
1961 | daddu c_1,AT | ||
1962 | dsll t_2,1 | ||
1963 | slt a2,t_1,zero | ||
1964 | daddu t_2,a2 | ||
1965 | dsll t_1,1 | ||
1966 | daddu c_2,t_1 | ||
1967 | sltu AT,c_2,t_1 | ||
1968 | daddu t_2,AT | ||
1969 | daddu c_3,t_2 | ||
1970 | sltu AT,c_3,t_2 | ||
1971 | daddu c_1,AT | ||
1972 | dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ | ||
1973 | mflo t_1 | ||
1974 | mfhi t_2 | ||
1975 | daddu c_2,t_1 | ||
1976 | sltu AT,c_2,t_1 | ||
1977 | daddu t_2,AT | ||
1978 | daddu c_3,t_2 | ||
1979 | sltu AT,c_3,t_2 | ||
1980 | daddu c_1,AT | ||
1981 | sd c_2,80(a0) | ||
1982 | |||
1983 | dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ | ||
1984 | mflo t_1 | ||
1985 | mfhi t_2 | ||
1986 | slt c_2,t_2,zero | ||
1987 | dsll t_2,1 | ||
1988 | slt a2,t_1,zero | ||
1989 | daddu t_2,a2 | ||
1990 | dsll t_1,1 | ||
1991 | daddu c_3,t_1 | ||
1992 | sltu AT,c_3,t_1 | ||
1993 | daddu t_2,AT | ||
1994 | daddu c_1,t_2 | ||
1995 | sltu AT,c_1,t_2 | ||
1996 | daddu c_2,AT | ||
1997 | dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ | ||
1998 | mflo t_1 | ||
1999 | mfhi t_2 | ||
2000 | slt AT,t_2,zero | ||
2001 | daddu c_2,AT | ||
2002 | dsll t_2,1 | ||
2003 | slt a2,t_1,zero | ||
2004 | daddu t_2,a2 | ||
2005 | dsll t_1,1 | ||
2006 | daddu c_3,t_1 | ||
2007 | sltu AT,c_3,t_1 | ||
2008 | daddu t_2,AT | ||
2009 | daddu c_1,t_2 | ||
2010 | sltu AT,c_1,t_2 | ||
2011 | daddu c_2,AT | ||
2012 | sd c_3,88(a0) | ||
2013 | |||
2014 | dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ | ||
2015 | mflo t_1 | ||
2016 | mfhi t_2 | ||
2017 | slt c_3,t_2,zero | ||
2018 | dsll t_2,1 | ||
2019 | slt a2,t_1,zero | ||
2020 | daddu t_2,a2 | ||
2021 | dsll t_1,1 | ||
2022 | daddu c_1,t_1 | ||
2023 | sltu AT,c_1,t_1 | ||
2024 | daddu t_2,AT | ||
2025 | daddu c_2,t_2 | ||
2026 | sltu AT,c_2,t_2 | ||
2027 | daddu c_3,AT | ||
2028 | dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ | ||
2029 | mflo t_1 | ||
2030 | mfhi t_2 | ||
2031 | daddu c_1,t_1 | ||
2032 | sltu AT,c_1,t_1 | ||
2033 | daddu t_2,AT | ||
2034 | daddu c_2,t_2 | ||
2035 | sltu AT,c_2,t_2 | ||
2036 | daddu c_3,AT | ||
2037 | sd c_1,96(a0) | ||
2038 | |||
2039 | dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ | ||
2040 | mflo t_1 | ||
2041 | mfhi t_2 | ||
2042 | slt c_1,t_2,zero | ||
2043 | dsll t_2,1 | ||
2044 | slt a2,t_1,zero | ||
2045 | daddu t_2,a2 | ||
2046 | dsll t_1,1 | ||
2047 | daddu c_2,t_1 | ||
2048 | sltu AT,c_2,t_1 | ||
2049 | daddu t_2,AT | ||
2050 | daddu c_3,t_2 | ||
2051 | sltu AT,c_3,t_2 | ||
2052 | daddu c_1,AT | ||
2053 | sd c_2,104(a0) | ||
2054 | |||
2055 | dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ | ||
2056 | mflo t_1 | ||
2057 | mfhi t_2 | ||
2058 | daddu c_3,t_1 | ||
2059 | sltu AT,c_3,t_1 | ||
2060 | daddu t_2,AT | ||
2061 | daddu c_1,t_2 | ||
2062 | sd c_3,112(a0) | ||
2063 | sd c_1,120(a0) | ||
2064 | |||
2065 | jr ra | ||
2066 | END(bn_sqr_comba8) | ||
2067 | |||
2068 | .align 5 | ||
2069 | LEAF(bn_sqr_comba4) | ||
2070 | .set reorder | ||
2071 | ld a_0,0(a1) | ||
2072 | ld a_1,8(a1) | ||
2073 | ld a_2,16(a1) | ||
2074 | ld a_3,24(a1) | ||
2075 | dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ | ||
2076 | mflo c_1 | ||
2077 | mfhi c_2 | ||
2078 | sd c_1,0(a0) | ||
2079 | |||
2080 | dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ | ||
2081 | mflo t_1 | ||
2082 | mfhi t_2 | ||
2083 | slt c_1,t_2,zero | ||
2084 | dsll t_2,1 | ||
2085 | slt a2,t_1,zero | ||
2086 | daddu t_2,a2 | ||
2087 | dsll t_1,1 | ||
2088 | daddu c_2,t_1 | ||
2089 | sltu AT,c_2,t_1 | ||
2090 | daddu c_3,t_2,AT | ||
2091 | sd c_2,8(a0) | ||
2092 | |||
2093 | dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ | ||
2094 | mflo t_1 | ||
2095 | mfhi t_2 | ||
2096 | slt c_2,t_2,zero | ||
2097 | dsll t_2,1 | ||
2098 | slt a2,t_1,zero | ||
2099 | daddu t_2,a2 | ||
2100 | dsll t_1,1 | ||
2101 | daddu c_3,t_1 | ||
2102 | sltu AT,c_3,t_1 | ||
2103 | daddu t_2,AT | ||
2104 | daddu c_1,t_2 | ||
2105 | sltu AT,c_1,t_2 | ||
2106 | daddu c_2,AT | ||
2107 | dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ | ||
2108 | mflo t_1 | ||
2109 | mfhi t_2 | ||
2110 | daddu c_3,t_1 | ||
2111 | sltu AT,c_3,t_1 | ||
2112 | daddu t_2,AT | ||
2113 | daddu c_1,t_2 | ||
2114 | sltu AT,c_1,t_2 | ||
2115 | daddu c_2,AT | ||
2116 | sd c_3,16(a0) | ||
2117 | |||
2118 | dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ | ||
2119 | mflo t_1 | ||
2120 | mfhi t_2 | ||
2121 | slt c_3,t_2,zero | ||
2122 | dsll t_2,1 | ||
2123 | slt a2,t_1,zero | ||
2124 | daddu t_2,a2 | ||
2125 | dsll t_1,1 | ||
2126 | daddu c_1,t_1 | ||
2127 | sltu AT,c_1,t_1 | ||
2128 | daddu t_2,AT | ||
2129 | daddu c_2,t_2 | ||
2130 | sltu AT,c_2,t_2 | ||
2131 | daddu c_3,AT | ||
2132 | dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ | ||
2133 | mflo t_1 | ||
2134 | mfhi t_2 | ||
2135 | slt AT,t_2,zero | ||
2136 | daddu c_3,AT | ||
2137 | dsll t_2,1 | ||
2138 | slt a2,t_1,zero | ||
2139 | daddu t_2,a2 | ||
2140 | dsll t_1,1 | ||
2141 | daddu c_1,t_1 | ||
2142 | sltu AT,c_1,t_1 | ||
2143 | daddu t_2,AT | ||
2144 | daddu c_2,t_2 | ||
2145 | sltu AT,c_2,t_2 | ||
2146 | daddu c_3,AT | ||
2147 | sd c_1,24(a0) | ||
2148 | |||
2149 | dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ | ||
2150 | mflo t_1 | ||
2151 | mfhi t_2 | ||
2152 | slt c_1,t_2,zero | ||
2153 | dsll t_2,1 | ||
2154 | slt a2,t_1,zero | ||
2155 | daddu t_2,a2 | ||
2156 | dsll t_1,1 | ||
2157 | daddu c_2,t_1 | ||
2158 | sltu AT,c_2,t_1 | ||
2159 | daddu t_2,AT | ||
2160 | daddu c_3,t_2 | ||
2161 | sltu AT,c_3,t_2 | ||
2162 | daddu c_1,AT | ||
2163 | dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ | ||
2164 | mflo t_1 | ||
2165 | mfhi t_2 | ||
2166 | daddu c_2,t_1 | ||
2167 | sltu AT,c_2,t_1 | ||
2168 | daddu t_2,AT | ||
2169 | daddu c_3,t_2 | ||
2170 | sltu AT,c_3,t_2 | ||
2171 | daddu c_1,AT | ||
2172 | sd c_2,32(a0) | ||
2173 | |||
2174 | dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ | ||
2175 | mflo t_1 | ||
2176 | mfhi t_2 | ||
2177 | slt c_2,t_2,zero | ||
2178 | dsll t_2,1 | ||
2179 | slt a2,t_1,zero | ||
2180 | daddu t_2,a2 | ||
2181 | dsll t_1,1 | ||
2182 | daddu c_3,t_1 | ||
2183 | sltu AT,c_3,t_1 | ||
2184 | daddu t_2,AT | ||
2185 | daddu c_1,t_2 | ||
2186 | sltu AT,c_1,t_2 | ||
2187 | daddu c_2,AT | ||
2188 | sd c_3,40(a0) | ||
2189 | |||
2190 | dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ | ||
2191 | mflo t_1 | ||
2192 | mfhi t_2 | ||
2193 | daddu c_1,t_1 | ||
2194 | sltu AT,c_1,t_1 | ||
2195 | daddu t_2,AT | ||
2196 | daddu c_2,t_2 | ||
2197 | sd c_1,48(a0) | ||
2198 | sd c_2,56(a0) | ||
2199 | |||
2200 | jr ra | ||
2201 | END(bn_sqr_comba4) | ||