diff options
Diffstat (limited to 'src/lib/libcrypto/bn/asm/mips.pl')
-rw-r--r-- | src/lib/libcrypto/bn/asm/mips.pl | 2585 |
1 files changed, 0 insertions, 2585 deletions
diff --git a/src/lib/libcrypto/bn/asm/mips.pl b/src/lib/libcrypto/bn/asm/mips.pl deleted file mode 100644 index c162a3ec23..0000000000 --- a/src/lib/libcrypto/bn/asm/mips.pl +++ /dev/null | |||
@@ -1,2585 +0,0 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
5 | # project. | ||
6 | # | ||
7 | # Rights for redistribution and usage in source and binary forms are | ||
8 | # granted according to the OpenSSL license. Warranty of any kind is | ||
9 | # disclaimed. | ||
10 | # ==================================================================== | ||
11 | |||
12 | |||
13 | # July 1999 | ||
14 | # | ||
15 | # This is drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c. | ||
16 | # | ||
17 | # The module is designed to work with either of the "new" MIPS ABI(5), | ||
18 | # namely N32 or N64, offered by IRIX 6.x. It's not ment to work under | ||
19 | # IRIX 5.x not only because it doesn't support new ABIs but also | ||
20 | # because 5.x kernels put R4x00 CPU into 32-bit mode and all those | ||
21 | # 64-bit instructions (daddu, dmultu, etc.) found below gonna only | ||
22 | # cause illegal instruction exception:-( | ||
23 | # | ||
24 | # In addition the code depends on preprocessor flags set up by MIPSpro | ||
25 | # compiler driver (either as or cc) and therefore (probably?) can't be | ||
26 | # compiled by the GNU assembler. GNU C driver manages fine though... | ||
27 | # I mean as long as -mmips-as is specified or is the default option, | ||
28 | # because then it simply invokes /usr/bin/as which in turn takes | ||
29 | # perfect care of the preprocessor definitions. Another neat feature | ||
30 | # offered by the MIPSpro assembler is an optimization pass. This gave | ||
31 | # me the opportunity to have the code looking more regular as all those | ||
32 | # architecture dependent instruction rescheduling details were left to | ||
33 | # the assembler. Cool, huh? | ||
34 | # | ||
35 | # Performance improvement is astonishing! 'apps/openssl speed rsa dsa' | ||
36 | # goes way over 3 times faster! | ||
37 | # | ||
38 | # <appro@fy.chalmers.se> | ||
39 | |||
40 | # October 2010 | ||
41 | # | ||
42 | # Adapt the module even for 32-bit ABIs and other OSes. The former was | ||
43 | # achieved by mechanical replacement of 64-bit arithmetic instructions | ||
44 | # such as dmultu, daddu, etc. with their 32-bit counterparts and | ||
45 | # adjusting offsets denoting multiples of BN_ULONG. Above mentioned | ||
46 | # >3x performance improvement naturally does not apply to 32-bit code | ||
47 | # [because there is no instruction 32-bit compiler can't use], one | ||
48 | # has to content with 40-85% improvement depending on benchmark and | ||
49 | # key length, more for longer keys. | ||
50 | |||
51 | $flavour = shift; | ||
52 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | ||
53 | open STDOUT,">$output"; | ||
54 | |||
55 | if ($flavour =~ /64|n32/i) { | ||
56 | $LD="ld"; | ||
57 | $ST="sd"; | ||
58 | $MULTU="dmultu"; | ||
59 | $DIVU="ddivu"; | ||
60 | $ADDU="daddu"; | ||
61 | $SUBU="dsubu"; | ||
62 | $SRL="dsrl"; | ||
63 | $SLL="dsll"; | ||
64 | $BNSZ=8; | ||
65 | $PTR_ADD="daddu"; | ||
66 | $PTR_SUB="dsubu"; | ||
67 | $SZREG=8; | ||
68 | $REG_S="sd"; | ||
69 | $REG_L="ld"; | ||
70 | } else { | ||
71 | $LD="lw"; | ||
72 | $ST="sw"; | ||
73 | $MULTU="multu"; | ||
74 | $DIVU="divu"; | ||
75 | $ADDU="addu"; | ||
76 | $SUBU="subu"; | ||
77 | $SRL="srl"; | ||
78 | $SLL="sll"; | ||
79 | $BNSZ=4; | ||
80 | $PTR_ADD="addu"; | ||
81 | $PTR_SUB="subu"; | ||
82 | $SZREG=4; | ||
83 | $REG_S="sw"; | ||
84 | $REG_L="lw"; | ||
85 | $code=".set mips2\n"; | ||
86 | } | ||
87 | |||
88 | # Below is N32/64 register layout used in the original module. | ||
89 | # | ||
90 | ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); | ||
91 | ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); | ||
92 | ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); | ||
93 | ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); | ||
94 | ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); | ||
95 | ($ta0,$ta1,$ta2,$ta3)=($a4,$a5,$a6,$a7); | ||
96 | # | ||
97 | # No special adaptation is required for O32. NUBI on the other hand | ||
98 | # is treated by saving/restoring ($v1,$t0..$t3). | ||
99 | |||
100 | $gp=$v1 if ($flavour =~ /nubi/i); | ||
101 | |||
102 | $minus4=$v1; | ||
103 | |||
104 | $code.=<<___; | ||
105 | .rdata | ||
106 | .asciiz "mips3.s, Version 1.2" | ||
107 | .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>" | ||
108 | |||
109 | .text | ||
110 | .set noat | ||
111 | |||
112 | .align 5 | ||
113 | .globl bn_mul_add_words | ||
114 | .ent bn_mul_add_words | ||
115 | bn_mul_add_words: | ||
116 | .set noreorder | ||
117 | bgtz $a2,bn_mul_add_words_internal | ||
118 | move $v0,$zero | ||
119 | jr $ra | ||
120 | move $a0,$v0 | ||
121 | .end bn_mul_add_words | ||
122 | |||
123 | .align 5 | ||
124 | .ent bn_mul_add_words_internal | ||
125 | bn_mul_add_words_internal: | ||
126 | ___ | ||
127 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
128 | .frame $sp,6*$SZREG,$ra | ||
129 | .mask 0x8000f008,-$SZREG | ||
130 | .set noreorder | ||
131 | $PTR_SUB $sp,6*$SZREG | ||
132 | $REG_S $ra,5*$SZREG($sp) | ||
133 | $REG_S $t3,4*$SZREG($sp) | ||
134 | $REG_S $t2,3*$SZREG($sp) | ||
135 | $REG_S $t1,2*$SZREG($sp) | ||
136 | $REG_S $t0,1*$SZREG($sp) | ||
137 | $REG_S $gp,0*$SZREG($sp) | ||
138 | ___ | ||
139 | $code.=<<___; | ||
140 | .set reorder | ||
141 | li $minus4,-4 | ||
142 | and $ta0,$a2,$minus4 | ||
143 | $LD $t0,0($a1) | ||
144 | beqz $ta0,.L_bn_mul_add_words_tail | ||
145 | |||
146 | .L_bn_mul_add_words_loop: | ||
147 | $MULTU $t0,$a3 | ||
148 | $LD $t1,0($a0) | ||
149 | $LD $t2,$BNSZ($a1) | ||
150 | $LD $t3,$BNSZ($a0) | ||
151 | $LD $ta0,2*$BNSZ($a1) | ||
152 | $LD $ta1,2*$BNSZ($a0) | ||
153 | $ADDU $t1,$v0 | ||
154 | sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit | ||
155 | # values", but it seems to work fine | ||
156 | # even on 64-bit registers. | ||
157 | mflo $at | ||
158 | mfhi $t0 | ||
159 | $ADDU $t1,$at | ||
160 | $ADDU $v0,$t0 | ||
161 | $MULTU $t2,$a3 | ||
162 | sltu $at,$t1,$at | ||
163 | $ST $t1,0($a0) | ||
164 | $ADDU $v0,$at | ||
165 | |||
166 | $LD $ta2,3*$BNSZ($a1) | ||
167 | $LD $ta3,3*$BNSZ($a0) | ||
168 | $ADDU $t3,$v0 | ||
169 | sltu $v0,$t3,$v0 | ||
170 | mflo $at | ||
171 | mfhi $t2 | ||
172 | $ADDU $t3,$at | ||
173 | $ADDU $v0,$t2 | ||
174 | $MULTU $ta0,$a3 | ||
175 | sltu $at,$t3,$at | ||
176 | $ST $t3,$BNSZ($a0) | ||
177 | $ADDU $v0,$at | ||
178 | |||
179 | subu $a2,4 | ||
180 | $PTR_ADD $a0,4*$BNSZ | ||
181 | $PTR_ADD $a1,4*$BNSZ | ||
182 | $ADDU $ta1,$v0 | ||
183 | sltu $v0,$ta1,$v0 | ||
184 | mflo $at | ||
185 | mfhi $ta0 | ||
186 | $ADDU $ta1,$at | ||
187 | $ADDU $v0,$ta0 | ||
188 | $MULTU $ta2,$a3 | ||
189 | sltu $at,$ta1,$at | ||
190 | $ST $ta1,-2*$BNSZ($a0) | ||
191 | $ADDU $v0,$at | ||
192 | |||
193 | |||
194 | and $ta0,$a2,$minus4 | ||
195 | $ADDU $ta3,$v0 | ||
196 | sltu $v0,$ta3,$v0 | ||
197 | mflo $at | ||
198 | mfhi $ta2 | ||
199 | $ADDU $ta3,$at | ||
200 | $ADDU $v0,$ta2 | ||
201 | sltu $at,$ta3,$at | ||
202 | $ST $ta3,-$BNSZ($a0) | ||
203 | $ADDU $v0,$at | ||
204 | .set noreorder | ||
205 | bgtzl $ta0,.L_bn_mul_add_words_loop | ||
206 | $LD $t0,0($a1) | ||
207 | |||
208 | beqz $a2,.L_bn_mul_add_words_return | ||
209 | nop | ||
210 | |||
211 | .L_bn_mul_add_words_tail: | ||
212 | .set reorder | ||
213 | $LD $t0,0($a1) | ||
214 | $MULTU $t0,$a3 | ||
215 | $LD $t1,0($a0) | ||
216 | subu $a2,1 | ||
217 | $ADDU $t1,$v0 | ||
218 | sltu $v0,$t1,$v0 | ||
219 | mflo $at | ||
220 | mfhi $t0 | ||
221 | $ADDU $t1,$at | ||
222 | $ADDU $v0,$t0 | ||
223 | sltu $at,$t1,$at | ||
224 | $ST $t1,0($a0) | ||
225 | $ADDU $v0,$at | ||
226 | beqz $a2,.L_bn_mul_add_words_return | ||
227 | |||
228 | $LD $t0,$BNSZ($a1) | ||
229 | $MULTU $t0,$a3 | ||
230 | $LD $t1,$BNSZ($a0) | ||
231 | subu $a2,1 | ||
232 | $ADDU $t1,$v0 | ||
233 | sltu $v0,$t1,$v0 | ||
234 | mflo $at | ||
235 | mfhi $t0 | ||
236 | $ADDU $t1,$at | ||
237 | $ADDU $v0,$t0 | ||
238 | sltu $at,$t1,$at | ||
239 | $ST $t1,$BNSZ($a0) | ||
240 | $ADDU $v0,$at | ||
241 | beqz $a2,.L_bn_mul_add_words_return | ||
242 | |||
243 | $LD $t0,2*$BNSZ($a1) | ||
244 | $MULTU $t0,$a3 | ||
245 | $LD $t1,2*$BNSZ($a0) | ||
246 | $ADDU $t1,$v0 | ||
247 | sltu $v0,$t1,$v0 | ||
248 | mflo $at | ||
249 | mfhi $t0 | ||
250 | $ADDU $t1,$at | ||
251 | $ADDU $v0,$t0 | ||
252 | sltu $at,$t1,$at | ||
253 | $ST $t1,2*$BNSZ($a0) | ||
254 | $ADDU $v0,$at | ||
255 | |||
256 | .L_bn_mul_add_words_return: | ||
257 | .set noreorder | ||
258 | ___ | ||
259 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
260 | $REG_L $t3,4*$SZREG($sp) | ||
261 | $REG_L $t2,3*$SZREG($sp) | ||
262 | $REG_L $t1,2*$SZREG($sp) | ||
263 | $REG_L $t0,1*$SZREG($sp) | ||
264 | $REG_L $gp,0*$SZREG($sp) | ||
265 | $PTR_ADD $sp,6*$SZREG | ||
266 | ___ | ||
267 | $code.=<<___; | ||
268 | jr $ra | ||
269 | move $a0,$v0 | ||
270 | .end bn_mul_add_words_internal | ||
271 | |||
272 | .align 5 | ||
273 | .globl bn_mul_words | ||
274 | .ent bn_mul_words | ||
275 | bn_mul_words: | ||
276 | .set noreorder | ||
277 | bgtz $a2,bn_mul_words_internal | ||
278 | move $v0,$zero | ||
279 | jr $ra | ||
280 | move $a0,$v0 | ||
281 | .end bn_mul_words | ||
282 | |||
283 | .align 5 | ||
284 | .ent bn_mul_words_internal | ||
285 | bn_mul_words_internal: | ||
286 | ___ | ||
287 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
288 | .frame $sp,6*$SZREG,$ra | ||
289 | .mask 0x8000f008,-$SZREG | ||
290 | .set noreorder | ||
291 | $PTR_SUB $sp,6*$SZREG | ||
292 | $REG_S $ra,5*$SZREG($sp) | ||
293 | $REG_S $t3,4*$SZREG($sp) | ||
294 | $REG_S $t2,3*$SZREG($sp) | ||
295 | $REG_S $t1,2*$SZREG($sp) | ||
296 | $REG_S $t0,1*$SZREG($sp) | ||
297 | $REG_S $gp,0*$SZREG($sp) | ||
298 | ___ | ||
299 | $code.=<<___; | ||
300 | .set reorder | ||
301 | li $minus4,-4 | ||
302 | and $ta0,$a2,$minus4 | ||
303 | $LD $t0,0($a1) | ||
304 | beqz $ta0,.L_bn_mul_words_tail | ||
305 | |||
306 | .L_bn_mul_words_loop: | ||
307 | $MULTU $t0,$a3 | ||
308 | $LD $t2,$BNSZ($a1) | ||
309 | $LD $ta0,2*$BNSZ($a1) | ||
310 | $LD $ta2,3*$BNSZ($a1) | ||
311 | mflo $at | ||
312 | mfhi $t0 | ||
313 | $ADDU $v0,$at | ||
314 | sltu $t1,$v0,$at | ||
315 | $MULTU $t2,$a3 | ||
316 | $ST $v0,0($a0) | ||
317 | $ADDU $v0,$t1,$t0 | ||
318 | |||
319 | subu $a2,4 | ||
320 | $PTR_ADD $a0,4*$BNSZ | ||
321 | $PTR_ADD $a1,4*$BNSZ | ||
322 | mflo $at | ||
323 | mfhi $t2 | ||
324 | $ADDU $v0,$at | ||
325 | sltu $t3,$v0,$at | ||
326 | $MULTU $ta0,$a3 | ||
327 | $ST $v0,-3*$BNSZ($a0) | ||
328 | $ADDU $v0,$t3,$t2 | ||
329 | |||
330 | mflo $at | ||
331 | mfhi $ta0 | ||
332 | $ADDU $v0,$at | ||
333 | sltu $ta1,$v0,$at | ||
334 | $MULTU $ta2,$a3 | ||
335 | $ST $v0,-2*$BNSZ($a0) | ||
336 | $ADDU $v0,$ta1,$ta0 | ||
337 | |||
338 | and $ta0,$a2,$minus4 | ||
339 | mflo $at | ||
340 | mfhi $ta2 | ||
341 | $ADDU $v0,$at | ||
342 | sltu $ta3,$v0,$at | ||
343 | $ST $v0,-$BNSZ($a0) | ||
344 | $ADDU $v0,$ta3,$ta2 | ||
345 | .set noreorder | ||
346 | bgtzl $ta0,.L_bn_mul_words_loop | ||
347 | $LD $t0,0($a1) | ||
348 | |||
349 | beqz $a2,.L_bn_mul_words_return | ||
350 | nop | ||
351 | |||
352 | .L_bn_mul_words_tail: | ||
353 | .set reorder | ||
354 | $LD $t0,0($a1) | ||
355 | $MULTU $t0,$a3 | ||
356 | subu $a2,1 | ||
357 | mflo $at | ||
358 | mfhi $t0 | ||
359 | $ADDU $v0,$at | ||
360 | sltu $t1,$v0,$at | ||
361 | $ST $v0,0($a0) | ||
362 | $ADDU $v0,$t1,$t0 | ||
363 | beqz $a2,.L_bn_mul_words_return | ||
364 | |||
365 | $LD $t0,$BNSZ($a1) | ||
366 | $MULTU $t0,$a3 | ||
367 | subu $a2,1 | ||
368 | mflo $at | ||
369 | mfhi $t0 | ||
370 | $ADDU $v0,$at | ||
371 | sltu $t1,$v0,$at | ||
372 | $ST $v0,$BNSZ($a0) | ||
373 | $ADDU $v0,$t1,$t0 | ||
374 | beqz $a2,.L_bn_mul_words_return | ||
375 | |||
376 | $LD $t0,2*$BNSZ($a1) | ||
377 | $MULTU $t0,$a3 | ||
378 | mflo $at | ||
379 | mfhi $t0 | ||
380 | $ADDU $v0,$at | ||
381 | sltu $t1,$v0,$at | ||
382 | $ST $v0,2*$BNSZ($a0) | ||
383 | $ADDU $v0,$t1,$t0 | ||
384 | |||
385 | .L_bn_mul_words_return: | ||
386 | .set noreorder | ||
387 | ___ | ||
388 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
389 | $REG_L $t3,4*$SZREG($sp) | ||
390 | $REG_L $t2,3*$SZREG($sp) | ||
391 | $REG_L $t1,2*$SZREG($sp) | ||
392 | $REG_L $t0,1*$SZREG($sp) | ||
393 | $REG_L $gp,0*$SZREG($sp) | ||
394 | $PTR_ADD $sp,6*$SZREG | ||
395 | ___ | ||
396 | $code.=<<___; | ||
397 | jr $ra | ||
398 | move $a0,$v0 | ||
399 | .end bn_mul_words_internal | ||
400 | |||
401 | .align 5 | ||
402 | .globl bn_sqr_words | ||
403 | .ent bn_sqr_words | ||
404 | bn_sqr_words: | ||
405 | .set noreorder | ||
406 | bgtz $a2,bn_sqr_words_internal | ||
407 | move $v0,$zero | ||
408 | jr $ra | ||
409 | move $a0,$v0 | ||
410 | .end bn_sqr_words | ||
411 | |||
412 | .align 5 | ||
413 | .ent bn_sqr_words_internal | ||
414 | bn_sqr_words_internal: | ||
415 | ___ | ||
416 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
417 | .frame $sp,6*$SZREG,$ra | ||
418 | .mask 0x8000f008,-$SZREG | ||
419 | .set noreorder | ||
420 | $PTR_SUB $sp,6*$SZREG | ||
421 | $REG_S $ra,5*$SZREG($sp) | ||
422 | $REG_S $t3,4*$SZREG($sp) | ||
423 | $REG_S $t2,3*$SZREG($sp) | ||
424 | $REG_S $t1,2*$SZREG($sp) | ||
425 | $REG_S $t0,1*$SZREG($sp) | ||
426 | $REG_S $gp,0*$SZREG($sp) | ||
427 | ___ | ||
428 | $code.=<<___; | ||
429 | .set reorder | ||
430 | li $minus4,-4 | ||
431 | and $ta0,$a2,$minus4 | ||
432 | $LD $t0,0($a1) | ||
433 | beqz $ta0,.L_bn_sqr_words_tail | ||
434 | |||
435 | .L_bn_sqr_words_loop: | ||
436 | $MULTU $t0,$t0 | ||
437 | $LD $t2,$BNSZ($a1) | ||
438 | $LD $ta0,2*$BNSZ($a1) | ||
439 | $LD $ta2,3*$BNSZ($a1) | ||
440 | mflo $t1 | ||
441 | mfhi $t0 | ||
442 | $ST $t1,0($a0) | ||
443 | $ST $t0,$BNSZ($a0) | ||
444 | |||
445 | $MULTU $t2,$t2 | ||
446 | subu $a2,4 | ||
447 | $PTR_ADD $a0,8*$BNSZ | ||
448 | $PTR_ADD $a1,4*$BNSZ | ||
449 | mflo $t3 | ||
450 | mfhi $t2 | ||
451 | $ST $t3,-6*$BNSZ($a0) | ||
452 | $ST $t2,-5*$BNSZ($a0) | ||
453 | |||
454 | $MULTU $ta0,$ta0 | ||
455 | mflo $ta1 | ||
456 | mfhi $ta0 | ||
457 | $ST $ta1,-4*$BNSZ($a0) | ||
458 | $ST $ta0,-3*$BNSZ($a0) | ||
459 | |||
460 | |||
461 | $MULTU $ta2,$ta2 | ||
462 | and $ta0,$a2,$minus4 | ||
463 | mflo $ta3 | ||
464 | mfhi $ta2 | ||
465 | $ST $ta3,-2*$BNSZ($a0) | ||
466 | $ST $ta2,-$BNSZ($a0) | ||
467 | |||
468 | .set noreorder | ||
469 | bgtzl $ta0,.L_bn_sqr_words_loop | ||
470 | $LD $t0,0($a1) | ||
471 | |||
472 | beqz $a2,.L_bn_sqr_words_return | ||
473 | nop | ||
474 | |||
475 | .L_bn_sqr_words_tail: | ||
476 | .set reorder | ||
477 | $LD $t0,0($a1) | ||
478 | $MULTU $t0,$t0 | ||
479 | subu $a2,1 | ||
480 | mflo $t1 | ||
481 | mfhi $t0 | ||
482 | $ST $t1,0($a0) | ||
483 | $ST $t0,$BNSZ($a0) | ||
484 | beqz $a2,.L_bn_sqr_words_return | ||
485 | |||
486 | $LD $t0,$BNSZ($a1) | ||
487 | $MULTU $t0,$t0 | ||
488 | subu $a2,1 | ||
489 | mflo $t1 | ||
490 | mfhi $t0 | ||
491 | $ST $t1,2*$BNSZ($a0) | ||
492 | $ST $t0,3*$BNSZ($a0) | ||
493 | beqz $a2,.L_bn_sqr_words_return | ||
494 | |||
495 | $LD $t0,2*$BNSZ($a1) | ||
496 | $MULTU $t0,$t0 | ||
497 | mflo $t1 | ||
498 | mfhi $t0 | ||
499 | $ST $t1,4*$BNSZ($a0) | ||
500 | $ST $t0,5*$BNSZ($a0) | ||
501 | |||
502 | .L_bn_sqr_words_return: | ||
503 | .set noreorder | ||
504 | ___ | ||
505 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
506 | $REG_L $t3,4*$SZREG($sp) | ||
507 | $REG_L $t2,3*$SZREG($sp) | ||
508 | $REG_L $t1,2*$SZREG($sp) | ||
509 | $REG_L $t0,1*$SZREG($sp) | ||
510 | $REG_L $gp,0*$SZREG($sp) | ||
511 | $PTR_ADD $sp,6*$SZREG | ||
512 | ___ | ||
513 | $code.=<<___; | ||
514 | jr $ra | ||
515 | move $a0,$v0 | ||
516 | |||
517 | .end bn_sqr_words_internal | ||
518 | |||
519 | .align 5 | ||
520 | .globl bn_add_words | ||
521 | .ent bn_add_words | ||
522 | bn_add_words: | ||
523 | .set noreorder | ||
524 | bgtz $a3,bn_add_words_internal | ||
525 | move $v0,$zero | ||
526 | jr $ra | ||
527 | move $a0,$v0 | ||
528 | .end bn_add_words | ||
529 | |||
530 | .align 5 | ||
531 | .ent bn_add_words_internal | ||
532 | bn_add_words_internal: | ||
533 | ___ | ||
534 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
535 | .frame $sp,6*$SZREG,$ra | ||
536 | .mask 0x8000f008,-$SZREG | ||
537 | .set noreorder | ||
538 | $PTR_SUB $sp,6*$SZREG | ||
539 | $REG_S $ra,5*$SZREG($sp) | ||
540 | $REG_S $t3,4*$SZREG($sp) | ||
541 | $REG_S $t2,3*$SZREG($sp) | ||
542 | $REG_S $t1,2*$SZREG($sp) | ||
543 | $REG_S $t0,1*$SZREG($sp) | ||
544 | $REG_S $gp,0*$SZREG($sp) | ||
545 | ___ | ||
546 | $code.=<<___; | ||
547 | .set reorder | ||
548 | li $minus4,-4 | ||
549 | and $at,$a3,$minus4 | ||
550 | $LD $t0,0($a1) | ||
551 | beqz $at,.L_bn_add_words_tail | ||
552 | |||
553 | .L_bn_add_words_loop: | ||
554 | $LD $ta0,0($a2) | ||
555 | subu $a3,4 | ||
556 | $LD $t1,$BNSZ($a1) | ||
557 | and $at,$a3,$minus4 | ||
558 | $LD $t2,2*$BNSZ($a1) | ||
559 | $PTR_ADD $a2,4*$BNSZ | ||
560 | $LD $t3,3*$BNSZ($a1) | ||
561 | $PTR_ADD $a0,4*$BNSZ | ||
562 | $LD $ta1,-3*$BNSZ($a2) | ||
563 | $PTR_ADD $a1,4*$BNSZ | ||
564 | $LD $ta2,-2*$BNSZ($a2) | ||
565 | $LD $ta3,-$BNSZ($a2) | ||
566 | $ADDU $ta0,$t0 | ||
567 | sltu $t8,$ta0,$t0 | ||
568 | $ADDU $t0,$ta0,$v0 | ||
569 | sltu $v0,$t0,$ta0 | ||
570 | $ST $t0,-4*$BNSZ($a0) | ||
571 | $ADDU $v0,$t8 | ||
572 | |||
573 | $ADDU $ta1,$t1 | ||
574 | sltu $t9,$ta1,$t1 | ||
575 | $ADDU $t1,$ta1,$v0 | ||
576 | sltu $v0,$t1,$ta1 | ||
577 | $ST $t1,-3*$BNSZ($a0) | ||
578 | $ADDU $v0,$t9 | ||
579 | |||
580 | $ADDU $ta2,$t2 | ||
581 | sltu $t8,$ta2,$t2 | ||
582 | $ADDU $t2,$ta2,$v0 | ||
583 | sltu $v0,$t2,$ta2 | ||
584 | $ST $t2,-2*$BNSZ($a0) | ||
585 | $ADDU $v0,$t8 | ||
586 | |||
587 | $ADDU $ta3,$t3 | ||
588 | sltu $t9,$ta3,$t3 | ||
589 | $ADDU $t3,$ta3,$v0 | ||
590 | sltu $v0,$t3,$ta3 | ||
591 | $ST $t3,-$BNSZ($a0) | ||
592 | $ADDU $v0,$t9 | ||
593 | |||
594 | .set noreorder | ||
595 | bgtzl $at,.L_bn_add_words_loop | ||
596 | $LD $t0,0($a1) | ||
597 | |||
598 | beqz $a3,.L_bn_add_words_return | ||
599 | nop | ||
600 | |||
601 | .L_bn_add_words_tail: | ||
602 | .set reorder | ||
603 | $LD $t0,0($a1) | ||
604 | $LD $ta0,0($a2) | ||
605 | $ADDU $ta0,$t0 | ||
606 | subu $a3,1 | ||
607 | sltu $t8,$ta0,$t0 | ||
608 | $ADDU $t0,$ta0,$v0 | ||
609 | sltu $v0,$t0,$ta0 | ||
610 | $ST $t0,0($a0) | ||
611 | $ADDU $v0,$t8 | ||
612 | beqz $a3,.L_bn_add_words_return | ||
613 | |||
614 | $LD $t1,$BNSZ($a1) | ||
615 | $LD $ta1,$BNSZ($a2) | ||
616 | $ADDU $ta1,$t1 | ||
617 | subu $a3,1 | ||
618 | sltu $t9,$ta1,$t1 | ||
619 | $ADDU $t1,$ta1,$v0 | ||
620 | sltu $v0,$t1,$ta1 | ||
621 | $ST $t1,$BNSZ($a0) | ||
622 | $ADDU $v0,$t9 | ||
623 | beqz $a3,.L_bn_add_words_return | ||
624 | |||
625 | $LD $t2,2*$BNSZ($a1) | ||
626 | $LD $ta2,2*$BNSZ($a2) | ||
627 | $ADDU $ta2,$t2 | ||
628 | sltu $t8,$ta2,$t2 | ||
629 | $ADDU $t2,$ta2,$v0 | ||
630 | sltu $v0,$t2,$ta2 | ||
631 | $ST $t2,2*$BNSZ($a0) | ||
632 | $ADDU $v0,$t8 | ||
633 | |||
634 | .L_bn_add_words_return: | ||
635 | .set noreorder | ||
636 | ___ | ||
637 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
638 | $REG_L $t3,4*$SZREG($sp) | ||
639 | $REG_L $t2,3*$SZREG($sp) | ||
640 | $REG_L $t1,2*$SZREG($sp) | ||
641 | $REG_L $t0,1*$SZREG($sp) | ||
642 | $REG_L $gp,0*$SZREG($sp) | ||
643 | $PTR_ADD $sp,6*$SZREG | ||
644 | ___ | ||
645 | $code.=<<___; | ||
646 | jr $ra | ||
647 | move $a0,$v0 | ||
648 | |||
649 | .end bn_add_words_internal | ||
650 | |||
651 | .align 5 | ||
652 | .globl bn_sub_words | ||
653 | .ent bn_sub_words | ||
654 | bn_sub_words: | ||
655 | .set noreorder | ||
656 | bgtz $a3,bn_sub_words_internal | ||
657 | move $v0,$zero | ||
658 | jr $ra | ||
659 | move $a0,$zero | ||
660 | .end bn_sub_words | ||
661 | |||
662 | .align 5 | ||
663 | .ent bn_sub_words_internal | ||
664 | bn_sub_words_internal: | ||
665 | ___ | ||
666 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
667 | .frame $sp,6*$SZREG,$ra | ||
668 | .mask 0x8000f008,-$SZREG | ||
669 | .set noreorder | ||
670 | $PTR_SUB $sp,6*$SZREG | ||
671 | $REG_S $ra,5*$SZREG($sp) | ||
672 | $REG_S $t3,4*$SZREG($sp) | ||
673 | $REG_S $t2,3*$SZREG($sp) | ||
674 | $REG_S $t1,2*$SZREG($sp) | ||
675 | $REG_S $t0,1*$SZREG($sp) | ||
676 | $REG_S $gp,0*$SZREG($sp) | ||
677 | ___ | ||
678 | $code.=<<___; | ||
679 | .set reorder | ||
680 | li $minus4,-4 | ||
681 | and $at,$a3,$minus4 | ||
682 | $LD $t0,0($a1) | ||
683 | beqz $at,.L_bn_sub_words_tail | ||
684 | |||
685 | .L_bn_sub_words_loop: | ||
686 | $LD $ta0,0($a2) | ||
687 | subu $a3,4 | ||
688 | $LD $t1,$BNSZ($a1) | ||
689 | and $at,$a3,$minus4 | ||
690 | $LD $t2,2*$BNSZ($a1) | ||
691 | $PTR_ADD $a2,4*$BNSZ | ||
692 | $LD $t3,3*$BNSZ($a1) | ||
693 | $PTR_ADD $a0,4*$BNSZ | ||
694 | $LD $ta1,-3*$BNSZ($a2) | ||
695 | $PTR_ADD $a1,4*$BNSZ | ||
696 | $LD $ta2,-2*$BNSZ($a2) | ||
697 | $LD $ta3,-$BNSZ($a2) | ||
698 | sltu $t8,$t0,$ta0 | ||
699 | $SUBU $ta0,$t0,$ta0 | ||
700 | $SUBU $t0,$ta0,$v0 | ||
701 | sgtu $v0,$t0,$ta0 | ||
702 | $ST $t0,-4*$BNSZ($a0) | ||
703 | $ADDU $v0,$t8 | ||
704 | |||
705 | sltu $t9,$t1,$ta1 | ||
706 | $SUBU $ta1,$t1,$ta1 | ||
707 | $SUBU $t1,$ta1,$v0 | ||
708 | sgtu $v0,$t1,$ta1 | ||
709 | $ST $t1,-3*$BNSZ($a0) | ||
710 | $ADDU $v0,$t9 | ||
711 | |||
712 | |||
713 | sltu $t8,$t2,$ta2 | ||
714 | $SUBU $ta2,$t2,$ta2 | ||
715 | $SUBU $t2,$ta2,$v0 | ||
716 | sgtu $v0,$t2,$ta2 | ||
717 | $ST $t2,-2*$BNSZ($a0) | ||
718 | $ADDU $v0,$t8 | ||
719 | |||
720 | sltu $t9,$t3,$ta3 | ||
721 | $SUBU $ta3,$t3,$ta3 | ||
722 | $SUBU $t3,$ta3,$v0 | ||
723 | sgtu $v0,$t3,$ta3 | ||
724 | $ST $t3,-$BNSZ($a0) | ||
725 | $ADDU $v0,$t9 | ||
726 | |||
727 | .set noreorder | ||
728 | bgtzl $at,.L_bn_sub_words_loop | ||
729 | $LD $t0,0($a1) | ||
730 | |||
731 | beqz $a3,.L_bn_sub_words_return | ||
732 | nop | ||
733 | |||
734 | .L_bn_sub_words_tail: | ||
735 | .set reorder | ||
736 | $LD $t0,0($a1) | ||
737 | $LD $ta0,0($a2) | ||
738 | subu $a3,1 | ||
739 | sltu $t8,$t0,$ta0 | ||
740 | $SUBU $ta0,$t0,$ta0 | ||
741 | $SUBU $t0,$ta0,$v0 | ||
742 | sgtu $v0,$t0,$ta0 | ||
743 | $ST $t0,0($a0) | ||
744 | $ADDU $v0,$t8 | ||
745 | beqz $a3,.L_bn_sub_words_return | ||
746 | |||
747 | $LD $t1,$BNSZ($a1) | ||
748 | subu $a3,1 | ||
749 | $LD $ta1,$BNSZ($a2) | ||
750 | sltu $t9,$t1,$ta1 | ||
751 | $SUBU $ta1,$t1,$ta1 | ||
752 | $SUBU $t1,$ta1,$v0 | ||
753 | sgtu $v0,$t1,$ta1 | ||
754 | $ST $t1,$BNSZ($a0) | ||
755 | $ADDU $v0,$t9 | ||
756 | beqz $a3,.L_bn_sub_words_return | ||
757 | |||
758 | $LD $t2,2*$BNSZ($a1) | ||
759 | $LD $ta2,2*$BNSZ($a2) | ||
760 | sltu $t8,$t2,$ta2 | ||
761 | $SUBU $ta2,$t2,$ta2 | ||
762 | $SUBU $t2,$ta2,$v0 | ||
763 | sgtu $v0,$t2,$ta2 | ||
764 | $ST $t2,2*$BNSZ($a0) | ||
765 | $ADDU $v0,$t8 | ||
766 | |||
767 | .L_bn_sub_words_return: | ||
768 | .set noreorder | ||
769 | ___ | ||
770 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
771 | $REG_L $t3,4*$SZREG($sp) | ||
772 | $REG_L $t2,3*$SZREG($sp) | ||
773 | $REG_L $t1,2*$SZREG($sp) | ||
774 | $REG_L $t0,1*$SZREG($sp) | ||
775 | $REG_L $gp,0*$SZREG($sp) | ||
776 | $PTR_ADD $sp,6*$SZREG | ||
777 | ___ | ||
778 | $code.=<<___; | ||
779 | jr $ra | ||
780 | move $a0,$v0 | ||
781 | .end bn_sub_words_internal | ||
782 | |||
783 | .align 5 | ||
784 | .globl bn_div_3_words | ||
785 | .ent bn_div_3_words | ||
786 | bn_div_3_words: | ||
787 | .set noreorder | ||
788 | move $a3,$a0 # we know that bn_div_words does not | ||
789 | # touch $a3, $ta2, $ta3 and preserves $a2 | ||
790 | # so that we can save two arguments | ||
791 | # and return address in registers | ||
792 | # instead of stack:-) | ||
793 | |||
794 | $LD $a0,($a3) | ||
795 | move $ta2,$a1 | ||
796 | bne $a0,$a2,bn_div_3_words_internal | ||
797 | $LD $a1,-$BNSZ($a3) | ||
798 | li $v0,-1 | ||
799 | jr $ra | ||
800 | move $a0,$v0 | ||
801 | .end bn_div_3_words | ||
802 | |||
803 | .align 5 | ||
804 | .ent bn_div_3_words_internal | ||
805 | bn_div_3_words_internal: | ||
806 | ___ | ||
807 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
808 | .frame $sp,6*$SZREG,$ra | ||
809 | .mask 0x8000f008,-$SZREG | ||
810 | .set noreorder | ||
811 | $PTR_SUB $sp,6*$SZREG | ||
812 | $REG_S $ra,5*$SZREG($sp) | ||
813 | $REG_S $t3,4*$SZREG($sp) | ||
814 | $REG_S $t2,3*$SZREG($sp) | ||
815 | $REG_S $t1,2*$SZREG($sp) | ||
816 | $REG_S $t0,1*$SZREG($sp) | ||
817 | $REG_S $gp,0*$SZREG($sp) | ||
818 | ___ | ||
819 | $code.=<<___; | ||
820 | .set reorder | ||
821 | move $ta3,$ra | ||
822 | bal bn_div_words | ||
823 | move $ra,$ta3 | ||
824 | $MULTU $ta2,$v0 | ||
825 | $LD $t2,-2*$BNSZ($a3) | ||
826 | move $ta0,$zero | ||
827 | mfhi $t1 | ||
828 | mflo $t0 | ||
829 | sltu $t8,$t1,$a1 | ||
830 | .L_bn_div_3_words_inner_loop: | ||
831 | bnez $t8,.L_bn_div_3_words_inner_loop_done | ||
832 | sgeu $at,$t2,$t0 | ||
833 | seq $t9,$t1,$a1 | ||
834 | and $at,$t9 | ||
835 | sltu $t3,$t0,$ta2 | ||
836 | $ADDU $a1,$a2 | ||
837 | $SUBU $t1,$t3 | ||
838 | $SUBU $t0,$ta2 | ||
839 | sltu $t8,$t1,$a1 | ||
840 | sltu $ta0,$a1,$a2 | ||
841 | or $t8,$ta0 | ||
842 | .set noreorder | ||
843 | beqzl $at,.L_bn_div_3_words_inner_loop | ||
844 | $SUBU $v0,1 | ||
845 | .set reorder | ||
846 | .L_bn_div_3_words_inner_loop_done: | ||
847 | .set noreorder | ||
848 | ___ | ||
849 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
850 | $REG_L $t3,4*$SZREG($sp) | ||
851 | $REG_L $t2,3*$SZREG($sp) | ||
852 | $REG_L $t1,2*$SZREG($sp) | ||
853 | $REG_L $t0,1*$SZREG($sp) | ||
854 | $REG_L $gp,0*$SZREG($sp) | ||
855 | $PTR_ADD $sp,6*$SZREG | ||
856 | ___ | ||
857 | $code.=<<___; | ||
858 | jr $ra | ||
859 | move $a0,$v0 | ||
860 | .end bn_div_3_words_internal | ||
861 | |||
862 | .align 5 | ||
863 | .globl bn_div_words | ||
864 | .ent bn_div_words | ||
865 | bn_div_words: | ||
866 | .set noreorder | ||
867 | bnez $a2,bn_div_words_internal | ||
868 | li $v0,-1 # I would rather signal div-by-zero | ||
869 | # which can be done with 'break 7' | ||
870 | jr $ra | ||
871 | move $a0,$v0 | ||
872 | .end bn_div_words | ||
873 | |||
874 | .align 5 | ||
875 | .ent bn_div_words_internal | ||
876 | bn_div_words_internal: | ||
877 | ___ | ||
878 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
879 | .frame $sp,6*$SZREG,$ra | ||
880 | .mask 0x8000f008,-$SZREG | ||
881 | .set noreorder | ||
882 | $PTR_SUB $sp,6*$SZREG | ||
883 | $REG_S $ra,5*$SZREG($sp) | ||
884 | $REG_S $t3,4*$SZREG($sp) | ||
885 | $REG_S $t2,3*$SZREG($sp) | ||
886 | $REG_S $t1,2*$SZREG($sp) | ||
887 | $REG_S $t0,1*$SZREG($sp) | ||
888 | $REG_S $gp,0*$SZREG($sp) | ||
889 | ___ | ||
890 | $code.=<<___; | ||
891 | move $v1,$zero | ||
892 | bltz $a2,.L_bn_div_words_body | ||
893 | move $t9,$v1 | ||
894 | $SLL $a2,1 | ||
895 | bgtz $a2,.-4 | ||
896 | addu $t9,1 | ||
897 | |||
898 | .set reorder | ||
899 | negu $t1,$t9 | ||
900 | li $t2,-1 | ||
901 | $SLL $t2,$t1 | ||
902 | and $t2,$a0 | ||
903 | $SRL $at,$a1,$t1 | ||
904 | .set noreorder | ||
905 | bnezl $t2,.+8 | ||
906 | break 6 # signal overflow | ||
907 | .set reorder | ||
908 | $SLL $a0,$t9 | ||
909 | $SLL $a1,$t9 | ||
910 | or $a0,$at | ||
911 | ___ | ||
912 | $QT=$ta0; | ||
913 | $HH=$ta1; | ||
914 | $DH=$v1; | ||
915 | $code.=<<___; | ||
916 | .L_bn_div_words_body: | ||
917 | $SRL $DH,$a2,4*$BNSZ # bits | ||
918 | sgeu $at,$a0,$a2 | ||
919 | .set noreorder | ||
920 | bnezl $at,.+8 | ||
921 | $SUBU $a0,$a2 | ||
922 | .set reorder | ||
923 | |||
924 | li $QT,-1 | ||
925 | $SRL $HH,$a0,4*$BNSZ # bits | ||
926 | $SRL $QT,4*$BNSZ # q=0xffffffff | ||
927 | beq $DH,$HH,.L_bn_div_words_skip_div1 | ||
928 | $DIVU $zero,$a0,$DH | ||
929 | mflo $QT | ||
930 | .L_bn_div_words_skip_div1: | ||
931 | $MULTU $a2,$QT | ||
932 | $SLL $t3,$a0,4*$BNSZ # bits | ||
933 | $SRL $at,$a1,4*$BNSZ # bits | ||
934 | or $t3,$at | ||
935 | mflo $t0 | ||
936 | mfhi $t1 | ||
937 | .L_bn_div_words_inner_loop1: | ||
938 | sltu $t2,$t3,$t0 | ||
939 | seq $t8,$HH,$t1 | ||
940 | sltu $at,$HH,$t1 | ||
941 | and $t2,$t8 | ||
942 | sltu $v0,$t0,$a2 | ||
943 | or $at,$t2 | ||
944 | .set noreorder | ||
945 | beqz $at,.L_bn_div_words_inner_loop1_done | ||
946 | $SUBU $t1,$v0 | ||
947 | $SUBU $t0,$a2 | ||
948 | b .L_bn_div_words_inner_loop1 | ||
949 | $SUBU $QT,1 | ||
950 | .set reorder | ||
951 | .L_bn_div_words_inner_loop1_done: | ||
952 | |||
953 | $SLL $a1,4*$BNSZ # bits | ||
954 | $SUBU $a0,$t3,$t0 | ||
955 | $SLL $v0,$QT,4*$BNSZ # bits | ||
956 | |||
957 | li $QT,-1 | ||
958 | $SRL $HH,$a0,4*$BNSZ # bits | ||
959 | $SRL $QT,4*$BNSZ # q=0xffffffff | ||
960 | beq $DH,$HH,.L_bn_div_words_skip_div2 | ||
961 | $DIVU $zero,$a0,$DH | ||
962 | mflo $QT | ||
963 | .L_bn_div_words_skip_div2: | ||
964 | $MULTU $a2,$QT | ||
965 | $SLL $t3,$a0,4*$BNSZ # bits | ||
966 | $SRL $at,$a1,4*$BNSZ # bits | ||
967 | or $t3,$at | ||
968 | mflo $t0 | ||
969 | mfhi $t1 | ||
970 | .L_bn_div_words_inner_loop2: | ||
971 | sltu $t2,$t3,$t0 | ||
972 | seq $t8,$HH,$t1 | ||
973 | sltu $at,$HH,$t1 | ||
974 | and $t2,$t8 | ||
975 | sltu $v1,$t0,$a2 | ||
976 | or $at,$t2 | ||
977 | .set noreorder | ||
978 | beqz $at,.L_bn_div_words_inner_loop2_done | ||
979 | $SUBU $t1,$v1 | ||
980 | $SUBU $t0,$a2 | ||
981 | b .L_bn_div_words_inner_loop2 | ||
982 | $SUBU $QT,1 | ||
983 | .set reorder | ||
984 | .L_bn_div_words_inner_loop2_done: | ||
985 | |||
986 | $SUBU $a0,$t3,$t0 | ||
987 | or $v0,$QT | ||
988 | $SRL $v1,$a0,$t9 # $v1 contains remainder if anybody wants it | ||
989 | $SRL $a2,$t9 # restore $a2 | ||
990 | |||
991 | .set noreorder | ||
992 | move $a1,$v1 | ||
993 | ___ | ||
994 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
995 | $REG_L $t3,4*$SZREG($sp) | ||
996 | $REG_L $t2,3*$SZREG($sp) | ||
997 | $REG_L $t1,2*$SZREG($sp) | ||
998 | $REG_L $t0,1*$SZREG($sp) | ||
999 | $REG_L $gp,0*$SZREG($sp) | ||
1000 | $PTR_ADD $sp,6*$SZREG | ||
1001 | ___ | ||
1002 | $code.=<<___; | ||
1003 | jr $ra | ||
1004 | move $a0,$v0 | ||
1005 | .end bn_div_words_internal | ||
1006 | ___ | ||
1007 | undef $HH; undef $QT; undef $DH; | ||
1008 | |||
1009 | ($a_0,$a_1,$a_2,$a_3)=($t0,$t1,$t2,$t3); | ||
1010 | ($b_0,$b_1,$b_2,$b_3)=($ta0,$ta1,$ta2,$ta3); | ||
1011 | |||
1012 | ($a_4,$a_5,$a_6,$a_7)=($s0,$s2,$s4,$a1); # once we load a[7], no use for $a1 | ||
1013 | ($b_4,$b_5,$b_6,$b_7)=($s1,$s3,$s5,$a2); # once we load b[7], no use for $a2 | ||
1014 | |||
1015 | ($t_1,$t_2,$c_1,$c_2,$c_3)=($t8,$t9,$v0,$v1,$a3); | ||
1016 | |||
1017 | $code.=<<___; | ||
1018 | |||
1019 | .align 5 | ||
1020 | .globl bn_mul_comba8 | ||
1021 | .ent bn_mul_comba8 | ||
1022 | bn_mul_comba8: | ||
1023 | .set noreorder | ||
1024 | ___ | ||
1025 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
1026 | .frame $sp,12*$SZREG,$ra | ||
1027 | .mask 0x803ff008,-$SZREG | ||
1028 | $PTR_SUB $sp,12*$SZREG | ||
1029 | $REG_S $ra,11*$SZREG($sp) | ||
1030 | $REG_S $s5,10*$SZREG($sp) | ||
1031 | $REG_S $s4,9*$SZREG($sp) | ||
1032 | $REG_S $s3,8*$SZREG($sp) | ||
1033 | $REG_S $s2,7*$SZREG($sp) | ||
1034 | $REG_S $s1,6*$SZREG($sp) | ||
1035 | $REG_S $s0,5*$SZREG($sp) | ||
1036 | $REG_S $t3,4*$SZREG($sp) | ||
1037 | $REG_S $t2,3*$SZREG($sp) | ||
1038 | $REG_S $t1,2*$SZREG($sp) | ||
1039 | $REG_S $t0,1*$SZREG($sp) | ||
1040 | $REG_S $gp,0*$SZREG($sp) | ||
1041 | ___ | ||
1042 | $code.=<<___ if ($flavour !~ /nubi/i); | ||
1043 | .frame $sp,6*$SZREG,$ra | ||
1044 | .mask 0x003f0000,-$SZREG | ||
1045 | $PTR_SUB $sp,6*$SZREG | ||
1046 | $REG_S $s5,5*$SZREG($sp) | ||
1047 | $REG_S $s4,4*$SZREG($sp) | ||
1048 | $REG_S $s3,3*$SZREG($sp) | ||
1049 | $REG_S $s2,2*$SZREG($sp) | ||
1050 | $REG_S $s1,1*$SZREG($sp) | ||
1051 | $REG_S $s0,0*$SZREG($sp) | ||
1052 | ___ | ||
1053 | $code.=<<___; | ||
1054 | |||
1055 | .set reorder | ||
1056 | $LD $a_0,0($a1) # If compiled with -mips3 option on | ||
1057 | # R5000 box assembler barks on this | ||
1058 | # 1ine with "should not have mult/div | ||
1059 | # as last instruction in bb (R10K | ||
1060 | # bug)" warning. If anybody out there | ||
1061 | # has a clue about how to circumvent | ||
1062 | # this do send me a note. | ||
1063 | # <appro\@fy.chalmers.se> | ||
1064 | |||
1065 | $LD $b_0,0($a2) | ||
1066 | $LD $a_1,$BNSZ($a1) | ||
1067 | $LD $a_2,2*$BNSZ($a1) | ||
1068 | $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); | ||
1069 | $LD $a_3,3*$BNSZ($a1) | ||
1070 | $LD $b_1,$BNSZ($a2) | ||
1071 | $LD $b_2,2*$BNSZ($a2) | ||
1072 | $LD $b_3,3*$BNSZ($a2) | ||
1073 | mflo $c_1 | ||
1074 | mfhi $c_2 | ||
1075 | |||
1076 | $LD $a_4,4*$BNSZ($a1) | ||
1077 | $LD $a_5,5*$BNSZ($a1) | ||
1078 | $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); | ||
1079 | $LD $a_6,6*$BNSZ($a1) | ||
1080 | $LD $a_7,7*$BNSZ($a1) | ||
1081 | $LD $b_4,4*$BNSZ($a2) | ||
1082 | $LD $b_5,5*$BNSZ($a2) | ||
1083 | mflo $t_1 | ||
1084 | mfhi $t_2 | ||
1085 | $ADDU $c_2,$t_1 | ||
1086 | sltu $at,$c_2,$t_1 | ||
1087 | $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); | ||
1088 | $ADDU $c_3,$t_2,$at | ||
1089 | $LD $b_6,6*$BNSZ($a2) | ||
1090 | $LD $b_7,7*$BNSZ($a2) | ||
1091 | $ST $c_1,0($a0) # r[0]=c1; | ||
1092 | mflo $t_1 | ||
1093 | mfhi $t_2 | ||
1094 | $ADDU $c_2,$t_1 | ||
1095 | sltu $at,$c_2,$t_1 | ||
1096 | $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); | ||
1097 | $ADDU $t_2,$at | ||
1098 | $ADDU $c_3,$t_2 | ||
1099 | sltu $c_1,$c_3,$t_2 | ||
1100 | $ST $c_2,$BNSZ($a0) # r[1]=c2; | ||
1101 | |||
1102 | mflo $t_1 | ||
1103 | mfhi $t_2 | ||
1104 | $ADDU $c_3,$t_1 | ||
1105 | sltu $at,$c_3,$t_1 | ||
1106 | $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
1107 | $ADDU $t_2,$at | ||
1108 | $ADDU $c_1,$t_2 | ||
1109 | mflo $t_1 | ||
1110 | mfhi $t_2 | ||
1111 | $ADDU $c_3,$t_1 | ||
1112 | sltu $at,$c_3,$t_1 | ||
1113 | $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); | ||
1114 | $ADDU $t_2,$at | ||
1115 | $ADDU $c_1,$t_2 | ||
1116 | sltu $c_2,$c_1,$t_2 | ||
1117 | mflo $t_1 | ||
1118 | mfhi $t_2 | ||
1119 | $ADDU $c_3,$t_1 | ||
1120 | sltu $at,$c_3,$t_1 | ||
1121 | $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); | ||
1122 | $ADDU $t_2,$at | ||
1123 | $ADDU $c_1,$t_2 | ||
1124 | sltu $at,$c_1,$t_2 | ||
1125 | $ADDU $c_2,$at | ||
1126 | $ST $c_3,2*$BNSZ($a0) # r[2]=c3; | ||
1127 | |||
1128 | mflo $t_1 | ||
1129 | mfhi $t_2 | ||
1130 | $ADDU $c_1,$t_1 | ||
1131 | sltu $at,$c_1,$t_1 | ||
1132 | $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); | ||
1133 | $ADDU $t_2,$at | ||
1134 | $ADDU $c_2,$t_2 | ||
1135 | sltu $c_3,$c_2,$t_2 | ||
1136 | mflo $t_1 | ||
1137 | mfhi $t_2 | ||
1138 | $ADDU $c_1,$t_1 | ||
1139 | sltu $at,$c_1,$t_1 | ||
1140 | $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); | ||
1141 | $ADDU $t_2,$at | ||
1142 | $ADDU $c_2,$t_2 | ||
1143 | sltu $at,$c_2,$t_2 | ||
1144 | $ADDU $c_3,$at | ||
1145 | mflo $t_1 | ||
1146 | mfhi $t_2 | ||
1147 | $ADDU $c_1,$t_1 | ||
1148 | sltu $at,$c_1,$t_1 | ||
1149 | $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); | ||
1150 | $ADDU $t_2,$at | ||
1151 | $ADDU $c_2,$t_2 | ||
1152 | sltu $at,$c_2,$t_2 | ||
1153 | $ADDU $c_3,$at | ||
1154 | mflo $t_1 | ||
1155 | mfhi $t_2 | ||
1156 | $ADDU $c_1,$t_1 | ||
1157 | sltu $at,$c_1,$t_1 | ||
1158 | $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1); | ||
1159 | $ADDU $t_2,$at | ||
1160 | $ADDU $c_2,$t_2 | ||
1161 | sltu $at,$c_2,$t_2 | ||
1162 | $ADDU $c_3,$at | ||
1163 | $ST $c_1,3*$BNSZ($a0) # r[3]=c1; | ||
1164 | |||
1165 | mflo $t_1 | ||
1166 | mfhi $t_2 | ||
1167 | $ADDU $c_2,$t_1 | ||
1168 | sltu $at,$c_2,$t_1 | ||
1169 | $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); | ||
1170 | $ADDU $t_2,$at | ||
1171 | $ADDU $c_3,$t_2 | ||
1172 | sltu $c_1,$c_3,$t_2 | ||
1173 | mflo $t_1 | ||
1174 | mfhi $t_2 | ||
1175 | $ADDU $c_2,$t_1 | ||
1176 | sltu $at,$c_2,$t_1 | ||
1177 | $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
1178 | $ADDU $t_2,$at | ||
1179 | $ADDU $c_3,$t_2 | ||
1180 | sltu $at,$c_3,$t_2 | ||
1181 | $ADDU $c_1,$at | ||
1182 | mflo $t_1 | ||
1183 | mfhi $t_2 | ||
1184 | $ADDU $c_2,$t_1 | ||
1185 | sltu $at,$c_2,$t_1 | ||
1186 | $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); | ||
1187 | $ADDU $t_2,$at | ||
1188 | $ADDU $c_3,$t_2 | ||
1189 | sltu $at,$c_3,$t_2 | ||
1190 | $ADDU $c_1,$at | ||
1191 | mflo $t_1 | ||
1192 | mfhi $t_2 | ||
1193 | $ADDU $c_2,$t_1 | ||
1194 | sltu $at,$c_2,$t_1 | ||
1195 | $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1); | ||
1196 | $ADDU $t_2,$at | ||
1197 | $ADDU $c_3,$t_2 | ||
1198 | sltu $at,$c_3,$t_2 | ||
1199 | $ADDU $c_1,$at | ||
1200 | mflo $t_1 | ||
1201 | mfhi $t_2 | ||
1202 | $ADDU $c_2,$t_1 | ||
1203 | sltu $at,$c_2,$t_1 | ||
1204 | $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2); | ||
1205 | $ADDU $t_2,$at | ||
1206 | $ADDU $c_3,$t_2 | ||
1207 | sltu $at,$c_3,$t_2 | ||
1208 | $ADDU $c_1,$at | ||
1209 | $ST $c_2,4*$BNSZ($a0) # r[4]=c2; | ||
1210 | |||
1211 | mflo $t_1 | ||
1212 | mfhi $t_2 | ||
1213 | $ADDU $c_3,$t_1 | ||
1214 | sltu $at,$c_3,$t_1 | ||
1215 | $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2); | ||
1216 | $ADDU $t_2,$at | ||
1217 | $ADDU $c_1,$t_2 | ||
1218 | sltu $c_2,$c_1,$t_2 | ||
1219 | mflo $t_1 | ||
1220 | mfhi $t_2 | ||
1221 | $ADDU $c_3,$t_1 | ||
1222 | sltu $at,$c_3,$t_1 | ||
1223 | $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); | ||
1224 | $ADDU $t_2,$at | ||
1225 | $ADDU $c_1,$t_2 | ||
1226 | sltu $at,$c_1,$t_2 | ||
1227 | $ADDU $c_2,$at | ||
1228 | mflo $t_1 | ||
1229 | mfhi $t_2 | ||
1230 | $ADDU $c_3,$t_1 | ||
1231 | sltu $at,$c_3,$t_1 | ||
1232 | $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); | ||
1233 | $ADDU $t_2,$at | ||
1234 | $ADDU $c_1,$t_2 | ||
1235 | sltu $at,$c_1,$t_2 | ||
1236 | $ADDU $c_2,$at | ||
1237 | mflo $t_1 | ||
1238 | mfhi $t_2 | ||
1239 | $ADDU $c_3,$t_1 | ||
1240 | sltu $at,$c_3,$t_1 | ||
1241 | $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2); | ||
1242 | $ADDU $t_2,$at | ||
1243 | $ADDU $c_1,$t_2 | ||
1244 | sltu $at,$c_1,$t_2 | ||
1245 | $ADDU $c_2,$at | ||
1246 | mflo $t_1 | ||
1247 | mfhi $t_2 | ||
1248 | $ADDU $c_3,$t_1 | ||
1249 | sltu $at,$c_3,$t_1 | ||
1250 | $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2); | ||
1251 | $ADDU $t_2,$at | ||
1252 | $ADDU $c_1,$t_2 | ||
1253 | sltu $at,$c_1,$t_2 | ||
1254 | $ADDU $c_2,$at | ||
1255 | mflo $t_1 | ||
1256 | mfhi $t_2 | ||
1257 | $ADDU $c_3,$t_1 | ||
1258 | sltu $at,$c_3,$t_1 | ||
1259 | $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3); | ||
1260 | $ADDU $t_2,$at | ||
1261 | $ADDU $c_1,$t_2 | ||
1262 | sltu $at,$c_1,$t_2 | ||
1263 | $ADDU $c_2,$at | ||
1264 | $ST $c_3,5*$BNSZ($a0) # r[5]=c3; | ||
1265 | |||
1266 | mflo $t_1 | ||
1267 | mfhi $t_2 | ||
1268 | $ADDU $c_1,$t_1 | ||
1269 | sltu $at,$c_1,$t_1 | ||
1270 | $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3); | ||
1271 | $ADDU $t_2,$at | ||
1272 | $ADDU $c_2,$t_2 | ||
1273 | sltu $c_3,$c_2,$t_2 | ||
1274 | mflo $t_1 | ||
1275 | mfhi $t_2 | ||
1276 | $ADDU $c_1,$t_1 | ||
1277 | sltu $at,$c_1,$t_1 | ||
1278 | $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3); | ||
1279 | $ADDU $t_2,$at | ||
1280 | $ADDU $c_2,$t_2 | ||
1281 | sltu $at,$c_2,$t_2 | ||
1282 | $ADDU $c_3,$at | ||
1283 | mflo $t_1 | ||
1284 | mfhi $t_2 | ||
1285 | $ADDU $c_1,$t_1 | ||
1286 | sltu $at,$c_1,$t_1 | ||
1287 | $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
1288 | $ADDU $t_2,$at | ||
1289 | $ADDU $c_2,$t_2 | ||
1290 | sltu $at,$c_2,$t_2 | ||
1291 | $ADDU $c_3,$at | ||
1292 | mflo $t_1 | ||
1293 | mfhi $t_2 | ||
1294 | $ADDU $c_1,$t_1 | ||
1295 | sltu $at,$c_1,$t_1 | ||
1296 | $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3); | ||
1297 | $ADDU $t_2,$at | ||
1298 | $ADDU $c_2,$t_2 | ||
1299 | sltu $at,$c_2,$t_2 | ||
1300 | $ADDU $c_3,$at | ||
1301 | mflo $t_1 | ||
1302 | mfhi $t_2 | ||
1303 | $ADDU $c_1,$t_1 | ||
1304 | sltu $at,$c_1,$t_1 | ||
1305 | $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3); | ||
1306 | $ADDU $t_2,$at | ||
1307 | $ADDU $c_2,$t_2 | ||
1308 | sltu $at,$c_2,$t_2 | ||
1309 | $ADDU $c_3,$at | ||
1310 | mflo $t_1 | ||
1311 | mfhi $t_2 | ||
1312 | $ADDU $c_1,$t_1 | ||
1313 | sltu $at,$c_1,$t_1 | ||
1314 | $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3); | ||
1315 | $ADDU $t_2,$at | ||
1316 | $ADDU $c_2,$t_2 | ||
1317 | sltu $at,$c_2,$t_2 | ||
1318 | $ADDU $c_3,$at | ||
1319 | mflo $t_1 | ||
1320 | mfhi $t_2 | ||
1321 | $ADDU $c_1,$t_1 | ||
1322 | sltu $at,$c_1,$t_1 | ||
1323 | $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1); | ||
1324 | $ADDU $t_2,$at | ||
1325 | $ADDU $c_2,$t_2 | ||
1326 | sltu $at,$c_2,$t_2 | ||
1327 | $ADDU $c_3,$at | ||
1328 | $ST $c_1,6*$BNSZ($a0) # r[6]=c1; | ||
1329 | |||
1330 | mflo $t_1 | ||
1331 | mfhi $t_2 | ||
1332 | $ADDU $c_2,$t_1 | ||
1333 | sltu $at,$c_2,$t_1 | ||
1334 | $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1); | ||
1335 | $ADDU $t_2,$at | ||
1336 | $ADDU $c_3,$t_2 | ||
1337 | sltu $c_1,$c_3,$t_2 | ||
1338 | mflo $t_1 | ||
1339 | mfhi $t_2 | ||
1340 | $ADDU $c_2,$t_1 | ||
1341 | sltu $at,$c_2,$t_1 | ||
1342 | $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1); | ||
1343 | $ADDU $t_2,$at | ||
1344 | $ADDU $c_3,$t_2 | ||
1345 | sltu $at,$c_3,$t_2 | ||
1346 | $ADDU $c_1,$at | ||
1347 | mflo $t_1 | ||
1348 | mfhi $t_2 | ||
1349 | $ADDU $c_2,$t_1 | ||
1350 | sltu $at,$c_2,$t_1 | ||
1351 | $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1); | ||
1352 | $ADDU $t_2,$at | ||
1353 | $ADDU $c_3,$t_2 | ||
1354 | sltu $at,$c_3,$t_2 | ||
1355 | $ADDU $c_1,$at | ||
1356 | mflo $t_1 | ||
1357 | mfhi $t_2 | ||
1358 | $ADDU $c_2,$t_1 | ||
1359 | sltu $at,$c_2,$t_1 | ||
1360 | $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1); | ||
1361 | $ADDU $t_2,$at | ||
1362 | $ADDU $c_3,$t_2 | ||
1363 | sltu $at,$c_3,$t_2 | ||
1364 | $ADDU $c_1,$at | ||
1365 | mflo $t_1 | ||
1366 | mfhi $t_2 | ||
1367 | $ADDU $c_2,$t_1 | ||
1368 | sltu $at,$c_2,$t_1 | ||
1369 | $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1); | ||
1370 | $ADDU $t_2,$at | ||
1371 | $ADDU $c_3,$t_2 | ||
1372 | sltu $at,$c_3,$t_2 | ||
1373 | $ADDU $c_1,$at | ||
1374 | mflo $t_1 | ||
1375 | mfhi $t_2 | ||
1376 | $ADDU $c_2,$t_1 | ||
1377 | sltu $at,$c_2,$t_1 | ||
1378 | $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1); | ||
1379 | $ADDU $t_2,$at | ||
1380 | $ADDU $c_3,$t_2 | ||
1381 | sltu $at,$c_3,$t_2 | ||
1382 | $ADDU $c_1,$at | ||
1383 | mflo $t_1 | ||
1384 | mfhi $t_2 | ||
1385 | $ADDU $c_2,$t_1 | ||
1386 | sltu $at,$c_2,$t_1 | ||
1387 | $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1); | ||
1388 | $ADDU $t_2,$at | ||
1389 | $ADDU $c_3,$t_2 | ||
1390 | sltu $at,$c_3,$t_2 | ||
1391 | $ADDU $c_1,$at | ||
1392 | mflo $t_1 | ||
1393 | mfhi $t_2 | ||
1394 | $ADDU $c_2,$t_1 | ||
1395 | sltu $at,$c_2,$t_1 | ||
1396 | $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2); | ||
1397 | $ADDU $t_2,$at | ||
1398 | $ADDU $c_3,$t_2 | ||
1399 | sltu $at,$c_3,$t_2 | ||
1400 | $ADDU $c_1,$at | ||
1401 | $ST $c_2,7*$BNSZ($a0) # r[7]=c2; | ||
1402 | |||
1403 | mflo $t_1 | ||
1404 | mfhi $t_2 | ||
1405 | $ADDU $c_3,$t_1 | ||
1406 | sltu $at,$c_3,$t_1 | ||
1407 | $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2); | ||
1408 | $ADDU $t_2,$at | ||
1409 | $ADDU $c_1,$t_2 | ||
1410 | sltu $c_2,$c_1,$t_2 | ||
1411 | mflo $t_1 | ||
1412 | mfhi $t_2 | ||
1413 | $ADDU $c_3,$t_1 | ||
1414 | sltu $at,$c_3,$t_1 | ||
1415 | $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2); | ||
1416 | $ADDU $t_2,$at | ||
1417 | $ADDU $c_1,$t_2 | ||
1418 | sltu $at,$c_1,$t_2 | ||
1419 | $ADDU $c_2,$at | ||
1420 | mflo $t_1 | ||
1421 | mfhi $t_2 | ||
1422 | $ADDU $c_3,$t_1 | ||
1423 | sltu $at,$c_3,$t_1 | ||
1424 | $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2); | ||
1425 | $ADDU $t_2,$at | ||
1426 | $ADDU $c_1,$t_2 | ||
1427 | sltu $at,$c_1,$t_2 | ||
1428 | $ADDU $c_2,$at | ||
1429 | mflo $t_1 | ||
1430 | mfhi $t_2 | ||
1431 | $ADDU $c_3,$t_1 | ||
1432 | sltu $at,$c_3,$t_1 | ||
1433 | $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2); | ||
1434 | $ADDU $t_2,$at | ||
1435 | $ADDU $c_1,$t_2 | ||
1436 | sltu $at,$c_1,$t_2 | ||
1437 | $ADDU $c_2,$at | ||
1438 | mflo $t_1 | ||
1439 | mfhi $t_2 | ||
1440 | $ADDU $c_3,$t_1 | ||
1441 | sltu $at,$c_3,$t_1 | ||
1442 | $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2); | ||
1443 | $ADDU $t_2,$at | ||
1444 | $ADDU $c_1,$t_2 | ||
1445 | sltu $at,$c_1,$t_2 | ||
1446 | $ADDU $c_2,$at | ||
1447 | mflo $t_1 | ||
1448 | mfhi $t_2 | ||
1449 | $ADDU $c_3,$t_1 | ||
1450 | sltu $at,$c_3,$t_1 | ||
1451 | $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2); | ||
1452 | $ADDU $t_2,$at | ||
1453 | $ADDU $c_1,$t_2 | ||
1454 | sltu $at,$c_1,$t_2 | ||
1455 | $ADDU $c_2,$at | ||
1456 | mflo $t_1 | ||
1457 | mfhi $t_2 | ||
1458 | $ADDU $c_3,$t_1 | ||
1459 | sltu $at,$c_3,$t_1 | ||
1460 | $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3); | ||
1461 | $ADDU $t_2,$at | ||
1462 | $ADDU $c_1,$t_2 | ||
1463 | sltu $at,$c_1,$t_2 | ||
1464 | $ADDU $c_2,$at | ||
1465 | $ST $c_3,8*$BNSZ($a0) # r[8]=c3; | ||
1466 | |||
1467 | mflo $t_1 | ||
1468 | mfhi $t_2 | ||
1469 | $ADDU $c_1,$t_1 | ||
1470 | sltu $at,$c_1,$t_1 | ||
1471 | $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3); | ||
1472 | $ADDU $t_2,$at | ||
1473 | $ADDU $c_2,$t_2 | ||
1474 | sltu $c_3,$c_2,$t_2 | ||
1475 | mflo $t_1 | ||
1476 | mfhi $t_2 | ||
1477 | $ADDU $c_1,$t_1 | ||
1478 | sltu $at,$c_1,$t_1 | ||
1479 | $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3); | ||
1480 | $ADDU $t_2,$at | ||
1481 | $ADDU $c_2,$t_2 | ||
1482 | sltu $at,$c_2,$t_2 | ||
1483 | $ADDU $c_3,$at | ||
1484 | mflo $t_1 | ||
1485 | mfhi $t_2 | ||
1486 | $ADDU $c_1,$t_1 | ||
1487 | sltu $at,$c_1,$t_1 | ||
1488 | $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3); | ||
1489 | $ADDU $t_2,$at | ||
1490 | $ADDU $c_2,$t_2 | ||
1491 | sltu $at,$c_2,$t_2 | ||
1492 | $ADDU $c_3,$at | ||
1493 | mflo $t_1 | ||
1494 | mfhi $t_2 | ||
1495 | $ADDU $c_1,$t_1 | ||
1496 | sltu $at,$c_1,$t_1 | ||
1497 | $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3); | ||
1498 | $ADDU $t_2,$at | ||
1499 | $ADDU $c_2,$t_2 | ||
1500 | sltu $at,$c_2,$t_2 | ||
1501 | $ADDU $c_3,$at | ||
1502 | mflo $t_1 | ||
1503 | mfhi $t_2 | ||
1504 | $ADDU $c_1,$t_1 | ||
1505 | sltu $at,$c_1,$t_1 | ||
1506 | $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3); | ||
1507 | $ADDU $t_2,$at | ||
1508 | $ADDU $c_2,$t_2 | ||
1509 | sltu $at,$c_2,$t_2 | ||
1510 | $ADDU $c_3,$at | ||
1511 | mflo $t_1 | ||
1512 | mfhi $t_2 | ||
1513 | $ADDU $c_1,$t_1 | ||
1514 | sltu $at,$c_1,$t_1 | ||
1515 | $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1); | ||
1516 | $ADDU $t_2,$at | ||
1517 | $ADDU $c_2,$t_2 | ||
1518 | sltu $at,$c_2,$t_2 | ||
1519 | $ADDU $c_3,$at | ||
1520 | $ST $c_1,9*$BNSZ($a0) # r[9]=c1; | ||
1521 | |||
1522 | mflo $t_1 | ||
1523 | mfhi $t_2 | ||
1524 | $ADDU $c_2,$t_1 | ||
1525 | sltu $at,$c_2,$t_1 | ||
1526 | $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1); | ||
1527 | $ADDU $t_2,$at | ||
1528 | $ADDU $c_3,$t_2 | ||
1529 | sltu $c_1,$c_3,$t_2 | ||
1530 | mflo $t_1 | ||
1531 | mfhi $t_2 | ||
1532 | $ADDU $c_2,$t_1 | ||
1533 | sltu $at,$c_2,$t_1 | ||
1534 | $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1); | ||
1535 | $ADDU $t_2,$at | ||
1536 | $ADDU $c_3,$t_2 | ||
1537 | sltu $at,$c_3,$t_2 | ||
1538 | $ADDU $c_1,$at | ||
1539 | mflo $t_1 | ||
1540 | mfhi $t_2 | ||
1541 | $ADDU $c_2,$t_1 | ||
1542 | sltu $at,$c_2,$t_1 | ||
1543 | $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1); | ||
1544 | $ADDU $t_2,$at | ||
1545 | $ADDU $c_3,$t_2 | ||
1546 | sltu $at,$c_3,$t_2 | ||
1547 | $ADDU $c_1,$at | ||
1548 | mflo $t_1 | ||
1549 | mfhi $t_2 | ||
1550 | $ADDU $c_2,$t_1 | ||
1551 | sltu $at,$c_2,$t_1 | ||
1552 | $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1); | ||
1553 | $ADDU $t_2,$at | ||
1554 | $ADDU $c_3,$t_2 | ||
1555 | sltu $at,$c_3,$t_2 | ||
1556 | $ADDU $c_1,$at | ||
1557 | mflo $t_1 | ||
1558 | mfhi $t_2 | ||
1559 | $ADDU $c_2,$t_1 | ||
1560 | sltu $at,$c_2,$t_1 | ||
1561 | $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2); | ||
1562 | $ADDU $t_2,$at | ||
1563 | $ADDU $c_3,$t_2 | ||
1564 | sltu $at,$c_3,$t_2 | ||
1565 | $ADDU $c_1,$at | ||
1566 | $ST $c_2,10*$BNSZ($a0) # r[10]=c2; | ||
1567 | |||
1568 | mflo $t_1 | ||
1569 | mfhi $t_2 | ||
1570 | $ADDU $c_3,$t_1 | ||
1571 | sltu $at,$c_3,$t_1 | ||
1572 | $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2); | ||
1573 | $ADDU $t_2,$at | ||
1574 | $ADDU $c_1,$t_2 | ||
1575 | sltu $c_2,$c_1,$t_2 | ||
1576 | mflo $t_1 | ||
1577 | mfhi $t_2 | ||
1578 | $ADDU $c_3,$t_1 | ||
1579 | sltu $at,$c_3,$t_1 | ||
1580 | $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2); | ||
1581 | $ADDU $t_2,$at | ||
1582 | $ADDU $c_1,$t_2 | ||
1583 | sltu $at,$c_1,$t_2 | ||
1584 | $ADDU $c_2,$at | ||
1585 | mflo $t_1 | ||
1586 | mfhi $t_2 | ||
1587 | $ADDU $c_3,$t_1 | ||
1588 | sltu $at,$c_3,$t_1 | ||
1589 | $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2); | ||
1590 | $ADDU $t_2,$at | ||
1591 | $ADDU $c_1,$t_2 | ||
1592 | sltu $at,$c_1,$t_2 | ||
1593 | $ADDU $c_2,$at | ||
1594 | mflo $t_1 | ||
1595 | mfhi $t_2 | ||
1596 | $ADDU $c_3,$t_1 | ||
1597 | sltu $at,$c_3,$t_1 | ||
1598 | $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3); | ||
1599 | $ADDU $t_2,$at | ||
1600 | $ADDU $c_1,$t_2 | ||
1601 | sltu $at,$c_1,$t_2 | ||
1602 | $ADDU $c_2,$at | ||
1603 | $ST $c_3,11*$BNSZ($a0) # r[11]=c3; | ||
1604 | |||
1605 | mflo $t_1 | ||
1606 | mfhi $t_2 | ||
1607 | $ADDU $c_1,$t_1 | ||
1608 | sltu $at,$c_1,$t_1 | ||
1609 | $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3); | ||
1610 | $ADDU $t_2,$at | ||
1611 | $ADDU $c_2,$t_2 | ||
1612 | sltu $c_3,$c_2,$t_2 | ||
1613 | mflo $t_1 | ||
1614 | mfhi $t_2 | ||
1615 | $ADDU $c_1,$t_1 | ||
1616 | sltu $at,$c_1,$t_1 | ||
1617 | $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3); | ||
1618 | $ADDU $t_2,$at | ||
1619 | $ADDU $c_2,$t_2 | ||
1620 | sltu $at,$c_2,$t_2 | ||
1621 | $ADDU $c_3,$at | ||
1622 | mflo $t_1 | ||
1623 | mfhi $t_2 | ||
1624 | $ADDU $c_1,$t_1 | ||
1625 | sltu $at,$c_1,$t_1 | ||
1626 | $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1); | ||
1627 | $ADDU $t_2,$at | ||
1628 | $ADDU $c_2,$t_2 | ||
1629 | sltu $at,$c_2,$t_2 | ||
1630 | $ADDU $c_3,$at | ||
1631 | $ST $c_1,12*$BNSZ($a0) # r[12]=c1; | ||
1632 | |||
1633 | mflo $t_1 | ||
1634 | mfhi $t_2 | ||
1635 | $ADDU $c_2,$t_1 | ||
1636 | sltu $at,$c_2,$t_1 | ||
1637 | $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1); | ||
1638 | $ADDU $t_2,$at | ||
1639 | $ADDU $c_3,$t_2 | ||
1640 | sltu $c_1,$c_3,$t_2 | ||
1641 | mflo $t_1 | ||
1642 | mfhi $t_2 | ||
1643 | $ADDU $c_2,$t_1 | ||
1644 | sltu $at,$c_2,$t_1 | ||
1645 | $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2); | ||
1646 | $ADDU $t_2,$at | ||
1647 | $ADDU $c_3,$t_2 | ||
1648 | sltu $at,$c_3,$t_2 | ||
1649 | $ADDU $c_1,$at | ||
1650 | $ST $c_2,13*$BNSZ($a0) # r[13]=c2; | ||
1651 | |||
1652 | mflo $t_1 | ||
1653 | mfhi $t_2 | ||
1654 | $ADDU $c_3,$t_1 | ||
1655 | sltu $at,$c_3,$t_1 | ||
1656 | $ADDU $t_2,$at | ||
1657 | $ADDU $c_1,$t_2 | ||
1658 | $ST $c_3,14*$BNSZ($a0) # r[14]=c3; | ||
1659 | $ST $c_1,15*$BNSZ($a0) # r[15]=c1; | ||
1660 | |||
1661 | .set noreorder | ||
1662 | ___ | ||
1663 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
1664 | $REG_L $s5,10*$SZREG($sp) | ||
1665 | $REG_L $s4,9*$SZREG($sp) | ||
1666 | $REG_L $s3,8*$SZREG($sp) | ||
1667 | $REG_L $s2,7*$SZREG($sp) | ||
1668 | $REG_L $s1,6*$SZREG($sp) | ||
1669 | $REG_L $s0,5*$SZREG($sp) | ||
1670 | $REG_L $t3,4*$SZREG($sp) | ||
1671 | $REG_L $t2,3*$SZREG($sp) | ||
1672 | $REG_L $t1,2*$SZREG($sp) | ||
1673 | $REG_L $t0,1*$SZREG($sp) | ||
1674 | $REG_L $gp,0*$SZREG($sp) | ||
1675 | jr $ra | ||
1676 | $PTR_ADD $sp,12*$SZREG | ||
1677 | ___ | ||
1678 | $code.=<<___ if ($flavour !~ /nubi/i); | ||
1679 | $REG_L $s5,5*$SZREG($sp) | ||
1680 | $REG_L $s4,4*$SZREG($sp) | ||
1681 | $REG_L $s3,3*$SZREG($sp) | ||
1682 | $REG_L $s2,2*$SZREG($sp) | ||
1683 | $REG_L $s1,1*$SZREG($sp) | ||
1684 | $REG_L $s0,0*$SZREG($sp) | ||
1685 | jr $ra | ||
1686 | $PTR_ADD $sp,6*$SZREG | ||
1687 | ___ | ||
1688 | $code.=<<___; | ||
1689 | .end bn_mul_comba8 | ||
1690 | |||
1691 | .align 5 | ||
1692 | .globl bn_mul_comba4 | ||
1693 | .ent bn_mul_comba4 | ||
1694 | bn_mul_comba4: | ||
1695 | ___ | ||
1696 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
1697 | .frame $sp,6*$SZREG,$ra | ||
1698 | .mask 0x8000f008,-$SZREG | ||
1699 | .set noreorder | ||
1700 | $PTR_SUB $sp,6*$SZREG | ||
1701 | $REG_S $ra,5*$SZREG($sp) | ||
1702 | $REG_S $t3,4*$SZREG($sp) | ||
1703 | $REG_S $t2,3*$SZREG($sp) | ||
1704 | $REG_S $t1,2*$SZREG($sp) | ||
1705 | $REG_S $t0,1*$SZREG($sp) | ||
1706 | $REG_S $gp,0*$SZREG($sp) | ||
1707 | ___ | ||
1708 | $code.=<<___; | ||
1709 | .set reorder | ||
1710 | $LD $a_0,0($a1) | ||
1711 | $LD $b_0,0($a2) | ||
1712 | $LD $a_1,$BNSZ($a1) | ||
1713 | $LD $a_2,2*$BNSZ($a1) | ||
1714 | $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); | ||
1715 | $LD $a_3,3*$BNSZ($a1) | ||
1716 | $LD $b_1,$BNSZ($a2) | ||
1717 | $LD $b_2,2*$BNSZ($a2) | ||
1718 | $LD $b_3,3*$BNSZ($a2) | ||
1719 | mflo $c_1 | ||
1720 | mfhi $c_2 | ||
1721 | $ST $c_1,0($a0) | ||
1722 | |||
1723 | $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); | ||
1724 | mflo $t_1 | ||
1725 | mfhi $t_2 | ||
1726 | $ADDU $c_2,$t_1 | ||
1727 | sltu $at,$c_2,$t_1 | ||
1728 | $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); | ||
1729 | $ADDU $c_3,$t_2,$at | ||
1730 | mflo $t_1 | ||
1731 | mfhi $t_2 | ||
1732 | $ADDU $c_2,$t_1 | ||
1733 | sltu $at,$c_2,$t_1 | ||
1734 | $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); | ||
1735 | $ADDU $t_2,$at | ||
1736 | $ADDU $c_3,$t_2 | ||
1737 | sltu $c_1,$c_3,$t_2 | ||
1738 | $ST $c_2,$BNSZ($a0) | ||
1739 | |||
1740 | mflo $t_1 | ||
1741 | mfhi $t_2 | ||
1742 | $ADDU $c_3,$t_1 | ||
1743 | sltu $at,$c_3,$t_1 | ||
1744 | $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
1745 | $ADDU $t_2,$at | ||
1746 | $ADDU $c_1,$t_2 | ||
1747 | mflo $t_1 | ||
1748 | mfhi $t_2 | ||
1749 | $ADDU $c_3,$t_1 | ||
1750 | sltu $at,$c_3,$t_1 | ||
1751 | $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); | ||
1752 | $ADDU $t_2,$at | ||
1753 | $ADDU $c_1,$t_2 | ||
1754 | sltu $c_2,$c_1,$t_2 | ||
1755 | mflo $t_1 | ||
1756 | mfhi $t_2 | ||
1757 | $ADDU $c_3,$t_1 | ||
1758 | sltu $at,$c_3,$t_1 | ||
1759 | $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); | ||
1760 | $ADDU $t_2,$at | ||
1761 | $ADDU $c_1,$t_2 | ||
1762 | sltu $at,$c_1,$t_2 | ||
1763 | $ADDU $c_2,$at | ||
1764 | $ST $c_3,2*$BNSZ($a0) | ||
1765 | |||
1766 | mflo $t_1 | ||
1767 | mfhi $t_2 | ||
1768 | $ADDU $c_1,$t_1 | ||
1769 | sltu $at,$c_1,$t_1 | ||
1770 | $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); | ||
1771 | $ADDU $t_2,$at | ||
1772 | $ADDU $c_2,$t_2 | ||
1773 | sltu $c_3,$c_2,$t_2 | ||
1774 | mflo $t_1 | ||
1775 | mfhi $t_2 | ||
1776 | $ADDU $c_1,$t_1 | ||
1777 | sltu $at,$c_1,$t_1 | ||
1778 | $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); | ||
1779 | $ADDU $t_2,$at | ||
1780 | $ADDU $c_2,$t_2 | ||
1781 | sltu $at,$c_2,$t_2 | ||
1782 | $ADDU $c_3,$at | ||
1783 | mflo $t_1 | ||
1784 | mfhi $t_2 | ||
1785 | $ADDU $c_1,$t_1 | ||
1786 | sltu $at,$c_1,$t_1 | ||
1787 | $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); | ||
1788 | $ADDU $t_2,$at | ||
1789 | $ADDU $c_2,$t_2 | ||
1790 | sltu $at,$c_2,$t_2 | ||
1791 | $ADDU $c_3,$at | ||
1792 | mflo $t_1 | ||
1793 | mfhi $t_2 | ||
1794 | $ADDU $c_1,$t_1 | ||
1795 | sltu $at,$c_1,$t_1 | ||
1796 | $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); | ||
1797 | $ADDU $t_2,$at | ||
1798 | $ADDU $c_2,$t_2 | ||
1799 | sltu $at,$c_2,$t_2 | ||
1800 | $ADDU $c_3,$at | ||
1801 | $ST $c_1,3*$BNSZ($a0) | ||
1802 | |||
1803 | mflo $t_1 | ||
1804 | mfhi $t_2 | ||
1805 | $ADDU $c_2,$t_1 | ||
1806 | sltu $at,$c_2,$t_1 | ||
1807 | $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
1808 | $ADDU $t_2,$at | ||
1809 | $ADDU $c_3,$t_2 | ||
1810 | sltu $c_1,$c_3,$t_2 | ||
1811 | mflo $t_1 | ||
1812 | mfhi $t_2 | ||
1813 | $ADDU $c_2,$t_1 | ||
1814 | sltu $at,$c_2,$t_1 | ||
1815 | $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); | ||
1816 | $ADDU $t_2,$at | ||
1817 | $ADDU $c_3,$t_2 | ||
1818 | sltu $at,$c_3,$t_2 | ||
1819 | $ADDU $c_1,$at | ||
1820 | mflo $t_1 | ||
1821 | mfhi $t_2 | ||
1822 | $ADDU $c_2,$t_1 | ||
1823 | sltu $at,$c_2,$t_1 | ||
1824 | $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); | ||
1825 | $ADDU $t_2,$at | ||
1826 | $ADDU $c_3,$t_2 | ||
1827 | sltu $at,$c_3,$t_2 | ||
1828 | $ADDU $c_1,$at | ||
1829 | $ST $c_2,4*$BNSZ($a0) | ||
1830 | |||
1831 | mflo $t_1 | ||
1832 | mfhi $t_2 | ||
1833 | $ADDU $c_3,$t_1 | ||
1834 | sltu $at,$c_3,$t_1 | ||
1835 | $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); | ||
1836 | $ADDU $t_2,$at | ||
1837 | $ADDU $c_1,$t_2 | ||
1838 | sltu $c_2,$c_1,$t_2 | ||
1839 | mflo $t_1 | ||
1840 | mfhi $t_2 | ||
1841 | $ADDU $c_3,$t_1 | ||
1842 | sltu $at,$c_3,$t_1 | ||
1843 | $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
1844 | $ADDU $t_2,$at | ||
1845 | $ADDU $c_1,$t_2 | ||
1846 | sltu $at,$c_1,$t_2 | ||
1847 | $ADDU $c_2,$at | ||
1848 | $ST $c_3,5*$BNSZ($a0) | ||
1849 | |||
1850 | mflo $t_1 | ||
1851 | mfhi $t_2 | ||
1852 | $ADDU $c_1,$t_1 | ||
1853 | sltu $at,$c_1,$t_1 | ||
1854 | $ADDU $t_2,$at | ||
1855 | $ADDU $c_2,$t_2 | ||
1856 | $ST $c_1,6*$BNSZ($a0) | ||
1857 | $ST $c_2,7*$BNSZ($a0) | ||
1858 | |||
1859 | .set noreorder | ||
1860 | ___ | ||
1861 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
1862 | $REG_L $t3,4*$SZREG($sp) | ||
1863 | $REG_L $t2,3*$SZREG($sp) | ||
1864 | $REG_L $t1,2*$SZREG($sp) | ||
1865 | $REG_L $t0,1*$SZREG($sp) | ||
1866 | $REG_L $gp,0*$SZREG($sp) | ||
1867 | $PTR_ADD $sp,6*$SZREG | ||
1868 | ___ | ||
1869 | $code.=<<___; | ||
1870 | jr $ra | ||
1871 | nop | ||
1872 | .end bn_mul_comba4 | ||
1873 | ___ | ||
1874 | |||
1875 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); | ||
1876 | |||
1877 | $code.=<<___; | ||
1878 | |||
1879 | .align 5 | ||
1880 | .globl bn_sqr_comba8 | ||
1881 | .ent bn_sqr_comba8 | ||
1882 | bn_sqr_comba8: | ||
1883 | ___ | ||
1884 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
1885 | .frame $sp,6*$SZREG,$ra | ||
1886 | .mask 0x8000f008,-$SZREG | ||
1887 | .set noreorder | ||
1888 | $PTR_SUB $sp,6*$SZREG | ||
1889 | $REG_S $ra,5*$SZREG($sp) | ||
1890 | $REG_S $t3,4*$SZREG($sp) | ||
1891 | $REG_S $t2,3*$SZREG($sp) | ||
1892 | $REG_S $t1,2*$SZREG($sp) | ||
1893 | $REG_S $t0,1*$SZREG($sp) | ||
1894 | $REG_S $gp,0*$SZREG($sp) | ||
1895 | ___ | ||
1896 | $code.=<<___; | ||
1897 | .set reorder | ||
1898 | $LD $a_0,0($a1) | ||
1899 | $LD $a_1,$BNSZ($a1) | ||
1900 | $LD $a_2,2*$BNSZ($a1) | ||
1901 | $LD $a_3,3*$BNSZ($a1) | ||
1902 | |||
1903 | $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); | ||
1904 | $LD $a_4,4*$BNSZ($a1) | ||
1905 | $LD $a_5,5*$BNSZ($a1) | ||
1906 | $LD $a_6,6*$BNSZ($a1) | ||
1907 | $LD $a_7,7*$BNSZ($a1) | ||
1908 | mflo $c_1 | ||
1909 | mfhi $c_2 | ||
1910 | $ST $c_1,0($a0) | ||
1911 | |||
1912 | $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); | ||
1913 | mflo $t_1 | ||
1914 | mfhi $t_2 | ||
1915 | slt $c_1,$t_2,$zero | ||
1916 | $SLL $t_2,1 | ||
1917 | $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); | ||
1918 | slt $a2,$t_1,$zero | ||
1919 | $ADDU $t_2,$a2 | ||
1920 | $SLL $t_1,1 | ||
1921 | $ADDU $c_2,$t_1 | ||
1922 | sltu $at,$c_2,$t_1 | ||
1923 | $ADDU $c_3,$t_2,$at | ||
1924 | $ST $c_2,$BNSZ($a0) | ||
1925 | |||
1926 | mflo $t_1 | ||
1927 | mfhi $t_2 | ||
1928 | slt $c_2,$t_2,$zero | ||
1929 | $SLL $t_2,1 | ||
1930 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
1931 | slt $a2,$t_1,$zero | ||
1932 | $ADDU $t_2,$a2 | ||
1933 | $SLL $t_1,1 | ||
1934 | $ADDU $c_3,$t_1 | ||
1935 | sltu $at,$c_3,$t_1 | ||
1936 | $ADDU $t_2,$at | ||
1937 | $ADDU $c_1,$t_2 | ||
1938 | sltu $at,$c_1,$t_2 | ||
1939 | $ADDU $c_2,$at | ||
1940 | mflo $t_1 | ||
1941 | mfhi $t_2 | ||
1942 | $ADDU $c_3,$t_1 | ||
1943 | sltu $at,$c_3,$t_1 | ||
1944 | $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); | ||
1945 | $ADDU $t_2,$at | ||
1946 | $ADDU $c_1,$t_2 | ||
1947 | sltu $at,$c_1,$t_2 | ||
1948 | $ADDU $c_2,$at | ||
1949 | $ST $c_3,2*$BNSZ($a0) | ||
1950 | |||
1951 | mflo $t_1 | ||
1952 | mfhi $t_2 | ||
1953 | slt $c_3,$t_2,$zero | ||
1954 | $SLL $t_2,1 | ||
1955 | $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); | ||
1956 | slt $a2,$t_1,$zero | ||
1957 | $ADDU $t_2,$a2 | ||
1958 | $SLL $t_1,1 | ||
1959 | $ADDU $c_1,$t_1 | ||
1960 | sltu $at,$c_1,$t_1 | ||
1961 | $ADDU $t_2,$at | ||
1962 | $ADDU $c_2,$t_2 | ||
1963 | sltu $at,$c_2,$t_2 | ||
1964 | $ADDU $c_3,$at | ||
1965 | mflo $t_1 | ||
1966 | mfhi $t_2 | ||
1967 | slt $at,$t_2,$zero | ||
1968 | $ADDU $c_3,$at | ||
1969 | $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); | ||
1970 | $SLL $t_2,1 | ||
1971 | slt $a2,$t_1,$zero | ||
1972 | $ADDU $t_2,$a2 | ||
1973 | $SLL $t_1,1 | ||
1974 | $ADDU $c_1,$t_1 | ||
1975 | sltu $at,$c_1,$t_1 | ||
1976 | $ADDU $t_2,$at | ||
1977 | $ADDU $c_2,$t_2 | ||
1978 | sltu $at,$c_2,$t_2 | ||
1979 | $ADDU $c_3,$at | ||
1980 | $ST $c_1,3*$BNSZ($a0) | ||
1981 | |||
1982 | mflo $t_1 | ||
1983 | mfhi $t_2 | ||
1984 | slt $c_1,$t_2,$zero | ||
1985 | $SLL $t_2,1 | ||
1986 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | ||
1987 | slt $a2,$t_1,$zero | ||
1988 | $ADDU $t_2,$a2 | ||
1989 | $SLL $t_1,1 | ||
1990 | $ADDU $c_2,$t_1 | ||
1991 | sltu $at,$c_2,$t_1 | ||
1992 | $ADDU $t_2,$at | ||
1993 | $ADDU $c_3,$t_2 | ||
1994 | sltu $at,$c_3,$t_2 | ||
1995 | $ADDU $c_1,$at | ||
1996 | mflo $t_1 | ||
1997 | mfhi $t_2 | ||
1998 | slt $at,$t_2,$zero | ||
1999 | $ADDU $c_1,$at | ||
2000 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
2001 | $SLL $t_2,1 | ||
2002 | slt $a2,$t_1,$zero | ||
2003 | $ADDU $t_2,$a2 | ||
2004 | $SLL $t_1,1 | ||
2005 | $ADDU $c_2,$t_1 | ||
2006 | sltu $at,$c_2,$t_1 | ||
2007 | $ADDU $t_2,$at | ||
2008 | $ADDU $c_3,$t_2 | ||
2009 | sltu $at,$c_3,$t_2 | ||
2010 | $ADDU $c_1,$at | ||
2011 | mflo $t_1 | ||
2012 | mfhi $t_2 | ||
2013 | $ADDU $c_2,$t_1 | ||
2014 | sltu $at,$c_2,$t_1 | ||
2015 | $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2); | ||
2016 | $ADDU $t_2,$at | ||
2017 | $ADDU $c_3,$t_2 | ||
2018 | sltu $at,$c_3,$t_2 | ||
2019 | $ADDU $c_1,$at | ||
2020 | $ST $c_2,4*$BNSZ($a0) | ||
2021 | |||
2022 | mflo $t_1 | ||
2023 | mfhi $t_2 | ||
2024 | slt $c_2,$t_2,$zero | ||
2025 | $SLL $t_2,1 | ||
2026 | $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); | ||
2027 | slt $a2,$t_1,$zero | ||
2028 | $ADDU $t_2,$a2 | ||
2029 | $SLL $t_1,1 | ||
2030 | $ADDU $c_3,$t_1 | ||
2031 | sltu $at,$c_3,$t_1 | ||
2032 | $ADDU $t_2,$at | ||
2033 | $ADDU $c_1,$t_2 | ||
2034 | sltu $at,$c_1,$t_2 | ||
2035 | $ADDU $c_2,$at | ||
2036 | mflo $t_1 | ||
2037 | mfhi $t_2 | ||
2038 | slt $at,$t_2,$zero | ||
2039 | $ADDU $c_2,$at | ||
2040 | $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | ||
2041 | $SLL $t_2,1 | ||
2042 | slt $a2,$t_1,$zero | ||
2043 | $ADDU $t_2,$a2 | ||
2044 | $SLL $t_1,1 | ||
2045 | $ADDU $c_3,$t_1 | ||
2046 | sltu $at,$c_3,$t_1 | ||
2047 | $ADDU $t_2,$at | ||
2048 | $ADDU $c_1,$t_2 | ||
2049 | sltu $at,$c_1,$t_2 | ||
2050 | $ADDU $c_2,$at | ||
2051 | mflo $t_1 | ||
2052 | mfhi $t_2 | ||
2053 | slt $at,$t_2,$zero | ||
2054 | $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); | ||
2055 | $ADDU $c_2,$at | ||
2056 | $SLL $t_2,1 | ||
2057 | slt $a2,$t_1,$zero | ||
2058 | $ADDU $t_2,$a2 | ||
2059 | $SLL $t_1,1 | ||
2060 | $ADDU $c_3,$t_1 | ||
2061 | sltu $at,$c_3,$t_1 | ||
2062 | $ADDU $t_2,$at | ||
2063 | $ADDU $c_1,$t_2 | ||
2064 | sltu $at,$c_1,$t_2 | ||
2065 | $ADDU $c_2,$at | ||
2066 | $ST $c_3,5*$BNSZ($a0) | ||
2067 | |||
2068 | mflo $t_1 | ||
2069 | mfhi $t_2 | ||
2070 | slt $c_3,$t_2,$zero | ||
2071 | $SLL $t_2,1 | ||
2072 | $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); | ||
2073 | slt $a2,$t_1,$zero | ||
2074 | $ADDU $t_2,$a2 | ||
2075 | $SLL $t_1,1 | ||
2076 | $ADDU $c_1,$t_1 | ||
2077 | sltu $at,$c_1,$t_1 | ||
2078 | $ADDU $t_2,$at | ||
2079 | $ADDU $c_2,$t_2 | ||
2080 | sltu $at,$c_2,$t_2 | ||
2081 | $ADDU $c_3,$at | ||
2082 | mflo $t_1 | ||
2083 | mfhi $t_2 | ||
2084 | slt $at,$t_2,$zero | ||
2085 | $ADDU $c_3,$at | ||
2086 | $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); | ||
2087 | $SLL $t_2,1 | ||
2088 | slt $a2,$t_1,$zero | ||
2089 | $ADDU $t_2,$a2 | ||
2090 | $SLL $t_1,1 | ||
2091 | $ADDU $c_1,$t_1 | ||
2092 | sltu $at,$c_1,$t_1 | ||
2093 | $ADDU $t_2,$at | ||
2094 | $ADDU $c_2,$t_2 | ||
2095 | sltu $at,$c_2,$t_2 | ||
2096 | $ADDU $c_3,$at | ||
2097 | mflo $t_1 | ||
2098 | mfhi $t_2 | ||
2099 | slt $at,$t_2,$zero | ||
2100 | $ADDU $c_3,$at | ||
2101 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
2102 | $SLL $t_2,1 | ||
2103 | slt $a2,$t_1,$zero | ||
2104 | $ADDU $t_2,$a2 | ||
2105 | $SLL $t_1,1 | ||
2106 | $ADDU $c_1,$t_1 | ||
2107 | sltu $at,$c_1,$t_1 | ||
2108 | $ADDU $t_2,$at | ||
2109 | $ADDU $c_2,$t_2 | ||
2110 | sltu $at,$c_2,$t_2 | ||
2111 | $ADDU $c_3,$at | ||
2112 | mflo $t_1 | ||
2113 | mfhi $t_2 | ||
2114 | $ADDU $c_1,$t_1 | ||
2115 | sltu $at,$c_1,$t_1 | ||
2116 | $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1); | ||
2117 | $ADDU $t_2,$at | ||
2118 | $ADDU $c_2,$t_2 | ||
2119 | sltu $at,$c_2,$t_2 | ||
2120 | $ADDU $c_3,$at | ||
2121 | $ST $c_1,6*$BNSZ($a0) | ||
2122 | |||
2123 | mflo $t_1 | ||
2124 | mfhi $t_2 | ||
2125 | slt $c_1,$t_2,$zero | ||
2126 | $SLL $t_2,1 | ||
2127 | $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); | ||
2128 | slt $a2,$t_1,$zero | ||
2129 | $ADDU $t_2,$a2 | ||
2130 | $SLL $t_1,1 | ||
2131 | $ADDU $c_2,$t_1 | ||
2132 | sltu $at,$c_2,$t_1 | ||
2133 | $ADDU $t_2,$at | ||
2134 | $ADDU $c_3,$t_2 | ||
2135 | sltu $at,$c_3,$t_2 | ||
2136 | $ADDU $c_1,$at | ||
2137 | mflo $t_1 | ||
2138 | mfhi $t_2 | ||
2139 | slt $at,$t_2,$zero | ||
2140 | $ADDU $c_1,$at | ||
2141 | $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); | ||
2142 | $SLL $t_2,1 | ||
2143 | slt $a2,$t_1,$zero | ||
2144 | $ADDU $t_2,$a2 | ||
2145 | $SLL $t_1,1 | ||
2146 | $ADDU $c_2,$t_1 | ||
2147 | sltu $at,$c_2,$t_1 | ||
2148 | $ADDU $t_2,$at | ||
2149 | $ADDU $c_3,$t_2 | ||
2150 | sltu $at,$c_3,$t_2 | ||
2151 | $ADDU $c_1,$at | ||
2152 | mflo $t_1 | ||
2153 | mfhi $t_2 | ||
2154 | slt $at,$t_2,$zero | ||
2155 | $ADDU $c_1,$at | ||
2156 | $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); | ||
2157 | $SLL $t_2,1 | ||
2158 | slt $a2,$t_1,$zero | ||
2159 | $ADDU $t_2,$a2 | ||
2160 | $SLL $t_1,1 | ||
2161 | $ADDU $c_2,$t_1 | ||
2162 | sltu $at,$c_2,$t_1 | ||
2163 | $ADDU $t_2,$at | ||
2164 | $ADDU $c_3,$t_2 | ||
2165 | sltu $at,$c_3,$t_2 | ||
2166 | $ADDU $c_1,$at | ||
2167 | mflo $t_1 | ||
2168 | mfhi $t_2 | ||
2169 | slt $at,$t_2,$zero | ||
2170 | $ADDU $c_1,$at | ||
2171 | $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); | ||
2172 | $SLL $t_2,1 | ||
2173 | slt $a2,$t_1,$zero | ||
2174 | $ADDU $t_2,$a2 | ||
2175 | $SLL $t_1,1 | ||
2176 | $ADDU $c_2,$t_1 | ||
2177 | sltu $at,$c_2,$t_1 | ||
2178 | $ADDU $t_2,$at | ||
2179 | $ADDU $c_3,$t_2 | ||
2180 | sltu $at,$c_3,$t_2 | ||
2181 | $ADDU $c_1,$at | ||
2182 | $ST $c_2,7*$BNSZ($a0) | ||
2183 | |||
2184 | mflo $t_1 | ||
2185 | mfhi $t_2 | ||
2186 | slt $c_2,$t_2,$zero | ||
2187 | $SLL $t_2,1 | ||
2188 | $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); | ||
2189 | slt $a2,$t_1,$zero | ||
2190 | $ADDU $t_2,$a2 | ||
2191 | $SLL $t_1,1 | ||
2192 | $ADDU $c_3,$t_1 | ||
2193 | sltu $at,$c_3,$t_1 | ||
2194 | $ADDU $t_2,$at | ||
2195 | $ADDU $c_1,$t_2 | ||
2196 | sltu $at,$c_1,$t_2 | ||
2197 | $ADDU $c_2,$at | ||
2198 | mflo $t_1 | ||
2199 | mfhi $t_2 | ||
2200 | slt $at,$t_2,$zero | ||
2201 | $ADDU $c_2,$at | ||
2202 | $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); | ||
2203 | $SLL $t_2,1 | ||
2204 | slt $a2,$t_1,$zero | ||
2205 | $ADDU $t_2,$a2 | ||
2206 | $SLL $t_1,1 | ||
2207 | $ADDU $c_3,$t_1 | ||
2208 | sltu $at,$c_3,$t_1 | ||
2209 | $ADDU $t_2,$at | ||
2210 | $ADDU $c_1,$t_2 | ||
2211 | sltu $at,$c_1,$t_2 | ||
2212 | $ADDU $c_2,$at | ||
2213 | mflo $t_1 | ||
2214 | mfhi $t_2 | ||
2215 | slt $at,$t_2,$zero | ||
2216 | $ADDU $c_2,$at | ||
2217 | $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); | ||
2218 | $SLL $t_2,1 | ||
2219 | slt $a2,$t_1,$zero | ||
2220 | $ADDU $t_2,$a2 | ||
2221 | $SLL $t_1,1 | ||
2222 | $ADDU $c_3,$t_1 | ||
2223 | sltu $at,$c_3,$t_1 | ||
2224 | $ADDU $t_2,$at | ||
2225 | $ADDU $c_1,$t_2 | ||
2226 | sltu $at,$c_1,$t_2 | ||
2227 | $ADDU $c_2,$at | ||
2228 | mflo $t_1 | ||
2229 | mfhi $t_2 | ||
2230 | $ADDU $c_3,$t_1 | ||
2231 | sltu $at,$c_3,$t_1 | ||
2232 | $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3); | ||
2233 | $ADDU $t_2,$at | ||
2234 | $ADDU $c_1,$t_2 | ||
2235 | sltu $at,$c_1,$t_2 | ||
2236 | $ADDU $c_2,$at | ||
2237 | $ST $c_3,8*$BNSZ($a0) | ||
2238 | |||
2239 | mflo $t_1 | ||
2240 | mfhi $t_2 | ||
2241 | slt $c_3,$t_2,$zero | ||
2242 | $SLL $t_2,1 | ||
2243 | $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); | ||
2244 | slt $a2,$t_1,$zero | ||
2245 | $ADDU $t_2,$a2 | ||
2246 | $SLL $t_1,1 | ||
2247 | $ADDU $c_1,$t_1 | ||
2248 | sltu $at,$c_1,$t_1 | ||
2249 | $ADDU $t_2,$at | ||
2250 | $ADDU $c_2,$t_2 | ||
2251 | sltu $at,$c_2,$t_2 | ||
2252 | $ADDU $c_3,$at | ||
2253 | mflo $t_1 | ||
2254 | mfhi $t_2 | ||
2255 | slt $at,$t_2,$zero | ||
2256 | $ADDU $c_3,$at | ||
2257 | $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); | ||
2258 | $SLL $t_2,1 | ||
2259 | slt $a2,$t_1,$zero | ||
2260 | $ADDU $t_2,$a2 | ||
2261 | $SLL $t_1,1 | ||
2262 | $ADDU $c_1,$t_1 | ||
2263 | sltu $at,$c_1,$t_1 | ||
2264 | $ADDU $t_2,$at | ||
2265 | $ADDU $c_2,$t_2 | ||
2266 | sltu $at,$c_2,$t_2 | ||
2267 | $ADDU $c_3,$at | ||
2268 | mflo $t_1 | ||
2269 | mfhi $t_2 | ||
2270 | slt $at,$t_2,$zero | ||
2271 | $ADDU $c_3,$at | ||
2272 | $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); | ||
2273 | $SLL $t_2,1 | ||
2274 | slt $a2,$t_1,$zero | ||
2275 | $ADDU $t_2,$a2 | ||
2276 | $SLL $t_1,1 | ||
2277 | $ADDU $c_1,$t_1 | ||
2278 | sltu $at,$c_1,$t_1 | ||
2279 | $ADDU $t_2,$at | ||
2280 | $ADDU $c_2,$t_2 | ||
2281 | sltu $at,$c_2,$t_2 | ||
2282 | $ADDU $c_3,$at | ||
2283 | $ST $c_1,9*$BNSZ($a0) | ||
2284 | |||
2285 | mflo $t_1 | ||
2286 | mfhi $t_2 | ||
2287 | slt $c_1,$t_2,$zero | ||
2288 | $SLL $t_2,1 | ||
2289 | $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); | ||
2290 | slt $a2,$t_1,$zero | ||
2291 | $ADDU $t_2,$a2 | ||
2292 | $SLL $t_1,1 | ||
2293 | $ADDU $c_2,$t_1 | ||
2294 | sltu $at,$c_2,$t_1 | ||
2295 | $ADDU $t_2,$at | ||
2296 | $ADDU $c_3,$t_2 | ||
2297 | sltu $at,$c_3,$t_2 | ||
2298 | $ADDU $c_1,$at | ||
2299 | mflo $t_1 | ||
2300 | mfhi $t_2 | ||
2301 | slt $at,$t_2,$zero | ||
2302 | $ADDU $c_1,$at | ||
2303 | $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); | ||
2304 | $SLL $t_2,1 | ||
2305 | slt $a2,$t_1,$zero | ||
2306 | $ADDU $t_2,$a2 | ||
2307 | $SLL $t_1,1 | ||
2308 | $ADDU $c_2,$t_1 | ||
2309 | sltu $at,$c_2,$t_1 | ||
2310 | $ADDU $t_2,$at | ||
2311 | $ADDU $c_3,$t_2 | ||
2312 | sltu $at,$c_3,$t_2 | ||
2313 | $ADDU $c_1,$at | ||
2314 | mflo $t_1 | ||
2315 | mfhi $t_2 | ||
2316 | $ADDU $c_2,$t_1 | ||
2317 | sltu $at,$c_2,$t_1 | ||
2318 | $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2); | ||
2319 | $ADDU $t_2,$at | ||
2320 | $ADDU $c_3,$t_2 | ||
2321 | sltu $at,$c_3,$t_2 | ||
2322 | $ADDU $c_1,$at | ||
2323 | $ST $c_2,10*$BNSZ($a0) | ||
2324 | |||
2325 | mflo $t_1 | ||
2326 | mfhi $t_2 | ||
2327 | slt $c_2,$t_2,$zero | ||
2328 | $SLL $t_2,1 | ||
2329 | $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); | ||
2330 | slt $a2,$t_1,$zero | ||
2331 | $ADDU $t_2,$a2 | ||
2332 | $SLL $t_1,1 | ||
2333 | $ADDU $c_3,$t_1 | ||
2334 | sltu $at,$c_3,$t_1 | ||
2335 | $ADDU $t_2,$at | ||
2336 | $ADDU $c_1,$t_2 | ||
2337 | sltu $at,$c_1,$t_2 | ||
2338 | $ADDU $c_2,$at | ||
2339 | mflo $t_1 | ||
2340 | mfhi $t_2 | ||
2341 | slt $at,$t_2,$zero | ||
2342 | $ADDU $c_2,$at | ||
2343 | $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); | ||
2344 | $SLL $t_2,1 | ||
2345 | slt $a2,$t_1,$zero | ||
2346 | $ADDU $t_2,$a2 | ||
2347 | $SLL $t_1,1 | ||
2348 | $ADDU $c_3,$t_1 | ||
2349 | sltu $at,$c_3,$t_1 | ||
2350 | $ADDU $t_2,$at | ||
2351 | $ADDU $c_1,$t_2 | ||
2352 | sltu $at,$c_1,$t_2 | ||
2353 | $ADDU $c_2,$at | ||
2354 | $ST $c_3,11*$BNSZ($a0) | ||
2355 | |||
2356 | mflo $t_1 | ||
2357 | mfhi $t_2 | ||
2358 | slt $c_3,$t_2,$zero | ||
2359 | $SLL $t_2,1 | ||
2360 | $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); | ||
2361 | slt $a2,$t_1,$zero | ||
2362 | $ADDU $t_2,$a2 | ||
2363 | $SLL $t_1,1 | ||
2364 | $ADDU $c_1,$t_1 | ||
2365 | sltu $at,$c_1,$t_1 | ||
2366 | $ADDU $t_2,$at | ||
2367 | $ADDU $c_2,$t_2 | ||
2368 | sltu $at,$c_2,$t_2 | ||
2369 | $ADDU $c_3,$at | ||
2370 | mflo $t_1 | ||
2371 | mfhi $t_2 | ||
2372 | $ADDU $c_1,$t_1 | ||
2373 | sltu $at,$c_1,$t_1 | ||
2374 | $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1); | ||
2375 | $ADDU $t_2,$at | ||
2376 | $ADDU $c_2,$t_2 | ||
2377 | sltu $at,$c_2,$t_2 | ||
2378 | $ADDU $c_3,$at | ||
2379 | $ST $c_1,12*$BNSZ($a0) | ||
2380 | |||
2381 | mflo $t_1 | ||
2382 | mfhi $t_2 | ||
2383 | slt $c_1,$t_2,$zero | ||
2384 | $SLL $t_2,1 | ||
2385 | $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); | ||
2386 | slt $a2,$t_1,$zero | ||
2387 | $ADDU $t_2,$a2 | ||
2388 | $SLL $t_1,1 | ||
2389 | $ADDU $c_2,$t_1 | ||
2390 | sltu $at,$c_2,$t_1 | ||
2391 | $ADDU $t_2,$at | ||
2392 | $ADDU $c_3,$t_2 | ||
2393 | sltu $at,$c_3,$t_2 | ||
2394 | $ADDU $c_1,$at | ||
2395 | $ST $c_2,13*$BNSZ($a0) | ||
2396 | |||
2397 | mflo $t_1 | ||
2398 | mfhi $t_2 | ||
2399 | $ADDU $c_3,$t_1 | ||
2400 | sltu $at,$c_3,$t_1 | ||
2401 | $ADDU $t_2,$at | ||
2402 | $ADDU $c_1,$t_2 | ||
2403 | $ST $c_3,14*$BNSZ($a0) | ||
2404 | $ST $c_1,15*$BNSZ($a0) | ||
2405 | |||
2406 | .set noreorder | ||
2407 | ___ | ||
2408 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
2409 | $REG_L $t3,4*$SZREG($sp) | ||
2410 | $REG_L $t2,3*$SZREG($sp) | ||
2411 | $REG_L $t1,2*$SZREG($sp) | ||
2412 | $REG_L $t0,1*$SZREG($sp) | ||
2413 | $REG_L $gp,0*$SZREG($sp) | ||
2414 | $PTR_ADD $sp,6*$SZREG | ||
2415 | ___ | ||
2416 | $code.=<<___; | ||
2417 | jr $ra | ||
2418 | nop | ||
2419 | .end bn_sqr_comba8 | ||
2420 | |||
2421 | .align 5 | ||
2422 | .globl bn_sqr_comba4 | ||
2423 | .ent bn_sqr_comba4 | ||
2424 | bn_sqr_comba4: | ||
2425 | ___ | ||
2426 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
2427 | .frame $sp,6*$SZREG,$ra | ||
2428 | .mask 0x8000f008,-$SZREG | ||
2429 | .set noreorder | ||
2430 | $PTR_SUB $sp,6*$SZREG | ||
2431 | $REG_S $ra,5*$SZREG($sp) | ||
2432 | $REG_S $t3,4*$SZREG($sp) | ||
2433 | $REG_S $t2,3*$SZREG($sp) | ||
2434 | $REG_S $t1,2*$SZREG($sp) | ||
2435 | $REG_S $t0,1*$SZREG($sp) | ||
2436 | $REG_S $gp,0*$SZREG($sp) | ||
2437 | ___ | ||
2438 | $code.=<<___; | ||
2439 | .set reorder | ||
2440 | $LD $a_0,0($a1) | ||
2441 | $LD $a_1,$BNSZ($a1) | ||
2442 | $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); | ||
2443 | $LD $a_2,2*$BNSZ($a1) | ||
2444 | $LD $a_3,3*$BNSZ($a1) | ||
2445 | mflo $c_1 | ||
2446 | mfhi $c_2 | ||
2447 | $ST $c_1,0($a0) | ||
2448 | |||
2449 | $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); | ||
2450 | mflo $t_1 | ||
2451 | mfhi $t_2 | ||
2452 | slt $c_1,$t_2,$zero | ||
2453 | $SLL $t_2,1 | ||
2454 | $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); | ||
2455 | slt $a2,$t_1,$zero | ||
2456 | $ADDU $t_2,$a2 | ||
2457 | $SLL $t_1,1 | ||
2458 | $ADDU $c_2,$t_1 | ||
2459 | sltu $at,$c_2,$t_1 | ||
2460 | $ADDU $c_3,$t_2,$at | ||
2461 | $ST $c_2,$BNSZ($a0) | ||
2462 | |||
2463 | mflo $t_1 | ||
2464 | mfhi $t_2 | ||
2465 | slt $c_2,$t_2,$zero | ||
2466 | $SLL $t_2,1 | ||
2467 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
2468 | slt $a2,$t_1,$zero | ||
2469 | $ADDU $t_2,$a2 | ||
2470 | $SLL $t_1,1 | ||
2471 | $ADDU $c_3,$t_1 | ||
2472 | sltu $at,$c_3,$t_1 | ||
2473 | $ADDU $t_2,$at | ||
2474 | $ADDU $c_1,$t_2 | ||
2475 | sltu $at,$c_1,$t_2 | ||
2476 | $ADDU $c_2,$at | ||
2477 | mflo $t_1 | ||
2478 | mfhi $t_2 | ||
2479 | $ADDU $c_3,$t_1 | ||
2480 | sltu $at,$c_3,$t_1 | ||
2481 | $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); | ||
2482 | $ADDU $t_2,$at | ||
2483 | $ADDU $c_1,$t_2 | ||
2484 | sltu $at,$c_1,$t_2 | ||
2485 | $ADDU $c_2,$at | ||
2486 | $ST $c_3,2*$BNSZ($a0) | ||
2487 | |||
2488 | mflo $t_1 | ||
2489 | mfhi $t_2 | ||
2490 | slt $c_3,$t_2,$zero | ||
2491 | $SLL $t_2,1 | ||
2492 | $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); | ||
2493 | slt $a2,$t_1,$zero | ||
2494 | $ADDU $t_2,$a2 | ||
2495 | $SLL $t_1,1 | ||
2496 | $ADDU $c_1,$t_1 | ||
2497 | sltu $at,$c_1,$t_1 | ||
2498 | $ADDU $t_2,$at | ||
2499 | $ADDU $c_2,$t_2 | ||
2500 | sltu $at,$c_2,$t_2 | ||
2501 | $ADDU $c_3,$at | ||
2502 | mflo $t_1 | ||
2503 | mfhi $t_2 | ||
2504 | slt $at,$t_2,$zero | ||
2505 | $ADDU $c_3,$at | ||
2506 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | ||
2507 | $SLL $t_2,1 | ||
2508 | slt $a2,$t_1,$zero | ||
2509 | $ADDU $t_2,$a2 | ||
2510 | $SLL $t_1,1 | ||
2511 | $ADDU $c_1,$t_1 | ||
2512 | sltu $at,$c_1,$t_1 | ||
2513 | $ADDU $t_2,$at | ||
2514 | $ADDU $c_2,$t_2 | ||
2515 | sltu $at,$c_2,$t_2 | ||
2516 | $ADDU $c_3,$at | ||
2517 | $ST $c_1,3*$BNSZ($a0) | ||
2518 | |||
2519 | mflo $t_1 | ||
2520 | mfhi $t_2 | ||
2521 | slt $c_1,$t_2,$zero | ||
2522 | $SLL $t_2,1 | ||
2523 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
2524 | slt $a2,$t_1,$zero | ||
2525 | $ADDU $t_2,$a2 | ||
2526 | $SLL $t_1,1 | ||
2527 | $ADDU $c_2,$t_1 | ||
2528 | sltu $at,$c_2,$t_1 | ||
2529 | $ADDU $t_2,$at | ||
2530 | $ADDU $c_3,$t_2 | ||
2531 | sltu $at,$c_3,$t_2 | ||
2532 | $ADDU $c_1,$at | ||
2533 | mflo $t_1 | ||
2534 | mfhi $t_2 | ||
2535 | $ADDU $c_2,$t_1 | ||
2536 | sltu $at,$c_2,$t_1 | ||
2537 | $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | ||
2538 | $ADDU $t_2,$at | ||
2539 | $ADDU $c_3,$t_2 | ||
2540 | sltu $at,$c_3,$t_2 | ||
2541 | $ADDU $c_1,$at | ||
2542 | $ST $c_2,4*$BNSZ($a0) | ||
2543 | |||
2544 | mflo $t_1 | ||
2545 | mfhi $t_2 | ||
2546 | slt $c_2,$t_2,$zero | ||
2547 | $SLL $t_2,1 | ||
2548 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
2549 | slt $a2,$t_1,$zero | ||
2550 | $ADDU $t_2,$a2 | ||
2551 | $SLL $t_1,1 | ||
2552 | $ADDU $c_3,$t_1 | ||
2553 | sltu $at,$c_3,$t_1 | ||
2554 | $ADDU $t_2,$at | ||
2555 | $ADDU $c_1,$t_2 | ||
2556 | sltu $at,$c_1,$t_2 | ||
2557 | $ADDU $c_2,$at | ||
2558 | $ST $c_3,5*$BNSZ($a0) | ||
2559 | |||
2560 | mflo $t_1 | ||
2561 | mfhi $t_2 | ||
2562 | $ADDU $c_1,$t_1 | ||
2563 | sltu $at,$c_1,$t_1 | ||
2564 | $ADDU $t_2,$at | ||
2565 | $ADDU $c_2,$t_2 | ||
2566 | $ST $c_1,6*$BNSZ($a0) | ||
2567 | $ST $c_2,7*$BNSZ($a0) | ||
2568 | |||
2569 | .set noreorder | ||
2570 | ___ | ||
2571 | $code.=<<___ if ($flavour =~ /nubi/i); | ||
2572 | $REG_L $t3,4*$SZREG($sp) | ||
2573 | $REG_L $t2,3*$SZREG($sp) | ||
2574 | $REG_L $t1,2*$SZREG($sp) | ||
2575 | $REG_L $t0,1*$SZREG($sp) | ||
2576 | $REG_L $gp,0*$SZREG($sp) | ||
2577 | $PTR_ADD $sp,6*$SZREG | ||
2578 | ___ | ||
2579 | $code.=<<___; | ||
2580 | jr $ra | ||
2581 | nop | ||
2582 | .end bn_sqr_comba4 | ||
2583 | ___ | ||
2584 | print $code; | ||
2585 | close STDOUT; | ||