summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/asm/ppc.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/asm/ppc.pl')
-rw-r--r--src/lib/libcrypto/bn/asm/ppc.pl233
1 files changed, 68 insertions, 165 deletions
diff --git a/src/lib/libcrypto/bn/asm/ppc.pl b/src/lib/libcrypto/bn/asm/ppc.pl
index 08e0053473..37c65d3511 100644
--- a/src/lib/libcrypto/bn/asm/ppc.pl
+++ b/src/lib/libcrypto/bn/asm/ppc.pl
@@ -100,9 +100,9 @@
100# me a note at schari@us.ibm.com 100# me a note at schari@us.ibm.com
101# 101#
102 102
103$opf = shift; 103$flavour = shift;
104 104
105if ($opf =~ /32\.s/) { 105if ($flavour =~ /32/) {
106 $BITS= 32; 106 $BITS= 32;
107 $BNSZ= $BITS/8; 107 $BNSZ= $BITS/8;
108 $ISA= "\"ppc\""; 108 $ISA= "\"ppc\"";
@@ -125,7 +125,7 @@ if ($opf =~ /32\.s/) {
125 $INSR= "insrwi"; # insert right 125 $INSR= "insrwi"; # insert right
126 $ROTL= "rotlwi"; # rotate left by immediate 126 $ROTL= "rotlwi"; # rotate left by immediate
127 $TR= "tw"; # conditional trap 127 $TR= "tw"; # conditional trap
128} elsif ($opf =~ /64\.s/) { 128} elsif ($flavour =~ /64/) {
129 $BITS= 64; 129 $BITS= 64;
130 $BNSZ= $BITS/8; 130 $BNSZ= $BITS/8;
131 $ISA= "\"ppc64\""; 131 $ISA= "\"ppc64\"";
@@ -149,93 +149,16 @@ if ($opf =~ /32\.s/) {
149 $INSR= "insrdi"; # insert right 149 $INSR= "insrdi"; # insert right
150 $ROTL= "rotldi"; # rotate left by immediate 150 $ROTL= "rotldi"; # rotate left by immediate
151 $TR= "td"; # conditional trap 151 $TR= "td"; # conditional trap
152} else { die "nonsense $opf"; } 152} else { die "nonsense $flavour"; }
153 153
154( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!"; 154$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
155( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
156( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
157die "can't locate ppc-xlate.pl";
155 158
156# function entry points from the AIX code 159open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
157#
158# There are other, more elegant, ways to handle this. We (IBM) chose
159# this approach as it plays well with scripts we run to 'namespace'
160# OpenSSL .i.e. we add a prefix to all the public symbols so we can
161# co-exist in the same process with other implementations of OpenSSL.
162# 'cleverer' ways of doing these substitutions tend to hide data we
163# need to be obvious.
164#
165my @items = ("bn_sqr_comba4",
166 "bn_sqr_comba8",
167 "bn_mul_comba4",
168 "bn_mul_comba8",
169 "bn_sub_words",
170 "bn_add_words",
171 "bn_div_words",
172 "bn_sqr_words",
173 "bn_mul_words",
174 "bn_mul_add_words");
175 160
176if ($opf =~ /linux/) { do_linux(); } 161$data=<<EOF;
177elsif ($opf =~ /aix/) { do_aix(); }
178elsif ($opf =~ /osx/) { do_osx(); }
179else { do_bsd(); }
180
181sub do_linux {
182 $d=&data();
183
184 if ($BITS==64) {
185 foreach $t (@items) {
186 $d =~ s/\.$t:/\
187\t.section\t".opd","aw"\
188\t.align\t3\
189\t.globl\t$t\
190$t:\
191\t.quad\t.$t,.TOC.\@tocbase,0\
192\t.size\t$t,24\
193\t.previous\n\
194\t.type\t.$t,\@function\
195\t.globl\t.$t\
196.$t:/g;
197 }
198 }
199 else {
200 foreach $t (@items) {
201 $d=~s/\.$t/$t/g;
202 }
203 }
204 # hide internal labels to avoid pollution of name table...
205 $d=~s/Lppcasm_/.Lppcasm_/gm;
206 print $d;
207}
208
209sub do_aix {
210 # AIX assembler is smart enough to please the linker without
211 # making us do something special...
212 print &data();
213}
214
215# MacOSX 32 bit
216sub do_osx {
217 $d=&data();
218 # Change the bn symbol prefix from '.' to '_'
219 foreach $t (@items) {
220 $d=~s/\.$t/_$t/g;
221 }
222 # Change .machine to something OS X asm will accept
223 $d=~s/\.machine.*/.text/g;
224 $d=~s/\#/;/g; # change comment from '#' to ';'
225 print $d;
226}
227
228# BSD (Untested)
229sub do_bsd {
230 $d=&data();
231 foreach $t (@items) {
232 $d=~s/\.$t/_$t/g;
233 }
234 print $d;
235}
236
237sub data {
238 local($data)=<<EOF;
239#-------------------------------------------------------------------- 162#--------------------------------------------------------------------
240# 163#
241# 164#
@@ -297,33 +220,20 @@ sub data {
297# 220#
298# Defines to be used in the assembly code. 221# Defines to be used in the assembly code.
299# 222#
300.set r0,0 # we use it as storage for value of 0 223#.set r0,0 # we use it as storage for value of 0
301.set SP,1 # preserved 224#.set SP,1 # preserved
302.set RTOC,2 # preserved 225#.set RTOC,2 # preserved
303.set r3,3 # 1st argument/return value 226#.set r3,3 # 1st argument/return value
304.set r4,4 # 2nd argument/volatile register 227#.set r4,4 # 2nd argument/volatile register
305.set r5,5 # 3rd argument/volatile register 228#.set r5,5 # 3rd argument/volatile register
306.set r6,6 # ... 229#.set r6,6 # ...
307.set r7,7 230#.set r7,7
308.set r8,8 231#.set r8,8
309.set r9,9 232#.set r9,9
310.set r10,10 233#.set r10,10
311.set r11,11 234#.set r11,11
312.set r12,12 235#.set r12,12
313.set r13,13 # not used, nor any other "below" it... 236#.set r13,13 # not used, nor any other "below" it...
314
315.set BO_IF_NOT,4
316.set BO_IF,12
317.set BO_dCTR_NZERO,16
318.set BO_dCTR_ZERO,18
319.set BO_ALWAYS,20
320.set CR0_LT,0;
321.set CR0_GT,1;
322.set CR0_EQ,2
323.set CR1_FX,4;
324.set CR1_FEX,5;
325.set CR1_VX,6
326.set LR,8
327 237
328# Declare function names to be global 238# Declare function names to be global
329# NOTE: For gcc these names MUST be changed to remove 239# NOTE: For gcc these names MUST be changed to remove
@@ -344,7 +254,7 @@ sub data {
344 254
345# .text section 255# .text section
346 256
347 .machine $ISA 257 .machine "any"
348 258
349# 259#
350# NOTE: The following label name should be changed to 260# NOTE: The following label name should be changed to
@@ -478,7 +388,7 @@ sub data {
478 388
479 $ST r9,`6*$BNSZ`(r3) #r[6]=c1 389 $ST r9,`6*$BNSZ`(r3) #r[6]=c1
480 $ST r10,`7*$BNSZ`(r3) #r[7]=c2 390 $ST r10,`7*$BNSZ`(r3) #r[7]=c2
481 bclr BO_ALWAYS,CR0_LT 391 blr
482 .long 0x00000000 392 .long 0x00000000
483 393
484# 394#
@@ -903,7 +813,7 @@ sub data {
903 $ST r9, `15*$BNSZ`(r3) #r[15]=c1; 813 $ST r9, `15*$BNSZ`(r3) #r[15]=c1;
904 814
905 815
906 bclr BO_ALWAYS,CR0_LT 816 blr
907 817
908 .long 0x00000000 818 .long 0x00000000
909 819
@@ -1055,7 +965,7 @@ sub data {
1055 965
1056 $ST r10,`6*$BNSZ`(r3) #r[6]=c1 966 $ST r10,`6*$BNSZ`(r3) #r[6]=c1
1057 $ST r11,`7*$BNSZ`(r3) #r[7]=c2 967 $ST r11,`7*$BNSZ`(r3) #r[7]=c2
1058 bclr BO_ALWAYS,CR0_LT 968 blr
1059 .long 0x00000000 969 .long 0x00000000
1060 970
1061# 971#
@@ -1591,7 +1501,7 @@ sub data {
1591 adde r10,r10,r9 1501 adde r10,r10,r9
1592 $ST r12,`14*$BNSZ`(r3) #r[14]=c3; 1502 $ST r12,`14*$BNSZ`(r3) #r[14]=c3;
1593 $ST r10,`15*$BNSZ`(r3) #r[15]=c1; 1503 $ST r10,`15*$BNSZ`(r3) #r[15]=c1;
1594 bclr BO_ALWAYS,CR0_LT 1504 blr
1595 .long 0x00000000 1505 .long 0x00000000
1596 1506
1597# 1507#
@@ -1623,7 +1533,7 @@ sub data {
1623 subfc. r7,r0,r6 # If r6 is 0 then result is 0. 1533 subfc. r7,r0,r6 # If r6 is 0 then result is 0.
1624 # if r6 > 0 then result !=0 1534 # if r6 > 0 then result !=0
1625 # In either case carry bit is set. 1535 # In either case carry bit is set.
1626 bc BO_IF,CR0_EQ,Lppcasm_sub_adios 1536 beq Lppcasm_sub_adios
1627 addi r4,r4,-$BNSZ 1537 addi r4,r4,-$BNSZ
1628 addi r3,r3,-$BNSZ 1538 addi r3,r3,-$BNSZ
1629 addi r5,r5,-$BNSZ 1539 addi r5,r5,-$BNSZ
@@ -1635,11 +1545,11 @@ Lppcasm_sub_mainloop:
1635 # if carry = 1 this is r7-r8. Else it 1545 # if carry = 1 this is r7-r8. Else it
1636 # is r7-r8 -1 as we need. 1546 # is r7-r8 -1 as we need.
1637 $STU r6,$BNSZ(r3) 1547 $STU r6,$BNSZ(r3)
1638 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop 1548 bdnz- Lppcasm_sub_mainloop
1639Lppcasm_sub_adios: 1549Lppcasm_sub_adios:
1640 subfze r3,r0 # if carry bit is set then r3 = 0 else -1 1550 subfze r3,r0 # if carry bit is set then r3 = 0 else -1
1641 andi. r3,r3,1 # keep only last bit. 1551 andi. r3,r3,1 # keep only last bit.
1642 bclr BO_ALWAYS,CR0_LT 1552 blr
1643 .long 0x00000000 1553 .long 0x00000000
1644 1554
1645 1555
@@ -1670,7 +1580,7 @@ Lppcasm_sub_adios:
1670# check for r6 = 0. Is this needed? 1580# check for r6 = 0. Is this needed?
1671# 1581#
1672 addic. r6,r6,0 #test r6 and clear carry bit. 1582 addic. r6,r6,0 #test r6 and clear carry bit.
1673 bc BO_IF,CR0_EQ,Lppcasm_add_adios 1583 beq Lppcasm_add_adios
1674 addi r4,r4,-$BNSZ 1584 addi r4,r4,-$BNSZ
1675 addi r3,r3,-$BNSZ 1585 addi r3,r3,-$BNSZ
1676 addi r5,r5,-$BNSZ 1586 addi r5,r5,-$BNSZ
@@ -1680,10 +1590,10 @@ Lppcasm_add_mainloop:
1680 $LDU r8,$BNSZ(r5) 1590 $LDU r8,$BNSZ(r5)
1681 adde r8,r7,r8 1591 adde r8,r7,r8
1682 $STU r8,$BNSZ(r3) 1592 $STU r8,$BNSZ(r3)
1683 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop 1593 bdnz- Lppcasm_add_mainloop
1684Lppcasm_add_adios: 1594Lppcasm_add_adios:
1685 addze r3,r0 #return carry bit. 1595 addze r3,r0 #return carry bit.
1686 bclr BO_ALWAYS,CR0_LT 1596 blr
1687 .long 0x00000000 1597 .long 0x00000000
1688 1598
1689# 1599#
@@ -1707,24 +1617,24 @@ Lppcasm_add_adios:
1707# r5 = d 1617# r5 = d
1708 1618
1709 $UCMPI 0,r5,0 # compare r5 and 0 1619 $UCMPI 0,r5,0 # compare r5 and 0
1710 bc BO_IF_NOT,CR0_EQ,Lppcasm_div1 # proceed if d!=0 1620 bne Lppcasm_div1 # proceed if d!=0
1711 li r3,-1 # d=0 return -1 1621 li r3,-1 # d=0 return -1
1712 bclr BO_ALWAYS,CR0_LT 1622 blr
1713Lppcasm_div1: 1623Lppcasm_div1:
1714 xor r0,r0,r0 #r0=0 1624 xor r0,r0,r0 #r0=0
1715 li r8,$BITS 1625 li r8,$BITS
1716 $CNTLZ. r7,r5 #r7 = num leading 0s in d. 1626 $CNTLZ. r7,r5 #r7 = num leading 0s in d.
1717 bc BO_IF,CR0_EQ,Lppcasm_div2 #proceed if no leading zeros 1627 beq Lppcasm_div2 #proceed if no leading zeros
1718 subf r8,r7,r8 #r8 = BN_num_bits_word(d) 1628 subf r8,r7,r8 #r8 = BN_num_bits_word(d)
1719 $SHR. r9,r3,r8 #are there any bits above r8'th? 1629 $SHR. r9,r3,r8 #are there any bits above r8'th?
1720 $TR 16,r9,r0 #if there're, signal to dump core... 1630 $TR 16,r9,r0 #if there're, signal to dump core...
1721Lppcasm_div2: 1631Lppcasm_div2:
1722 $UCMP 0,r3,r5 #h>=d? 1632 $UCMP 0,r3,r5 #h>=d?
1723 bc BO_IF,CR0_LT,Lppcasm_div3 #goto Lppcasm_div3 if not 1633 blt Lppcasm_div3 #goto Lppcasm_div3 if not
1724 subf r3,r5,r3 #h-=d ; 1634 subf r3,r5,r3 #h-=d ;
1725Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i 1635Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i
1726 cmpi 0,0,r7,0 # is (i == 0)? 1636 cmpi 0,0,r7,0 # is (i == 0)?
1727 bc BO_IF,CR0_EQ,Lppcasm_div4 1637 beq Lppcasm_div4
1728 $SHL r3,r3,r7 # h = (h<< i) 1638 $SHL r3,r3,r7 # h = (h<< i)
1729 $SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i) 1639 $SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i)
1730 $SHL r5,r5,r7 # d<<=i 1640 $SHL r5,r5,r7 # d<<=i
@@ -1741,7 +1651,7 @@ Lppcasm_divouterloop:
1741 $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4 1651 $SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4
1742 # compute here for innerloop. 1652 # compute here for innerloop.
1743 $UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh 1653 $UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh
1744 bc BO_IF_NOT,CR0_EQ,Lppcasm_div5 # goto Lppcasm_div5 if not 1654 bne Lppcasm_div5 # goto Lppcasm_div5 if not
1745 1655
1746 li r8,-1 1656 li r8,-1
1747 $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l 1657 $CLRU r8,r8,`$BITS/2` #q = BN_MASK2l
@@ -1762,9 +1672,9 @@ Lppcasm_divinnerloop:
1762 # the following 2 instructions do that 1672 # the following 2 instructions do that
1763 $SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4) 1673 $SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4)
1764 or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4) 1674 or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4)
1765 $UCMP 1,r6,r7 # compare (tl <= r7) 1675 $UCMP cr1,r6,r7 # compare (tl <= r7)
1766 bc BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit 1676 bne Lppcasm_divinnerexit
1767 bc BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit 1677 ble cr1,Lppcasm_divinnerexit
1768 addi r8,r8,-1 #q-- 1678 addi r8,r8,-1 #q--
1769 subf r12,r9,r12 #th -=dh 1679 subf r12,r9,r12 #th -=dh
1770 $CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop. 1680 $CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop.
@@ -1773,14 +1683,14 @@ Lppcasm_divinnerloop:
1773Lppcasm_divinnerexit: 1683Lppcasm_divinnerexit:
1774 $SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4) 1684 $SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4)
1775 $SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h; 1685 $SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h;
1776 $UCMP 1,r4,r11 # compare l and tl 1686 $UCMP cr1,r4,r11 # compare l and tl
1777 add r12,r12,r10 # th+=t 1687 add r12,r12,r10 # th+=t
1778 bc BO_IF_NOT,CR1_FX,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7 1688 bge cr1,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7
1779 addi r12,r12,1 # th++ 1689 addi r12,r12,1 # th++
1780Lppcasm_div7: 1690Lppcasm_div7:
1781 subf r11,r11,r4 #r11=l-tl 1691 subf r11,r11,r4 #r11=l-tl
1782 $UCMP 1,r3,r12 #compare h and th 1692 $UCMP cr1,r3,r12 #compare h and th
1783 bc BO_IF_NOT,CR1_FX,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8 1693 bge cr1,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8
1784 addi r8,r8,-1 # q-- 1694 addi r8,r8,-1 # q--
1785 add r3,r5,r3 # h+=d 1695 add r3,r5,r3 # h+=d
1786Lppcasm_div8: 1696Lppcasm_div8:
@@ -1791,12 +1701,12 @@ Lppcasm_div8:
1791 # the following 2 instructions will do this. 1701 # the following 2 instructions will do this.
1792 $INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2. 1702 $INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2.
1793 $ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3 1703 $ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3
1794 bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ; 1704 bdz Lppcasm_div9 #if (count==0) break ;
1795 $SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4 1705 $SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4
1796 b Lppcasm_divouterloop 1706 b Lppcasm_divouterloop
1797Lppcasm_div9: 1707Lppcasm_div9:
1798 or r3,r8,r0 1708 or r3,r8,r0
1799 bclr BO_ALWAYS,CR0_LT 1709 blr
1800 .long 0x00000000 1710 .long 0x00000000
1801 1711
1802# 1712#
@@ -1822,7 +1732,7 @@ Lppcasm_div9:
1822# No unrolling done here. Not performance critical. 1732# No unrolling done here. Not performance critical.
1823 1733
1824 addic. r5,r5,0 #test r5. 1734 addic. r5,r5,0 #test r5.
1825 bc BO_IF,CR0_EQ,Lppcasm_sqr_adios 1735 beq Lppcasm_sqr_adios
1826 addi r4,r4,-$BNSZ 1736 addi r4,r4,-$BNSZ
1827 addi r3,r3,-$BNSZ 1737 addi r3,r3,-$BNSZ
1828 mtctr r5 1738 mtctr r5
@@ -1833,9 +1743,9 @@ Lppcasm_sqr_mainloop:
1833 $UMULH r8,r6,r6 1743 $UMULH r8,r6,r6
1834 $STU r7,$BNSZ(r3) 1744 $STU r7,$BNSZ(r3)
1835 $STU r8,$BNSZ(r3) 1745 $STU r8,$BNSZ(r3)
1836 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop 1746 bdnz- Lppcasm_sqr_mainloop
1837Lppcasm_sqr_adios: 1747Lppcasm_sqr_adios:
1838 bclr BO_ALWAYS,CR0_LT 1748 blr
1839 .long 0x00000000 1749 .long 0x00000000
1840 1750
1841 1751
@@ -1858,7 +1768,7 @@ Lppcasm_sqr_adios:
1858 xor r0,r0,r0 1768 xor r0,r0,r0
1859 xor r12,r12,r12 # used for carry 1769 xor r12,r12,r12 # used for carry
1860 rlwinm. r7,r5,30,2,31 # num >> 2 1770 rlwinm. r7,r5,30,2,31 # num >> 2
1861 bc BO_IF,CR0_EQ,Lppcasm_mw_REM 1771 beq Lppcasm_mw_REM
1862 mtctr r7 1772 mtctr r7
1863Lppcasm_mw_LOOP: 1773Lppcasm_mw_LOOP:
1864 #mul(rp[0],ap[0],w,c1); 1774 #mul(rp[0],ap[0],w,c1);
@@ -1896,11 +1806,11 @@ Lppcasm_mw_LOOP:
1896 1806
1897 addi r3,r3,`4*$BNSZ` 1807 addi r3,r3,`4*$BNSZ`
1898 addi r4,r4,`4*$BNSZ` 1808 addi r4,r4,`4*$BNSZ`
1899 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP 1809 bdnz- Lppcasm_mw_LOOP
1900 1810
1901Lppcasm_mw_REM: 1811Lppcasm_mw_REM:
1902 andi. r5,r5,0x3 1812 andi. r5,r5,0x3
1903 bc BO_IF,CR0_EQ,Lppcasm_mw_OVER 1813 beq Lppcasm_mw_OVER
1904 #mul(rp[0],ap[0],w,c1); 1814 #mul(rp[0],ap[0],w,c1);
1905 $LD r8,`0*$BNSZ`(r4) 1815 $LD r8,`0*$BNSZ`(r4)
1906 $UMULL r9,r6,r8 1816 $UMULL r9,r6,r8
@@ -1912,7 +1822,7 @@ Lppcasm_mw_REM:
1912 1822
1913 addi r5,r5,-1 1823 addi r5,r5,-1
1914 cmpli 0,0,r5,0 1824 cmpli 0,0,r5,0
1915 bc BO_IF,CR0_EQ,Lppcasm_mw_OVER 1825 beq Lppcasm_mw_OVER
1916 1826
1917 1827
1918 #mul(rp[1],ap[1],w,c1); 1828 #mul(rp[1],ap[1],w,c1);
@@ -1926,7 +1836,7 @@ Lppcasm_mw_REM:
1926 1836
1927 addi r5,r5,-1 1837 addi r5,r5,-1
1928 cmpli 0,0,r5,0 1838 cmpli 0,0,r5,0
1929 bc BO_IF,CR0_EQ,Lppcasm_mw_OVER 1839 beq Lppcasm_mw_OVER
1930 1840
1931 #mul_add(rp[2],ap[2],w,c1); 1841 #mul_add(rp[2],ap[2],w,c1);
1932 $LD r8,`2*$BNSZ`(r4) 1842 $LD r8,`2*$BNSZ`(r4)
@@ -1939,7 +1849,7 @@ Lppcasm_mw_REM:
1939 1849
1940Lppcasm_mw_OVER: 1850Lppcasm_mw_OVER:
1941 addi r3,r12,0 1851 addi r3,r12,0
1942 bclr BO_ALWAYS,CR0_LT 1852 blr
1943 .long 0x00000000 1853 .long 0x00000000
1944 1854
1945# 1855#
@@ -1964,7 +1874,7 @@ Lppcasm_mw_OVER:
1964 xor r0,r0,r0 #r0 = 0 1874 xor r0,r0,r0 #r0 = 0
1965 xor r12,r12,r12 #r12 = 0 . used for carry 1875 xor r12,r12,r12 #r12 = 0 . used for carry
1966 rlwinm. r7,r5,30,2,31 # num >> 2 1876 rlwinm. r7,r5,30,2,31 # num >> 2
1967 bc BO_IF,CR0_EQ,Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover 1877 beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
1968 mtctr r7 1878 mtctr r7
1969Lppcasm_maw_mainloop: 1879Lppcasm_maw_mainloop:
1970 #mul_add(rp[0],ap[0],w,c1); 1880 #mul_add(rp[0],ap[0],w,c1);
@@ -2017,11 +1927,11 @@ Lppcasm_maw_mainloop:
2017 $ST r11,`3*$BNSZ`(r3) 1927 $ST r11,`3*$BNSZ`(r3)
2018 addi r3,r3,`4*$BNSZ` 1928 addi r3,r3,`4*$BNSZ`
2019 addi r4,r4,`4*$BNSZ` 1929 addi r4,r4,`4*$BNSZ`
2020 bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop 1930 bdnz- Lppcasm_maw_mainloop
2021 1931
2022Lppcasm_maw_leftover: 1932Lppcasm_maw_leftover:
2023 andi. r5,r5,0x3 1933 andi. r5,r5,0x3
2024 bc BO_IF,CR0_EQ,Lppcasm_maw_adios 1934 beq Lppcasm_maw_adios
2025 addi r3,r3,-$BNSZ 1935 addi r3,r3,-$BNSZ
2026 addi r4,r4,-$BNSZ 1936 addi r4,r4,-$BNSZ
2027 #mul_add(rp[0],ap[0],w,c1); 1937 #mul_add(rp[0],ap[0],w,c1);
@@ -2036,7 +1946,7 @@ Lppcasm_maw_leftover:
2036 addze r12,r10 1946 addze r12,r10
2037 $ST r9,0(r3) 1947 $ST r9,0(r3)
2038 1948
2039 bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios 1949 bdz Lppcasm_maw_adios
2040 #mul_add(rp[1],ap[1],w,c1); 1950 #mul_add(rp[1],ap[1],w,c1);
2041 $LDU r8,$BNSZ(r4) 1951 $LDU r8,$BNSZ(r4)
2042 $UMULL r9,r6,r8 1952 $UMULL r9,r6,r8
@@ -2048,7 +1958,7 @@ Lppcasm_maw_leftover:
2048 addze r12,r10 1958 addze r12,r10
2049 $ST r9,0(r3) 1959 $ST r9,0(r3)
2050 1960
2051 bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios 1961 bdz Lppcasm_maw_adios
2052 #mul_add(rp[2],ap[2],w,c1); 1962 #mul_add(rp[2],ap[2],w,c1);
2053 $LDU r8,$BNSZ(r4) 1963 $LDU r8,$BNSZ(r4)
2054 $UMULL r9,r6,r8 1964 $UMULL r9,r6,r8
@@ -2062,17 +1972,10 @@ Lppcasm_maw_leftover:
2062 1972
2063Lppcasm_maw_adios: 1973Lppcasm_maw_adios:
2064 addi r3,r12,0 1974 addi r3,r12,0
2065 bclr BO_ALWAYS,CR0_LT 1975 blr
2066 .long 0x00000000 1976 .long 0x00000000
2067 .align 4 1977 .align 4
2068EOF 1978EOF
2069 $data =~ s/\`([^\`]*)\`/eval $1/gem; 1979$data =~ s/\`([^\`]*)\`/eval $1/gem;
2070 1980print $data;
2071 # if some assembler chokes on some simplified mnemonic, 1981close STDOUT;
2072 # this is the spot to fix it up, e.g.:
2073 # GNU as doesn't seem to accept cmplw, 32-bit unsigned compare
2074 $data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm;
2075 # assembler X doesn't accept li, load immediate value
2076 #$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm;
2077 return($data);
2078}