diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib/libcrypto/bn/asm/armv4-mont.pl | 204 |
1 files changed, 0 insertions, 204 deletions
diff --git a/src/lib/libcrypto/bn/asm/armv4-mont.pl b/src/lib/libcrypto/bn/asm/armv4-mont.pl deleted file mode 100644 index f78a8b5f0f..0000000000 --- a/src/lib/libcrypto/bn/asm/armv4-mont.pl +++ /dev/null | |||
@@ -1,204 +0,0 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | |||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
5 | # project. The module is, however, dual licensed under OpenSSL and | ||
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
7 | # details see http://www.openssl.org/~appro/cryptogams/. | ||
8 | # ==================================================================== | ||
9 | |||
10 | # January 2007. | ||
11 | |||
12 | # Montgomery multiplication for ARMv4. | ||
13 | # | ||
14 | # Performance improvement naturally varies among CPU implementations | ||
15 | # and compilers. The code was observed to provide +65-35% improvement | ||
16 | # [depending on key length, less for longer keys] on ARM920T, and | ||
17 | # +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code | ||
18 | # base and compiler generated code with in-lined umull and even umlal | ||
19 | # instructions. The latter means that this code didn't really have an | ||
20 | # "advantage" of utilizing some "secret" instruction. | ||
21 | # | ||
22 | # The code is interoperable with Thumb ISA and is rather compact, less | ||
23 | # than 1/2KB. Windows CE port would be trivial, as it's exclusively | ||
24 | # about decorations, ABI and instruction syntax are identical. | ||
25 | |||
26 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | ||
27 | open STDOUT,">$output"; | ||
28 | |||
29 | $num="r0"; # starts as num argument, but holds &tp[num-1] | ||
30 | $ap="r1"; | ||
31 | $bp="r2"; $bi="r2"; $rp="r2"; | ||
32 | $np="r3"; | ||
33 | $tp="r4"; | ||
34 | $aj="r5"; | ||
35 | $nj="r6"; | ||
36 | $tj="r7"; | ||
37 | $n0="r8"; | ||
38 | ########### # r9 is reserved by ELF as platform specific, e.g. TLS pointer | ||
39 | $alo="r10"; # sl, gcc uses it to keep @GOT | ||
40 | $ahi="r11"; # fp | ||
41 | $nlo="r12"; # ip | ||
42 | ########### # r13 is stack pointer | ||
43 | $nhi="r14"; # lr | ||
44 | ########### # r15 is program counter | ||
45 | |||
46 | #### argument block layout relative to &tp[num-1], a.k.a. $num | ||
47 | $_rp="$num,#12*4"; | ||
48 | # ap permanently resides in r1 | ||
49 | $_bp="$num,#13*4"; | ||
50 | # np permanently resides in r3 | ||
51 | $_n0="$num,#14*4"; | ||
52 | $_num="$num,#15*4"; $_bpend=$_num; | ||
53 | |||
54 | $code=<<___; | ||
55 | .text | ||
56 | |||
57 | .global bn_mul_mont | ||
58 | .type bn_mul_mont,%function | ||
59 | |||
60 | .align 2 | ||
61 | bn_mul_mont: | ||
62 | stmdb sp!,{r0,r2} @ sp points at argument block | ||
63 | ldr $num,[sp,#3*4] @ load num | ||
64 | cmp $num,#2 | ||
65 | movlt r0,#0 | ||
66 | addlt sp,sp,#2*4 | ||
67 | blt .Labrt | ||
68 | |||
69 | stmdb sp!,{r4-r12,lr} @ save 10 registers | ||
70 | |||
71 | mov $num,$num,lsl#2 @ rescale $num for byte count | ||
72 | sub sp,sp,$num @ alloca(4*num) | ||
73 | sub sp,sp,#4 @ +extra dword | ||
74 | sub $num,$num,#4 @ "num=num-1" | ||
75 | add $tp,$bp,$num @ &bp[num-1] | ||
76 | |||
77 | add $num,sp,$num @ $num to point at &tp[num-1] | ||
78 | ldr $n0,[$_n0] @ &n0 | ||
79 | ldr $bi,[$bp] @ bp[0] | ||
80 | ldr $aj,[$ap],#4 @ ap[0],ap++ | ||
81 | ldr $nj,[$np],#4 @ np[0],np++ | ||
82 | ldr $n0,[$n0] @ *n0 | ||
83 | str $tp,[$_bpend] @ save &bp[num] | ||
84 | |||
85 | umull $alo,$ahi,$aj,$bi @ ap[0]*bp[0] | ||
86 | str $n0,[$_n0] @ save n0 value | ||
87 | mul $n0,$alo,$n0 @ "tp[0]"*n0 | ||
88 | mov $nlo,#0 | ||
89 | umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"t[0]" | ||
90 | mov $tp,sp | ||
91 | |||
92 | .L1st: | ||
93 | ldr $aj,[$ap],#4 @ ap[j],ap++ | ||
94 | mov $alo,$ahi | ||
95 | ldr $nj,[$np],#4 @ np[j],np++ | ||
96 | mov $ahi,#0 | ||
97 | umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0] | ||
98 | mov $nhi,#0 | ||
99 | umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 | ||
100 | adds $nlo,$nlo,$alo | ||
101 | str $nlo,[$tp],#4 @ tp[j-1]=,tp++ | ||
102 | adc $nlo,$nhi,#0 | ||
103 | cmp $tp,$num | ||
104 | bne .L1st | ||
105 | |||
106 | adds $nlo,$nlo,$ahi | ||
107 | ldr $tp,[$_bp] @ restore bp | ||
108 | mov $nhi,#0 | ||
109 | ldr $n0,[$_n0] @ restore n0 | ||
110 | adc $nhi,$nhi,#0 | ||
111 | str $nlo,[$num] @ tp[num-1]= | ||
112 | str $nhi,[$num,#4] @ tp[num]= | ||
113 | |||
114 | .Louter: | ||
115 | sub $tj,$num,sp @ "original" $num-1 value | ||
116 | sub $ap,$ap,$tj @ "rewind" ap to &ap[1] | ||
117 | ldr $bi,[$tp,#4]! @ *(++bp) | ||
118 | sub $np,$np,$tj @ "rewind" np to &np[1] | ||
119 | ldr $aj,[$ap,#-4] @ ap[0] | ||
120 | ldr $alo,[sp] @ tp[0] | ||
121 | ldr $nj,[$np,#-4] @ np[0] | ||
122 | ldr $tj,[sp,#4] @ tp[1] | ||
123 | |||
124 | mov $ahi,#0 | ||
125 | umlal $alo,$ahi,$aj,$bi @ ap[0]*bp[i]+tp[0] | ||
126 | str $tp,[$_bp] @ save bp | ||
127 | mul $n0,$alo,$n0 | ||
128 | mov $nlo,#0 | ||
129 | umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"tp[0]" | ||
130 | mov $tp,sp | ||
131 | |||
132 | .Linner: | ||
133 | ldr $aj,[$ap],#4 @ ap[j],ap++ | ||
134 | adds $alo,$ahi,$tj @ +=tp[j] | ||
135 | ldr $nj,[$np],#4 @ np[j],np++ | ||
136 | mov $ahi,#0 | ||
137 | umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i] | ||
138 | mov $nhi,#0 | ||
139 | umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0 | ||
140 | adc $ahi,$ahi,#0 | ||
141 | ldr $tj,[$tp,#8] @ tp[j+1] | ||
142 | adds $nlo,$nlo,$alo | ||
143 | str $nlo,[$tp],#4 @ tp[j-1]=,tp++ | ||
144 | adc $nlo,$nhi,#0 | ||
145 | cmp $tp,$num | ||
146 | bne .Linner | ||
147 | |||
148 | adds $nlo,$nlo,$ahi | ||
149 | mov $nhi,#0 | ||
150 | ldr $tp,[$_bp] @ restore bp | ||
151 | adc $nhi,$nhi,#0 | ||
152 | ldr $n0,[$_n0] @ restore n0 | ||
153 | adds $nlo,$nlo,$tj | ||
154 | ldr $tj,[$_bpend] @ restore &bp[num] | ||
155 | adc $nhi,$nhi,#0 | ||
156 | str $nlo,[$num] @ tp[num-1]= | ||
157 | str $nhi,[$num,#4] @ tp[num]= | ||
158 | |||
159 | cmp $tp,$tj | ||
160 | bne .Louter | ||
161 | |||
162 | ldr $rp,[$_rp] @ pull rp | ||
163 | add $num,$num,#4 @ $num to point at &tp[num] | ||
164 | sub $aj,$num,sp @ "original" num value | ||
165 | mov $tp,sp @ "rewind" $tp | ||
166 | mov $ap,$tp @ "borrow" $ap | ||
167 | sub $np,$np,$aj @ "rewind" $np to &np[0] | ||
168 | |||
169 | subs $tj,$tj,$tj @ "clear" carry flag | ||
170 | .Lsub: ldr $tj,[$tp],#4 | ||
171 | ldr $nj,[$np],#4 | ||
172 | sbcs $tj,$tj,$nj @ tp[j]-np[j] | ||
173 | str $tj,[$rp],#4 @ rp[j]= | ||
174 | teq $tp,$num @ preserve carry | ||
175 | bne .Lsub | ||
176 | sbcs $nhi,$nhi,#0 @ upmost carry | ||
177 | mov $tp,sp @ "rewind" $tp | ||
178 | sub $rp,$rp,$aj @ "rewind" $rp | ||
179 | |||
180 | and $ap,$tp,$nhi | ||
181 | bic $np,$rp,$nhi | ||
182 | orr $ap,$ap,$np @ ap=borrow?tp:rp | ||
183 | |||
184 | .Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh | ||
185 | str sp,[$tp],#4 @ zap tp | ||
186 | str $tj,[$rp],#4 | ||
187 | cmp $tp,$num | ||
188 | bne .Lcopy | ||
189 | |||
190 | add sp,$num,#4 @ skip over tp[num+1] | ||
191 | ldmia sp!,{r4-r12,lr} @ restore registers | ||
192 | add sp,sp,#2*4 @ skip over {r0,r2} | ||
193 | mov r0,#1 | ||
194 | .Labrt: tst lr,#1 | ||
195 | moveq pc,lr @ be binary compatible with V4, yet | ||
196 | bx lr @ interoperable with Thumb ISA:-) | ||
197 | .size bn_mul_mont,.-bn_mul_mont | ||
198 | .asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" | ||
199 | .align 2 | ||
200 | ___ | ||
201 | |||
202 | $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 | ||
203 | print $code; | ||
204 | close STDOUT; | ||