diff options
Diffstat (limited to 'src/lib/libcrypto/bn/asm/alpha.s')
-rw-r--r-- | src/lib/libcrypto/bn/asm/alpha.s | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/src/lib/libcrypto/bn/asm/alpha.s b/src/lib/libcrypto/bn/asm/alpha.s new file mode 100644 index 0000000000..1d17b1d619 --- /dev/null +++ b/src/lib/libcrypto/bn/asm/alpha.s | |||
@@ -0,0 +1,344 @@ | |||
1 | # DEC Alpha assember | ||
2 | # The bn_div64 is actually gcc output but the other parts are hand done. | ||
3 | # Thanks to tzeruch@ceddec.com for sending me the gcc output for | ||
4 | # bn_div64. | ||
5 | .file 1 "bn_mulw.c" | ||
6 | .set noat | ||
7 | gcc2_compiled.: | ||
8 | __gnu_compiled_c: | ||
9 | .text | ||
10 | .align 3 | ||
11 | .globl bn_mul_add_words | ||
12 | .ent bn_mul_add_words | ||
13 | bn_mul_add_words: | ||
14 | bn_mul_add_words..ng: | ||
15 | .frame $30,0,$26,0 | ||
16 | .prologue 0 | ||
17 | subq $18,2,$25 # num=-2 | ||
18 | bis $31,$31,$0 | ||
19 | blt $25,$42 | ||
20 | .align 5 | ||
21 | $142: | ||
22 | subq $18,2,$18 # num-=2 | ||
23 | subq $25,2,$25 # num-=2 | ||
24 | |||
25 | ldq $1,0($17) # a[0] | ||
26 | ldq $2,8($17) # a[1] | ||
27 | |||
28 | mulq $19,$1,$3 # a[0]*w low part r3 | ||
29 | umulh $19,$1,$1 # a[0]*w high part r1 | ||
30 | mulq $19,$2,$4 # a[1]*w low part r4 | ||
31 | umulh $19,$2,$2 # a[1]*w high part r2 | ||
32 | |||
33 | ldq $22,0($16) # r[0] r22 | ||
34 | ldq $23,8($16) # r[1] r23 | ||
35 | |||
36 | addq $3,$22,$3 # a0 low part + r[0] | ||
37 | addq $4,$23,$4 # a1 low part + r[1] | ||
38 | cmpult $3,$22,$5 # overflow? | ||
39 | cmpult $4,$23,$6 # overflow? | ||
40 | addq $5,$1,$1 # high part + overflow | ||
41 | addq $6,$2,$2 # high part + overflow | ||
42 | |||
43 | addq $3,$0,$3 # add c | ||
44 | cmpult $3,$0,$5 # overflow? | ||
45 | stq $3,0($16) | ||
46 | addq $5,$1,$0 # c=high part + overflow | ||
47 | |||
48 | addq $4,$0,$4 # add c | ||
49 | cmpult $4,$0,$5 # overflow? | ||
50 | stq $4,8($16) | ||
51 | addq $5,$2,$0 # c=high part + overflow | ||
52 | |||
53 | ble $18,$43 | ||
54 | |||
55 | addq $16,16,$16 | ||
56 | addq $17,16,$17 | ||
57 | blt $25,$42 | ||
58 | |||
59 | br $31,$142 | ||
60 | $42: | ||
61 | ldq $1,0($17) # a[0] | ||
62 | umulh $19,$1,$3 # a[0]*w high part | ||
63 | mulq $19,$1,$1 # a[0]*w low part | ||
64 | ldq $2,0($16) # r[0] | ||
65 | addq $1,$2,$1 # low part + r[0] | ||
66 | cmpult $1,$2,$4 # overflow? | ||
67 | addq $4,$3,$3 # high part + overflow | ||
68 | addq $1,$0,$1 # add c | ||
69 | cmpult $1,$0,$4 # overflow? | ||
70 | addq $4,$3,$0 # c=high part + overflow | ||
71 | stq $1,0($16) | ||
72 | |||
73 | .align 4 | ||
74 | $43: | ||
75 | ret $31,($26),1 | ||
76 | .end bn_mul_add_words | ||
77 | .align 3 | ||
78 | .globl bn_mul_words | ||
79 | .ent bn_mul_words | ||
80 | bn_mul_words: | ||
81 | bn_mul_words..ng: | ||
82 | .frame $30,0,$26,0 | ||
83 | .prologue 0 | ||
84 | subq $18,2,$25 # num=-2 | ||
85 | bis $31,$31,$0 | ||
86 | blt $25,$242 | ||
87 | .align 5 | ||
88 | $342: | ||
89 | subq $18,2,$18 # num-=2 | ||
90 | subq $25,2,$25 # num-=2 | ||
91 | |||
92 | ldq $1,0($17) # a[0] | ||
93 | ldq $2,8($17) # a[1] | ||
94 | |||
95 | mulq $19,$1,$3 # a[0]*w low part r3 | ||
96 | umulh $19,$1,$1 # a[0]*w high part r1 | ||
97 | mulq $19,$2,$4 # a[1]*w low part r4 | ||
98 | umulh $19,$2,$2 # a[1]*w high part r2 | ||
99 | |||
100 | addq $3,$0,$3 # add c | ||
101 | cmpult $3,$0,$5 # overflow? | ||
102 | stq $3,0($16) | ||
103 | addq $5,$1,$0 # c=high part + overflow | ||
104 | |||
105 | addq $4,$0,$4 # add c | ||
106 | cmpult $4,$0,$5 # overflow? | ||
107 | stq $4,8($16) | ||
108 | addq $5,$2,$0 # c=high part + overflow | ||
109 | |||
110 | ble $18,$243 | ||
111 | |||
112 | addq $16,16,$16 | ||
113 | addq $17,16,$17 | ||
114 | blt $25,$242 | ||
115 | |||
116 | br $31,$342 | ||
117 | $242: | ||
118 | ldq $1,0($17) # a[0] | ||
119 | umulh $19,$1,$3 # a[0]*w high part | ||
120 | mulq $19,$1,$1 # a[0]*w low part | ||
121 | addq $1,$0,$1 # add c | ||
122 | cmpult $1,$0,$4 # overflow? | ||
123 | addq $4,$3,$0 # c=high part + overflow | ||
124 | stq $1,0($16) | ||
125 | $243: | ||
126 | ret $31,($26),1 | ||
127 | .end bn_mul_words | ||
128 | .align 3 | ||
129 | .globl bn_sqr_words | ||
130 | .ent bn_sqr_words | ||
131 | bn_sqr_words: | ||
132 | bn_sqr_words..ng: | ||
133 | .frame $30,0,$26,0 | ||
134 | .prologue 0 | ||
135 | |||
136 | subq $18,2,$25 # num=-2 | ||
137 | blt $25,$442 | ||
138 | .align 5 | ||
139 | $542: | ||
140 | subq $18,2,$18 # num-=2 | ||
141 | subq $25,2,$25 # num-=2 | ||
142 | |||
143 | ldq $1,0($17) # a[0] | ||
144 | ldq $4,8($17) # a[1] | ||
145 | |||
146 | mulq $1,$1,$2 # a[0]*w low part r2 | ||
147 | umulh $1,$1,$3 # a[0]*w high part r3 | ||
148 | mulq $4,$4,$5 # a[1]*w low part r5 | ||
149 | umulh $4,$4,$6 # a[1]*w high part r6 | ||
150 | |||
151 | stq $2,0($16) # r[0] | ||
152 | stq $3,8($16) # r[1] | ||
153 | stq $5,16($16) # r[3] | ||
154 | stq $6,24($16) # r[4] | ||
155 | |||
156 | ble $18,$443 | ||
157 | |||
158 | addq $16,32,$16 | ||
159 | addq $17,16,$17 | ||
160 | blt $25,$442 | ||
161 | br $31,$542 | ||
162 | |||
163 | $442: | ||
164 | ldq $1,0($17) # a[0] | ||
165 | mulq $1,$1,$2 # a[0]*w low part r2 | ||
166 | umulh $1,$1,$3 # a[0]*w high part r3 | ||
167 | stq $2,0($16) # r[0] | ||
168 | stq $3,8($16) # r[1] | ||
169 | |||
170 | .align 4 | ||
171 | $443: | ||
172 | ret $31,($26),1 | ||
173 | .end bn_sqr_words | ||
174 | |||
175 | .align 3 | ||
176 | .globl bn_add_words | ||
177 | .ent bn_add_words | ||
178 | bn_add_words: | ||
179 | bn_add_words..ng: | ||
180 | .frame $30,0,$26,0 | ||
181 | .prologue 0 | ||
182 | |||
183 | bis $31,$31,$8 # carry = 0 | ||
184 | ble $19,$900 | ||
185 | $901: | ||
186 | ldq $0,0($17) # a[0] | ||
187 | ldq $1,0($18) # a[1] | ||
188 | |||
189 | addq $0,$1,$3 # c=a+b; | ||
190 | addq $17,8,$17 # a++ | ||
191 | |||
192 | cmpult $3,$1,$7 # did we overflow? | ||
193 | addq $18,8,$18 # b++ | ||
194 | |||
195 | addq $8,$3,$3 # c+=carry | ||
196 | |||
197 | cmpult $3,$8,$8 # did we overflow? | ||
198 | stq $3,($16) # r[0]=c | ||
199 | |||
200 | addq $7,$8,$8 # add into overflow | ||
201 | subq $19,1,$19 # loop-- | ||
202 | |||
203 | addq $16,8,$16 # r++ | ||
204 | bgt $19,$901 | ||
205 | $900: | ||
206 | bis $8,$8,$0 # return carry | ||
207 | ret $31,($26),1 | ||
208 | .end bn_add_words | ||
209 | |||
210 | # | ||
211 | # What follows was taken directly from the C compiler with a few | ||
212 | # hacks to redo the lables. | ||
213 | # | ||
214 | .text | ||
215 | .align 3 | ||
216 | .globl bn_div64 | ||
217 | .ent bn_div64 | ||
218 | bn_div64: | ||
219 | ldgp $29,0($27) | ||
220 | bn_div64..ng: | ||
221 | lda $30,-48($30) | ||
222 | .frame $30,48,$26,0 | ||
223 | stq $26,0($30) | ||
224 | stq $9,8($30) | ||
225 | stq $10,16($30) | ||
226 | stq $11,24($30) | ||
227 | stq $12,32($30) | ||
228 | stq $13,40($30) | ||
229 | .mask 0x4003e00,-48 | ||
230 | .prologue 1 | ||
231 | bis $16,$16,$9 | ||
232 | bis $17,$17,$10 | ||
233 | bis $18,$18,$11 | ||
234 | bis $31,$31,$13 | ||
235 | bis $31,2,$12 | ||
236 | bne $11,$119 | ||
237 | lda $0,-1 | ||
238 | br $31,$136 | ||
239 | .align 4 | ||
240 | $119: | ||
241 | bis $11,$11,$16 | ||
242 | jsr $26,BN_num_bits_word | ||
243 | ldgp $29,0($26) | ||
244 | subq $0,64,$1 | ||
245 | beq $1,$120 | ||
246 | bis $31,1,$1 | ||
247 | sll $1,$0,$1 | ||
248 | cmpule $9,$1,$1 | ||
249 | bne $1,$120 | ||
250 | # lda $16,_IO_stderr_ | ||
251 | # lda $17,$C32 | ||
252 | # bis $0,$0,$18 | ||
253 | # jsr $26,fprintf | ||
254 | # ldgp $29,0($26) | ||
255 | jsr $26,abort | ||
256 | ldgp $29,0($26) | ||
257 | .align 4 | ||
258 | $120: | ||
259 | bis $31,64,$3 | ||
260 | cmpult $9,$11,$2 | ||
261 | subq $3,$0,$1 | ||
262 | addl $1,$31,$0 | ||
263 | subq $9,$11,$1 | ||
264 | cmoveq $2,$1,$9 | ||
265 | beq $0,$122 | ||
266 | zapnot $0,15,$2 | ||
267 | subq $3,$0,$1 | ||
268 | sll $11,$2,$11 | ||
269 | sll $9,$2,$3 | ||
270 | srl $10,$1,$1 | ||
271 | sll $10,$2,$10 | ||
272 | bis $3,$1,$9 | ||
273 | $122: | ||
274 | srl $11,32,$5 | ||
275 | zapnot $11,15,$6 | ||
276 | lda $7,-1 | ||
277 | .align 5 | ||
278 | $123: | ||
279 | srl $9,32,$1 | ||
280 | subq $1,$5,$1 | ||
281 | bne $1,$126 | ||
282 | zapnot $7,15,$27 | ||
283 | br $31,$127 | ||
284 | .align 4 | ||
285 | $126: | ||
286 | bis $9,$9,$24 | ||
287 | bis $5,$5,$25 | ||
288 | divqu $24,$25,$27 | ||
289 | $127: | ||
290 | srl $10,32,$4 | ||
291 | .align 5 | ||
292 | $128: | ||
293 | mulq $27,$5,$1 | ||
294 | subq $9,$1,$3 | ||
295 | zapnot $3,240,$1 | ||
296 | bne $1,$129 | ||
297 | mulq $6,$27,$2 | ||
298 | sll $3,32,$1 | ||
299 | addq $1,$4,$1 | ||
300 | cmpule $2,$1,$2 | ||
301 | bne $2,$129 | ||
302 | subq $27,1,$27 | ||
303 | br $31,$128 | ||
304 | .align 4 | ||
305 | $129: | ||
306 | mulq $27,$6,$1 | ||
307 | mulq $27,$5,$4 | ||
308 | srl $1,32,$3 | ||
309 | sll $1,32,$1 | ||
310 | addq $4,$3,$4 | ||
311 | cmpult $10,$1,$2 | ||
312 | subq $10,$1,$10 | ||
313 | addq $2,$4,$2 | ||
314 | cmpult $9,$2,$1 | ||
315 | bis $2,$2,$4 | ||
316 | beq $1,$134 | ||
317 | addq $9,$11,$9 | ||
318 | subq $27,1,$27 | ||
319 | $134: | ||
320 | subl $12,1,$12 | ||
321 | subq $9,$4,$9 | ||
322 | beq $12,$124 | ||
323 | sll $27,32,$13 | ||
324 | sll $9,32,$2 | ||
325 | srl $10,32,$1 | ||
326 | sll $10,32,$10 | ||
327 | bis $2,$1,$9 | ||
328 | br $31,$123 | ||
329 | .align 4 | ||
330 | $124: | ||
331 | bis $13,$27,$0 | ||
332 | $136: | ||
333 | ldq $26,0($30) | ||
334 | ldq $9,8($30) | ||
335 | ldq $10,16($30) | ||
336 | ldq $11,24($30) | ||
337 | ldq $12,32($30) | ||
338 | ldq $13,40($30) | ||
339 | addq $30,48,$30 | ||
340 | ret $31,($26),1 | ||
341 | .end bn_div64 | ||
342 | .ident "GCC: (GNU) 2.7.2.1" | ||
343 | |||
344 | |||