diff options
Diffstat (limited to 'networking/tls_pstm_sqr_comba.c')
-rw-r--r-- | networking/tls_pstm_sqr_comba.c | 1107 |
1 files changed, 1107 insertions, 0 deletions
diff --git a/networking/tls_pstm_sqr_comba.c b/networking/tls_pstm_sqr_comba.c new file mode 100644 index 000000000..98186d31f --- /dev/null +++ b/networking/tls_pstm_sqr_comba.c | |||
@@ -0,0 +1,1107 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2017 Denys Vlasenko | ||
3 | * | ||
4 | * Licensed under GPLv2, see file LICENSE in this source tree. | ||
5 | */ | ||
6 | #include "tls.h" | ||
7 | |||
8 | /** | ||
9 | * @file pstm_sqr_comba.c | ||
10 | * @version 33ef80f (HEAD, tag: MATRIXSSL-3-7-2-OPEN, tag: MATRIXSSL-3-7-2-COMM, origin/master, origin/HEAD, master) | ||
11 | * | ||
12 | * Multiprecision Squaring with Comba technique. | ||
13 | */ | ||
14 | /* | ||
15 | * Copyright (c) 2013-2015 INSIDE Secure Corporation | ||
16 | * Copyright (c) PeerSec Networks, 2002-2011 | ||
17 | * All Rights Reserved | ||
18 | * | ||
19 | * The latest version of this code is available at http://www.matrixssl.org | ||
20 | * | ||
21 | * This software is open source; you can redistribute it and/or modify | ||
22 | * it under the terms of the GNU General Public License as published by | ||
23 | * the Free Software Foundation; either version 2 of the License, or | ||
24 | * (at your option) any later version. | ||
25 | * | ||
26 | * This General Public License does NOT permit incorporating this software | ||
27 | * into proprietary programs. If you are unable to comply with the GPL, a | ||
28 | * commercial license for this software may be purchased from INSIDE at | ||
29 | * http://www.insidesecure.com/eng/Company/Locations | ||
30 | * | ||
31 | * This program is distributed in WITHOUT ANY WARRANTY; without even the | ||
32 | * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
33 | * See the GNU General Public License for more details. | ||
34 | * | ||
35 | * You should have received a copy of the GNU General Public License | ||
36 | * along with this program; if not, write to the Free Software | ||
37 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
38 | * http://www.gnu.org/copyleft/gpl.html | ||
39 | */ | ||
40 | /******************************************************************************/ | ||
41 | |||
42 | ///bbox | ||
43 | //#include "../cryptoApi.h" | ||
44 | #ifndef DISABLE_PSTM | ||
45 | |||
46 | /******************************************************************************/ | ||
47 | #if defined(PSTM_X86) | ||
48 | /* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */ | ||
49 | #if !defined(__GNUC__) || !defined(__i386__) | ||
50 | #error "PSTM_X86 option requires GCC and 32 bit mode x86 processor" | ||
51 | #endif | ||
52 | //#pragma message ("Using 32 bit x86 Assembly Optimizations") | ||
53 | |||
54 | #define COMBA_START | ||
55 | |||
56 | #define CLEAR_CARRY \ | ||
57 | c0 = c1 = c2 = 0; | ||
58 | |||
59 | #define COMBA_STORE(x) \ | ||
60 | x = c0; | ||
61 | |||
62 | #define COMBA_STORE2(x) \ | ||
63 | x = c1; | ||
64 | |||
65 | #define CARRY_FORWARD \ | ||
66 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
67 | |||
68 | #define COMBA_FINI | ||
69 | |||
70 | #define SQRADD(i, j) \ | ||
71 | asm( \ | ||
72 | "movl %6,%%eax \n\t" \ | ||
73 | "mull %%eax \n\t" \ | ||
74 | "addl %%eax,%0 \n\t" \ | ||
75 | "adcl %%edx,%1 \n\t" \ | ||
76 | "adcl $0,%2 \n\t" \ | ||
77 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","%cc"); | ||
78 | |||
79 | #define SQRADD2(i, j) \ | ||
80 | asm( \ | ||
81 | "movl %6,%%eax \n\t" \ | ||
82 | "mull %7 \n\t" \ | ||
83 | "addl %%eax,%0 \n\t" \ | ||
84 | "adcl %%edx,%1 \n\t" \ | ||
85 | "adcl $0,%2 \n\t" \ | ||
86 | "addl %%eax,%0 \n\t" \ | ||
87 | "adcl %%edx,%1 \n\t" \ | ||
88 | "adcl $0,%2 \n\t" \ | ||
89 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","%cc"); | ||
90 | |||
91 | #define SQRADDSC(i, j) \ | ||
92 | asm( \ | ||
93 | "movl %6,%%eax \n\t" \ | ||
94 | "mull %7 \n\t" \ | ||
95 | "movl %%eax,%0 \n\t" \ | ||
96 | "movl %%edx,%1 \n\t" \ | ||
97 | "xorl %2,%2 \n\t" \ | ||
98 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); | ||
99 | |||
100 | #define SQRADDAC(i, j) \ | ||
101 | asm( \ | ||
102 | "movl %6,%%eax \n\t" \ | ||
103 | "mull %7 \n\t" \ | ||
104 | "addl %%eax,%0 \n\t" \ | ||
105 | "adcl %%edx,%1 \n\t" \ | ||
106 | "adcl $0,%2 \n\t" \ | ||
107 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","%cc"); | ||
108 | |||
109 | #define SQRADDDB \ | ||
110 | asm( \ | ||
111 | "addl %6,%0 \n\t" \ | ||
112 | "adcl %7,%1 \n\t" \ | ||
113 | "adcl %8,%2 \n\t" \ | ||
114 | "addl %6,%0 \n\t" \ | ||
115 | "adcl %7,%1 \n\t" \ | ||
116 | "adcl %8,%2 \n\t" \ | ||
117 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "%cc"); | ||
118 | |||
119 | /******************************************************************************/ | ||
120 | #elif defined(PSTM_X86_64) | ||
121 | /* x86-64 optimized */ | ||
122 | #if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT) | ||
123 | #error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor" | ||
124 | #endif | ||
125 | //#pragma message ("Using 64 bit x86_64 Assembly Optimizations") | ||
126 | |||
127 | #define COMBA_START | ||
128 | |||
129 | #define CLEAR_CARRY \ | ||
130 | c0 = c1 = c2 = 0; | ||
131 | |||
132 | #define COMBA_STORE(x) \ | ||
133 | x = c0; | ||
134 | |||
135 | #define COMBA_STORE2(x) \ | ||
136 | x = c1; | ||
137 | |||
138 | #define CARRY_FORWARD \ | ||
139 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
140 | |||
141 | #define COMBA_FINI | ||
142 | |||
143 | #define SQRADD(i, j) \ | ||
144 | asm( \ | ||
145 | "movq %6,%%rax \n\t" \ | ||
146 | "mulq %%rax \n\t" \ | ||
147 | "addq %%rax,%0 \n\t" \ | ||
148 | "adcq %%rdx,%1 \n\t" \ | ||
149 | "adcq $0,%2 \n\t" \ | ||
150 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc"); | ||
151 | |||
152 | #define SQRADD2(i, j) \ | ||
153 | asm( \ | ||
154 | "movq %6,%%rax \n\t" \ | ||
155 | "mulq %7 \n\t" \ | ||
156 | "addq %%rax,%0 \n\t" \ | ||
157 | "adcq %%rdx,%1 \n\t" \ | ||
158 | "adcq $0,%2 \n\t" \ | ||
159 | "addq %%rax,%0 \n\t" \ | ||
160 | "adcq %%rdx,%1 \n\t" \ | ||
161 | "adcq $0,%2 \n\t" \ | ||
162 | :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); | ||
163 | |||
164 | #define SQRADDSC(i, j) \ | ||
165 | asm( \ | ||
166 | "movq %6,%%rax \n\t" \ | ||
167 | "mulq %7 \n\t" \ | ||
168 | "movq %%rax,%0 \n\t" \ | ||
169 | "movq %%rdx,%1 \n\t" \ | ||
170 | "xorq %2,%2 \n\t" \ | ||
171 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc"); | ||
172 | |||
173 | #define SQRADDAC(i, j) \ | ||
174 | asm( \ | ||
175 | "movq %6,%%rax \n\t" \ | ||
176 | "mulq %7 \n\t" \ | ||
177 | "addq %%rax,%0 \n\t" \ | ||
178 | "adcq %%rdx,%1 \n\t" \ | ||
179 | "adcq $0,%2 \n\t" \ | ||
180 | :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc"); | ||
181 | |||
182 | #define SQRADDDB \ | ||
183 | asm( \ | ||
184 | "addq %6,%0 \n\t" \ | ||
185 | "adcq %7,%1 \n\t" \ | ||
186 | "adcq %8,%2 \n\t" \ | ||
187 | "addq %6,%0 \n\t" \ | ||
188 | "adcq %7,%1 \n\t" \ | ||
189 | "adcq %8,%2 \n\t" \ | ||
190 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); | ||
191 | |||
192 | /******************************************************************************/ | ||
193 | #elif defined(PSTM_ARM) | ||
194 | /* ARM code */ | ||
195 | //#pragma message ("Using 32 bit ARM Assembly Optimizations") | ||
196 | |||
197 | #define COMBA_START | ||
198 | |||
199 | #define CLEAR_CARRY \ | ||
200 | c0 = c1 = c2 = 0; | ||
201 | |||
202 | #define COMBA_STORE(x) \ | ||
203 | x = c0; | ||
204 | |||
205 | #define COMBA_STORE2(x) \ | ||
206 | x = c1; | ||
207 | |||
208 | #define CARRY_FORWARD \ | ||
209 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
210 | |||
211 | #define COMBA_FINI | ||
212 | |||
213 | /* multiplies point i and j, updates carry "c1" and digit c2 */ | ||
214 | #define SQRADD(i, j) \ | ||
215 | asm( \ | ||
216 | " UMULL r0,r1,%6,%6 \n\t" \ | ||
217 | " ADDS %0,%0,r0 \n\t" \ | ||
218 | " ADCS %1,%1,r1 \n\t" \ | ||
219 | " ADC %2,%2,#0 \n\t" \ | ||
220 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "%cc"); | ||
221 | |||
222 | /* for squaring some of the terms are doubled... */ | ||
223 | #define SQRADD2(i, j) \ | ||
224 | asm( \ | ||
225 | " UMULL r0,r1,%6,%7 \n\t" \ | ||
226 | " ADDS %0,%0,r0 \n\t" \ | ||
227 | " ADCS %1,%1,r1 \n\t" \ | ||
228 | " ADC %2,%2,#0 \n\t" \ | ||
229 | " ADDS %0,%0,r0 \n\t" \ | ||
230 | " ADCS %1,%1,r1 \n\t" \ | ||
231 | " ADC %2,%2,#0 \n\t" \ | ||
232 | :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "%cc"); | ||
233 | |||
234 | #define SQRADDSC(i, j) \ | ||
235 | asm( \ | ||
236 | " UMULL %0,%1,%6,%7 \n\t" \ | ||
237 | " SUB %2,%2,%2 \n\t" \ | ||
238 | :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "%cc"); | ||
239 | |||
240 | #define SQRADDAC(i, j) \ | ||
241 | asm( \ | ||
242 | " UMULL r0,r1,%6,%7 \n\t" \ | ||
243 | " ADDS %0,%0,r0 \n\t" \ | ||
244 | " ADCS %1,%1,r1 \n\t" \ | ||
245 | " ADC %2,%2,#0 \n\t" \ | ||
246 | :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "%cc"); | ||
247 | |||
248 | #define SQRADDDB \ | ||
249 | asm( \ | ||
250 | " ADDS %0,%0,%3 \n\t" \ | ||
251 | " ADCS %1,%1,%4 \n\t" \ | ||
252 | " ADC %2,%2,%5 \n\t" \ | ||
253 | " ADDS %0,%0,%3 \n\t" \ | ||
254 | " ADCS %1,%1,%4 \n\t" \ | ||
255 | " ADC %2,%2,%5 \n\t" \ | ||
256 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "%cc"); | ||
257 | |||
258 | /******************************************************************************/ | ||
259 | #elif defined(PSTM_MIPS) | ||
260 | /* MIPS32 */ | ||
261 | //#pragma message ("Using 32 bit MIPS Assembly Optimizations") | ||
262 | |||
263 | #define COMBA_START | ||
264 | |||
265 | #define CLEAR_CARRY \ | ||
266 | c0 = c1 = c2 = 0; | ||
267 | |||
268 | #define COMBA_STORE(x) \ | ||
269 | x = c0; | ||
270 | |||
271 | #define COMBA_STORE2(x) \ | ||
272 | x = c1; | ||
273 | |||
274 | #define CARRY_FORWARD \ | ||
275 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
276 | |||
277 | #define COMBA_FINI | ||
278 | |||
279 | /* multiplies point i and j, updates carry "c1" and digit c2 */ | ||
280 | #define SQRADD(i, j) \ | ||
281 | asm( \ | ||
282 | " multu %6,%6 \n\t" \ | ||
283 | " mflo $12 \n\t" \ | ||
284 | " mfhi $13 \n\t" \ | ||
285 | " addu %0,%0,$12 \n\t" \ | ||
286 | " sltu $12,%0,$12 \n\t" \ | ||
287 | " addu %1,%1,$13 \n\t" \ | ||
288 | " sltu $13,%1,$13 \n\t" \ | ||
289 | " addu %1,%1,$12 \n\t" \ | ||
290 | " sltu $12,%1,$12 \n\t" \ | ||
291 | " addu %2,%2,$13 \n\t" \ | ||
292 | " addu %2,%2,$12 \n\t" \ | ||
293 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13"); | ||
294 | |||
295 | /* for squaring some of the terms are doubled... */ | ||
296 | #define SQRADD2(i, j) \ | ||
297 | asm( \ | ||
298 | " multu %6,%7 \n\t" \ | ||
299 | " mflo $12 \n\t" \ | ||
300 | " mfhi $13 \n\t" \ | ||
301 | \ | ||
302 | " addu %0,%0,$12 \n\t" \ | ||
303 | " sltu $14,%0,$12 \n\t" \ | ||
304 | " addu %1,%1,$13 \n\t" \ | ||
305 | " sltu $15,%1,$13 \n\t" \ | ||
306 | " addu %1,%1,$14 \n\t" \ | ||
307 | " sltu $14,%1,$14 \n\t" \ | ||
308 | " addu %2,%2,$15 \n\t" \ | ||
309 | " addu %2,%2,$14 \n\t" \ | ||
310 | \ | ||
311 | " addu %0,%0,$12 \n\t" \ | ||
312 | " sltu $14,%0,$12 \n\t" \ | ||
313 | " addu %1,%1,$13 \n\t" \ | ||
314 | " sltu $15,%1,$13 \n\t" \ | ||
315 | " addu %1,%1,$14 \n\t" \ | ||
316 | " sltu $14,%1,$14 \n\t" \ | ||
317 | " addu %2,%2,$15 \n\t" \ | ||
318 | " addu %2,%2,$14 \n\t" \ | ||
319 | :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15"); | ||
320 | |||
321 | #define SQRADDSC(i, j) \ | ||
322 | asm( \ | ||
323 | " multu %6,%7 \n\t" \ | ||
324 | " mflo %0 \n\t" \ | ||
325 | " mfhi %1 \n\t" \ | ||
326 | " xor %2,%2,%2 \n\t" \ | ||
327 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "%cc"); | ||
328 | |||
329 | #define SQRADDAC(i, j) \ | ||
330 | asm( \ | ||
331 | " multu %6,%7 \n\t" \ | ||
332 | " mflo $12 \n\t" \ | ||
333 | " mfhi $13 \n\t" \ | ||
334 | " addu %0,%0,$12 \n\t" \ | ||
335 | " sltu $12,%0,$12 \n\t" \ | ||
336 | " addu %1,%1,$13 \n\t" \ | ||
337 | " sltu $13,%1,$13 \n\t" \ | ||
338 | " addu %1,%1,$12 \n\t" \ | ||
339 | " sltu $12,%1,$12 \n\t" \ | ||
340 | " addu %2,%2,$13 \n\t" \ | ||
341 | " addu %2,%2,$12 \n\t" \ | ||
342 | :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14"); | ||
343 | |||
344 | #define SQRADDDB \ | ||
345 | asm( \ | ||
346 | " addu %0,%0,%3 \n\t" \ | ||
347 | " sltu $10,%0,%3 \n\t" \ | ||
348 | " addu %1,%1,$10 \n\t" \ | ||
349 | " sltu $10,%1,$10 \n\t" \ | ||
350 | " addu %1,%1,%4 \n\t" \ | ||
351 | " sltu $11,%1,%4 \n\t" \ | ||
352 | " addu %2,%2,$10 \n\t" \ | ||
353 | " addu %2,%2,$11 \n\t" \ | ||
354 | " addu %2,%2,%5 \n\t" \ | ||
355 | \ | ||
356 | " addu %0,%0,%3 \n\t" \ | ||
357 | " sltu $10,%0,%3 \n\t" \ | ||
358 | " addu %1,%1,$10 \n\t" \ | ||
359 | " sltu $10,%1,$10 \n\t" \ | ||
360 | " addu %1,%1,%4 \n\t" \ | ||
361 | " sltu $11,%1,%4 \n\t" \ | ||
362 | " addu %2,%2,$10 \n\t" \ | ||
363 | " addu %2,%2,$11 \n\t" \ | ||
364 | " addu %2,%2,%5 \n\t" \ | ||
365 | :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11"); | ||
366 | |||
367 | #else | ||
368 | /******************************************************************************/ | ||
369 | #define PSTM_ISO | ||
370 | /* ISO C portable code */ | ||
371 | |||
372 | #define COMBA_START | ||
373 | |||
374 | #define CLEAR_CARRY \ | ||
375 | c0 = c1 = c2 = 0; | ||
376 | |||
377 | #define COMBA_STORE(x) \ | ||
378 | x = c0; | ||
379 | |||
380 | #define COMBA_STORE2(x) \ | ||
381 | x = c1; | ||
382 | |||
383 | #define CARRY_FORWARD \ | ||
384 | do { c0 = c1; c1 = c2; c2 = 0; } while (0); | ||
385 | |||
386 | #define COMBA_FINI | ||
387 | |||
388 | /* multiplies point i and j, updates carry "c1" and digit c2 */ | ||
389 | #define SQRADD(i, j) \ | ||
390 | do { pstm_word t; \ | ||
391 | t = c0 + ((pstm_word)i) * ((pstm_word)j); c0 = (pstm_digit)t; \ | ||
392 | t = c1 + (t >> DIGIT_BIT); \ | ||
393 | c1 = (pstm_digit)t; c2 += (pstm_digit)(t >> DIGIT_BIT); \ | ||
394 | } while (0); | ||
395 | |||
396 | |||
397 | /* for squaring some of the terms are doubled... */ | ||
398 | #define SQRADD2(i, j) \ | ||
399 | do { pstm_word t; \ | ||
400 | t = ((pstm_word)i) * ((pstm_word)j); \ | ||
401 | tt = (pstm_word)c0 + t; c0 = (pstm_digit)tt; \ | ||
402 | tt = (pstm_word)c1 + (tt >> DIGIT_BIT); \ | ||
403 | c1 = (pstm_digit)tt; c2 += (pstm_digit)(tt >> DIGIT_BIT); \ | ||
404 | tt = (pstm_word)c0 + t; c0 = (pstm_digit)tt; \ | ||
405 | tt = (pstm_word)c1 + (tt >> DIGIT_BIT); \ | ||
406 | c1 = (pstm_digit)tt; c2 += (pstm_digit)(tt >> DIGIT_BIT); \ | ||
407 | } while (0); | ||
408 | |||
409 | #define SQRADDSC(i, j) \ | ||
410 | do { pstm_word t; \ | ||
411 | t = ((pstm_word)i) * ((pstm_word)j); \ | ||
412 | sc0 = (pstm_digit)t; sc1 = (pstm_digit)(t >> DIGIT_BIT); sc2 = 0; \ | ||
413 | } while (0); | ||
414 | |||
415 | #define SQRADDAC(i, j) \ | ||
416 | do { pstm_word t; \ | ||
417 | t = ((pstm_word)sc0) + ((pstm_word)i) * ((pstm_word)j); \ | ||
418 | sc0 = (pstm_digit)t; \ | ||
419 | t = ((pstm_word)sc1) + (t >> DIGIT_BIT); sc1 = (pstm_digit)t; \ | ||
420 | sc2 += (pstm_digit)(t >> DIGIT_BIT); \ | ||
421 | } while (0); | ||
422 | |||
423 | #define SQRADDDB \ | ||
424 | do { pstm_word t; \ | ||
425 | t = ((pstm_word)sc0) + ((pstm_word)sc0) + ((pstm_word)c0); \ | ||
426 | c0 = (pstm_digit)t; \ | ||
427 | t = ((pstm_word)sc1) + ((pstm_word)sc1) + c1 + (t >> DIGIT_BIT); \ | ||
428 | c1 = (pstm_digit)t; \ | ||
429 | c2 = c2 + sc2 + sc2 + (pstm_digit)(t >> DIGIT_BIT); \ | ||
430 | } while (0); | ||
431 | |||
432 | #endif /* ISO_C */ | ||
433 | |||
434 | /******************************************************************************/ | ||
435 | /* | ||
436 | Non-unrolled comba squarer | ||
437 | */ | ||
438 | ///bbox: pool unused | ||
439 | #define pstm_sqr_comba_gen(pool, A, B, paD, paDlen) \ | ||
440 | pstm_sqr_comba_gen( A, B, paD, paDlen) | ||
441 | static int32 pstm_sqr_comba_gen(psPool_t *pool, pstm_int *A, pstm_int *B, | ||
442 | pstm_digit *paD, uint32 paDlen) | ||
443 | { | ||
444 | int16 paDfail, pa; | ||
445 | int32 ix, iz; | ||
446 | pstm_digit c0, c1, c2, *dst; | ||
447 | #ifdef PSTM_ISO | ||
448 | pstm_word tt; | ||
449 | #endif | ||
450 | |||
451 | paDfail = 0; | ||
452 | /* get size of output and trim */ | ||
453 | pa = A->used + A->used; | ||
454 | |||
455 | /* number of output digits to produce */ | ||
456 | COMBA_START; | ||
457 | CLEAR_CARRY; | ||
458 | /* | ||
459 | If b is not large enough grow it and continue | ||
460 | */ | ||
461 | if (B->alloc < pa) { | ||
462 | if (pstm_grow(B, pa) != PSTM_OKAY) { | ||
463 | return PS_MEM_FAIL; | ||
464 | } | ||
465 | } | ||
466 | if (paD != NULL) { | ||
467 | if (paDlen < (sizeof(pstm_digit) * pa)) { | ||
468 | paDfail = 1; /* have a paD, but it's not big enough */ | ||
469 | dst = xzalloc(sizeof(pstm_digit) * pa); | ||
470 | } else { | ||
471 | dst = paD; | ||
472 | memset(dst, 0x0, paDlen); | ||
473 | } | ||
474 | } else { | ||
475 | dst = xzalloc(sizeof(pstm_digit) * pa); | ||
476 | } | ||
477 | |||
478 | for (ix = 0; ix < pa; ix++) { | ||
479 | int32 tx, ty, iy; | ||
480 | pstm_digit *tmpy, *tmpx; | ||
481 | |||
482 | /* get offsets into the two bignums */ | ||
483 | ty = min(A->used-1, ix); | ||
484 | tx = ix - ty; | ||
485 | |||
486 | /* setup temp aliases */ | ||
487 | tmpx = A->dp + tx; | ||
488 | tmpy = A->dp + ty; | ||
489 | |||
490 | /* | ||
491 | This is the number of times the loop will iterate, | ||
492 | while (tx++ < a->used && ty-- >= 0) { ... } | ||
493 | */ | ||
494 | iy = min(A->used-tx, ty+1); | ||
495 | |||
496 | /* | ||
497 | now for squaring tx can never equal ty. We halve the distance since | ||
498 | they approach at a rate of 2x and we have to round because odd cases | ||
499 | need to be executed | ||
500 | */ | ||
501 | iy = min(iy, (ty-tx+1)>>1); | ||
502 | |||
503 | /* forward carries */ | ||
504 | CARRY_FORWARD; | ||
505 | |||
506 | /* execute loop */ | ||
507 | for (iz = 0; iz < iy; iz++) { | ||
508 | SQRADD2(*tmpx++, *tmpy--); | ||
509 | } | ||
510 | |||
511 | /* even columns have the square term in them */ | ||
512 | if ((ix&1) == 0) { | ||
513 | SQRADD(A->dp[ix>>1], A->dp[ix>>1]); | ||
514 | } | ||
515 | |||
516 | /* store it */ | ||
517 | COMBA_STORE(dst[ix]); | ||
518 | } | ||
519 | |||
520 | COMBA_FINI; | ||
521 | /* | ||
522 | setup dest | ||
523 | */ | ||
524 | iz = B->used; | ||
525 | B->used = pa; | ||
526 | { | ||
527 | pstm_digit *tmpc; | ||
528 | tmpc = B->dp; | ||
529 | for (ix = 0; ix < pa; ix++) { | ||
530 | *tmpc++ = dst[ix]; | ||
531 | } | ||
532 | /* clear unused digits (that existed in the old copy of c) */ | ||
533 | for (; ix < iz; ix++) { | ||
534 | *tmpc++ = 0; | ||
535 | } | ||
536 | } | ||
537 | pstm_clamp(B); | ||
538 | |||
539 | if ((paD == NULL) || paDfail == 1) { | ||
540 | psFree(dst, pool); | ||
541 | } | ||
542 | return PS_SUCCESS; | ||
543 | } | ||
544 | |||
545 | /******************************************************************************/ | ||
546 | /* | ||
547 | Unrolled Comba loop for 1024 bit keys | ||
548 | */ | ||
549 | #ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS | ||
550 | static int32 pstm_sqr_comba16(pstm_int *A, pstm_int *B) | ||
551 | { | ||
552 | pstm_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2; | ||
553 | #ifdef PSTM_ISO | ||
554 | pstm_word tt; | ||
555 | #endif | ||
556 | |||
557 | if (B->alloc < 32) { | ||
558 | if (pstm_grow(B, 32) != PSTM_OKAY) { | ||
559 | return PS_MEM_FAIL; | ||
560 | } | ||
561 | } | ||
562 | a = A->dp; | ||
563 | sc0 = sc1 = sc2 = 0; | ||
564 | |||
565 | COMBA_START; | ||
566 | |||
567 | /* clear carries */ | ||
568 | CLEAR_CARRY; | ||
569 | |||
570 | /* output 0 */ | ||
571 | SQRADD(a[0],a[0]); | ||
572 | COMBA_STORE(b[0]); | ||
573 | |||
574 | /* output 1 */ | ||
575 | CARRY_FORWARD; | ||
576 | SQRADD2(a[0], a[1]); | ||
577 | COMBA_STORE(b[1]); | ||
578 | |||
579 | /* output 2 */ | ||
580 | CARRY_FORWARD; | ||
581 | SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); | ||
582 | COMBA_STORE(b[2]); | ||
583 | |||
584 | /* output 3 */ | ||
585 | CARRY_FORWARD; | ||
586 | SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); | ||
587 | COMBA_STORE(b[3]); | ||
588 | |||
589 | /* output 4 */ | ||
590 | CARRY_FORWARD; | ||
591 | SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); | ||
592 | COMBA_STORE(b[4]); | ||
593 | |||
594 | /* output 5 */ | ||
595 | CARRY_FORWARD; | ||
596 | SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; | ||
597 | COMBA_STORE(b[5]); | ||
598 | |||
599 | /* output 6 */ | ||
600 | CARRY_FORWARD; | ||
601 | SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); | ||
602 | COMBA_STORE(b[6]); | ||
603 | |||
604 | /* output 7 */ | ||
605 | CARRY_FORWARD; | ||
606 | SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; | ||
607 | COMBA_STORE(b[7]); | ||
608 | |||
609 | /* output 8 */ | ||
610 | CARRY_FORWARD; | ||
611 | SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); | ||
612 | COMBA_STORE(b[8]); | ||
613 | |||
614 | /* output 9 */ | ||
615 | CARRY_FORWARD; | ||
616 | SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; | ||
617 | COMBA_STORE(b[9]); | ||
618 | |||
619 | /* output 10 */ | ||
620 | CARRY_FORWARD; | ||
621 | SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); | ||
622 | COMBA_STORE(b[10]); | ||
623 | |||
624 | /* output 11 */ | ||
625 | CARRY_FORWARD; | ||
626 | SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; | ||
627 | COMBA_STORE(b[11]); | ||
628 | |||
629 | /* output 12 */ | ||
630 | CARRY_FORWARD; | ||
631 | SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); | ||
632 | COMBA_STORE(b[12]); | ||
633 | |||
634 | /* output 13 */ | ||
635 | CARRY_FORWARD; | ||
636 | SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; | ||
637 | COMBA_STORE(b[13]); | ||
638 | |||
639 | /* output 14 */ | ||
640 | CARRY_FORWARD; | ||
641 | SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); | ||
642 | COMBA_STORE(b[14]); | ||
643 | |||
644 | /* output 15 */ | ||
645 | CARRY_FORWARD; | ||
646 | SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; | ||
647 | COMBA_STORE(b[15]); | ||
648 | |||
649 | /* output 16 */ | ||
650 | CARRY_FORWARD; | ||
651 | SQRADDSC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); | ||
652 | COMBA_STORE(b[16]); | ||
653 | |||
654 | /* output 17 */ | ||
655 | CARRY_FORWARD; | ||
656 | SQRADDSC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; | ||
657 | COMBA_STORE(b[17]); | ||
658 | |||
659 | /* output 18 */ | ||
660 | CARRY_FORWARD; | ||
661 | SQRADDSC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); | ||
662 | COMBA_STORE(b[18]); | ||
663 | |||
664 | /* output 19 */ | ||
665 | CARRY_FORWARD; | ||
666 | SQRADDSC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; | ||
667 | COMBA_STORE(b[19]); | ||
668 | |||
669 | /* output 20 */ | ||
670 | CARRY_FORWARD; | ||
671 | SQRADDSC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); | ||
672 | COMBA_STORE(b[20]); | ||
673 | |||
674 | /* output 21 */ | ||
675 | CARRY_FORWARD; | ||
676 | SQRADDSC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; | ||
677 | COMBA_STORE(b[21]); | ||
678 | |||
679 | /* output 22 */ | ||
680 | CARRY_FORWARD; | ||
681 | SQRADDSC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); | ||
682 | COMBA_STORE(b[22]); | ||
683 | |||
684 | /* output 23 */ | ||
685 | CARRY_FORWARD; | ||
686 | SQRADDSC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; | ||
687 | COMBA_STORE(b[23]); | ||
688 | |||
689 | /* output 24 */ | ||
690 | CARRY_FORWARD; | ||
691 | SQRADDSC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); | ||
692 | COMBA_STORE(b[24]); | ||
693 | |||
694 | /* output 25 */ | ||
695 | CARRY_FORWARD; | ||
696 | SQRADDSC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; | ||
697 | COMBA_STORE(b[25]); | ||
698 | |||
699 | /* output 26 */ | ||
700 | CARRY_FORWARD; | ||
701 | SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]); | ||
702 | COMBA_STORE(b[26]); | ||
703 | |||
704 | /* output 27 */ | ||
705 | CARRY_FORWARD; | ||
706 | SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]); | ||
707 | COMBA_STORE(b[27]); | ||
708 | |||
709 | /* output 28 */ | ||
710 | CARRY_FORWARD; | ||
711 | SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); | ||
712 | COMBA_STORE(b[28]); | ||
713 | |||
714 | /* output 29 */ | ||
715 | CARRY_FORWARD; | ||
716 | SQRADD2(a[14], a[15]); | ||
717 | COMBA_STORE(b[29]); | ||
718 | |||
719 | /* output 30 */ | ||
720 | CARRY_FORWARD; | ||
721 | SQRADD(a[15], a[15]); | ||
722 | COMBA_STORE(b[30]); | ||
723 | COMBA_STORE2(b[31]); | ||
724 | COMBA_FINI; | ||
725 | |||
726 | B->used = 32; | ||
727 | B->sign = PSTM_ZPOS; | ||
728 | memcpy(B->dp, b, 32 * sizeof(pstm_digit)); | ||
729 | pstm_clamp(B); | ||
730 | return PSTM_OKAY; | ||
731 | } | ||
732 | #endif /* USE_1024_KEY_SPEED_OPTIMIZATIONS */ | ||
733 | |||
734 | |||
735 | #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS | ||
736 | static int32 pstm_sqr_comba32(pstm_int *A, pstm_int *B) | ||
737 | { | ||
738 | pstm_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; | ||
739 | #ifdef PSTM_ISO | ||
740 | pstm_word tt; | ||
741 | #endif | ||
742 | |||
743 | if (B->alloc < 64) { | ||
744 | if (pstm_grow(B, 64) != PSTM_OKAY) { | ||
745 | return PS_MEM_FAIL; | ||
746 | } | ||
747 | } | ||
748 | sc0 = sc1 = sc2 = 0; | ||
749 | a = A->dp; | ||
750 | COMBA_START; | ||
751 | |||
752 | /* clear carries */ | ||
753 | CLEAR_CARRY; | ||
754 | |||
755 | /* output 0 */ | ||
756 | SQRADD(a[0],a[0]); | ||
757 | COMBA_STORE(b[0]); | ||
758 | |||
759 | /* output 1 */ | ||
760 | CARRY_FORWARD; | ||
761 | SQRADD2(a[0], a[1]); | ||
762 | COMBA_STORE(b[1]); | ||
763 | |||
764 | /* output 2 */ | ||
765 | CARRY_FORWARD; | ||
766 | SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); | ||
767 | COMBA_STORE(b[2]); | ||
768 | |||
769 | /* output 3 */ | ||
770 | CARRY_FORWARD; | ||
771 | SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); | ||
772 | COMBA_STORE(b[3]); | ||
773 | |||
774 | /* output 4 */ | ||
775 | CARRY_FORWARD; | ||
776 | SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); | ||
777 | COMBA_STORE(b[4]); | ||
778 | |||
779 | /* output 5 */ | ||
780 | CARRY_FORWARD; | ||
781 | SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; | ||
782 | COMBA_STORE(b[5]); | ||
783 | |||
784 | /* output 6 */ | ||
785 | CARRY_FORWARD; | ||
786 | SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); | ||
787 | COMBA_STORE(b[6]); | ||
788 | |||
789 | /* output 7 */ | ||
790 | CARRY_FORWARD; | ||
791 | SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; | ||
792 | COMBA_STORE(b[7]); | ||
793 | |||
794 | /* output 8 */ | ||
795 | CARRY_FORWARD; | ||
796 | SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); | ||
797 | COMBA_STORE(b[8]); | ||
798 | |||
799 | /* output 9 */ | ||
800 | CARRY_FORWARD; | ||
801 | SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; | ||
802 | COMBA_STORE(b[9]); | ||
803 | |||
804 | /* output 10 */ | ||
805 | CARRY_FORWARD; | ||
806 | SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); | ||
807 | COMBA_STORE(b[10]); | ||
808 | |||
809 | /* output 11 */ | ||
810 | CARRY_FORWARD; | ||
811 | SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; | ||
812 | COMBA_STORE(b[11]); | ||
813 | |||
814 | /* output 12 */ | ||
815 | CARRY_FORWARD; | ||
816 | SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); | ||
817 | COMBA_STORE(b[12]); | ||
818 | |||
819 | /* output 13 */ | ||
820 | CARRY_FORWARD; | ||
821 | SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; | ||
822 | COMBA_STORE(b[13]); | ||
823 | |||
824 | /* output 14 */ | ||
825 | CARRY_FORWARD; | ||
826 | SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); | ||
827 | COMBA_STORE(b[14]); | ||
828 | |||
829 | /* output 15 */ | ||
830 | CARRY_FORWARD; | ||
831 | SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; | ||
832 | COMBA_STORE(b[15]); | ||
833 | |||
834 | /* output 16 */ | ||
835 | CARRY_FORWARD; | ||
836 | SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); | ||
837 | COMBA_STORE(b[16]); | ||
838 | |||
839 | /* output 17 */ | ||
840 | CARRY_FORWARD; | ||
841 | SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; | ||
842 | COMBA_STORE(b[17]); | ||
843 | |||
844 | /* output 18 */ | ||
845 | CARRY_FORWARD; | ||
846 | SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); | ||
847 | COMBA_STORE(b[18]); | ||
848 | |||
849 | /* output 19 */ | ||
850 | CARRY_FORWARD; | ||
851 | SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; | ||
852 | COMBA_STORE(b[19]); | ||
853 | |||
854 | /* output 20 */ | ||
855 | CARRY_FORWARD; | ||
856 | SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); | ||
857 | COMBA_STORE(b[20]); | ||
858 | |||
859 | /* output 21 */ | ||
860 | CARRY_FORWARD; | ||
861 | SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; | ||
862 | COMBA_STORE(b[21]); | ||
863 | |||
864 | /* output 22 */ | ||
865 | CARRY_FORWARD; | ||
866 | SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); | ||
867 | COMBA_STORE(b[22]); | ||
868 | |||
869 | /* output 23 */ | ||
870 | CARRY_FORWARD; | ||
871 | SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; | ||
872 | COMBA_STORE(b[23]); | ||
873 | |||
874 | /* output 24 */ | ||
875 | CARRY_FORWARD; | ||
876 | SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); | ||
877 | COMBA_STORE(b[24]); | ||
878 | |||
879 | /* output 25 */ | ||
880 | CARRY_FORWARD; | ||
881 | SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; | ||
882 | COMBA_STORE(b[25]); | ||
883 | |||
884 | /* output 26 */ | ||
885 | CARRY_FORWARD; | ||
886 | SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); | ||
887 | COMBA_STORE(b[26]); | ||
888 | |||
889 | /* output 27 */ | ||
890 | CARRY_FORWARD; | ||
891 | SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; | ||
892 | COMBA_STORE(b[27]); | ||
893 | |||
894 | /* output 28 */ | ||
895 | CARRY_FORWARD; | ||
896 | SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); | ||
897 | COMBA_STORE(b[28]); | ||
898 | |||
899 | /* output 29 */ | ||
900 | CARRY_FORWARD; | ||
901 | SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; | ||
902 | COMBA_STORE(b[29]); | ||
903 | |||
904 | /* output 30 */ | ||
905 | CARRY_FORWARD; | ||
906 | SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); | ||
907 | COMBA_STORE(b[30]); | ||
908 | |||
909 | /* output 31 */ | ||
910 | CARRY_FORWARD; | ||
911 | SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; | ||
912 | COMBA_STORE(b[31]); | ||
913 | |||
914 | /* output 32 */ | ||
915 | CARRY_FORWARD; | ||
916 | SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); | ||
917 | COMBA_STORE(b[32]); | ||
918 | |||
919 | /* output 33 */ | ||
920 | CARRY_FORWARD; | ||
921 | SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; | ||
922 | COMBA_STORE(b[33]); | ||
923 | |||
924 | /* output 34 */ | ||
925 | CARRY_FORWARD; | ||
926 | SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); | ||
927 | COMBA_STORE(b[34]); | ||
928 | |||
929 | /* output 35 */ | ||
930 | CARRY_FORWARD; | ||
931 | SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; | ||
932 | COMBA_STORE(b[35]); | ||
933 | |||
934 | /* output 36 */ | ||
935 | CARRY_FORWARD; | ||
936 | SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); | ||
937 | COMBA_STORE(b[36]); | ||
938 | |||
939 | /* output 37 */ | ||
940 | CARRY_FORWARD; | ||
941 | SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; | ||
942 | COMBA_STORE(b[37]); | ||
943 | |||
944 | /* output 38 */ | ||
945 | CARRY_FORWARD; | ||
946 | SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); | ||
947 | COMBA_STORE(b[38]); | ||
948 | |||
949 | /* output 39 */ | ||
950 | CARRY_FORWARD; | ||
951 | SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; | ||
952 | COMBA_STORE(b[39]); | ||
953 | |||
954 | /* output 40 */ | ||
955 | CARRY_FORWARD; | ||
956 | SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); | ||
957 | COMBA_STORE(b[40]); | ||
958 | |||
959 | /* output 41 */ | ||
960 | CARRY_FORWARD; | ||
961 | SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; | ||
962 | COMBA_STORE(b[41]); | ||
963 | |||
964 | /* output 42 */ | ||
965 | CARRY_FORWARD; | ||
966 | SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); | ||
967 | COMBA_STORE(b[42]); | ||
968 | |||
969 | /* output 43 */ | ||
970 | CARRY_FORWARD; | ||
971 | SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; | ||
972 | COMBA_STORE(b[43]); | ||
973 | |||
974 | /* output 44 */ | ||
975 | CARRY_FORWARD; | ||
976 | SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); | ||
977 | COMBA_STORE(b[44]); | ||
978 | |||
979 | /* output 45 */ | ||
980 | CARRY_FORWARD; | ||
981 | SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; | ||
982 | COMBA_STORE(b[45]); | ||
983 | |||
984 | /* output 46 */ | ||
985 | CARRY_FORWARD; | ||
986 | SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); | ||
987 | COMBA_STORE(b[46]); | ||
988 | |||
989 | /* output 47 */ | ||
990 | CARRY_FORWARD; | ||
991 | SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; | ||
992 | COMBA_STORE(b[47]); | ||
993 | |||
994 | /* output 48 */ | ||
995 | CARRY_FORWARD; | ||
996 | SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); | ||
997 | COMBA_STORE(b[48]); | ||
998 | |||
999 | /* output 49 */ | ||
1000 | CARRY_FORWARD; | ||
1001 | SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; | ||
1002 | COMBA_STORE(b[49]); | ||
1003 | |||
1004 | /* output 50 */ | ||
1005 | CARRY_FORWARD; | ||
1006 | SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); | ||
1007 | COMBA_STORE(b[50]); | ||
1008 | |||
1009 | /* output 51 */ | ||
1010 | CARRY_FORWARD; | ||
1011 | SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; | ||
1012 | COMBA_STORE(b[51]); | ||
1013 | |||
1014 | /* output 52 */ | ||
1015 | CARRY_FORWARD; | ||
1016 | SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); | ||
1017 | COMBA_STORE(b[52]); | ||
1018 | |||
1019 | /* output 53 */ | ||
1020 | CARRY_FORWARD; | ||
1021 | SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; | ||
1022 | COMBA_STORE(b[53]); | ||
1023 | |||
1024 | /* output 54 */ | ||
1025 | CARRY_FORWARD; | ||
1026 | SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); | ||
1027 | COMBA_STORE(b[54]); | ||
1028 | |||
1029 | /* output 55 */ | ||
1030 | CARRY_FORWARD; | ||
1031 | SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; | ||
1032 | COMBA_STORE(b[55]); | ||
1033 | |||
1034 | /* output 56 */ | ||
1035 | CARRY_FORWARD; | ||
1036 | SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); | ||
1037 | COMBA_STORE(b[56]); | ||
1038 | |||
1039 | /* output 57 */ | ||
1040 | CARRY_FORWARD; | ||
1041 | SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; | ||
1042 | COMBA_STORE(b[57]); | ||
1043 | |||
1044 | /* output 58 */ | ||
1045 | CARRY_FORWARD; | ||
1046 | SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]); | ||
1047 | COMBA_STORE(b[58]); | ||
1048 | |||
1049 | /* output 59 */ | ||
1050 | CARRY_FORWARD; | ||
1051 | SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]); | ||
1052 | COMBA_STORE(b[59]); | ||
1053 | |||
1054 | /* output 60 */ | ||
1055 | CARRY_FORWARD; | ||
1056 | SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]); | ||
1057 | COMBA_STORE(b[60]); | ||
1058 | |||
1059 | /* output 61 */ | ||
1060 | CARRY_FORWARD; | ||
1061 | SQRADD2(a[30], a[31]); | ||
1062 | COMBA_STORE(b[61]); | ||
1063 | |||
1064 | /* output 62 */ | ||
1065 | CARRY_FORWARD; | ||
1066 | SQRADD(a[31], a[31]); | ||
1067 | COMBA_STORE(b[62]); | ||
1068 | COMBA_STORE2(b[63]); | ||
1069 | COMBA_FINI; | ||
1070 | |||
1071 | B->used = 64; | ||
1072 | B->sign = PSTM_ZPOS; | ||
1073 | memcpy(B->dp, b, 64 * sizeof(pstm_digit)); | ||
1074 | pstm_clamp(B); | ||
1075 | return PSTM_OKAY; | ||
1076 | } | ||
1077 | #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ | ||
1078 | |||
1079 | /******************************************************************************/ | ||
1080 | /* | ||
1081 | */ | ||
1082 | int32 pstm_sqr_comba(psPool_t *pool, pstm_int *A, pstm_int *B, pstm_digit *paD, | ||
1083 | uint32 paDlen) | ||
1084 | { | ||
1085 | #ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS | ||
1086 | if (A->used == 16) { | ||
1087 | return pstm_sqr_comba16(A, B); | ||
1088 | } else { | ||
1089 | #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS | ||
1090 | if (A->used == 32) { | ||
1091 | return pstm_sqr_comba32(A, B); | ||
1092 | } | ||
1093 | #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ | ||
1094 | return pstm_sqr_comba_gen(pool, A, B, paD, paDlen); | ||
1095 | } | ||
1096 | #else | ||
1097 | #ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS | ||
1098 | if (A->used == 32) { | ||
1099 | return pstm_sqr_comba32(A, B); | ||
1100 | } | ||
1101 | #endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ | ||
1102 | return pstm_sqr_comba_gen(pool, A, B, paD, paDlen); | ||
1103 | #endif | ||
1104 | } | ||
1105 | |||
1106 | #endif /* DISABLE_PSTM */ | ||
1107 | /******************************************************************************/ | ||