diff options
author | djm <> | 2008-09-06 12:15:56 +0000 |
---|---|---|
committer | djm <> | 2008-09-06 12:15:56 +0000 |
commit | 12867252827c8efaa8ddd1fa3b3d6e321e2bcdef (patch) | |
tree | b7a1f167ae5aeff4cfd8a18b598b68fe98a066fd /src/lib/libcrypto/des | |
parent | f519f07de9bfb123f2b32aa3965e6f73c8364b80 (diff) | |
parent | 5a3c0a05c7f2c5d3c584b7c8d6aec836dd724c80 (diff) | |
download | openbsd-12867252827c8efaa8ddd1fa3b3d6e321e2bcdef.tar.gz openbsd-12867252827c8efaa8ddd1fa3b3d6e321e2bcdef.tar.bz2 openbsd-12867252827c8efaa8ddd1fa3b3d6e321e2bcdef.zip |
This commit was generated by cvs2git to track changes on a CVS vendor
branch.
Diffstat (limited to 'src/lib/libcrypto/des')
-rw-r--r-- | src/lib/libcrypto/des/asm/des_enc.m4 | 1980 |
1 files changed, 1980 insertions, 0 deletions
diff --git a/src/lib/libcrypto/des/asm/des_enc.m4 b/src/lib/libcrypto/des/asm/des_enc.m4 new file mode 100644 index 0000000000..f5b1928f99 --- /dev/null +++ b/src/lib/libcrypto/des/asm/des_enc.m4 | |||
@@ -0,0 +1,1980 @@ | |||
1 | ! des_enc.m4 | ||
2 | ! des_enc.S (generated from des_enc.m4) | ||
3 | ! | ||
4 | ! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file. | ||
5 | ! | ||
6 | ! Version 1.0. 32-bit version. | ||
7 | ! | ||
8 | ! June 8, 2000. | ||
9 | ! | ||
10 | ! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation | ||
11 | ! by Andy Polyakov. | ||
12 | ! | ||
13 | ! January 1, 2003. | ||
14 | ! | ||
15 | ! Assembler version: Copyright Svend Olaf Mikkelsen. | ||
16 | ! | ||
17 | ! Original C code: Copyright Eric A. Young. | ||
18 | ! | ||
19 | ! This code can be freely used by LibDES/SSLeay/OpenSSL users. | ||
20 | ! | ||
21 | ! The LibDES/SSLeay/OpenSSL copyright notices must be respected. | ||
22 | ! | ||
23 | ! This version can be redistributed. | ||
24 | ! | ||
25 | ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S | ||
26 | ! | ||
27 | ! Global registers 1 to 5 are used. This is the same as done by the | ||
28 | ! cc compiler. The UltraSPARC load/store little endian feature is used. | ||
29 | ! | ||
30 | ! Instruction grouping often refers to one CPU cycle. | ||
31 | ! | ||
32 | ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S | ||
33 | ! | ||
34 | ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S | ||
35 | ! | ||
36 | ! Performance improvement according to './apps/openssl speed des' | ||
37 | ! | ||
38 | ! 32-bit build: | ||
39 | ! 23% faster than cc-5.2 -xarch=v8plus -xO5 | ||
40 | ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5 | ||
41 | ! 64-bit build: | ||
42 | ! 50% faster than cc-5.2 -xarch=v9 -xO5 | ||
43 | ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5 | ||
44 | ! | ||
45 | |||
46 | .ident "des_enc.m4 2.1" | ||
47 | |||
48 | #if defined(__SUNPRO_C) && defined(__sparcv9) | ||
49 | # define ABI64 /* They've said -xarch=v9 at command line */ | ||
50 | #elif defined(__GNUC__) && defined(__arch64__) | ||
51 | # define ABI64 /* They've said -m64 at command line */ | ||
52 | #endif | ||
53 | |||
54 | #ifdef ABI64 | ||
55 | .register %g2,#scratch | ||
56 | .register %g3,#scratch | ||
57 | # define FRAME -192 | ||
58 | # define BIAS 2047 | ||
59 | # define LDPTR ldx | ||
60 | # define STPTR stx | ||
61 | # define ARG0 128 | ||
62 | # define ARGSZ 8 | ||
63 | # ifndef OPENSSL_SYSNAME_ULTRASPARC | ||
64 | # define OPENSSL_SYSNAME_ULTRASPARC | ||
65 | # endif | ||
66 | #else | ||
67 | # define FRAME -96 | ||
68 | # define BIAS 0 | ||
69 | # define LDPTR ld | ||
70 | # define STPTR st | ||
71 | # define ARG0 68 | ||
72 | # define ARGSZ 4 | ||
73 | #endif | ||
74 | |||
75 | #define LOOPS 7 | ||
76 | |||
77 | #define global0 %g0 | ||
78 | #define global1 %g1 | ||
79 | #define global2 %g2 | ||
80 | #define global3 %g3 | ||
81 | #define global4 %g4 | ||
82 | #define global5 %g5 | ||
83 | |||
84 | #define local0 %l0 | ||
85 | #define local1 %l1 | ||
86 | #define local2 %l2 | ||
87 | #define local3 %l3 | ||
88 | #define local4 %l4 | ||
89 | #define local5 %l5 | ||
90 | #define local7 %l6 | ||
91 | #define local6 %l7 | ||
92 | |||
93 | #define in0 %i0 | ||
94 | #define in1 %i1 | ||
95 | #define in2 %i2 | ||
96 | #define in3 %i3 | ||
97 | #define in4 %i4 | ||
98 | #define in5 %i5 | ||
99 | #define in6 %i6 | ||
100 | #define in7 %i7 | ||
101 | |||
102 | #define out0 %o0 | ||
103 | #define out1 %o1 | ||
104 | #define out2 %o2 | ||
105 | #define out3 %o3 | ||
106 | #define out4 %o4 | ||
107 | #define out5 %o5 | ||
108 | #define out6 %o6 | ||
109 | #define out7 %o7 | ||
110 | |||
111 | #define stub stb | ||
112 | |||
113 | changequote({,}) | ||
114 | |||
115 | |||
116 | ! Macro definitions: | ||
117 | |||
118 | |||
119 | ! {ip_macro} | ||
120 | ! | ||
121 | ! The logic used in initial and final permutations is the same as in | ||
122 | ! the C code. The permutations are done with a clever shift, xor, and | ||
123 | ! technique. | ||
124 | ! | ||
125 | ! The macro also loads address sbox 1 to 5 to global 1 to 5, address | ||
126 | ! sbox 6 to local6, and addres sbox 8 to out3. | ||
127 | ! | ||
128 | ! Rotates the halfs 3 left to bring the sbox bits in convenient positions. | ||
129 | ! | ||
130 | ! Loads key first round from address in parameter 5 to out0, out1. | ||
131 | ! | ||
132 | ! After the the original LibDES initial permutation, the resulting left | ||
133 | ! is in the variable initially used for right and vice versa. The macro | ||
134 | ! implements the possibility to keep the halfs in the original registers. | ||
135 | ! | ||
136 | ! parameter 1 left | ||
137 | ! parameter 2 right | ||
138 | ! parameter 3 result left (modify in first round) | ||
139 | ! parameter 4 result right (use in first round) | ||
140 | ! parameter 5 key address | ||
141 | ! parameter 6 1/2 for include encryption/decryption | ||
142 | ! parameter 7 1 for move in1 to in3 | ||
143 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
144 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
145 | |||
146 | define(ip_macro, { | ||
147 | |||
148 | ! {ip_macro} | ||
149 | ! $1 $2 $4 $3 $5 $6 $7 $8 $9 | ||
150 | |||
151 | ld [out2+256], local1 | ||
152 | srl $2, 4, local4 | ||
153 | |||
154 | xor local4, $1, local4 | ||
155 | ifelse($7,1,{mov in1, in3},{nop}) | ||
156 | |||
157 | ld [out2+260], local2 | ||
158 | and local4, local1, local4 | ||
159 | ifelse($8,1,{mov in3, in4},{}) | ||
160 | ifelse($8,2,{mov in4, in3},{}) | ||
161 | |||
162 | ld [out2+280], out4 ! loop counter | ||
163 | sll local4, 4, local1 | ||
164 | xor $1, local4, $1 | ||
165 | |||
166 | ld [out2+264], local3 | ||
167 | srl $1, 16, local4 | ||
168 | xor $2, local1, $2 | ||
169 | |||
170 | ifelse($9,1,{LDPTR KS3, in4},{}) | ||
171 | xor local4, $2, local4 | ||
172 | nop !sethi %hi(DES_SPtrans), global1 ! sbox addr | ||
173 | |||
174 | ifelse($9,1,{LDPTR KS2, in3},{}) | ||
175 | and local4, local2, local4 | ||
176 | nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr | ||
177 | |||
178 | sll local4, 16, local1 | ||
179 | xor $2, local4, $2 | ||
180 | |||
181 | srl $2, 2, local4 | ||
182 | xor $1, local1, $1 | ||
183 | |||
184 | sethi %hi(16711680), local5 | ||
185 | xor local4, $1, local4 | ||
186 | |||
187 | and local4, local3, local4 | ||
188 | or local5, 255, local5 | ||
189 | |||
190 | sll local4, 2, local2 | ||
191 | xor $1, local4, $1 | ||
192 | |||
193 | srl $1, 8, local4 | ||
194 | xor $2, local2, $2 | ||
195 | |||
196 | xor local4, $2, local4 | ||
197 | add global1, 768, global4 | ||
198 | |||
199 | and local4, local5, local4 | ||
200 | add global1, 1024, global5 | ||
201 | |||
202 | ld [out2+272], local7 | ||
203 | sll local4, 8, local1 | ||
204 | xor $2, local4, $2 | ||
205 | |||
206 | srl $2, 1, local4 | ||
207 | xor $1, local1, $1 | ||
208 | |||
209 | ld [$5], out0 ! key 7531 | ||
210 | xor local4, $1, local4 | ||
211 | add global1, 256, global2 | ||
212 | |||
213 | ld [$5+4], out1 ! key 8642 | ||
214 | and local4, local7, local4 | ||
215 | add global1, 512, global3 | ||
216 | |||
217 | sll local4, 1, local1 | ||
218 | xor $1, local4, $1 | ||
219 | |||
220 | sll $1, 3, local3 | ||
221 | xor $2, local1, $2 | ||
222 | |||
223 | sll $2, 3, local2 | ||
224 | add global1, 1280, local6 ! address sbox 8 | ||
225 | |||
226 | srl $1, 29, local4 | ||
227 | add global1, 1792, out3 ! address sbox 8 | ||
228 | |||
229 | srl $2, 29, local1 | ||
230 | or local4, local3, $4 | ||
231 | |||
232 | or local2, local1, $3 | ||
233 | |||
234 | ifelse($6, 1, { | ||
235 | |||
236 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds | ||
237 | or local2, local1, $3 | ||
238 | xor $4, out0, local1 | ||
239 | |||
240 | call .des_enc.1 | ||
241 | and local1, 252, local1 | ||
242 | |||
243 | },{}) | ||
244 | |||
245 | ifelse($6, 2, { | ||
246 | |||
247 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds | ||
248 | or local2, local1, $3 | ||
249 | xor $4, out0, local1 | ||
250 | |||
251 | call .des_dec.1 | ||
252 | and local1, 252, local1 | ||
253 | |||
254 | },{}) | ||
255 | }) | ||
256 | |||
257 | |||
258 | ! {rounds_macro} | ||
259 | ! | ||
260 | ! The logic used in the DES rounds is the same as in the C code, | ||
261 | ! except that calculations for sbox 1 and sbox 5 begin before | ||
262 | ! the previous round is finished. | ||
263 | ! | ||
264 | ! In each round one half (work) is modified based on key and the | ||
265 | ! other half (use). | ||
266 | ! | ||
267 | ! In this version we do two rounds in a loop repeated 7 times | ||
268 | ! and two rounds seperately. | ||
269 | ! | ||
270 | ! One half has the bits for the sboxes in the following positions: | ||
271 | ! | ||
272 | ! 777777xx555555xx333333xx111111xx | ||
273 | ! | ||
274 | ! 88xx666666xx444444xx222222xx8888 | ||
275 | ! | ||
276 | ! The bits for each sbox are xor-ed with the key bits for that box. | ||
277 | ! The above xx bits are cleared, and the result used for lookup in | ||
278 | ! the sbox table. Each sbox entry contains the 4 output bits permuted | ||
279 | ! into 32 bits according to the P permutation. | ||
280 | ! | ||
281 | ! In the description of DES, left and right are switched after | ||
282 | ! each round, except after last round. In this code the original | ||
283 | ! left and right are kept in the same register in all rounds, meaning | ||
284 | ! that after the 16 rounds the result for right is in the register | ||
285 | ! originally used for left. | ||
286 | ! | ||
287 | ! parameter 1 first work (left in first round) | ||
288 | ! parameter 2 first use (right in first round) | ||
289 | ! parameter 3 enc/dec 1/-1 | ||
290 | ! parameter 4 loop label | ||
291 | ! parameter 5 key address register | ||
292 | ! parameter 6 optional address for key next encryption/decryption | ||
293 | ! parameter 7 not empty for include retl | ||
294 | ! | ||
295 | ! also compares in2 to 8 | ||
296 | |||
297 | define(rounds_macro, { | ||
298 | |||
299 | ! {rounds_macro} | ||
300 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
301 | |||
302 | xor $2, out0, local1 | ||
303 | |||
304 | ld [out2+284], local5 ! 0x0000FC00 | ||
305 | ba $4 | ||
306 | and local1, 252, local1 | ||
307 | |||
308 | .align 32 | ||
309 | |||
310 | $4: | ||
311 | ! local6 is address sbox 6 | ||
312 | ! out3 is address sbox 8 | ||
313 | ! out4 is loop counter | ||
314 | |||
315 | ld [global1+local1], local1 | ||
316 | xor $2, out1, out1 ! 8642 | ||
317 | xor $2, out0, out0 ! 7531 | ||
318 | fmovs %f0, %f0 ! fxor used for alignment | ||
319 | |||
320 | srl out1, 4, local0 ! rotate 4 right | ||
321 | and out0, local5, local3 ! 3 | ||
322 | fmovs %f0, %f0 | ||
323 | |||
324 | ld [$5+$3*8], local7 ! key 7531 next round | ||
325 | srl local3, 8, local3 ! 3 | ||
326 | and local0, 252, local2 ! 2 | ||
327 | fmovs %f0, %f0 | ||
328 | |||
329 | ld [global3+local3],local3 ! 3 | ||
330 | sll out1, 28, out1 ! rotate | ||
331 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7 | ||
332 | |||
333 | ld [global2+local2], local2 ! 2 | ||
334 | srl out0, 24, local1 ! 7 | ||
335 | or out1, local0, out1 ! rotate | ||
336 | |||
337 | ldub [out2+local1], local1 ! 7 (and 0xFC) | ||
338 | srl out1, 24, local0 ! 8 | ||
339 | and out1, local5, local4 ! 4 | ||
340 | |||
341 | ldub [out2+local0], local0 ! 8 (and 0xFC) | ||
342 | srl local4, 8, local4 ! 4 | ||
343 | xor $1, local2, $1 ! 2 finished local2 now sbox 6 | ||
344 | |||
345 | ld [global4+local4],local4 ! 4 | ||
346 | srl out1, 16, local2 ! 6 | ||
347 | xor $1, local3, $1 ! 3 finished local3 now sbox 5 | ||
348 | |||
349 | ld [out3+local0],local0 ! 8 | ||
350 | and local2, 252, local2 ! 6 | ||
351 | add global1, 1536, local5 ! address sbox 7 | ||
352 | |||
353 | ld [local6+local2], local2 ! 6 | ||
354 | srl out0, 16, local3 ! 5 | ||
355 | xor $1, local4, $1 ! 4 finished | ||
356 | |||
357 | ld [local5+local1],local1 ! 7 | ||
358 | and local3, 252, local3 ! 5 | ||
359 | xor $1, local0, $1 ! 8 finished | ||
360 | |||
361 | ld [global5+local3],local3 ! 5 | ||
362 | xor $1, local2, $1 ! 6 finished | ||
363 | subcc out4, 1, out4 | ||
364 | |||
365 | ld [$5+$3*8+4], out0 ! key 8642 next round | ||
366 | xor $1, local7, local2 ! sbox 5 next round | ||
367 | xor $1, local1, $1 ! 7 finished | ||
368 | |||
369 | srl local2, 16, local2 ! sbox 5 next round | ||
370 | xor $1, local3, $1 ! 5 finished | ||
371 | |||
372 | ld [$5+$3*16+4], out1 ! key 8642 next round again | ||
373 | and local2, 252, local2 ! sbox5 next round | ||
374 | ! next round | ||
375 | xor $1, local7, local7 ! 7531 | ||
376 | |||
377 | ld [global5+local2], local2 ! 5 | ||
378 | srl local7, 24, local3 ! 7 | ||
379 | xor $1, out0, out0 ! 8642 | ||
380 | |||
381 | ldub [out2+local3], local3 ! 7 (and 0xFC) | ||
382 | srl out0, 4, local0 ! rotate 4 right | ||
383 | and local7, 252, local1 ! 1 | ||
384 | |||
385 | sll out0, 28, out0 ! rotate | ||
386 | xor $2, local2, $2 ! 5 finished local2 used | ||
387 | |||
388 | srl local0, 8, local4 ! 4 | ||
389 | and local0, 252, local2 ! 2 | ||
390 | ld [local5+local3], local3 ! 7 | ||
391 | |||
392 | srl local0, 16, local5 ! 6 | ||
393 | or out0, local0, out0 ! rotate | ||
394 | ld [global2+local2], local2 ! 2 | ||
395 | |||
396 | srl out0, 24, local0 | ||
397 | ld [$5+$3*16], out0 ! key 7531 next round | ||
398 | and local4, 252, local4 ! 4 | ||
399 | |||
400 | and local5, 252, local5 ! 6 | ||
401 | ld [global4+local4], local4 ! 4 | ||
402 | xor $2, local3, $2 ! 7 finished local3 used | ||
403 | |||
404 | and local0, 252, local0 ! 8 | ||
405 | ld [local6+local5], local5 ! 6 | ||
406 | xor $2, local2, $2 ! 2 finished local2 now sbox 3 | ||
407 | |||
408 | srl local7, 8, local2 ! 3 start | ||
409 | ld [out3+local0], local0 ! 8 | ||
410 | xor $2, local4, $2 ! 4 finished | ||
411 | |||
412 | and local2, 252, local2 ! 3 | ||
413 | ld [global1+local1], local1 ! 1 | ||
414 | xor $2, local5, $2 ! 6 finished local5 used | ||
415 | |||
416 | ld [global3+local2], local2 ! 3 | ||
417 | xor $2, local0, $2 ! 8 finished | ||
418 | add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer | ||
419 | |||
420 | ld [out2+284], local5 ! 0x0000FC00 | ||
421 | xor $2, out0, local4 ! sbox 1 next round | ||
422 | xor $2, local1, $2 ! 1 finished | ||
423 | |||
424 | xor $2, local2, $2 ! 3 finished | ||
425 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
426 | bne,pt %icc, $4 | ||
427 | #else | ||
428 | bne $4 | ||
429 | #endif | ||
430 | and local4, 252, local1 ! sbox 1 next round | ||
431 | |||
432 | ! two rounds more: | ||
433 | |||
434 | ld [global1+local1], local1 | ||
435 | xor $2, out1, out1 | ||
436 | xor $2, out0, out0 | ||
437 | |||
438 | srl out1, 4, local0 ! rotate | ||
439 | and out0, local5, local3 | ||
440 | |||
441 | ld [$5+$3*8], local7 ! key 7531 | ||
442 | srl local3, 8, local3 | ||
443 | and local0, 252, local2 | ||
444 | |||
445 | ld [global3+local3],local3 | ||
446 | sll out1, 28, out1 ! rotate | ||
447 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7 | ||
448 | |||
449 | ld [global2+local2], local2 | ||
450 | srl out0, 24, local1 | ||
451 | or out1, local0, out1 ! rotate | ||
452 | |||
453 | ldub [out2+local1], local1 | ||
454 | srl out1, 24, local0 | ||
455 | and out1, local5, local4 | ||
456 | |||
457 | ldub [out2+local0], local0 | ||
458 | srl local4, 8, local4 | ||
459 | xor $1, local2, $1 ! 2 finished local2 now sbox 6 | ||
460 | |||
461 | ld [global4+local4],local4 | ||
462 | srl out1, 16, local2 | ||
463 | xor $1, local3, $1 ! 3 finished local3 now sbox 5 | ||
464 | |||
465 | ld [out3+local0],local0 | ||
466 | and local2, 252, local2 | ||
467 | add global1, 1536, local5 ! address sbox 7 | ||
468 | |||
469 | ld [local6+local2], local2 | ||
470 | srl out0, 16, local3 | ||
471 | xor $1, local4, $1 ! 4 finished | ||
472 | |||
473 | ld [local5+local1],local1 | ||
474 | and local3, 252, local3 | ||
475 | xor $1, local0, $1 | ||
476 | |||
477 | ld [global5+local3],local3 | ||
478 | xor $1, local2, $1 ! 6 finished | ||
479 | cmp in2, 8 | ||
480 | |||
481 | ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter | ||
482 | xor $1, local7, local2 ! sbox 5 next round | ||
483 | xor $1, local1, $1 ! 7 finished | ||
484 | |||
485 | ld [$5+$3*8+4], out0 | ||
486 | srl local2, 16, local2 ! sbox 5 next round | ||
487 | xor $1, local3, $1 ! 5 finished | ||
488 | |||
489 | and local2, 252, local2 | ||
490 | ! next round (two rounds more) | ||
491 | xor $1, local7, local7 ! 7531 | ||
492 | |||
493 | ld [global5+local2], local2 | ||
494 | srl local7, 24, local3 | ||
495 | xor $1, out0, out0 ! 8642 | ||
496 | |||
497 | ldub [out2+local3], local3 | ||
498 | srl out0, 4, local0 ! rotate | ||
499 | and local7, 252, local1 | ||
500 | |||
501 | sll out0, 28, out0 ! rotate | ||
502 | xor $2, local2, $2 ! 5 finished local2 used | ||
503 | |||
504 | srl local0, 8, local4 | ||
505 | and local0, 252, local2 | ||
506 | ld [local5+local3], local3 | ||
507 | |||
508 | srl local0, 16, local5 | ||
509 | or out0, local0, out0 ! rotate | ||
510 | ld [global2+local2], local2 | ||
511 | |||
512 | srl out0, 24, local0 | ||
513 | ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption | ||
514 | and local4, 252, local4 | ||
515 | |||
516 | and local5, 252, local5 | ||
517 | ld [global4+local4], local4 | ||
518 | xor $2, local3, $2 ! 7 finished local3 used | ||
519 | |||
520 | and local0, 252, local0 | ||
521 | ld [local6+local5], local5 | ||
522 | xor $2, local2, $2 ! 2 finished local2 now sbox 3 | ||
523 | |||
524 | srl local7, 8, local2 ! 3 start | ||
525 | ld [out3+local0], local0 | ||
526 | xor $2, local4, $2 | ||
527 | |||
528 | and local2, 252, local2 | ||
529 | ld [global1+local1], local1 | ||
530 | xor $2, local5, $2 ! 6 finished local5 used | ||
531 | |||
532 | ld [global3+local2], local2 | ||
533 | srl $1, 3, local3 | ||
534 | xor $2, local0, $2 | ||
535 | |||
536 | ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption | ||
537 | sll $1, 29, local4 | ||
538 | xor $2, local1, $2 | ||
539 | |||
540 | ifelse($7,{}, {}, {retl}) | ||
541 | xor $2, local2, $2 | ||
542 | }) | ||
543 | |||
544 | |||
545 | ! {fp_macro} | ||
546 | ! | ||
547 | ! parameter 1 right (original left) | ||
548 | ! parameter 2 left (original right) | ||
549 | ! parameter 3 1 for optional store to [in0] | ||
550 | ! parameter 4 1 for load input/output address to local5/7 | ||
551 | ! | ||
552 | ! The final permutation logic switches the halfes, meaning that | ||
553 | ! left and right ends up the the registers originally used. | ||
554 | |||
555 | define(fp_macro, { | ||
556 | |||
557 | ! {fp_macro} | ||
558 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
559 | |||
560 | ! initially undo the rotate 3 left done after initial permutation | ||
561 | ! original left is received shifted 3 right and 29 left in local3/4 | ||
562 | |||
563 | sll $2, 29, local1 | ||
564 | or local3, local4, $1 | ||
565 | |||
566 | srl $2, 3, $2 | ||
567 | sethi %hi(0x55555555), local2 | ||
568 | |||
569 | or $2, local1, $2 | ||
570 | or local2, %lo(0x55555555), local2 | ||
571 | |||
572 | srl $2, 1, local3 | ||
573 | sethi %hi(0x00ff00ff), local1 | ||
574 | xor local3, $1, local3 | ||
575 | or local1, %lo(0x00ff00ff), local1 | ||
576 | and local3, local2, local3 | ||
577 | sethi %hi(0x33333333), local4 | ||
578 | sll local3, 1, local2 | ||
579 | |||
580 | xor $1, local3, $1 | ||
581 | |||
582 | srl $1, 8, local3 | ||
583 | xor $2, local2, $2 | ||
584 | xor local3, $2, local3 | ||
585 | or local4, %lo(0x33333333), local4 | ||
586 | and local3, local1, local3 | ||
587 | sethi %hi(0x0000ffff), local1 | ||
588 | sll local3, 8, local2 | ||
589 | |||
590 | xor $2, local3, $2 | ||
591 | |||
592 | srl $2, 2, local3 | ||
593 | xor $1, local2, $1 | ||
594 | xor local3, $1, local3 | ||
595 | or local1, %lo(0x0000ffff), local1 | ||
596 | and local3, local4, local3 | ||
597 | sethi %hi(0x0f0f0f0f), local4 | ||
598 | sll local3, 2, local2 | ||
599 | |||
600 | ifelse($4,1, {LDPTR INPUT, local5}) | ||
601 | xor $1, local3, $1 | ||
602 | |||
603 | ifelse($4,1, {LDPTR OUTPUT, local7}) | ||
604 | srl $1, 16, local3 | ||
605 | xor $2, local2, $2 | ||
606 | xor local3, $2, local3 | ||
607 | or local4, %lo(0x0f0f0f0f), local4 | ||
608 | and local3, local1, local3 | ||
609 | sll local3, 16, local2 | ||
610 | |||
611 | xor $2, local3, local1 | ||
612 | |||
613 | srl local1, 4, local3 | ||
614 | xor $1, local2, $1 | ||
615 | xor local3, $1, local3 | ||
616 | and local3, local4, local3 | ||
617 | sll local3, 4, local2 | ||
618 | |||
619 | xor $1, local3, $1 | ||
620 | |||
621 | ! optional store: | ||
622 | |||
623 | ifelse($3,1, {st $1, [in0]}) | ||
624 | |||
625 | xor local1, local2, $2 | ||
626 | |||
627 | ifelse($3,1, {st $2, [in0+4]}) | ||
628 | |||
629 | }) | ||
630 | |||
631 | |||
632 | ! {fp_ip_macro} | ||
633 | ! | ||
634 | ! Does initial permutation for next block mixed with | ||
635 | ! final permutation for current block. | ||
636 | ! | ||
637 | ! parameter 1 original left | ||
638 | ! parameter 2 original right | ||
639 | ! parameter 3 left ip | ||
640 | ! parameter 4 right ip | ||
641 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | ||
642 | ! 2: mov in4 to in3 | ||
643 | ! | ||
644 | ! also adds -8 to length in2 and loads loop counter to out4 | ||
645 | |||
646 | define(fp_ip_macro, { | ||
647 | |||
648 | ! {fp_ip_macro} | ||
649 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
650 | |||
651 | define({temp1},{out4}) | ||
652 | define({temp2},{local3}) | ||
653 | |||
654 | define({ip1},{local1}) | ||
655 | define({ip2},{local2}) | ||
656 | define({ip4},{local4}) | ||
657 | define({ip5},{local5}) | ||
658 | |||
659 | ! $1 in local3, local4 | ||
660 | |||
661 | ld [out2+256], ip1 | ||
662 | sll out5, 29, temp1 | ||
663 | or local3, local4, $1 | ||
664 | |||
665 | srl out5, 3, $2 | ||
666 | ifelse($5,2,{mov in4, in3}) | ||
667 | |||
668 | ld [out2+272], ip5 | ||
669 | srl $4, 4, local0 | ||
670 | or $2, temp1, $2 | ||
671 | |||
672 | srl $2, 1, temp1 | ||
673 | xor temp1, $1, temp1 | ||
674 | |||
675 | and temp1, ip5, temp1 | ||
676 | xor local0, $3, local0 | ||
677 | |||
678 | sll temp1, 1, temp2 | ||
679 | xor $1, temp1, $1 | ||
680 | |||
681 | and local0, ip1, local0 | ||
682 | add in2, -8, in2 | ||
683 | |||
684 | sll local0, 4, local7 | ||
685 | xor $3, local0, $3 | ||
686 | |||
687 | ld [out2+268], ip4 | ||
688 | srl $1, 8, temp1 | ||
689 | xor $2, temp2, $2 | ||
690 | ld [out2+260], ip2 | ||
691 | srl $3, 16, local0 | ||
692 | xor $4, local7, $4 | ||
693 | xor temp1, $2, temp1 | ||
694 | xor local0, $4, local0 | ||
695 | and temp1, ip4, temp1 | ||
696 | and local0, ip2, local0 | ||
697 | sll temp1, 8, temp2 | ||
698 | xor $2, temp1, $2 | ||
699 | sll local0, 16, local7 | ||
700 | xor $4, local0, $4 | ||
701 | |||
702 | srl $2, 2, temp1 | ||
703 | xor $1, temp2, $1 | ||
704 | |||
705 | ld [out2+264], temp2 ! ip3 | ||
706 | srl $4, 2, local0 | ||
707 | xor $3, local7, $3 | ||
708 | xor temp1, $1, temp1 | ||
709 | xor local0, $3, local0 | ||
710 | and temp1, temp2, temp1 | ||
711 | and local0, temp2, local0 | ||
712 | sll temp1, 2, temp2 | ||
713 | xor $1, temp1, $1 | ||
714 | sll local0, 2, local7 | ||
715 | xor $3, local0, $3 | ||
716 | |||
717 | srl $1, 16, temp1 | ||
718 | xor $2, temp2, $2 | ||
719 | srl $3, 8, local0 | ||
720 | xor $4, local7, $4 | ||
721 | xor temp1, $2, temp1 | ||
722 | xor local0, $4, local0 | ||
723 | and temp1, ip2, temp1 | ||
724 | and local0, ip4, local0 | ||
725 | sll temp1, 16, temp2 | ||
726 | xor $2, temp1, local4 | ||
727 | sll local0, 8, local7 | ||
728 | xor $4, local0, $4 | ||
729 | |||
730 | srl $4, 1, local0 | ||
731 | xor $3, local7, $3 | ||
732 | |||
733 | srl local4, 4, temp1 | ||
734 | xor local0, $3, local0 | ||
735 | |||
736 | xor $1, temp2, $1 | ||
737 | and local0, ip5, local0 | ||
738 | |||
739 | sll local0, 1, local7 | ||
740 | xor temp1, $1, temp1 | ||
741 | |||
742 | xor $3, local0, $3 | ||
743 | xor $4, local7, $4 | ||
744 | |||
745 | sll $3, 3, local5 | ||
746 | and temp1, ip1, temp1 | ||
747 | |||
748 | sll temp1, 4, temp2 | ||
749 | xor $1, temp1, $1 | ||
750 | |||
751 | ifelse($5,1,{LDPTR KS2, in4}) | ||
752 | sll $4, 3, local2 | ||
753 | xor local4, temp2, $2 | ||
754 | |||
755 | ! reload since used as temporar: | ||
756 | |||
757 | ld [out2+280], out4 ! loop counter | ||
758 | |||
759 | srl $3, 29, local0 | ||
760 | ifelse($5,1,{add in4, 120, in4}) | ||
761 | |||
762 | ifelse($5,1,{LDPTR KS1, in3}) | ||
763 | srl $4, 29, local7 | ||
764 | |||
765 | or local0, local5, $4 | ||
766 | or local2, local7, $3 | ||
767 | |||
768 | }) | ||
769 | |||
770 | |||
771 | |||
772 | ! {load_little_endian} | ||
773 | ! | ||
774 | ! parameter 1 address | ||
775 | ! parameter 2 destination left | ||
776 | ! parameter 3 destination right | ||
777 | ! parameter 4 temporar | ||
778 | ! parameter 5 label | ||
779 | |||
780 | define(load_little_endian, { | ||
781 | |||
782 | ! {load_little_endian} | ||
783 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
784 | |||
785 | ! first in memory to rightmost in register | ||
786 | |||
787 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
788 | andcc $1, 3, global0 | ||
789 | bne,pn %icc, $5 | ||
790 | nop | ||
791 | |||
792 | lda [$1] 0x88, $2 | ||
793 | add $1, 4, $4 | ||
794 | |||
795 | ba,pt %icc, $5a | ||
796 | lda [$4] 0x88, $3 | ||
797 | #endif | ||
798 | |||
799 | $5: | ||
800 | ldub [$1+3], $2 | ||
801 | |||
802 | ldub [$1+2], $4 | ||
803 | sll $2, 8, $2 | ||
804 | or $2, $4, $2 | ||
805 | |||
806 | ldub [$1+1], $4 | ||
807 | sll $2, 8, $2 | ||
808 | or $2, $4, $2 | ||
809 | |||
810 | ldub [$1+0], $4 | ||
811 | sll $2, 8, $2 | ||
812 | or $2, $4, $2 | ||
813 | |||
814 | |||
815 | ldub [$1+3+4], $3 | ||
816 | |||
817 | ldub [$1+2+4], $4 | ||
818 | sll $3, 8, $3 | ||
819 | or $3, $4, $3 | ||
820 | |||
821 | ldub [$1+1+4], $4 | ||
822 | sll $3, 8, $3 | ||
823 | or $3, $4, $3 | ||
824 | |||
825 | ldub [$1+0+4], $4 | ||
826 | sll $3, 8, $3 | ||
827 | or $3, $4, $3 | ||
828 | $5a: | ||
829 | |||
830 | }) | ||
831 | |||
832 | |||
833 | ! {load_little_endian_inc} | ||
834 | ! | ||
835 | ! parameter 1 address | ||
836 | ! parameter 2 destination left | ||
837 | ! parameter 3 destination right | ||
838 | ! parameter 4 temporar | ||
839 | ! parameter 4 label | ||
840 | ! | ||
841 | ! adds 8 to address | ||
842 | |||
843 | define(load_little_endian_inc, { | ||
844 | |||
845 | ! {load_little_endian_inc} | ||
846 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
847 | |||
848 | ! first in memory to rightmost in register | ||
849 | |||
850 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
851 | andcc $1, 3, global0 | ||
852 | bne,pn %icc, $5 | ||
853 | nop | ||
854 | |||
855 | lda [$1] 0x88, $2 | ||
856 | add $1, 4, $1 | ||
857 | |||
858 | lda [$1] 0x88, $3 | ||
859 | ba,pt %icc, $5a | ||
860 | add $1, 4, $1 | ||
861 | #endif | ||
862 | |||
863 | $5: | ||
864 | ldub [$1+3], $2 | ||
865 | |||
866 | ldub [$1+2], $4 | ||
867 | sll $2, 8, $2 | ||
868 | or $2, $4, $2 | ||
869 | |||
870 | ldub [$1+1], $4 | ||
871 | sll $2, 8, $2 | ||
872 | or $2, $4, $2 | ||
873 | |||
874 | ldub [$1+0], $4 | ||
875 | sll $2, 8, $2 | ||
876 | or $2, $4, $2 | ||
877 | |||
878 | ldub [$1+3+4], $3 | ||
879 | add $1, 8, $1 | ||
880 | |||
881 | ldub [$1+2+4-8], $4 | ||
882 | sll $3, 8, $3 | ||
883 | or $3, $4, $3 | ||
884 | |||
885 | ldub [$1+1+4-8], $4 | ||
886 | sll $3, 8, $3 | ||
887 | or $3, $4, $3 | ||
888 | |||
889 | ldub [$1+0+4-8], $4 | ||
890 | sll $3, 8, $3 | ||
891 | or $3, $4, $3 | ||
892 | $5a: | ||
893 | |||
894 | }) | ||
895 | |||
896 | |||
897 | ! {load_n_bytes} | ||
898 | ! | ||
899 | ! Loads 1 to 7 bytes little endian | ||
900 | ! Remaining bytes are zeroed. | ||
901 | ! | ||
902 | ! parameter 1 address | ||
903 | ! parameter 2 length | ||
904 | ! parameter 3 destination register left | ||
905 | ! parameter 4 destination register right | ||
906 | ! parameter 5 temp | ||
907 | ! parameter 6 temp2 | ||
908 | ! parameter 7 label | ||
909 | ! parameter 8 return label | ||
910 | |||
911 | define(load_n_bytes, { | ||
912 | |||
913 | ! {load_n_bytes} | ||
914 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9 | ||
915 | |||
916 | $7.0: call .+8 | ||
917 | sll $2, 2, $6 | ||
918 | |||
919 | add %o7,$7.jmp.table-$7.0,$5 | ||
920 | |||
921 | add $5, $6, $5 | ||
922 | mov 0, $4 | ||
923 | |||
924 | ld [$5], $5 | ||
925 | |||
926 | jmp %o7+$5 | ||
927 | mov 0, $3 | ||
928 | |||
929 | $7.7: | ||
930 | ldub [$1+6], $5 | ||
931 | sll $5, 16, $5 | ||
932 | or $3, $5, $3 | ||
933 | $7.6: | ||
934 | ldub [$1+5], $5 | ||
935 | sll $5, 8, $5 | ||
936 | or $3, $5, $3 | ||
937 | $7.5: | ||
938 | ldub [$1+4], $5 | ||
939 | or $3, $5, $3 | ||
940 | $7.4: | ||
941 | ldub [$1+3], $5 | ||
942 | sll $5, 24, $5 | ||
943 | or $4, $5, $4 | ||
944 | $7.3: | ||
945 | ldub [$1+2], $5 | ||
946 | sll $5, 16, $5 | ||
947 | or $4, $5, $4 | ||
948 | $7.2: | ||
949 | ldub [$1+1], $5 | ||
950 | sll $5, 8, $5 | ||
951 | or $4, $5, $4 | ||
952 | $7.1: | ||
953 | ldub [$1+0], $5 | ||
954 | ba $8 | ||
955 | or $4, $5, $4 | ||
956 | |||
957 | .align 4 | ||
958 | |||
959 | $7.jmp.table: | ||
960 | .word 0 | ||
961 | .word $7.1-$7.0 | ||
962 | .word $7.2-$7.0 | ||
963 | .word $7.3-$7.0 | ||
964 | .word $7.4-$7.0 | ||
965 | .word $7.5-$7.0 | ||
966 | .word $7.6-$7.0 | ||
967 | .word $7.7-$7.0 | ||
968 | }) | ||
969 | |||
970 | |||
971 | ! {store_little_endian} | ||
972 | ! | ||
973 | ! parameter 1 address | ||
974 | ! parameter 2 source left | ||
975 | ! parameter 3 source right | ||
976 | ! parameter 4 temporar | ||
977 | |||
978 | define(store_little_endian, { | ||
979 | |||
980 | ! {store_little_endian} | ||
981 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
982 | |||
983 | ! rightmost in register to first in memory | ||
984 | |||
985 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
986 | andcc $1, 3, global0 | ||
987 | bne,pn %icc, $5 | ||
988 | nop | ||
989 | |||
990 | sta $2, [$1] 0x88 | ||
991 | add $1, 4, $4 | ||
992 | |||
993 | ba,pt %icc, $5a | ||
994 | sta $3, [$4] 0x88 | ||
995 | #endif | ||
996 | |||
997 | $5: | ||
998 | and $2, 255, $4 | ||
999 | stub $4, [$1+0] | ||
1000 | |||
1001 | srl $2, 8, $4 | ||
1002 | and $4, 255, $4 | ||
1003 | stub $4, [$1+1] | ||
1004 | |||
1005 | srl $2, 16, $4 | ||
1006 | and $4, 255, $4 | ||
1007 | stub $4, [$1+2] | ||
1008 | |||
1009 | srl $2, 24, $4 | ||
1010 | stub $4, [$1+3] | ||
1011 | |||
1012 | |||
1013 | and $3, 255, $4 | ||
1014 | stub $4, [$1+0+4] | ||
1015 | |||
1016 | srl $3, 8, $4 | ||
1017 | and $4, 255, $4 | ||
1018 | stub $4, [$1+1+4] | ||
1019 | |||
1020 | srl $3, 16, $4 | ||
1021 | and $4, 255, $4 | ||
1022 | stub $4, [$1+2+4] | ||
1023 | |||
1024 | srl $3, 24, $4 | ||
1025 | stub $4, [$1+3+4] | ||
1026 | |||
1027 | $5a: | ||
1028 | |||
1029 | }) | ||
1030 | |||
1031 | |||
1032 | ! {store_n_bytes} | ||
1033 | ! | ||
1034 | ! Stores 1 to 7 bytes little endian | ||
1035 | ! | ||
1036 | ! parameter 1 address | ||
1037 | ! parameter 2 length | ||
1038 | ! parameter 3 source register left | ||
1039 | ! parameter 4 source register right | ||
1040 | ! parameter 5 temp | ||
1041 | ! parameter 6 temp2 | ||
1042 | ! parameter 7 label | ||
1043 | ! parameter 8 return label | ||
1044 | |||
1045 | define(store_n_bytes, { | ||
1046 | |||
1047 | ! {store_n_bytes} | ||
1048 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9 | ||
1049 | |||
1050 | $7.0: call .+8 | ||
1051 | sll $2, 2, $6 | ||
1052 | |||
1053 | add %o7,$7.jmp.table-$7.0,$5 | ||
1054 | |||
1055 | add $5, $6, $5 | ||
1056 | |||
1057 | ld [$5], $5 | ||
1058 | |||
1059 | jmp %o7+$5 | ||
1060 | nop | ||
1061 | |||
1062 | $7.7: | ||
1063 | srl $3, 16, $5 | ||
1064 | and $5, 0xff, $5 | ||
1065 | stub $5, [$1+6] | ||
1066 | $7.6: | ||
1067 | srl $3, 8, $5 | ||
1068 | and $5, 0xff, $5 | ||
1069 | stub $5, [$1+5] | ||
1070 | $7.5: | ||
1071 | and $3, 0xff, $5 | ||
1072 | stub $5, [$1+4] | ||
1073 | $7.4: | ||
1074 | srl $4, 24, $5 | ||
1075 | stub $5, [$1+3] | ||
1076 | $7.3: | ||
1077 | srl $4, 16, $5 | ||
1078 | and $5, 0xff, $5 | ||
1079 | stub $5, [$1+2] | ||
1080 | $7.2: | ||
1081 | srl $4, 8, $5 | ||
1082 | and $5, 0xff, $5 | ||
1083 | stub $5, [$1+1] | ||
1084 | $7.1: | ||
1085 | and $4, 0xff, $5 | ||
1086 | |||
1087 | |||
1088 | ba $8 | ||
1089 | stub $5, [$1] | ||
1090 | |||
1091 | .align 4 | ||
1092 | |||
1093 | $7.jmp.table: | ||
1094 | |||
1095 | .word 0 | ||
1096 | .word $7.1-$7.0 | ||
1097 | .word $7.2-$7.0 | ||
1098 | .word $7.3-$7.0 | ||
1099 | .word $7.4-$7.0 | ||
1100 | .word $7.5-$7.0 | ||
1101 | .word $7.6-$7.0 | ||
1102 | .word $7.7-$7.0 | ||
1103 | }) | ||
1104 | |||
1105 | |||
1106 | define(testvalue,{1}) | ||
1107 | |||
1108 | define(register_init, { | ||
1109 | |||
1110 | ! For test purposes: | ||
1111 | |||
1112 | sethi %hi(testvalue), local0 | ||
1113 | or local0, %lo(testvalue), local0 | ||
1114 | |||
1115 | ifelse($1,{},{}, {mov local0, $1}) | ||
1116 | ifelse($2,{},{}, {mov local0, $2}) | ||
1117 | ifelse($3,{},{}, {mov local0, $3}) | ||
1118 | ifelse($4,{},{}, {mov local0, $4}) | ||
1119 | ifelse($5,{},{}, {mov local0, $5}) | ||
1120 | ifelse($6,{},{}, {mov local0, $6}) | ||
1121 | ifelse($7,{},{}, {mov local0, $7}) | ||
1122 | ifelse($8,{},{}, {mov local0, $8}) | ||
1123 | |||
1124 | mov local0, local1 | ||
1125 | mov local0, local2 | ||
1126 | mov local0, local3 | ||
1127 | mov local0, local4 | ||
1128 | mov local0, local5 | ||
1129 | mov local0, local7 | ||
1130 | mov local0, local6 | ||
1131 | mov local0, out0 | ||
1132 | mov local0, out1 | ||
1133 | mov local0, out2 | ||
1134 | mov local0, out3 | ||
1135 | mov local0, out4 | ||
1136 | mov local0, out5 | ||
1137 | mov local0, global1 | ||
1138 | mov local0, global2 | ||
1139 | mov local0, global3 | ||
1140 | mov local0, global4 | ||
1141 | mov local0, global5 | ||
1142 | |||
1143 | }) | ||
1144 | |||
1145 | .section ".text" | ||
1146 | |||
1147 | .align 32 | ||
1148 | |||
1149 | .des_enc: | ||
1150 | |||
1151 | ! key address in3 | ||
1152 | ! loads key next encryption/decryption first round from [in4] | ||
1153 | |||
1154 | rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl) | ||
1155 | |||
1156 | |||
1157 | .align 32 | ||
1158 | |||
1159 | .des_dec: | ||
1160 | |||
1161 | ! implemented with out5 as first parameter to avoid | ||
1162 | ! register exchange in ede modes | ||
1163 | |||
1164 | ! key address in4 | ||
1165 | ! loads key next encryption/decryption first round from [in3] | ||
1166 | |||
1167 | rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl) | ||
1168 | |||
1169 | |||
1170 | |||
1171 | ! void DES_encrypt1(data, ks, enc) | ||
1172 | ! ******************************* | ||
1173 | |||
1174 | .align 32 | ||
1175 | .global DES_encrypt1 | ||
1176 | .type DES_encrypt1,#function | ||
1177 | |||
1178 | DES_encrypt1: | ||
1179 | |||
1180 | save %sp, FRAME, %sp | ||
1181 | |||
1182 | call .PIC.me.up | ||
1183 | mov .PIC.me.up-(.-4),out0 | ||
1184 | |||
1185 | ld [in0], in5 ! left | ||
1186 | cmp in2, 0 ! enc | ||
1187 | |||
1188 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1189 | be,pn %icc, .encrypt.dec ! enc/dec | ||
1190 | #else | ||
1191 | be .encrypt.dec | ||
1192 | #endif | ||
1193 | ld [in0+4], out5 ! right | ||
1194 | |||
1195 | ! parameter 6 1/2 for include encryption/decryption | ||
1196 | ! parameter 7 1 for move in1 to in3 | ||
1197 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
1198 | |||
1199 | ip_macro(in5, out5, in5, out5, in3, 0, 1, 1) | ||
1200 | |||
1201 | rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used | ||
1202 | |||
1203 | fp_macro(in5, out5, 1) ! 1 for store to [in0] | ||
1204 | |||
1205 | ret | ||
1206 | restore | ||
1207 | |||
1208 | .encrypt.dec: | ||
1209 | |||
1210 | add in1, 120, in3 ! use last subkey for first round | ||
1211 | |||
1212 | ! parameter 6 1/2 for include encryption/decryption | ||
1213 | ! parameter 7 1 for move in1 to in3 | ||
1214 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
1215 | |||
1216 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4 | ||
1217 | |||
1218 | fp_macro(out5, in5, 1) ! 1 for store to [in0] | ||
1219 | |||
1220 | ret | ||
1221 | restore | ||
1222 | |||
1223 | .DES_encrypt1.end: | ||
1224 | .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1 | ||
1225 | |||
1226 | |||
1227 | ! void DES_encrypt2(data, ks, enc) | ||
1228 | !********************************* | ||
1229 | |||
1230 | ! encrypts/decrypts without initial/final permutation | ||
1231 | |||
1232 | .align 32 | ||
1233 | .global DES_encrypt2 | ||
1234 | .type DES_encrypt2,#function | ||
1235 | |||
1236 | DES_encrypt2: | ||
1237 | |||
1238 | save %sp, FRAME, %sp | ||
1239 | |||
1240 | call .PIC.me.up | ||
1241 | mov .PIC.me.up-(.-4),out0 | ||
1242 | |||
1243 | ! Set sbox address 1 to 6 and rotate halfs 3 left | ||
1244 | ! Errors caught by destest? Yes. Still? *NO* | ||
1245 | |||
1246 | !sethi %hi(DES_SPtrans), global1 ! address sbox 1 | ||
1247 | |||
1248 | !or global1, %lo(DES_SPtrans), global1 ! sbox 1 | ||
1249 | |||
1250 | add global1, 256, global2 ! sbox 2 | ||
1251 | add global1, 512, global3 ! sbox 3 | ||
1252 | |||
1253 | ld [in0], out5 ! right | ||
1254 | add global1, 768, global4 ! sbox 4 | ||
1255 | add global1, 1024, global5 ! sbox 5 | ||
1256 | |||
1257 | ld [in0+4], in5 ! left | ||
1258 | add global1, 1280, local6 ! sbox 6 | ||
1259 | add global1, 1792, out3 ! sbox 8 | ||
1260 | |||
1261 | ! rotate | ||
1262 | |||
1263 | sll in5, 3, local5 | ||
1264 | mov in1, in3 ! key address to in3 | ||
1265 | |||
1266 | sll out5, 3, local7 | ||
1267 | srl in5, 29, in5 | ||
1268 | |||
1269 | srl out5, 29, out5 | ||
1270 | add in5, local5, in5 | ||
1271 | |||
1272 | add out5, local7, out5 | ||
1273 | cmp in2, 0 | ||
1274 | |||
1275 | ! we use our own stackframe | ||
1276 | |||
1277 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1278 | be,pn %icc, .encrypt2.dec ! decryption | ||
1279 | #else | ||
1280 | be .encrypt2.dec | ||
1281 | #endif | ||
1282 | STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ] | ||
1283 | |||
1284 | ld [in3], out0 ! key 7531 first round | ||
1285 | mov LOOPS, out4 ! loop counter | ||
1286 | |||
1287 | ld [in3+4], out1 ! key 8642 first round | ||
1288 | sethi %hi(0x0000FC00), local5 | ||
1289 | |||
1290 | call .des_enc | ||
1291 | mov in3, in4 | ||
1292 | |||
1293 | ! rotate | ||
1294 | sll in5, 29, in0 | ||
1295 | srl in5, 3, in5 | ||
1296 | sll out5, 29, in1 | ||
1297 | add in5, in0, in5 | ||
1298 | srl out5, 3, out5 | ||
1299 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 | ||
1300 | add out5, in1, out5 | ||
1301 | st in5, [in0] | ||
1302 | st out5, [in0+4] | ||
1303 | |||
1304 | ret | ||
1305 | restore | ||
1306 | |||
1307 | |||
1308 | .encrypt2.dec: | ||
1309 | |||
1310 | add in3, 120, in4 | ||
1311 | |||
1312 | ld [in4], out0 ! key 7531 first round | ||
1313 | mov LOOPS, out4 ! loop counter | ||
1314 | |||
1315 | ld [in4+4], out1 ! key 8642 first round | ||
1316 | sethi %hi(0x0000FC00), local5 | ||
1317 | |||
1318 | mov in5, local1 ! left expected in out5 | ||
1319 | mov out5, in5 | ||
1320 | |||
1321 | call .des_dec | ||
1322 | mov local1, out5 | ||
1323 | |||
1324 | .encrypt2.finish: | ||
1325 | |||
1326 | ! rotate | ||
1327 | sll in5, 29, in0 | ||
1328 | srl in5, 3, in5 | ||
1329 | sll out5, 29, in1 | ||
1330 | add in5, in0, in5 | ||
1331 | srl out5, 3, out5 | ||
1332 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 | ||
1333 | add out5, in1, out5 | ||
1334 | st out5, [in0] | ||
1335 | st in5, [in0+4] | ||
1336 | |||
1337 | ret | ||
1338 | restore | ||
1339 | |||
1340 | .DES_encrypt2.end: | ||
1341 | .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2 | ||
1342 | |||
1343 | |||
1344 | ! void DES_encrypt3(data, ks1, ks2, ks3) | ||
1345 | ! ************************************** | ||
1346 | |||
1347 | .align 32 | ||
1348 | .global DES_encrypt3 | ||
1349 | .type DES_encrypt3,#function | ||
1350 | |||
1351 | DES_encrypt3: | ||
1352 | |||
1353 | save %sp, FRAME, %sp | ||
1354 | |||
1355 | call .PIC.me.up | ||
1356 | mov .PIC.me.up-(.-4),out0 | ||
1357 | |||
1358 | ld [in0], in5 ! left | ||
1359 | add in2, 120, in4 ! ks2 | ||
1360 | |||
1361 | ld [in0+4], out5 ! right | ||
1362 | mov in3, in2 ! save ks3 | ||
1363 | |||
1364 | ! parameter 6 1/2 for include encryption/decryption | ||
1365 | ! parameter 7 1 for mov in1 to in3 | ||
1366 | ! parameter 8 1 for mov in3 to in4 | ||
1367 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
1368 | |||
1369 | ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0) | ||
1370 | |||
1371 | call .des_dec | ||
1372 | mov in2, in3 ! preload ks3 | ||
1373 | |||
1374 | call .des_enc | ||
1375 | nop | ||
1376 | |||
1377 | fp_macro(in5, out5, 1) | ||
1378 | |||
1379 | ret | ||
1380 | restore | ||
1381 | |||
1382 | .DES_encrypt3.end: | ||
1383 | .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3 | ||
1384 | |||
1385 | |||
1386 | ! void DES_decrypt3(data, ks1, ks2, ks3) | ||
1387 | ! ************************************** | ||
1388 | |||
1389 | .align 32 | ||
1390 | .global DES_decrypt3 | ||
1391 | .type DES_decrypt3,#function | ||
1392 | |||
1393 | DES_decrypt3: | ||
1394 | |||
1395 | save %sp, FRAME, %sp | ||
1396 | |||
1397 | call .PIC.me.up | ||
1398 | mov .PIC.me.up-(.-4),out0 | ||
1399 | |||
1400 | ld [in0], in5 ! left | ||
1401 | add in3, 120, in4 ! ks3 | ||
1402 | |||
1403 | ld [in0+4], out5 ! right | ||
1404 | mov in2, in3 ! ks2 | ||
1405 | |||
1406 | ! parameter 6 1/2 for include encryption/decryption | ||
1407 | ! parameter 7 1 for mov in1 to in3 | ||
1408 | ! parameter 8 1 for mov in3 to in4 | ||
1409 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
1410 | |||
1411 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0) | ||
1412 | |||
1413 | call .des_enc | ||
1414 | add in1, 120, in4 ! preload ks1 | ||
1415 | |||
1416 | call .des_dec | ||
1417 | nop | ||
1418 | |||
1419 | fp_macro(out5, in5, 1) | ||
1420 | |||
1421 | ret | ||
1422 | restore | ||
1423 | |||
1424 | .DES_decrypt3.end: | ||
1425 | .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 | ||
1426 | |||
1427 | .align 256 | ||
1428 | .type .des_and,#object | ||
1429 | .size .des_and,284 | ||
1430 | |||
1431 | .des_and: | ||
1432 | |||
1433 | ! This table is used for AND 0xFC when it is known that register | ||
1434 | ! bits 8-31 are zero. Makes it possible to do three arithmetic | ||
1435 | ! operations in one cycle. | ||
1436 | |||
1437 | .byte 0, 0, 0, 0, 4, 4, 4, 4 | ||
1438 | .byte 8, 8, 8, 8, 12, 12, 12, 12 | ||
1439 | .byte 16, 16, 16, 16, 20, 20, 20, 20 | ||
1440 | .byte 24, 24, 24, 24, 28, 28, 28, 28 | ||
1441 | .byte 32, 32, 32, 32, 36, 36, 36, 36 | ||
1442 | .byte 40, 40, 40, 40, 44, 44, 44, 44 | ||
1443 | .byte 48, 48, 48, 48, 52, 52, 52, 52 | ||
1444 | .byte 56, 56, 56, 56, 60, 60, 60, 60 | ||
1445 | .byte 64, 64, 64, 64, 68, 68, 68, 68 | ||
1446 | .byte 72, 72, 72, 72, 76, 76, 76, 76 | ||
1447 | .byte 80, 80, 80, 80, 84, 84, 84, 84 | ||
1448 | .byte 88, 88, 88, 88, 92, 92, 92, 92 | ||
1449 | .byte 96, 96, 96, 96, 100, 100, 100, 100 | ||
1450 | .byte 104, 104, 104, 104, 108, 108, 108, 108 | ||
1451 | .byte 112, 112, 112, 112, 116, 116, 116, 116 | ||
1452 | .byte 120, 120, 120, 120, 124, 124, 124, 124 | ||
1453 | .byte 128, 128, 128, 128, 132, 132, 132, 132 | ||
1454 | .byte 136, 136, 136, 136, 140, 140, 140, 140 | ||
1455 | .byte 144, 144, 144, 144, 148, 148, 148, 148 | ||
1456 | .byte 152, 152, 152, 152, 156, 156, 156, 156 | ||
1457 | .byte 160, 160, 160, 160, 164, 164, 164, 164 | ||
1458 | .byte 168, 168, 168, 168, 172, 172, 172, 172 | ||
1459 | .byte 176, 176, 176, 176, 180, 180, 180, 180 | ||
1460 | .byte 184, 184, 184, 184, 188, 188, 188, 188 | ||
1461 | .byte 192, 192, 192, 192, 196, 196, 196, 196 | ||
1462 | .byte 200, 200, 200, 200, 204, 204, 204, 204 | ||
1463 | .byte 208, 208, 208, 208, 212, 212, 212, 212 | ||
1464 | .byte 216, 216, 216, 216, 220, 220, 220, 220 | ||
1465 | .byte 224, 224, 224, 224, 228, 228, 228, 228 | ||
1466 | .byte 232, 232, 232, 232, 236, 236, 236, 236 | ||
1467 | .byte 240, 240, 240, 240, 244, 244, 244, 244 | ||
1468 | .byte 248, 248, 248, 248, 252, 252, 252, 252 | ||
1469 | |||
1470 | ! 5 numbers for initil/final permutation | ||
1471 | |||
1472 | .word 0x0f0f0f0f ! offset 256 | ||
1473 | .word 0x0000ffff ! 260 | ||
1474 | .word 0x33333333 ! 264 | ||
1475 | .word 0x00ff00ff ! 268 | ||
1476 | .word 0x55555555 ! 272 | ||
1477 | |||
1478 | .word 0 ! 276 | ||
1479 | .word LOOPS ! 280 | ||
1480 | .word 0x0000FC00 ! 284 | ||
1481 | .PIC.DES_SPtrans: | ||
1482 | .word %r_disp32(DES_SPtrans) | ||
1483 | |||
1484 | ! input: out0 offset between .PIC.me.up and caller | ||
1485 | ! output: out0 pointer to .PIC.me.up | ||
1486 | ! out2 pointer to .des_and | ||
1487 | ! global1 pointer to DES_SPtrans | ||
1488 | .align 32 | ||
1489 | .PIC.me.up: | ||
1490 | add out0,%o7,out0 ! pointer to .PIC.me.up | ||
1491 | #if 1 | ||
1492 | ld [out0+(.PIC.DES_SPtrans-.PIC.me.up)],global1 | ||
1493 | add global1,(.PIC.DES_SPtrans-.PIC.me.up),global1 | ||
1494 | add global1,out0,global1 | ||
1495 | #else | ||
1496 | # ifdef OPENSSL_PIC | ||
1497 | ! In case anybody wonders why this code is same for both ABI. | ||
1498 | ! To start with it is not. Do note LDPTR below. But of course | ||
1499 | ! you must be wondering why the rest of it does not contain | ||
1500 | ! things like %hh, %hm and %lm. Well, those are needed only | ||
1501 | ! if OpenSSL library *itself* will become larger than 4GB, | ||
1502 | ! which is not going to happen any time soon. | ||
1503 | sethi %hi(DES_SPtrans),global1 | ||
1504 | or global1,%lo(DES_SPtrans),global1 | ||
1505 | sethi %hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2 | ||
1506 | add global1,out0,global1 | ||
1507 | add out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2 | ||
1508 | LDPTR [out2+global1],global1 | ||
1509 | # elif 0 | ||
1510 | setn DES_SPtrans,out2,global1 ! synthetic instruction ! | ||
1511 | # elif defined(ABI64) | ||
1512 | sethi %hh(DES_SPtrans),out2 | ||
1513 | or out2,%hm(DES_SPtrans),out2 | ||
1514 | sethi %lm(DES_SPtrans),global1 | ||
1515 | or global1,%lo(DES_SPtrans),global1 | ||
1516 | sllx out2,32,out2 | ||
1517 | or out2,global1,global1 | ||
1518 | # else | ||
1519 | sethi %hi(DES_SPtrans),global1 | ||
1520 | or global1,%lo(DES_SPtrans),global1 | ||
1521 | # endif | ||
1522 | #endif | ||
1523 | retl | ||
1524 | add out0,.des_and-.PIC.me.up,out2 | ||
1525 | |||
1526 | ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) | ||
1527 | ! ***************************************************************** | ||
1528 | |||
1529 | |||
1530 | .align 32 | ||
1531 | .global DES_ncbc_encrypt | ||
1532 | .type DES_ncbc_encrypt,#function | ||
1533 | |||
1534 | DES_ncbc_encrypt: | ||
1535 | |||
1536 | save %sp, FRAME, %sp | ||
1537 | |||
1538 | define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) | ||
1539 | define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) | ||
1540 | define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) | ||
1541 | |||
1542 | call .PIC.me.up | ||
1543 | mov .PIC.me.up-(.-4),out0 | ||
1544 | |||
1545 | cmp in5, 0 ! enc | ||
1546 | |||
1547 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1548 | be,pn %icc, .ncbc.dec | ||
1549 | #else | ||
1550 | be .ncbc.dec | ||
1551 | #endif | ||
1552 | STPTR in4, IVEC | ||
1553 | |||
1554 | ! addr left right temp label | ||
1555 | load_little_endian(in4, in5, out5, local3, .LLE1) ! iv | ||
1556 | |||
1557 | addcc in2, -8, in2 ! bytes missing when first block done | ||
1558 | |||
1559 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1560 | bl,pn %icc, .ncbc.enc.seven.or.less | ||
1561 | #else | ||
1562 | bl .ncbc.enc.seven.or.less | ||
1563 | #endif | ||
1564 | mov in3, in4 ! schedule | ||
1565 | |||
1566 | .ncbc.enc.next.block: | ||
1567 | |||
1568 | load_little_endian(in0, out4, global4, local3, .LLE2) ! block | ||
1569 | |||
1570 | .ncbc.enc.next.block_1: | ||
1571 | |||
1572 | xor in5, out4, in5 ! iv xor | ||
1573 | xor out5, global4, out5 ! iv xor | ||
1574 | |||
1575 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
1576 | ip_macro(in5, out5, in5, out5, in3, 0, 0, 2) | ||
1577 | |||
1578 | .ncbc.enc.next.block_2: | ||
1579 | |||
1580 | !// call .des_enc ! compares in2 to 8 | ||
1581 | ! rounds inlined for alignment purposes | ||
1582 | |||
1583 | add global1, 768, global4 ! address sbox 4 since register used below | ||
1584 | |||
1585 | rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3 | ||
1586 | |||
1587 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1588 | bl,pn %icc, .ncbc.enc.next.block_fp | ||
1589 | #else | ||
1590 | bl .ncbc.enc.next.block_fp | ||
1591 | #endif | ||
1592 | add in0, 8, in0 ! input address | ||
1593 | |||
1594 | ! If 8 or more bytes are to be encrypted after this block, | ||
1595 | ! we combine final permutation for this block with initial | ||
1596 | ! permutation for next block. Load next block: | ||
1597 | |||
1598 | load_little_endian(in0, global3, global4, local5, .LLE12) | ||
1599 | |||
1600 | ! parameter 1 original left | ||
1601 | ! parameter 2 original right | ||
1602 | ! parameter 3 left ip | ||
1603 | ! parameter 4 right ip | ||
1604 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | ||
1605 | ! 2: mov in4 to in3 | ||
1606 | ! | ||
1607 | ! also adds -8 to length in2 and loads loop counter to out4 | ||
1608 | |||
1609 | fp_ip_macro(out0, out1, global3, global4, 2) | ||
1610 | |||
1611 | store_little_endian(in1, out0, out1, local3, .SLE10) ! block | ||
1612 | |||
1613 | ld [in3], out0 ! key 7531 first round next block | ||
1614 | mov in5, local1 | ||
1615 | xor global3, out5, in5 ! iv xor next block | ||
1616 | |||
1617 | ld [in3+4], out1 ! key 8642 | ||
1618 | add global1, 512, global3 ! address sbox 3 since register used | ||
1619 | xor global4, local1, out5 ! iv xor next block | ||
1620 | |||
1621 | ba .ncbc.enc.next.block_2 | ||
1622 | add in1, 8, in1 ! output adress | ||
1623 | |||
1624 | .ncbc.enc.next.block_fp: | ||
1625 | |||
1626 | fp_macro(in5, out5) | ||
1627 | |||
1628 | store_little_endian(in1, in5, out5, local3, .SLE1) ! block | ||
1629 | |||
1630 | addcc in2, -8, in2 ! bytes missing when next block done | ||
1631 | |||
1632 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1633 | bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0 | ||
1634 | #else | ||
1635 | bpos .ncbc.enc.next.block | ||
1636 | #endif | ||
1637 | add in1, 8, in1 | ||
1638 | |||
1639 | .ncbc.enc.seven.or.less: | ||
1640 | |||
1641 | cmp in2, -8 | ||
1642 | |||
1643 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1644 | ble,pt %icc, .ncbc.enc.finish | ||
1645 | #else | ||
1646 | ble .ncbc.enc.finish | ||
1647 | #endif | ||
1648 | nop | ||
1649 | |||
1650 | add in2, 8, local1 ! bytes to load | ||
1651 | |||
1652 | ! addr, length, dest left, dest right, temp, temp2, label, ret label | ||
1653 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1) | ||
1654 | |||
1655 | ! Loads 1 to 7 bytes little endian to global4, out4 | ||
1656 | |||
1657 | |||
1658 | .ncbc.enc.finish: | ||
1659 | |||
1660 | LDPTR IVEC, local4 | ||
1661 | store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec | ||
1662 | |||
1663 | ret | ||
1664 | restore | ||
1665 | |||
1666 | |||
1667 | .ncbc.dec: | ||
1668 | |||
1669 | STPTR in0, INPUT | ||
1670 | cmp in2, 0 ! length | ||
1671 | add in3, 120, in3 | ||
1672 | |||
1673 | LDPTR IVEC, local7 ! ivec | ||
1674 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1675 | ble,pn %icc, .ncbc.dec.finish | ||
1676 | #else | ||
1677 | ble .ncbc.dec.finish | ||
1678 | #endif | ||
1679 | mov in3, in4 ! schedule | ||
1680 | |||
1681 | STPTR in1, OUTPUT | ||
1682 | mov in0, local5 ! input | ||
1683 | |||
1684 | load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec | ||
1685 | |||
1686 | .ncbc.dec.next.block: | ||
1687 | |||
1688 | load_little_endian(local5, in5, out5, local3, .LLE4) ! block | ||
1689 | |||
1690 | ! parameter 6 1/2 for include encryption/decryption | ||
1691 | ! parameter 7 1 for mov in1 to in3 | ||
1692 | ! parameter 8 1 for mov in3 to in4 | ||
1693 | |||
1694 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4 | ||
1695 | |||
1696 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7 | ||
1697 | |||
1698 | ! in2 is bytes left to be stored | ||
1699 | ! in2 is compared to 8 in the rounds | ||
1700 | |||
1701 | xor out5, in0, out4 ! iv xor | ||
1702 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1703 | bl,pn %icc, .ncbc.dec.seven.or.less | ||
1704 | #else | ||
1705 | bl .ncbc.dec.seven.or.less | ||
1706 | #endif | ||
1707 | xor in5, in1, global4 ! iv xor | ||
1708 | |||
1709 | ! Load ivec next block now, since input and output address might be the same. | ||
1710 | |||
1711 | load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv | ||
1712 | |||
1713 | store_little_endian(local7, out4, global4, local3, .SLE3) | ||
1714 | |||
1715 | STPTR local5, INPUT | ||
1716 | add local7, 8, local7 | ||
1717 | addcc in2, -8, in2 | ||
1718 | |||
1719 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1720 | bg,pt %icc, .ncbc.dec.next.block | ||
1721 | #else | ||
1722 | bg .ncbc.dec.next.block | ||
1723 | #endif | ||
1724 | STPTR local7, OUTPUT | ||
1725 | |||
1726 | |||
1727 | .ncbc.dec.store.iv: | ||
1728 | |||
1729 | LDPTR IVEC, local4 ! ivec | ||
1730 | store_little_endian(local4, in0, in1, local5, .SLE4) | ||
1731 | |||
1732 | .ncbc.dec.finish: | ||
1733 | |||
1734 | ret | ||
1735 | restore | ||
1736 | |||
1737 | .ncbc.dec.seven.or.less: | ||
1738 | |||
1739 | load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec | ||
1740 | |||
1741 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) | ||
1742 | |||
1743 | |||
1744 | .DES_ncbc_encrypt.end: | ||
1745 | .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt | ||
1746 | |||
1747 | |||
1748 | ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc) | ||
1749 | ! ************************************************************************** | ||
1750 | |||
1751 | |||
1752 | .align 32 | ||
1753 | .global DES_ede3_cbc_encrypt | ||
1754 | .type DES_ede3_cbc_encrypt,#function | ||
1755 | |||
1756 | DES_ede3_cbc_encrypt: | ||
1757 | |||
1758 | save %sp, FRAME, %sp | ||
1759 | |||
1760 | define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) | ||
1761 | define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) | ||
1762 | define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) | ||
1763 | |||
1764 | call .PIC.me.up | ||
1765 | mov .PIC.me.up-(.-4),out0 | ||
1766 | |||
1767 | LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc | ||
1768 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec | ||
1769 | cmp local3, 0 ! enc | ||
1770 | |||
1771 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1772 | be,pn %icc, .ede3.dec | ||
1773 | #else | ||
1774 | be .ede3.dec | ||
1775 | #endif | ||
1776 | STPTR in4, KS2 | ||
1777 | |||
1778 | STPTR in5, KS3 | ||
1779 | |||
1780 | load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec | ||
1781 | |||
1782 | addcc in2, -8, in2 ! bytes missing after next block | ||
1783 | |||
1784 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1785 | bl,pn %icc, .ede3.enc.seven.or.less | ||
1786 | #else | ||
1787 | bl .ede3.enc.seven.or.less | ||
1788 | #endif | ||
1789 | STPTR in3, KS1 | ||
1790 | |||
1791 | .ede3.enc.next.block: | ||
1792 | |||
1793 | load_little_endian(in0, out4, global4, local3, .LLE7) | ||
1794 | |||
1795 | .ede3.enc.next.block_1: | ||
1796 | |||
1797 | LDPTR KS2, in4 | ||
1798 | xor in5, out4, in5 ! iv xor | ||
1799 | xor out5, global4, out5 ! iv xor | ||
1800 | |||
1801 | LDPTR KS1, in3 | ||
1802 | add in4, 120, in4 ! for decryption we use last subkey first | ||
1803 | nop | ||
1804 | |||
1805 | ip_macro(in5, out5, in5, out5, in3) | ||
1806 | |||
1807 | .ede3.enc.next.block_2: | ||
1808 | |||
1809 | call .des_enc ! ks1 in3 | ||
1810 | nop | ||
1811 | |||
1812 | call .des_dec ! ks2 in4 | ||
1813 | LDPTR KS3, in3 | ||
1814 | |||
1815 | call .des_enc ! ks3 in3 compares in2 to 8 | ||
1816 | nop | ||
1817 | |||
1818 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1819 | bl,pn %icc, .ede3.enc.next.block_fp | ||
1820 | #else | ||
1821 | bl .ede3.enc.next.block_fp | ||
1822 | #endif | ||
1823 | add in0, 8, in0 | ||
1824 | |||
1825 | ! If 8 or more bytes are to be encrypted after this block, | ||
1826 | ! we combine final permutation for this block with initial | ||
1827 | ! permutation for next block. Load next block: | ||
1828 | |||
1829 | load_little_endian(in0, global3, global4, local5, .LLE11) | ||
1830 | |||
1831 | ! parameter 1 original left | ||
1832 | ! parameter 2 original right | ||
1833 | ! parameter 3 left ip | ||
1834 | ! parameter 4 right ip | ||
1835 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | ||
1836 | ! 2: mov in4 to in3 | ||
1837 | ! | ||
1838 | ! also adds -8 to length in2 and loads loop counter to out4 | ||
1839 | |||
1840 | fp_ip_macro(out0, out1, global3, global4, 1) | ||
1841 | |||
1842 | store_little_endian(in1, out0, out1, local3, .SLE9) ! block | ||
1843 | |||
1844 | mov in5, local1 | ||
1845 | xor global3, out5, in5 ! iv xor next block | ||
1846 | |||
1847 | ld [in3], out0 ! key 7531 | ||
1848 | add global1, 512, global3 ! address sbox 3 | ||
1849 | xor global4, local1, out5 ! iv xor next block | ||
1850 | |||
1851 | ld [in3+4], out1 ! key 8642 | ||
1852 | add global1, 768, global4 ! address sbox 4 | ||
1853 | ba .ede3.enc.next.block_2 | ||
1854 | add in1, 8, in1 | ||
1855 | |||
1856 | .ede3.enc.next.block_fp: | ||
1857 | |||
1858 | fp_macro(in5, out5) | ||
1859 | |||
1860 | store_little_endian(in1, in5, out5, local3, .SLE5) ! block | ||
1861 | |||
1862 | addcc in2, -8, in2 ! bytes missing when next block done | ||
1863 | |||
1864 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1865 | bpos,pt %icc, .ede3.enc.next.block | ||
1866 | #else | ||
1867 | bpos .ede3.enc.next.block | ||
1868 | #endif | ||
1869 | add in1, 8, in1 | ||
1870 | |||
1871 | .ede3.enc.seven.or.less: | ||
1872 | |||
1873 | cmp in2, -8 | ||
1874 | |||
1875 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1876 | ble,pt %icc, .ede3.enc.finish | ||
1877 | #else | ||
1878 | ble .ede3.enc.finish | ||
1879 | #endif | ||
1880 | nop | ||
1881 | |||
1882 | add in2, 8, local1 ! bytes to load | ||
1883 | |||
1884 | ! addr, length, dest left, dest right, temp, temp2, label, ret label | ||
1885 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1) | ||
1886 | |||
1887 | .ede3.enc.finish: | ||
1888 | |||
1889 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec | ||
1890 | store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec | ||
1891 | |||
1892 | ret | ||
1893 | restore | ||
1894 | |||
1895 | .ede3.dec: | ||
1896 | |||
1897 | STPTR in0, INPUT | ||
1898 | add in5, 120, in5 | ||
1899 | |||
1900 | STPTR in1, OUTPUT | ||
1901 | mov in0, local5 | ||
1902 | add in3, 120, in3 | ||
1903 | |||
1904 | STPTR in3, KS1 | ||
1905 | cmp in2, 0 | ||
1906 | |||
1907 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1908 | ble %icc, .ede3.dec.finish | ||
1909 | #else | ||
1910 | ble .ede3.dec.finish | ||
1911 | #endif | ||
1912 | STPTR in5, KS3 | ||
1913 | |||
1914 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv | ||
1915 | load_little_endian(local7, in0, in1, local3, .LLE8) | ||
1916 | |||
1917 | .ede3.dec.next.block: | ||
1918 | |||
1919 | load_little_endian(local5, in5, out5, local3, .LLE9) | ||
1920 | |||
1921 | ! parameter 6 1/2 for include encryption/decryption | ||
1922 | ! parameter 7 1 for mov in1 to in3 | ||
1923 | ! parameter 8 1 for mov in3 to in4 | ||
1924 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
1925 | |||
1926 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4 | ||
1927 | |||
1928 | call .des_enc ! ks2 in3 | ||
1929 | LDPTR KS1, in4 | ||
1930 | |||
1931 | call .des_dec ! ks1 in4 | ||
1932 | nop | ||
1933 | |||
1934 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7 | ||
1935 | |||
1936 | ! in2 is bytes left to be stored | ||
1937 | ! in2 is compared to 8 in the rounds | ||
1938 | |||
1939 | xor out5, in0, out4 | ||
1940 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1941 | bl,pn %icc, .ede3.dec.seven.or.less | ||
1942 | #else | ||
1943 | bl .ede3.dec.seven.or.less | ||
1944 | #endif | ||
1945 | xor in5, in1, global4 | ||
1946 | |||
1947 | load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block | ||
1948 | |||
1949 | store_little_endian(local7, out4, global4, local3, .SLE7) ! block | ||
1950 | |||
1951 | STPTR local5, INPUT | ||
1952 | addcc in2, -8, in2 | ||
1953 | add local7, 8, local7 | ||
1954 | |||
1955 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
1956 | bg,pt %icc, .ede3.dec.next.block | ||
1957 | #else | ||
1958 | bg .ede3.dec.next.block | ||
1959 | #endif | ||
1960 | STPTR local7, OUTPUT | ||
1961 | |||
1962 | .ede3.dec.store.iv: | ||
1963 | |||
1964 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec | ||
1965 | store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec | ||
1966 | |||
1967 | .ede3.dec.finish: | ||
1968 | |||
1969 | ret | ||
1970 | restore | ||
1971 | |||
1972 | .ede3.dec.seven.or.less: | ||
1973 | |||
1974 | load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv | ||
1975 | |||
1976 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) | ||
1977 | |||
1978 | |||
1979 | .DES_ede3_cbc_encrypt.end: | ||
1980 | .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt | ||