summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/des
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/des')
-rw-r--r--src/lib/libcrypto/des/asm/des_enc.m41980
1 files changed, 1980 insertions, 0 deletions
diff --git a/src/lib/libcrypto/des/asm/des_enc.m4 b/src/lib/libcrypto/des/asm/des_enc.m4
new file mode 100644
index 0000000000..f5b1928f99
--- /dev/null
+++ b/src/lib/libcrypto/des/asm/des_enc.m4
@@ -0,0 +1,1980 @@
1! des_enc.m4
2! des_enc.S (generated from des_enc.m4)
3!
4! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file.
5!
6! Version 1.0. 32-bit version.
7!
8! June 8, 2000.
9!
10! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation
11! by Andy Polyakov.
12!
13! January 1, 2003.
14!
15! Assembler version: Copyright Svend Olaf Mikkelsen.
16!
17! Original C code: Copyright Eric A. Young.
18!
19! This code can be freely used by LibDES/SSLeay/OpenSSL users.
20!
21! The LibDES/SSLeay/OpenSSL copyright notices must be respected.
22!
23! This version can be redistributed.
24!
25! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
26!
27! Global registers 1 to 5 are used. This is the same as done by the
28! cc compiler. The UltraSPARC load/store little endian feature is used.
29!
30! Instruction grouping often refers to one CPU cycle.
31!
32! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
33!
34! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
35!
36! Performance improvement according to './apps/openssl speed des'
37!
38! 32-bit build:
39! 23% faster than cc-5.2 -xarch=v8plus -xO5
40! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
41! 64-bit build:
42! 50% faster than cc-5.2 -xarch=v9 -xO5
43! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
44!
45
46.ident "des_enc.m4 2.1"
47
48#if defined(__SUNPRO_C) && defined(__sparcv9)
49# define ABI64 /* They've said -xarch=v9 at command line */
50#elif defined(__GNUC__) && defined(__arch64__)
51# define ABI64 /* They've said -m64 at command line */
52#endif
53
54#ifdef ABI64
55 .register %g2,#scratch
56 .register %g3,#scratch
57# define FRAME -192
58# define BIAS 2047
59# define LDPTR ldx
60# define STPTR stx
61# define ARG0 128
62# define ARGSZ 8
63# ifndef OPENSSL_SYSNAME_ULTRASPARC
64# define OPENSSL_SYSNAME_ULTRASPARC
65# endif
66#else
67# define FRAME -96
68# define BIAS 0
69# define LDPTR ld
70# define STPTR st
71# define ARG0 68
72# define ARGSZ 4
73#endif
74
75#define LOOPS 7
76
77#define global0 %g0
78#define global1 %g1
79#define global2 %g2
80#define global3 %g3
81#define global4 %g4
82#define global5 %g5
83
84#define local0 %l0
85#define local1 %l1
86#define local2 %l2
87#define local3 %l3
88#define local4 %l4
89#define local5 %l5
90#define local7 %l6
91#define local6 %l7
92
93#define in0 %i0
94#define in1 %i1
95#define in2 %i2
96#define in3 %i3
97#define in4 %i4
98#define in5 %i5
99#define in6 %i6
100#define in7 %i7
101
102#define out0 %o0
103#define out1 %o1
104#define out2 %o2
105#define out3 %o3
106#define out4 %o4
107#define out5 %o5
108#define out6 %o6
109#define out7 %o7
110
111#define stub stb
112
113changequote({,})
114
115
116! Macro definitions:
117
118
119! {ip_macro}
120!
121! The logic used in initial and final permutations is the same as in
122! the C code. The permutations are done with a clever shift, xor, and
123! technique.
124!
125! The macro also loads address sbox 1 to 5 to global 1 to 5, address
126! sbox 6 to local6, and addres sbox 8 to out3.
127!
128! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
129!
130! Loads key first round from address in parameter 5 to out0, out1.
131!
132! After the the original LibDES initial permutation, the resulting left
133! is in the variable initially used for right and vice versa. The macro
134! implements the possibility to keep the halfs in the original registers.
135!
136! parameter 1 left
137! parameter 2 right
138! parameter 3 result left (modify in first round)
139! parameter 4 result right (use in first round)
140! parameter 5 key address
141! parameter 6 1/2 for include encryption/decryption
142! parameter 7 1 for move in1 to in3
143! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
144! parameter 9 1 for load ks3 and ks2 to in4 and in3
145
146define(ip_macro, {
147
148! {ip_macro}
149! $1 $2 $4 $3 $5 $6 $7 $8 $9
150
151 ld [out2+256], local1
152 srl $2, 4, local4
153
154 xor local4, $1, local4
155 ifelse($7,1,{mov in1, in3},{nop})
156
157 ld [out2+260], local2
158 and local4, local1, local4
159 ifelse($8,1,{mov in3, in4},{})
160 ifelse($8,2,{mov in4, in3},{})
161
162 ld [out2+280], out4 ! loop counter
163 sll local4, 4, local1
164 xor $1, local4, $1
165
166 ld [out2+264], local3
167 srl $1, 16, local4
168 xor $2, local1, $2
169
170 ifelse($9,1,{LDPTR KS3, in4},{})
171 xor local4, $2, local4
172 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
173
174 ifelse($9,1,{LDPTR KS2, in3},{})
175 and local4, local2, local4
176 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
177
178 sll local4, 16, local1
179 xor $2, local4, $2
180
181 srl $2, 2, local4
182 xor $1, local1, $1
183
184 sethi %hi(16711680), local5
185 xor local4, $1, local4
186
187 and local4, local3, local4
188 or local5, 255, local5
189
190 sll local4, 2, local2
191 xor $1, local4, $1
192
193 srl $1, 8, local4
194 xor $2, local2, $2
195
196 xor local4, $2, local4
197 add global1, 768, global4
198
199 and local4, local5, local4
200 add global1, 1024, global5
201
202 ld [out2+272], local7
203 sll local4, 8, local1
204 xor $2, local4, $2
205
206 srl $2, 1, local4
207 xor $1, local1, $1
208
209 ld [$5], out0 ! key 7531
210 xor local4, $1, local4
211 add global1, 256, global2
212
213 ld [$5+4], out1 ! key 8642
214 and local4, local7, local4
215 add global1, 512, global3
216
217 sll local4, 1, local1
218 xor $1, local4, $1
219
220 sll $1, 3, local3
221 xor $2, local1, $2
222
223 sll $2, 3, local2
224 add global1, 1280, local6 ! address sbox 8
225
226 srl $1, 29, local4
227 add global1, 1792, out3 ! address sbox 8
228
229 srl $2, 29, local1
230 or local4, local3, $4
231
232 or local2, local1, $3
233
234 ifelse($6, 1, {
235
236 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
237 or local2, local1, $3
238 xor $4, out0, local1
239
240 call .des_enc.1
241 and local1, 252, local1
242
243 },{})
244
245 ifelse($6, 2, {
246
247 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
248 or local2, local1, $3
249 xor $4, out0, local1
250
251 call .des_dec.1
252 and local1, 252, local1
253
254 },{})
255})
256
257
258! {rounds_macro}
259!
260! The logic used in the DES rounds is the same as in the C code,
261! except that calculations for sbox 1 and sbox 5 begin before
262! the previous round is finished.
263!
264! In each round one half (work) is modified based on key and the
265! other half (use).
266!
267! In this version we do two rounds in a loop repeated 7 times
268! and two rounds seperately.
269!
270! One half has the bits for the sboxes in the following positions:
271!
272! 777777xx555555xx333333xx111111xx
273!
274! 88xx666666xx444444xx222222xx8888
275!
276! The bits for each sbox are xor-ed with the key bits for that box.
277! The above xx bits are cleared, and the result used for lookup in
278! the sbox table. Each sbox entry contains the 4 output bits permuted
279! into 32 bits according to the P permutation.
280!
281! In the description of DES, left and right are switched after
282! each round, except after last round. In this code the original
283! left and right are kept in the same register in all rounds, meaning
284! that after the 16 rounds the result for right is in the register
285! originally used for left.
286!
287! parameter 1 first work (left in first round)
288! parameter 2 first use (right in first round)
289! parameter 3 enc/dec 1/-1
290! parameter 4 loop label
291! parameter 5 key address register
292! parameter 6 optional address for key next encryption/decryption
293! parameter 7 not empty for include retl
294!
295! also compares in2 to 8
296
297define(rounds_macro, {
298
299! {rounds_macro}
300! $1 $2 $3 $4 $5 $6 $7 $8 $9
301
302 xor $2, out0, local1
303
304 ld [out2+284], local5 ! 0x0000FC00
305 ba $4
306 and local1, 252, local1
307
308 .align 32
309
310$4:
311 ! local6 is address sbox 6
312 ! out3 is address sbox 8
313 ! out4 is loop counter
314
315 ld [global1+local1], local1
316 xor $2, out1, out1 ! 8642
317 xor $2, out0, out0 ! 7531
318 fmovs %f0, %f0 ! fxor used for alignment
319
320 srl out1, 4, local0 ! rotate 4 right
321 and out0, local5, local3 ! 3
322 fmovs %f0, %f0
323
324 ld [$5+$3*8], local7 ! key 7531 next round
325 srl local3, 8, local3 ! 3
326 and local0, 252, local2 ! 2
327 fmovs %f0, %f0
328
329 ld [global3+local3],local3 ! 3
330 sll out1, 28, out1 ! rotate
331 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
332
333 ld [global2+local2], local2 ! 2
334 srl out0, 24, local1 ! 7
335 or out1, local0, out1 ! rotate
336
337 ldub [out2+local1], local1 ! 7 (and 0xFC)
338 srl out1, 24, local0 ! 8
339 and out1, local5, local4 ! 4
340
341 ldub [out2+local0], local0 ! 8 (and 0xFC)
342 srl local4, 8, local4 ! 4
343 xor $1, local2, $1 ! 2 finished local2 now sbox 6
344
345 ld [global4+local4],local4 ! 4
346 srl out1, 16, local2 ! 6
347 xor $1, local3, $1 ! 3 finished local3 now sbox 5
348
349 ld [out3+local0],local0 ! 8
350 and local2, 252, local2 ! 6
351 add global1, 1536, local5 ! address sbox 7
352
353 ld [local6+local2], local2 ! 6
354 srl out0, 16, local3 ! 5
355 xor $1, local4, $1 ! 4 finished
356
357 ld [local5+local1],local1 ! 7
358 and local3, 252, local3 ! 5
359 xor $1, local0, $1 ! 8 finished
360
361 ld [global5+local3],local3 ! 5
362 xor $1, local2, $1 ! 6 finished
363 subcc out4, 1, out4
364
365 ld [$5+$3*8+4], out0 ! key 8642 next round
366 xor $1, local7, local2 ! sbox 5 next round
367 xor $1, local1, $1 ! 7 finished
368
369 srl local2, 16, local2 ! sbox 5 next round
370 xor $1, local3, $1 ! 5 finished
371
372 ld [$5+$3*16+4], out1 ! key 8642 next round again
373 and local2, 252, local2 ! sbox5 next round
374! next round
375 xor $1, local7, local7 ! 7531
376
377 ld [global5+local2], local2 ! 5
378 srl local7, 24, local3 ! 7
379 xor $1, out0, out0 ! 8642
380
381 ldub [out2+local3], local3 ! 7 (and 0xFC)
382 srl out0, 4, local0 ! rotate 4 right
383 and local7, 252, local1 ! 1
384
385 sll out0, 28, out0 ! rotate
386 xor $2, local2, $2 ! 5 finished local2 used
387
388 srl local0, 8, local4 ! 4
389 and local0, 252, local2 ! 2
390 ld [local5+local3], local3 ! 7
391
392 srl local0, 16, local5 ! 6
393 or out0, local0, out0 ! rotate
394 ld [global2+local2], local2 ! 2
395
396 srl out0, 24, local0
397 ld [$5+$3*16], out0 ! key 7531 next round
398 and local4, 252, local4 ! 4
399
400 and local5, 252, local5 ! 6
401 ld [global4+local4], local4 ! 4
402 xor $2, local3, $2 ! 7 finished local3 used
403
404 and local0, 252, local0 ! 8
405 ld [local6+local5], local5 ! 6
406 xor $2, local2, $2 ! 2 finished local2 now sbox 3
407
408 srl local7, 8, local2 ! 3 start
409 ld [out3+local0], local0 ! 8
410 xor $2, local4, $2 ! 4 finished
411
412 and local2, 252, local2 ! 3
413 ld [global1+local1], local1 ! 1
414 xor $2, local5, $2 ! 6 finished local5 used
415
416 ld [global3+local2], local2 ! 3
417 xor $2, local0, $2 ! 8 finished
418 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
419
420 ld [out2+284], local5 ! 0x0000FC00
421 xor $2, out0, local4 ! sbox 1 next round
422 xor $2, local1, $2 ! 1 finished
423
424 xor $2, local2, $2 ! 3 finished
425#ifdef OPENSSL_SYSNAME_ULTRASPARC
426 bne,pt %icc, $4
427#else
428 bne $4
429#endif
430 and local4, 252, local1 ! sbox 1 next round
431
432! two rounds more:
433
434 ld [global1+local1], local1
435 xor $2, out1, out1
436 xor $2, out0, out0
437
438 srl out1, 4, local0 ! rotate
439 and out0, local5, local3
440
441 ld [$5+$3*8], local7 ! key 7531
442 srl local3, 8, local3
443 and local0, 252, local2
444
445 ld [global3+local3],local3
446 sll out1, 28, out1 ! rotate
447 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
448
449 ld [global2+local2], local2
450 srl out0, 24, local1
451 or out1, local0, out1 ! rotate
452
453 ldub [out2+local1], local1
454 srl out1, 24, local0
455 and out1, local5, local4
456
457 ldub [out2+local0], local0
458 srl local4, 8, local4
459 xor $1, local2, $1 ! 2 finished local2 now sbox 6
460
461 ld [global4+local4],local4
462 srl out1, 16, local2
463 xor $1, local3, $1 ! 3 finished local3 now sbox 5
464
465 ld [out3+local0],local0
466 and local2, 252, local2
467 add global1, 1536, local5 ! address sbox 7
468
469 ld [local6+local2], local2
470 srl out0, 16, local3
471 xor $1, local4, $1 ! 4 finished
472
473 ld [local5+local1],local1
474 and local3, 252, local3
475 xor $1, local0, $1
476
477 ld [global5+local3],local3
478 xor $1, local2, $1 ! 6 finished
479 cmp in2, 8
480
481 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
482 xor $1, local7, local2 ! sbox 5 next round
483 xor $1, local1, $1 ! 7 finished
484
485 ld [$5+$3*8+4], out0
486 srl local2, 16, local2 ! sbox 5 next round
487 xor $1, local3, $1 ! 5 finished
488
489 and local2, 252, local2
490! next round (two rounds more)
491 xor $1, local7, local7 ! 7531
492
493 ld [global5+local2], local2
494 srl local7, 24, local3
495 xor $1, out0, out0 ! 8642
496
497 ldub [out2+local3], local3
498 srl out0, 4, local0 ! rotate
499 and local7, 252, local1
500
501 sll out0, 28, out0 ! rotate
502 xor $2, local2, $2 ! 5 finished local2 used
503
504 srl local0, 8, local4
505 and local0, 252, local2
506 ld [local5+local3], local3
507
508 srl local0, 16, local5
509 or out0, local0, out0 ! rotate
510 ld [global2+local2], local2
511
512 srl out0, 24, local0
513 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
514 and local4, 252, local4
515
516 and local5, 252, local5
517 ld [global4+local4], local4
518 xor $2, local3, $2 ! 7 finished local3 used
519
520 and local0, 252, local0
521 ld [local6+local5], local5
522 xor $2, local2, $2 ! 2 finished local2 now sbox 3
523
524 srl local7, 8, local2 ! 3 start
525 ld [out3+local0], local0
526 xor $2, local4, $2
527
528 and local2, 252, local2
529 ld [global1+local1], local1
530 xor $2, local5, $2 ! 6 finished local5 used
531
532 ld [global3+local2], local2
533 srl $1, 3, local3
534 xor $2, local0, $2
535
536 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
537 sll $1, 29, local4
538 xor $2, local1, $2
539
540 ifelse($7,{}, {}, {retl})
541 xor $2, local2, $2
542})
543
544
545! {fp_macro}
546!
547! parameter 1 right (original left)
548! parameter 2 left (original right)
549! parameter 3 1 for optional store to [in0]
550! parameter 4 1 for load input/output address to local5/7
551!
552! The final permutation logic switches the halfes, meaning that
553! left and right ends up the the registers originally used.
554
555define(fp_macro, {
556
557! {fp_macro}
558! $1 $2 $3 $4 $5 $6 $7 $8 $9
559
560 ! initially undo the rotate 3 left done after initial permutation
561 ! original left is received shifted 3 right and 29 left in local3/4
562
563 sll $2, 29, local1
564 or local3, local4, $1
565
566 srl $2, 3, $2
567 sethi %hi(0x55555555), local2
568
569 or $2, local1, $2
570 or local2, %lo(0x55555555), local2
571
572 srl $2, 1, local3
573 sethi %hi(0x00ff00ff), local1
574 xor local3, $1, local3
575 or local1, %lo(0x00ff00ff), local1
576 and local3, local2, local3
577 sethi %hi(0x33333333), local4
578 sll local3, 1, local2
579
580 xor $1, local3, $1
581
582 srl $1, 8, local3
583 xor $2, local2, $2
584 xor local3, $2, local3
585 or local4, %lo(0x33333333), local4
586 and local3, local1, local3
587 sethi %hi(0x0000ffff), local1
588 sll local3, 8, local2
589
590 xor $2, local3, $2
591
592 srl $2, 2, local3
593 xor $1, local2, $1
594 xor local3, $1, local3
595 or local1, %lo(0x0000ffff), local1
596 and local3, local4, local3
597 sethi %hi(0x0f0f0f0f), local4
598 sll local3, 2, local2
599
600 ifelse($4,1, {LDPTR INPUT, local5})
601 xor $1, local3, $1
602
603 ifelse($4,1, {LDPTR OUTPUT, local7})
604 srl $1, 16, local3
605 xor $2, local2, $2
606 xor local3, $2, local3
607 or local4, %lo(0x0f0f0f0f), local4
608 and local3, local1, local3
609 sll local3, 16, local2
610
611 xor $2, local3, local1
612
613 srl local1, 4, local3
614 xor $1, local2, $1
615 xor local3, $1, local3
616 and local3, local4, local3
617 sll local3, 4, local2
618
619 xor $1, local3, $1
620
621 ! optional store:
622
623 ifelse($3,1, {st $1, [in0]})
624
625 xor local1, local2, $2
626
627 ifelse($3,1, {st $2, [in0+4]})
628
629})
630
631
632! {fp_ip_macro}
633!
634! Does initial permutation for next block mixed with
635! final permutation for current block.
636!
637! parameter 1 original left
638! parameter 2 original right
639! parameter 3 left ip
640! parameter 4 right ip
641! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
642! 2: mov in4 to in3
643!
644! also adds -8 to length in2 and loads loop counter to out4
645
646define(fp_ip_macro, {
647
648! {fp_ip_macro}
649! $1 $2 $3 $4 $5 $6 $7 $8 $9
650
651 define({temp1},{out4})
652 define({temp2},{local3})
653
654 define({ip1},{local1})
655 define({ip2},{local2})
656 define({ip4},{local4})
657 define({ip5},{local5})
658
659 ! $1 in local3, local4
660
661 ld [out2+256], ip1
662 sll out5, 29, temp1
663 or local3, local4, $1
664
665 srl out5, 3, $2
666 ifelse($5,2,{mov in4, in3})
667
668 ld [out2+272], ip5
669 srl $4, 4, local0
670 or $2, temp1, $2
671
672 srl $2, 1, temp1
673 xor temp1, $1, temp1
674
675 and temp1, ip5, temp1
676 xor local0, $3, local0
677
678 sll temp1, 1, temp2
679 xor $1, temp1, $1
680
681 and local0, ip1, local0
682 add in2, -8, in2
683
684 sll local0, 4, local7
685 xor $3, local0, $3
686
687 ld [out2+268], ip4
688 srl $1, 8, temp1
689 xor $2, temp2, $2
690 ld [out2+260], ip2
691 srl $3, 16, local0
692 xor $4, local7, $4
693 xor temp1, $2, temp1
694 xor local0, $4, local0
695 and temp1, ip4, temp1
696 and local0, ip2, local0
697 sll temp1, 8, temp2
698 xor $2, temp1, $2
699 sll local0, 16, local7
700 xor $4, local0, $4
701
702 srl $2, 2, temp1
703 xor $1, temp2, $1
704
705 ld [out2+264], temp2 ! ip3
706 srl $4, 2, local0
707 xor $3, local7, $3
708 xor temp1, $1, temp1
709 xor local0, $3, local0
710 and temp1, temp2, temp1
711 and local0, temp2, local0
712 sll temp1, 2, temp2
713 xor $1, temp1, $1
714 sll local0, 2, local7
715 xor $3, local0, $3
716
717 srl $1, 16, temp1
718 xor $2, temp2, $2
719 srl $3, 8, local0
720 xor $4, local7, $4
721 xor temp1, $2, temp1
722 xor local0, $4, local0
723 and temp1, ip2, temp1
724 and local0, ip4, local0
725 sll temp1, 16, temp2
726 xor $2, temp1, local4
727 sll local0, 8, local7
728 xor $4, local0, $4
729
730 srl $4, 1, local0
731 xor $3, local7, $3
732
733 srl local4, 4, temp1
734 xor local0, $3, local0
735
736 xor $1, temp2, $1
737 and local0, ip5, local0
738
739 sll local0, 1, local7
740 xor temp1, $1, temp1
741
742 xor $3, local0, $3
743 xor $4, local7, $4
744
745 sll $3, 3, local5
746 and temp1, ip1, temp1
747
748 sll temp1, 4, temp2
749 xor $1, temp1, $1
750
751 ifelse($5,1,{LDPTR KS2, in4})
752 sll $4, 3, local2
753 xor local4, temp2, $2
754
755 ! reload since used as temporar:
756
757 ld [out2+280], out4 ! loop counter
758
759 srl $3, 29, local0
760 ifelse($5,1,{add in4, 120, in4})
761
762 ifelse($5,1,{LDPTR KS1, in3})
763 srl $4, 29, local7
764
765 or local0, local5, $4
766 or local2, local7, $3
767
768})
769
770
771
772! {load_little_endian}
773!
774! parameter 1 address
775! parameter 2 destination left
776! parameter 3 destination right
777! parameter 4 temporar
778! parameter 5 label
779
780define(load_little_endian, {
781
782! {load_little_endian}
783! $1 $2 $3 $4 $5 $6 $7 $8 $9
784
785 ! first in memory to rightmost in register
786
787#ifdef OPENSSL_SYSNAME_ULTRASPARC
788 andcc $1, 3, global0
789 bne,pn %icc, $5
790 nop
791
792 lda [$1] 0x88, $2
793 add $1, 4, $4
794
795 ba,pt %icc, $5a
796 lda [$4] 0x88, $3
797#endif
798
799$5:
800 ldub [$1+3], $2
801
802 ldub [$1+2], $4
803 sll $2, 8, $2
804 or $2, $4, $2
805
806 ldub [$1+1], $4
807 sll $2, 8, $2
808 or $2, $4, $2
809
810 ldub [$1+0], $4
811 sll $2, 8, $2
812 or $2, $4, $2
813
814
815 ldub [$1+3+4], $3
816
817 ldub [$1+2+4], $4
818 sll $3, 8, $3
819 or $3, $4, $3
820
821 ldub [$1+1+4], $4
822 sll $3, 8, $3
823 or $3, $4, $3
824
825 ldub [$1+0+4], $4
826 sll $3, 8, $3
827 or $3, $4, $3
828$5a:
829
830})
831
832
833! {load_little_endian_inc}
834!
835! parameter 1 address
836! parameter 2 destination left
837! parameter 3 destination right
838! parameter 4 temporar
839! parameter 4 label
840!
841! adds 8 to address
842
843define(load_little_endian_inc, {
844
845! {load_little_endian_inc}
846! $1 $2 $3 $4 $5 $6 $7 $8 $9
847
848 ! first in memory to rightmost in register
849
850#ifdef OPENSSL_SYSNAME_ULTRASPARC
851 andcc $1, 3, global0
852 bne,pn %icc, $5
853 nop
854
855 lda [$1] 0x88, $2
856 add $1, 4, $1
857
858 lda [$1] 0x88, $3
859 ba,pt %icc, $5a
860 add $1, 4, $1
861#endif
862
863$5:
864 ldub [$1+3], $2
865
866 ldub [$1+2], $4
867 sll $2, 8, $2
868 or $2, $4, $2
869
870 ldub [$1+1], $4
871 sll $2, 8, $2
872 or $2, $4, $2
873
874 ldub [$1+0], $4
875 sll $2, 8, $2
876 or $2, $4, $2
877
878 ldub [$1+3+4], $3
879 add $1, 8, $1
880
881 ldub [$1+2+4-8], $4
882 sll $3, 8, $3
883 or $3, $4, $3
884
885 ldub [$1+1+4-8], $4
886 sll $3, 8, $3
887 or $3, $4, $3
888
889 ldub [$1+0+4-8], $4
890 sll $3, 8, $3
891 or $3, $4, $3
892$5a:
893
894})
895
896
897! {load_n_bytes}
898!
899! Loads 1 to 7 bytes little endian
900! Remaining bytes are zeroed.
901!
902! parameter 1 address
903! parameter 2 length
904! parameter 3 destination register left
905! parameter 4 destination register right
906! parameter 5 temp
907! parameter 6 temp2
908! parameter 7 label
909! parameter 8 return label
910
911define(load_n_bytes, {
912
913! {load_n_bytes}
914! $1 $2 $5 $6 $7 $8 $7 $8 $9
915
916$7.0: call .+8
917 sll $2, 2, $6
918
919 add %o7,$7.jmp.table-$7.0,$5
920
921 add $5, $6, $5
922 mov 0, $4
923
924 ld [$5], $5
925
926 jmp %o7+$5
927 mov 0, $3
928
929$7.7:
930 ldub [$1+6], $5
931 sll $5, 16, $5
932 or $3, $5, $3
933$7.6:
934 ldub [$1+5], $5
935 sll $5, 8, $5
936 or $3, $5, $3
937$7.5:
938 ldub [$1+4], $5
939 or $3, $5, $3
940$7.4:
941 ldub [$1+3], $5
942 sll $5, 24, $5
943 or $4, $5, $4
944$7.3:
945 ldub [$1+2], $5
946 sll $5, 16, $5
947 or $4, $5, $4
948$7.2:
949 ldub [$1+1], $5
950 sll $5, 8, $5
951 or $4, $5, $4
952$7.1:
953 ldub [$1+0], $5
954 ba $8
955 or $4, $5, $4
956
957 .align 4
958
959$7.jmp.table:
960 .word 0
961 .word $7.1-$7.0
962 .word $7.2-$7.0
963 .word $7.3-$7.0
964 .word $7.4-$7.0
965 .word $7.5-$7.0
966 .word $7.6-$7.0
967 .word $7.7-$7.0
968})
969
970
971! {store_little_endian}
972!
973! parameter 1 address
974! parameter 2 source left
975! parameter 3 source right
976! parameter 4 temporar
977
978define(store_little_endian, {
979
980! {store_little_endian}
981! $1 $2 $3 $4 $5 $6 $7 $8 $9
982
983 ! rightmost in register to first in memory
984
985#ifdef OPENSSL_SYSNAME_ULTRASPARC
986 andcc $1, 3, global0
987 bne,pn %icc, $5
988 nop
989
990 sta $2, [$1] 0x88
991 add $1, 4, $4
992
993 ba,pt %icc, $5a
994 sta $3, [$4] 0x88
995#endif
996
997$5:
998 and $2, 255, $4
999 stub $4, [$1+0]
1000
1001 srl $2, 8, $4
1002 and $4, 255, $4
1003 stub $4, [$1+1]
1004
1005 srl $2, 16, $4
1006 and $4, 255, $4
1007 stub $4, [$1+2]
1008
1009 srl $2, 24, $4
1010 stub $4, [$1+3]
1011
1012
1013 and $3, 255, $4
1014 stub $4, [$1+0+4]
1015
1016 srl $3, 8, $4
1017 and $4, 255, $4
1018 stub $4, [$1+1+4]
1019
1020 srl $3, 16, $4
1021 and $4, 255, $4
1022 stub $4, [$1+2+4]
1023
1024 srl $3, 24, $4
1025 stub $4, [$1+3+4]
1026
1027$5a:
1028
1029})
1030
1031
1032! {store_n_bytes}
1033!
1034! Stores 1 to 7 bytes little endian
1035!
1036! parameter 1 address
1037! parameter 2 length
1038! parameter 3 source register left
1039! parameter 4 source register right
1040! parameter 5 temp
1041! parameter 6 temp2
1042! parameter 7 label
1043! parameter 8 return label
1044
1045define(store_n_bytes, {
1046
1047! {store_n_bytes}
1048! $1 $2 $5 $6 $7 $8 $7 $8 $9
1049
1050$7.0: call .+8
1051 sll $2, 2, $6
1052
1053 add %o7,$7.jmp.table-$7.0,$5
1054
1055 add $5, $6, $5
1056
1057 ld [$5], $5
1058
1059 jmp %o7+$5
1060 nop
1061
1062$7.7:
1063 srl $3, 16, $5
1064 and $5, 0xff, $5
1065 stub $5, [$1+6]
1066$7.6:
1067 srl $3, 8, $5
1068 and $5, 0xff, $5
1069 stub $5, [$1+5]
1070$7.5:
1071 and $3, 0xff, $5
1072 stub $5, [$1+4]
1073$7.4:
1074 srl $4, 24, $5
1075 stub $5, [$1+3]
1076$7.3:
1077 srl $4, 16, $5
1078 and $5, 0xff, $5
1079 stub $5, [$1+2]
1080$7.2:
1081 srl $4, 8, $5
1082 and $5, 0xff, $5
1083 stub $5, [$1+1]
1084$7.1:
1085 and $4, 0xff, $5
1086
1087
1088 ba $8
1089 stub $5, [$1]
1090
1091 .align 4
1092
1093$7.jmp.table:
1094
1095 .word 0
1096 .word $7.1-$7.0
1097 .word $7.2-$7.0
1098 .word $7.3-$7.0
1099 .word $7.4-$7.0
1100 .word $7.5-$7.0
1101 .word $7.6-$7.0
1102 .word $7.7-$7.0
1103})
1104
1105
1106define(testvalue,{1})
1107
1108define(register_init, {
1109
1110! For test purposes:
1111
1112 sethi %hi(testvalue), local0
1113 or local0, %lo(testvalue), local0
1114
1115 ifelse($1,{},{}, {mov local0, $1})
1116 ifelse($2,{},{}, {mov local0, $2})
1117 ifelse($3,{},{}, {mov local0, $3})
1118 ifelse($4,{},{}, {mov local0, $4})
1119 ifelse($5,{},{}, {mov local0, $5})
1120 ifelse($6,{},{}, {mov local0, $6})
1121 ifelse($7,{},{}, {mov local0, $7})
1122 ifelse($8,{},{}, {mov local0, $8})
1123
1124 mov local0, local1
1125 mov local0, local2
1126 mov local0, local3
1127 mov local0, local4
1128 mov local0, local5
1129 mov local0, local7
1130 mov local0, local6
1131 mov local0, out0
1132 mov local0, out1
1133 mov local0, out2
1134 mov local0, out3
1135 mov local0, out4
1136 mov local0, out5
1137 mov local0, global1
1138 mov local0, global2
1139 mov local0, global3
1140 mov local0, global4
1141 mov local0, global5
1142
1143})
1144
1145.section ".text"
1146
1147 .align 32
1148
1149.des_enc:
1150
1151 ! key address in3
1152 ! loads key next encryption/decryption first round from [in4]
1153
1154 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1155
1156
1157 .align 32
1158
1159.des_dec:
1160
1161 ! implemented with out5 as first parameter to avoid
1162 ! register exchange in ede modes
1163
1164 ! key address in4
1165 ! loads key next encryption/decryption first round from [in3]
1166
1167 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1168
1169
1170
1171! void DES_encrypt1(data, ks, enc)
1172! *******************************
1173
1174 .align 32
1175 .global DES_encrypt1
1176 .type DES_encrypt1,#function
1177
1178DES_encrypt1:
1179
1180 save %sp, FRAME, %sp
1181
1182 call .PIC.me.up
1183 mov .PIC.me.up-(.-4),out0
1184
1185 ld [in0], in5 ! left
1186 cmp in2, 0 ! enc
1187
1188#ifdef OPENSSL_SYSNAME_ULTRASPARC
1189 be,pn %icc, .encrypt.dec ! enc/dec
1190#else
1191 be .encrypt.dec
1192#endif
1193 ld [in0+4], out5 ! right
1194
1195 ! parameter 6 1/2 for include encryption/decryption
1196 ! parameter 7 1 for move in1 to in3
1197 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1198
1199 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1200
1201 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
1202
1203 fp_macro(in5, out5, 1) ! 1 for store to [in0]
1204
1205 ret
1206 restore
1207
1208.encrypt.dec:
1209
1210 add in1, 120, in3 ! use last subkey for first round
1211
1212 ! parameter 6 1/2 for include encryption/decryption
1213 ! parameter 7 1 for move in1 to in3
1214 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1215
1216 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
1217
1218 fp_macro(out5, in5, 1) ! 1 for store to [in0]
1219
1220 ret
1221 restore
1222
1223.DES_encrypt1.end:
1224 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
1225
1226
1227! void DES_encrypt2(data, ks, enc)
1228!*********************************
1229
1230 ! encrypts/decrypts without initial/final permutation
1231
1232 .align 32
1233 .global DES_encrypt2
1234 .type DES_encrypt2,#function
1235
1236DES_encrypt2:
1237
1238 save %sp, FRAME, %sp
1239
1240 call .PIC.me.up
1241 mov .PIC.me.up-(.-4),out0
1242
1243 ! Set sbox address 1 to 6 and rotate halfs 3 left
1244 ! Errors caught by destest? Yes. Still? *NO*
1245
1246 !sethi %hi(DES_SPtrans), global1 ! address sbox 1
1247
1248 !or global1, %lo(DES_SPtrans), global1 ! sbox 1
1249
1250 add global1, 256, global2 ! sbox 2
1251 add global1, 512, global3 ! sbox 3
1252
1253 ld [in0], out5 ! right
1254 add global1, 768, global4 ! sbox 4
1255 add global1, 1024, global5 ! sbox 5
1256
1257 ld [in0+4], in5 ! left
1258 add global1, 1280, local6 ! sbox 6
1259 add global1, 1792, out3 ! sbox 8
1260
1261 ! rotate
1262
1263 sll in5, 3, local5
1264 mov in1, in3 ! key address to in3
1265
1266 sll out5, 3, local7
1267 srl in5, 29, in5
1268
1269 srl out5, 29, out5
1270 add in5, local5, in5
1271
1272 add out5, local7, out5
1273 cmp in2, 0
1274
1275 ! we use our own stackframe
1276
1277#ifdef OPENSSL_SYSNAME_ULTRASPARC
1278 be,pn %icc, .encrypt2.dec ! decryption
1279#else
1280 be .encrypt2.dec
1281#endif
1282 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
1283
1284 ld [in3], out0 ! key 7531 first round
1285 mov LOOPS, out4 ! loop counter
1286
1287 ld [in3+4], out1 ! key 8642 first round
1288 sethi %hi(0x0000FC00), local5
1289
1290 call .des_enc
1291 mov in3, in4
1292
1293 ! rotate
1294 sll in5, 29, in0
1295 srl in5, 3, in5
1296 sll out5, 29, in1
1297 add in5, in0, in5
1298 srl out5, 3, out5
1299 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1300 add out5, in1, out5
1301 st in5, [in0]
1302 st out5, [in0+4]
1303
1304 ret
1305 restore
1306
1307
1308.encrypt2.dec:
1309
1310 add in3, 120, in4
1311
1312 ld [in4], out0 ! key 7531 first round
1313 mov LOOPS, out4 ! loop counter
1314
1315 ld [in4+4], out1 ! key 8642 first round
1316 sethi %hi(0x0000FC00), local5
1317
1318 mov in5, local1 ! left expected in out5
1319 mov out5, in5
1320
1321 call .des_dec
1322 mov local1, out5
1323
1324.encrypt2.finish:
1325
1326 ! rotate
1327 sll in5, 29, in0
1328 srl in5, 3, in5
1329 sll out5, 29, in1
1330 add in5, in0, in5
1331 srl out5, 3, out5
1332 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1333 add out5, in1, out5
1334 st out5, [in0]
1335 st in5, [in0+4]
1336
1337 ret
1338 restore
1339
1340.DES_encrypt2.end:
1341 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
1342
1343
1344! void DES_encrypt3(data, ks1, ks2, ks3)
1345! **************************************
1346
1347 .align 32
1348 .global DES_encrypt3
1349 .type DES_encrypt3,#function
1350
1351DES_encrypt3:
1352
1353 save %sp, FRAME, %sp
1354
1355 call .PIC.me.up
1356 mov .PIC.me.up-(.-4),out0
1357
1358 ld [in0], in5 ! left
1359 add in2, 120, in4 ! ks2
1360
1361 ld [in0+4], out5 ! right
1362 mov in3, in2 ! save ks3
1363
1364 ! parameter 6 1/2 for include encryption/decryption
1365 ! parameter 7 1 for mov in1 to in3
1366 ! parameter 8 1 for mov in3 to in4
1367 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1368
1369 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1370
1371 call .des_dec
1372 mov in2, in3 ! preload ks3
1373
1374 call .des_enc
1375 nop
1376
1377 fp_macro(in5, out5, 1)
1378
1379 ret
1380 restore
1381
1382.DES_encrypt3.end:
1383 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
1384
1385
1386! void DES_decrypt3(data, ks1, ks2, ks3)
1387! **************************************
1388
1389 .align 32
1390 .global DES_decrypt3
1391 .type DES_decrypt3,#function
1392
1393DES_decrypt3:
1394
1395 save %sp, FRAME, %sp
1396
1397 call .PIC.me.up
1398 mov .PIC.me.up-(.-4),out0
1399
1400 ld [in0], in5 ! left
1401 add in3, 120, in4 ! ks3
1402
1403 ld [in0+4], out5 ! right
1404 mov in2, in3 ! ks2
1405
1406 ! parameter 6 1/2 for include encryption/decryption
1407 ! parameter 7 1 for mov in1 to in3
1408 ! parameter 8 1 for mov in3 to in4
1409 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1410
1411 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1412
1413 call .des_enc
1414 add in1, 120, in4 ! preload ks1
1415
1416 call .des_dec
1417 nop
1418
1419 fp_macro(out5, in5, 1)
1420
1421 ret
1422 restore
1423
1424.DES_decrypt3.end:
1425 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1426
1427 .align 256
1428 .type .des_and,#object
1429 .size .des_and,284
1430
1431.des_and:
1432
1433! This table is used for AND 0xFC when it is known that register
1434! bits 8-31 are zero. Makes it possible to do three arithmetic
1435! operations in one cycle.
1436
1437 .byte 0, 0, 0, 0, 4, 4, 4, 4
1438 .byte 8, 8, 8, 8, 12, 12, 12, 12
1439 .byte 16, 16, 16, 16, 20, 20, 20, 20
1440 .byte 24, 24, 24, 24, 28, 28, 28, 28
1441 .byte 32, 32, 32, 32, 36, 36, 36, 36
1442 .byte 40, 40, 40, 40, 44, 44, 44, 44
1443 .byte 48, 48, 48, 48, 52, 52, 52, 52
1444 .byte 56, 56, 56, 56, 60, 60, 60, 60
1445 .byte 64, 64, 64, 64, 68, 68, 68, 68
1446 .byte 72, 72, 72, 72, 76, 76, 76, 76
1447 .byte 80, 80, 80, 80, 84, 84, 84, 84
1448 .byte 88, 88, 88, 88, 92, 92, 92, 92
1449 .byte 96, 96, 96, 96, 100, 100, 100, 100
1450 .byte 104, 104, 104, 104, 108, 108, 108, 108
1451 .byte 112, 112, 112, 112, 116, 116, 116, 116
1452 .byte 120, 120, 120, 120, 124, 124, 124, 124
1453 .byte 128, 128, 128, 128, 132, 132, 132, 132
1454 .byte 136, 136, 136, 136, 140, 140, 140, 140
1455 .byte 144, 144, 144, 144, 148, 148, 148, 148
1456 .byte 152, 152, 152, 152, 156, 156, 156, 156
1457 .byte 160, 160, 160, 160, 164, 164, 164, 164
1458 .byte 168, 168, 168, 168, 172, 172, 172, 172
1459 .byte 176, 176, 176, 176, 180, 180, 180, 180
1460 .byte 184, 184, 184, 184, 188, 188, 188, 188
1461 .byte 192, 192, 192, 192, 196, 196, 196, 196
1462 .byte 200, 200, 200, 200, 204, 204, 204, 204
1463 .byte 208, 208, 208, 208, 212, 212, 212, 212
1464 .byte 216, 216, 216, 216, 220, 220, 220, 220
1465 .byte 224, 224, 224, 224, 228, 228, 228, 228
1466 .byte 232, 232, 232, 232, 236, 236, 236, 236
1467 .byte 240, 240, 240, 240, 244, 244, 244, 244
1468 .byte 248, 248, 248, 248, 252, 252, 252, 252
1469
1470 ! 5 numbers for initil/final permutation
1471
1472 .word 0x0f0f0f0f ! offset 256
1473 .word 0x0000ffff ! 260
1474 .word 0x33333333 ! 264
1475 .word 0x00ff00ff ! 268
1476 .word 0x55555555 ! 272
1477
1478 .word 0 ! 276
1479 .word LOOPS ! 280
1480 .word 0x0000FC00 ! 284
1481.PIC.DES_SPtrans:
1482 .word %r_disp32(DES_SPtrans)
1483
1484! input: out0 offset between .PIC.me.up and caller
1485! output: out0 pointer to .PIC.me.up
1486! out2 pointer to .des_and
1487! global1 pointer to DES_SPtrans
1488 .align 32
1489.PIC.me.up:
1490 add out0,%o7,out0 ! pointer to .PIC.me.up
1491#if 1
1492 ld [out0+(.PIC.DES_SPtrans-.PIC.me.up)],global1
1493 add global1,(.PIC.DES_SPtrans-.PIC.me.up),global1
1494 add global1,out0,global1
1495#else
1496# ifdef OPENSSL_PIC
1497 ! In case anybody wonders why this code is same for both ABI.
1498 ! To start with it is not. Do note LDPTR below. But of course
1499 ! you must be wondering why the rest of it does not contain
1500 ! things like %hh, %hm and %lm. Well, those are needed only
1501 ! if OpenSSL library *itself* will become larger than 4GB,
1502 ! which is not going to happen any time soon.
1503 sethi %hi(DES_SPtrans),global1
1504 or global1,%lo(DES_SPtrans),global1
1505 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1506 add global1,out0,global1
1507 add out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2
1508 LDPTR [out2+global1],global1
1509# elif 0
1510 setn DES_SPtrans,out2,global1 ! synthetic instruction !
1511# elif defined(ABI64)
1512 sethi %hh(DES_SPtrans),out2
1513 or out2,%hm(DES_SPtrans),out2
1514 sethi %lm(DES_SPtrans),global1
1515 or global1,%lo(DES_SPtrans),global1
1516 sllx out2,32,out2
1517 or out2,global1,global1
1518# else
1519 sethi %hi(DES_SPtrans),global1
1520 or global1,%lo(DES_SPtrans),global1
1521# endif
1522#endif
1523 retl
1524 add out0,.des_and-.PIC.me.up,out2
1525
1526! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1527! *****************************************************************
1528
1529
1530 .align 32
1531 .global DES_ncbc_encrypt
1532 .type DES_ncbc_encrypt,#function
1533
1534DES_ncbc_encrypt:
1535
1536 save %sp, FRAME, %sp
1537
1538 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
1539 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1540 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1541
1542 call .PIC.me.up
1543 mov .PIC.me.up-(.-4),out0
1544
1545 cmp in5, 0 ! enc
1546
1547#ifdef OPENSSL_SYSNAME_ULTRASPARC
1548 be,pn %icc, .ncbc.dec
1549#else
1550 be .ncbc.dec
1551#endif
1552 STPTR in4, IVEC
1553
1554 ! addr left right temp label
1555 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
1556
1557 addcc in2, -8, in2 ! bytes missing when first block done
1558
1559#ifdef OPENSSL_SYSNAME_ULTRASPARC
1560 bl,pn %icc, .ncbc.enc.seven.or.less
1561#else
1562 bl .ncbc.enc.seven.or.less
1563#endif
1564 mov in3, in4 ! schedule
1565
1566.ncbc.enc.next.block:
1567
1568 load_little_endian(in0, out4, global4, local3, .LLE2) ! block
1569
1570.ncbc.enc.next.block_1:
1571
1572 xor in5, out4, in5 ! iv xor
1573 xor out5, global4, out5 ! iv xor
1574
1575 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1576 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1577
1578.ncbc.enc.next.block_2:
1579
1580!// call .des_enc ! compares in2 to 8
1581! rounds inlined for alignment purposes
1582
1583 add global1, 768, global4 ! address sbox 4 since register used below
1584
1585 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
1586
1587#ifdef OPENSSL_SYSNAME_ULTRASPARC
1588 bl,pn %icc, .ncbc.enc.next.block_fp
1589#else
1590 bl .ncbc.enc.next.block_fp
1591#endif
1592 add in0, 8, in0 ! input address
1593
1594 ! If 8 or more bytes are to be encrypted after this block,
1595 ! we combine final permutation for this block with initial
1596 ! permutation for next block. Load next block:
1597
1598 load_little_endian(in0, global3, global4, local5, .LLE12)
1599
1600 ! parameter 1 original left
1601 ! parameter 2 original right
1602 ! parameter 3 left ip
1603 ! parameter 4 right ip
1604 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1605 ! 2: mov in4 to in3
1606 !
1607 ! also adds -8 to length in2 and loads loop counter to out4
1608
1609 fp_ip_macro(out0, out1, global3, global4, 2)
1610
1611 store_little_endian(in1, out0, out1, local3, .SLE10) ! block
1612
1613 ld [in3], out0 ! key 7531 first round next block
1614 mov in5, local1
1615 xor global3, out5, in5 ! iv xor next block
1616
1617 ld [in3+4], out1 ! key 8642
1618 add global1, 512, global3 ! address sbox 3 since register used
1619 xor global4, local1, out5 ! iv xor next block
1620
1621 ba .ncbc.enc.next.block_2
1622 add in1, 8, in1 ! output adress
1623
1624.ncbc.enc.next.block_fp:
1625
1626 fp_macro(in5, out5)
1627
1628 store_little_endian(in1, in5, out5, local3, .SLE1) ! block
1629
1630 addcc in2, -8, in2 ! bytes missing when next block done
1631
1632#ifdef OPENSSL_SYSNAME_ULTRASPARC
1633 bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0
1634#else
1635 bpos .ncbc.enc.next.block
1636#endif
1637 add in1, 8, in1
1638
1639.ncbc.enc.seven.or.less:
1640
1641 cmp in2, -8
1642
1643#ifdef OPENSSL_SYSNAME_ULTRASPARC
1644 ble,pt %icc, .ncbc.enc.finish
1645#else
1646 ble .ncbc.enc.finish
1647#endif
1648 nop
1649
1650 add in2, 8, local1 ! bytes to load
1651
1652 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1653 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1654
1655 ! Loads 1 to 7 bytes little endian to global4, out4
1656
1657
1658.ncbc.enc.finish:
1659
1660 LDPTR IVEC, local4
1661 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
1662
1663 ret
1664 restore
1665
1666
1667.ncbc.dec:
1668
1669 STPTR in0, INPUT
1670 cmp in2, 0 ! length
1671 add in3, 120, in3
1672
1673 LDPTR IVEC, local7 ! ivec
1674#ifdef OPENSSL_SYSNAME_ULTRASPARC
1675 ble,pn %icc, .ncbc.dec.finish
1676#else
1677 ble .ncbc.dec.finish
1678#endif
1679 mov in3, in4 ! schedule
1680
1681 STPTR in1, OUTPUT
1682 mov in0, local5 ! input
1683
1684 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
1685
1686.ncbc.dec.next.block:
1687
1688 load_little_endian(local5, in5, out5, local3, .LLE4) ! block
1689
1690 ! parameter 6 1/2 for include encryption/decryption
1691 ! parameter 7 1 for mov in1 to in3
1692 ! parameter 8 1 for mov in3 to in4
1693
1694 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
1695
1696 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1697
1698 ! in2 is bytes left to be stored
1699 ! in2 is compared to 8 in the rounds
1700
1701 xor out5, in0, out4 ! iv xor
1702#ifdef OPENSSL_SYSNAME_ULTRASPARC
1703 bl,pn %icc, .ncbc.dec.seven.or.less
1704#else
1705 bl .ncbc.dec.seven.or.less
1706#endif
1707 xor in5, in1, global4 ! iv xor
1708
1709 ! Load ivec next block now, since input and output address might be the same.
1710
1711 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
1712
1713 store_little_endian(local7, out4, global4, local3, .SLE3)
1714
1715 STPTR local5, INPUT
1716 add local7, 8, local7
1717 addcc in2, -8, in2
1718
1719#ifdef OPENSSL_SYSNAME_ULTRASPARC
1720 bg,pt %icc, .ncbc.dec.next.block
1721#else
1722 bg .ncbc.dec.next.block
1723#endif
1724 STPTR local7, OUTPUT
1725
1726
1727.ncbc.dec.store.iv:
1728
1729 LDPTR IVEC, local4 ! ivec
1730 store_little_endian(local4, in0, in1, local5, .SLE4)
1731
1732.ncbc.dec.finish:
1733
1734 ret
1735 restore
1736
1737.ncbc.dec.seven.or.less:
1738
1739 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
1740
1741 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1742
1743
1744.DES_ncbc_encrypt.end:
1745 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
1746
1747
1748! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
1749! **************************************************************************
1750
1751
1752 .align 32
1753 .global DES_ede3_cbc_encrypt
1754 .type DES_ede3_cbc_encrypt,#function
1755
1756DES_ede3_cbc_encrypt:
1757
1758 save %sp, FRAME, %sp
1759
1760 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
1761 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1762 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
1763
1764 call .PIC.me.up
1765 mov .PIC.me.up-(.-4),out0
1766
1767 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
1768 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1769 cmp local3, 0 ! enc
1770
1771#ifdef OPENSSL_SYSNAME_ULTRASPARC
1772 be,pn %icc, .ede3.dec
1773#else
1774 be .ede3.dec
1775#endif
1776 STPTR in4, KS2
1777
1778 STPTR in5, KS3
1779
1780 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
1781
1782 addcc in2, -8, in2 ! bytes missing after next block
1783
1784#ifdef OPENSSL_SYSNAME_ULTRASPARC
1785 bl,pn %icc, .ede3.enc.seven.or.less
1786#else
1787 bl .ede3.enc.seven.or.less
1788#endif
1789 STPTR in3, KS1
1790
1791.ede3.enc.next.block:
1792
1793 load_little_endian(in0, out4, global4, local3, .LLE7)
1794
1795.ede3.enc.next.block_1:
1796
1797 LDPTR KS2, in4
1798 xor in5, out4, in5 ! iv xor
1799 xor out5, global4, out5 ! iv xor
1800
1801 LDPTR KS1, in3
1802 add in4, 120, in4 ! for decryption we use last subkey first
1803 nop
1804
1805 ip_macro(in5, out5, in5, out5, in3)
1806
1807.ede3.enc.next.block_2:
1808
1809 call .des_enc ! ks1 in3
1810 nop
1811
1812 call .des_dec ! ks2 in4
1813 LDPTR KS3, in3
1814
1815 call .des_enc ! ks3 in3 compares in2 to 8
1816 nop
1817
1818#ifdef OPENSSL_SYSNAME_ULTRASPARC
1819 bl,pn %icc, .ede3.enc.next.block_fp
1820#else
1821 bl .ede3.enc.next.block_fp
1822#endif
1823 add in0, 8, in0
1824
1825 ! If 8 or more bytes are to be encrypted after this block,
1826 ! we combine final permutation for this block with initial
1827 ! permutation for next block. Load next block:
1828
1829 load_little_endian(in0, global3, global4, local5, .LLE11)
1830
1831 ! parameter 1 original left
1832 ! parameter 2 original right
1833 ! parameter 3 left ip
1834 ! parameter 4 right ip
1835 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1836 ! 2: mov in4 to in3
1837 !
1838 ! also adds -8 to length in2 and loads loop counter to out4
1839
1840 fp_ip_macro(out0, out1, global3, global4, 1)
1841
1842 store_little_endian(in1, out0, out1, local3, .SLE9) ! block
1843
1844 mov in5, local1
1845 xor global3, out5, in5 ! iv xor next block
1846
1847 ld [in3], out0 ! key 7531
1848 add global1, 512, global3 ! address sbox 3
1849 xor global4, local1, out5 ! iv xor next block
1850
1851 ld [in3+4], out1 ! key 8642
1852 add global1, 768, global4 ! address sbox 4
1853 ba .ede3.enc.next.block_2
1854 add in1, 8, in1
1855
1856.ede3.enc.next.block_fp:
1857
1858 fp_macro(in5, out5)
1859
1860 store_little_endian(in1, in5, out5, local3, .SLE5) ! block
1861
1862 addcc in2, -8, in2 ! bytes missing when next block done
1863
1864#ifdef OPENSSL_SYSNAME_ULTRASPARC
1865 bpos,pt %icc, .ede3.enc.next.block
1866#else
1867 bpos .ede3.enc.next.block
1868#endif
1869 add in1, 8, in1
1870
1871.ede3.enc.seven.or.less:
1872
1873 cmp in2, -8
1874
1875#ifdef OPENSSL_SYSNAME_ULTRASPARC
1876 ble,pt %icc, .ede3.enc.finish
1877#else
1878 ble .ede3.enc.finish
1879#endif
1880 nop
1881
1882 add in2, 8, local1 ! bytes to load
1883
1884 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1885 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1886
1887.ede3.enc.finish:
1888
1889 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1890 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
1891
1892 ret
1893 restore
1894
1895.ede3.dec:
1896
1897 STPTR in0, INPUT
1898 add in5, 120, in5
1899
1900 STPTR in1, OUTPUT
1901 mov in0, local5
1902 add in3, 120, in3
1903
1904 STPTR in3, KS1
1905 cmp in2, 0
1906
1907#ifdef OPENSSL_SYSNAME_ULTRASPARC
1908 ble %icc, .ede3.dec.finish
1909#else
1910 ble .ede3.dec.finish
1911#endif
1912 STPTR in5, KS3
1913
1914 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
1915 load_little_endian(local7, in0, in1, local3, .LLE8)
1916
1917.ede3.dec.next.block:
1918
1919 load_little_endian(local5, in5, out5, local3, .LLE9)
1920
1921 ! parameter 6 1/2 for include encryption/decryption
1922 ! parameter 7 1 for mov in1 to in3
1923 ! parameter 8 1 for mov in3 to in4
1924 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1925
1926 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1927
1928 call .des_enc ! ks2 in3
1929 LDPTR KS1, in4
1930
1931 call .des_dec ! ks1 in4
1932 nop
1933
1934 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
1935
1936 ! in2 is bytes left to be stored
1937 ! in2 is compared to 8 in the rounds
1938
1939 xor out5, in0, out4
1940#ifdef OPENSSL_SYSNAME_ULTRASPARC
1941 bl,pn %icc, .ede3.dec.seven.or.less
1942#else
1943 bl .ede3.dec.seven.or.less
1944#endif
1945 xor in5, in1, global4
1946
1947 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
1948
1949 store_little_endian(local7, out4, global4, local3, .SLE7) ! block
1950
1951 STPTR local5, INPUT
1952 addcc in2, -8, in2
1953 add local7, 8, local7
1954
1955#ifdef OPENSSL_SYSNAME_ULTRASPARC
1956 bg,pt %icc, .ede3.dec.next.block
1957#else
1958 bg .ede3.dec.next.block
1959#endif
1960 STPTR local7, OUTPUT
1961
1962.ede3.dec.store.iv:
1963
1964 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1965 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
1966
1967.ede3.dec.finish:
1968
1969 ret
1970 restore
1971
1972.ede3.dec.seven.or.less:
1973
1974 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
1975
1976 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1977
1978
1979.DES_ede3_cbc_encrypt.end:
1980 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt