diff options
Diffstat (limited to 'src/lib/libcrypto/des/asm')
| -rw-r--r-- | src/lib/libcrypto/des/asm/crypt586.pl | 208 | ||||
| -rw-r--r-- | src/lib/libcrypto/des/asm/des-586.pl | 255 | ||||
| -rw-r--r-- | src/lib/libcrypto/des/asm/des686.pl | 230 | ||||
| -rw-r--r-- | src/lib/libcrypto/des/asm/des_enc.m4 | 1980 | ||||
| -rw-r--r-- | src/lib/libcrypto/des/asm/desboth.pl | 79 | ||||
| -rw-r--r-- | src/lib/libcrypto/des/asm/readme | 131 |
6 files changed, 2883 insertions, 0 deletions
diff --git a/src/lib/libcrypto/des/asm/crypt586.pl b/src/lib/libcrypto/des/asm/crypt586.pl new file mode 100644 index 0000000000..1d04ed6def --- /dev/null +++ b/src/lib/libcrypto/des/asm/crypt586.pl | |||
| @@ -0,0 +1,208 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # | ||
| 3 | # The inner loop instruction sequence and the IP/FP modifications are from | ||
| 4 | # Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> | ||
| 5 | # I've added the stuff needed for crypt() but I've not worried about making | ||
| 6 | # things perfect. | ||
| 7 | # | ||
| 8 | |||
| 9 | push(@INC,"perlasm","../../perlasm"); | ||
| 10 | require "x86asm.pl"; | ||
| 11 | |||
| 12 | &asm_init($ARGV[0],"crypt586.pl"); | ||
| 13 | |||
| 14 | $L="edi"; | ||
| 15 | $R="esi"; | ||
| 16 | |||
| 17 | &external_label("DES_SPtrans"); | ||
| 18 | &fcrypt_body("fcrypt_body"); | ||
| 19 | &asm_finish(); | ||
| 20 | |||
| 21 | sub fcrypt_body | ||
| 22 | { | ||
| 23 | local($name,$do_ip)=@_; | ||
| 24 | |||
| 25 | &function_begin($name,"EXTRN _DES_SPtrans:DWORD"); | ||
| 26 | |||
| 27 | &comment(""); | ||
| 28 | &comment("Load the 2 words"); | ||
| 29 | $trans="ebp"; | ||
| 30 | |||
| 31 | &xor( $L, $L); | ||
| 32 | &xor( $R, $R); | ||
| 33 | |||
| 34 | # PIC-ification:-) | ||
| 35 | &picmeup("edx","DES_SPtrans"); | ||
| 36 | #if ($cpp) { &picmeup("edx","DES_SPtrans"); } | ||
| 37 | #else { &lea("edx",&DWP("DES_SPtrans")); } | ||
| 38 | &push("edx"); # becomes &swtmp(1) | ||
| 39 | # | ||
| 40 | &mov($trans,&wparam(1)); # reloaded with DES_SPtrans in D_ENCRYPT | ||
| 41 | |||
| 42 | &push(&DWC(25)); # add a variable | ||
| 43 | |||
| 44 | &set_label("start"); | ||
| 45 | for ($i=0; $i<16; $i+=2) | ||
| 46 | { | ||
| 47 | &comment(""); | ||
| 48 | &comment("Round $i"); | ||
| 49 | &D_ENCRYPT($i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx"); | ||
| 50 | |||
| 51 | &comment(""); | ||
| 52 | &comment("Round ".sprintf("%d",$i+1)); | ||
| 53 | &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$trans,"eax","ebx","ecx","edx"); | ||
| 54 | } | ||
| 55 | &mov("ebx", &swtmp(0)); | ||
| 56 | &mov("eax", $L); | ||
| 57 | &dec("ebx"); | ||
| 58 | &mov($L, $R); | ||
| 59 | &mov($R, "eax"); | ||
| 60 | &mov(&swtmp(0), "ebx"); | ||
| 61 | &jnz(&label("start")); | ||
| 62 | |||
| 63 | &comment(""); | ||
| 64 | &comment("FP"); | ||
| 65 | &mov("edx",&wparam(0)); | ||
| 66 | |||
| 67 | &FP_new($R,$L,"eax",3); | ||
| 68 | &mov(&DWP(0,"edx","",0),"eax"); | ||
| 69 | &mov(&DWP(4,"edx","",0),$L); | ||
| 70 | |||
| 71 | &add("esp",8); # remove variables | ||
| 72 | |||
| 73 | &function_end($name); | ||
| 74 | } | ||
| 75 | |||
| 76 | sub D_ENCRYPT | ||
| 77 | { | ||
| 78 | local($r,$L,$R,$S,$trans,$u,$tmp1,$tmp2,$t)=@_; | ||
| 79 | |||
| 80 | &mov( $u, &wparam(2)); # 2 | ||
| 81 | &mov( $t, $R); | ||
| 82 | &shr( $t, 16); # 1 | ||
| 83 | &mov( $tmp2, &wparam(3)); # 2 | ||
| 84 | &xor( $t, $R); # 1 | ||
| 85 | |||
| 86 | &and( $u, $t); # 2 | ||
| 87 | &and( $t, $tmp2); # 2 | ||
| 88 | |||
| 89 | &mov( $tmp1, $u); | ||
| 90 | &shl( $tmp1, 16); # 1 | ||
| 91 | &mov( $tmp2, $t); | ||
| 92 | &shl( $tmp2, 16); # 1 | ||
| 93 | &xor( $u, $tmp1); # 2 | ||
| 94 | &xor( $t, $tmp2); # 2 | ||
| 95 | &mov( $tmp1, &DWP(&n2a($S*4),$trans,"",0)); # 2 | ||
| 96 | &xor( $u, $tmp1); | ||
| 97 | &mov( $tmp2, &DWP(&n2a(($S+1)*4),$trans,"",0)); # 2 | ||
| 98 | &xor( $u, $R); | ||
| 99 | &xor( $t, $R); | ||
| 100 | &xor( $t, $tmp2); | ||
| 101 | |||
| 102 | &and( $u, "0xfcfcfcfc" ); # 2 | ||
| 103 | &xor( $tmp1, $tmp1); # 1 | ||
| 104 | &and( $t, "0xcfcfcfcf" ); # 2 | ||
| 105 | &xor( $tmp2, $tmp2); | ||
| 106 | &movb( &LB($tmp1), &LB($u) ); | ||
| 107 | &movb( &LB($tmp2), &HB($u) ); | ||
| 108 | &rotr( $t, 4 ); | ||
| 109 | &mov( $trans, &swtmp(1)); | ||
| 110 | &xor( $L, &DWP(" ",$trans,$tmp1,0)); | ||
| 111 | &movb( &LB($tmp1), &LB($t) ); | ||
| 112 | &xor( $L, &DWP("0x200",$trans,$tmp2,0)); | ||
| 113 | &movb( &LB($tmp2), &HB($t) ); | ||
| 114 | &shr( $u, 16); | ||
| 115 | &xor( $L, &DWP("0x100",$trans,$tmp1,0)); | ||
| 116 | &movb( &LB($tmp1), &HB($u) ); | ||
| 117 | &shr( $t, 16); | ||
| 118 | &xor( $L, &DWP("0x300",$trans,$tmp2,0)); | ||
| 119 | &movb( &LB($tmp2), &HB($t) ); | ||
| 120 | &and( $u, "0xff" ); | ||
| 121 | &and( $t, "0xff" ); | ||
| 122 | &mov( $tmp1, &DWP("0x600",$trans,$tmp1,0)); | ||
| 123 | &xor( $L, $tmp1); | ||
| 124 | &mov( $tmp1, &DWP("0x700",$trans,$tmp2,0)); | ||
| 125 | &xor( $L, $tmp1); | ||
| 126 | &mov( $tmp1, &DWP("0x400",$trans,$u,0)); | ||
| 127 | &xor( $L, $tmp1); | ||
| 128 | &mov( $tmp1, &DWP("0x500",$trans,$t,0)); | ||
| 129 | &xor( $L, $tmp1); | ||
| 130 | &mov( $trans, &wparam(1)); | ||
| 131 | } | ||
| 132 | |||
| 133 | sub n2a | ||
| 134 | { | ||
| 135 | sprintf("%d",$_[0]); | ||
| 136 | } | ||
| 137 | |||
| 138 | # now has a side affect of rotating $a by $shift | ||
| 139 | sub R_PERM_OP | ||
| 140 | { | ||
| 141 | local($a,$b,$tt,$shift,$mask,$last)=@_; | ||
| 142 | |||
| 143 | &rotl( $a, $shift ) if ($shift != 0); | ||
| 144 | &mov( $tt, $a ); | ||
| 145 | &xor( $a, $b ); | ||
| 146 | &and( $a, $mask ); | ||
| 147 | if ($notlast eq $b) | ||
| 148 | { | ||
| 149 | &xor( $b, $a ); | ||
| 150 | &xor( $tt, $a ); | ||
| 151 | } | ||
| 152 | else | ||
| 153 | { | ||
| 154 | &xor( $tt, $a ); | ||
| 155 | &xor( $b, $a ); | ||
| 156 | } | ||
| 157 | &comment(""); | ||
| 158 | } | ||
| 159 | |||
| 160 | sub IP_new | ||
| 161 | { | ||
| 162 | local($l,$r,$tt,$lr)=@_; | ||
| 163 | |||
| 164 | &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l); | ||
| 165 | &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l); | ||
| 166 | &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r); | ||
| 167 | &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r); | ||
| 168 | &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r); | ||
| 169 | |||
| 170 | if ($lr != 3) | ||
| 171 | { | ||
| 172 | if (($lr-3) < 0) | ||
| 173 | { &rotr($tt, 3-$lr); } | ||
| 174 | else { &rotl($tt, $lr-3); } | ||
| 175 | } | ||
| 176 | if ($lr != 2) | ||
| 177 | { | ||
| 178 | if (($lr-2) < 0) | ||
| 179 | { &rotr($r, 2-$lr); } | ||
| 180 | else { &rotl($r, $lr-2); } | ||
| 181 | } | ||
| 182 | } | ||
| 183 | |||
| 184 | sub FP_new | ||
| 185 | { | ||
| 186 | local($l,$r,$tt,$lr)=@_; | ||
| 187 | |||
| 188 | if ($lr != 2) | ||
| 189 | { | ||
| 190 | if (($lr-2) < 0) | ||
| 191 | { &rotl($r, 2-$lr); } | ||
| 192 | else { &rotr($r, $lr-2); } | ||
| 193 | } | ||
| 194 | if ($lr != 3) | ||
| 195 | { | ||
| 196 | if (($lr-3) < 0) | ||
| 197 | { &rotl($l, 3-$lr); } | ||
| 198 | else { &rotr($l, $lr-3); } | ||
| 199 | } | ||
| 200 | |||
| 201 | &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r); | ||
| 202 | &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r); | ||
| 203 | &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l); | ||
| 204 | &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l); | ||
| 205 | &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r); | ||
| 206 | &rotr($tt , 4); | ||
| 207 | } | ||
| 208 | |||
diff --git a/src/lib/libcrypto/des/asm/des-586.pl b/src/lib/libcrypto/des/asm/des-586.pl new file mode 100644 index 0000000000..60d577cc8d --- /dev/null +++ b/src/lib/libcrypto/des/asm/des-586.pl | |||
| @@ -0,0 +1,255 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # | ||
| 3 | # The inner loop instruction sequence and the IP/FP modifications are from | ||
| 4 | # Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> | ||
| 5 | # | ||
| 6 | |||
| 7 | push(@INC,"perlasm","../../perlasm"); | ||
| 8 | require "x86asm.pl"; | ||
| 9 | require "cbc.pl"; | ||
| 10 | require "desboth.pl"; | ||
| 11 | |||
| 12 | # base code is in microsft | ||
| 13 | # op dest, source | ||
| 14 | # format. | ||
| 15 | # | ||
| 16 | |||
| 17 | &asm_init($ARGV[0],"des-586.pl"); | ||
| 18 | |||
| 19 | $L="edi"; | ||
| 20 | $R="esi"; | ||
| 21 | |||
| 22 | &external_label("DES_SPtrans"); | ||
| 23 | &DES_encrypt("DES_encrypt1",1); | ||
| 24 | &DES_encrypt("DES_encrypt2",0); | ||
| 25 | |||
| 26 | if (!$main'openbsd) | ||
| 27 | { | ||
| 28 | &DES_encrypt3("DES_encrypt3",1); | ||
| 29 | &DES_encrypt3("DES_decrypt3",0); | ||
| 30 | &cbc("DES_ncbc_encrypt","DES_encrypt1","DES_encrypt1",0,4,5,3,5,-1); | ||
| 31 | &cbc("DES_ede3_cbc_encrypt","DES_encrypt3","DES_decrypt3",0,6,7,3,4,5); | ||
| 32 | } | ||
| 33 | |||
| 34 | &asm_finish(); | ||
| 35 | |||
| 36 | sub DES_encrypt | ||
| 37 | { | ||
| 38 | local($name,$do_ip)=@_; | ||
| 39 | |||
| 40 | &function_begin_B($name,"EXTRN _DES_SPtrans:DWORD"); | ||
| 41 | |||
| 42 | &push("esi"); | ||
| 43 | &push("edi"); | ||
| 44 | |||
| 45 | &comment(""); | ||
| 46 | &comment("Load the 2 words"); | ||
| 47 | $trans="ebp"; | ||
| 48 | |||
| 49 | if ($do_ip) | ||
| 50 | { | ||
| 51 | &mov($R,&wparam(0)); | ||
| 52 | &xor( "ecx", "ecx" ); | ||
| 53 | |||
| 54 | &push("ebx"); | ||
| 55 | &push("ebp"); | ||
| 56 | |||
| 57 | &mov("eax",&DWP(0,$R,"",0)); | ||
| 58 | &mov("ebx",&wparam(2)); # get encrypt flag | ||
| 59 | &mov($L,&DWP(4,$R,"",0)); | ||
| 60 | &comment(""); | ||
| 61 | &comment("IP"); | ||
| 62 | &IP_new("eax",$L,$R,3); | ||
| 63 | } | ||
| 64 | else | ||
| 65 | { | ||
| 66 | &mov("eax",&wparam(0)); | ||
| 67 | &xor( "ecx", "ecx" ); | ||
| 68 | |||
| 69 | &push("ebx"); | ||
| 70 | &push("ebp"); | ||
| 71 | |||
| 72 | &mov($R,&DWP(0,"eax","",0)); | ||
| 73 | &mov("ebx",&wparam(2)); # get encrypt flag | ||
| 74 | &rotl($R,3); | ||
| 75 | &mov($L,&DWP(4,"eax","",0)); | ||
| 76 | &rotl($L,3); | ||
| 77 | } | ||
| 78 | |||
| 79 | # PIC-ification:-) | ||
| 80 | &picmeup($trans,"DES_SPtrans"); | ||
| 81 | #if ($cpp) { &picmeup($trans,"DES_SPtrans"); } | ||
| 82 | #else { &lea($trans,&DWP("DES_SPtrans")); } | ||
| 83 | |||
| 84 | &mov( "ecx", &wparam(1) ); | ||
| 85 | &cmp("ebx","0"); | ||
| 86 | &je(&label("start_decrypt")); | ||
| 87 | |||
| 88 | for ($i=0; $i<16; $i+=2) | ||
| 89 | { | ||
| 90 | &comment(""); | ||
| 91 | &comment("Round $i"); | ||
| 92 | &D_ENCRYPT($i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx"); | ||
| 93 | |||
| 94 | &comment(""); | ||
| 95 | &comment("Round ".sprintf("%d",$i+1)); | ||
| 96 | &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$trans,"eax","ebx","ecx","edx"); | ||
| 97 | } | ||
| 98 | &jmp(&label("end")); | ||
| 99 | |||
| 100 | &set_label("start_decrypt"); | ||
| 101 | |||
| 102 | for ($i=15; $i>0; $i-=2) | ||
| 103 | { | ||
| 104 | &comment(""); | ||
| 105 | &comment("Round $i"); | ||
| 106 | &D_ENCRYPT(15-$i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx"); | ||
| 107 | &comment(""); | ||
| 108 | &comment("Round ".sprintf("%d",$i-1)); | ||
| 109 | &D_ENCRYPT(15-$i+1,$R,$L,($i-1)*2,$trans,"eax","ebx","ecx","edx"); | ||
| 110 | } | ||
| 111 | |||
| 112 | &set_label("end"); | ||
| 113 | |||
| 114 | if ($do_ip) | ||
| 115 | { | ||
| 116 | &comment(""); | ||
| 117 | &comment("FP"); | ||
| 118 | &mov("edx",&wparam(0)); | ||
| 119 | &FP_new($L,$R,"eax",3); | ||
| 120 | |||
| 121 | &mov(&DWP(0,"edx","",0),"eax"); | ||
| 122 | &mov(&DWP(4,"edx","",0),$R); | ||
| 123 | } | ||
| 124 | else | ||
| 125 | { | ||
| 126 | &comment(""); | ||
| 127 | &comment("Fixup"); | ||
| 128 | &rotr($L,3); # r | ||
| 129 | &mov("eax",&wparam(0)); | ||
| 130 | &rotr($R,3); # l | ||
| 131 | &mov(&DWP(0,"eax","",0),$L); | ||
| 132 | &mov(&DWP(4,"eax","",0),$R); | ||
| 133 | } | ||
| 134 | |||
| 135 | &pop("ebp"); | ||
| 136 | &pop("ebx"); | ||
| 137 | &pop("edi"); | ||
| 138 | &pop("esi"); | ||
| 139 | &ret(); | ||
| 140 | |||
| 141 | &function_end_B($name); | ||
| 142 | } | ||
| 143 | |||
| 144 | sub D_ENCRYPT | ||
| 145 | { | ||
| 146 | local($r,$L,$R,$S,$trans,$u,$tmp1,$tmp2,$t)=@_; | ||
| 147 | |||
| 148 | &mov( $u, &DWP(&n2a($S*4),$tmp2,"",0)); | ||
| 149 | &xor( $tmp1, $tmp1); | ||
| 150 | &mov( $t, &DWP(&n2a(($S+1)*4),$tmp2,"",0)); | ||
| 151 | &xor( $u, $R); | ||
| 152 | &xor( $tmp2, $tmp2); | ||
| 153 | &xor( $t, $R); | ||
| 154 | &and( $u, "0xfcfcfcfc" ); | ||
| 155 | &and( $t, "0xcfcfcfcf" ); | ||
| 156 | &movb( &LB($tmp1), &LB($u) ); | ||
| 157 | &movb( &LB($tmp2), &HB($u) ); | ||
| 158 | &rotr( $t, 4 ); | ||
| 159 | &xor( $L, &DWP(" ",$trans,$tmp1,0)); | ||
| 160 | &movb( &LB($tmp1), &LB($t) ); | ||
| 161 | &xor( $L, &DWP("0x200",$trans,$tmp2,0)); | ||
| 162 | &movb( &LB($tmp2), &HB($t) ); | ||
| 163 | &shr( $u, 16); | ||
| 164 | &xor( $L, &DWP("0x100",$trans,$tmp1,0)); | ||
| 165 | &movb( &LB($tmp1), &HB($u) ); | ||
| 166 | &shr( $t, 16); | ||
| 167 | &xor( $L, &DWP("0x300",$trans,$tmp2,0)); | ||
| 168 | &movb( &LB($tmp2), &HB($t) ); | ||
| 169 | &and( $u, "0xff" ); | ||
| 170 | &and( $t, "0xff" ); | ||
| 171 | &xor( $L, &DWP("0x600",$trans,$tmp1,0)); | ||
| 172 | &xor( $L, &DWP("0x700",$trans,$tmp2,0)); | ||
| 173 | &mov( $tmp2, &wparam(1) ); | ||
| 174 | &xor( $L, &DWP("0x400",$trans,$u,0)); | ||
| 175 | &xor( $L, &DWP("0x500",$trans,$t,0)); | ||
| 176 | } | ||
| 177 | |||
| 178 | sub n2a | ||
| 179 | { | ||
| 180 | sprintf("%d",$_[0]); | ||
| 181 | } | ||
| 182 | |||
| 183 | # now has a side affect of rotating $a by $shift | ||
| 184 | sub R_PERM_OP | ||
| 185 | { | ||
| 186 | local($a,$b,$tt,$shift,$mask,$last)=@_; | ||
| 187 | |||
| 188 | &rotl( $a, $shift ) if ($shift != 0); | ||
| 189 | &mov( $tt, $a ); | ||
| 190 | &xor( $a, $b ); | ||
| 191 | &and( $a, $mask ); | ||
| 192 | # This can never succeed, and besides it is difficult to see what the | ||
| 193 | # idea was - Ben 13 Feb 99 | ||
| 194 | if (!$last eq $b) | ||
| 195 | { | ||
| 196 | &xor( $b, $a ); | ||
| 197 | &xor( $tt, $a ); | ||
| 198 | } | ||
| 199 | else | ||
| 200 | { | ||
| 201 | &xor( $tt, $a ); | ||
| 202 | &xor( $b, $a ); | ||
| 203 | } | ||
| 204 | &comment(""); | ||
| 205 | } | ||
| 206 | |||
| 207 | sub IP_new | ||
| 208 | { | ||
| 209 | local($l,$r,$tt,$lr)=@_; | ||
| 210 | |||
| 211 | &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l); | ||
| 212 | &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l); | ||
| 213 | &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r); | ||
| 214 | &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r); | ||
| 215 | &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r); | ||
| 216 | |||
| 217 | if ($lr != 3) | ||
| 218 | { | ||
| 219 | if (($lr-3) < 0) | ||
| 220 | { &rotr($tt, 3-$lr); } | ||
| 221 | else { &rotl($tt, $lr-3); } | ||
| 222 | } | ||
| 223 | if ($lr != 2) | ||
| 224 | { | ||
| 225 | if (($lr-2) < 0) | ||
| 226 | { &rotr($r, 2-$lr); } | ||
| 227 | else { &rotl($r, $lr-2); } | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | sub FP_new | ||
| 232 | { | ||
| 233 | local($l,$r,$tt,$lr)=@_; | ||
| 234 | |||
| 235 | if ($lr != 2) | ||
| 236 | { | ||
| 237 | if (($lr-2) < 0) | ||
| 238 | { &rotl($r, 2-$lr); } | ||
| 239 | else { &rotr($r, $lr-2); } | ||
| 240 | } | ||
| 241 | if ($lr != 3) | ||
| 242 | { | ||
| 243 | if (($lr-3) < 0) | ||
| 244 | { &rotl($l, 3-$lr); } | ||
| 245 | else { &rotr($l, $lr-3); } | ||
| 246 | } | ||
| 247 | |||
| 248 | &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r); | ||
| 249 | &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r); | ||
| 250 | &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l); | ||
| 251 | &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l); | ||
| 252 | &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r); | ||
| 253 | &rotr($tt , 4); | ||
| 254 | } | ||
| 255 | |||
diff --git a/src/lib/libcrypto/des/asm/des686.pl b/src/lib/libcrypto/des/asm/des686.pl new file mode 100644 index 0000000000..d3ad5d5edd --- /dev/null +++ b/src/lib/libcrypto/des/asm/des686.pl | |||
| @@ -0,0 +1,230 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | |||
| 3 | $prog="des686.pl"; | ||
| 4 | |||
| 5 | # base code is in microsft | ||
| 6 | # op dest, source | ||
| 7 | # format. | ||
| 8 | # | ||
| 9 | |||
| 10 | # WILL NOT WORK ANYMORE WITH desboth.pl | ||
| 11 | require "desboth.pl"; | ||
| 12 | |||
| 13 | if ( ($ARGV[0] eq "elf")) | ||
| 14 | { require "x86unix.pl"; } | ||
| 15 | elsif ( ($ARGV[0] eq "a.out")) | ||
| 16 | { $aout=1; require "x86unix.pl"; } | ||
| 17 | elsif ( ($ARGV[0] eq "sol")) | ||
| 18 | { $sol=1; require "x86unix.pl"; } | ||
| 19 | elsif ( ($ARGV[0] eq "cpp")) | ||
| 20 | { $cpp=1; require "x86unix.pl"; } | ||
| 21 | elsif ( ($ARGV[0] eq "win32")) | ||
| 22 | { require "x86ms.pl"; } | ||
| 23 | else | ||
| 24 | { | ||
| 25 | print STDERR <<"EOF"; | ||
| 26 | Pick one target type from | ||
| 27 | elf - linux, FreeBSD etc | ||
| 28 | a.out - old linux | ||
| 29 | sol - x86 solaris | ||
| 30 | cpp - format so x86unix.cpp can be used | ||
| 31 | win32 - Windows 95/Windows NT | ||
| 32 | EOF | ||
| 33 | exit(1); | ||
| 34 | } | ||
| 35 | |||
| 36 | &comment("Don't even think of reading this code"); | ||
| 37 | &comment("It was automatically generated by $prog"); | ||
| 38 | &comment("Which is a perl program used to generate the x86 assember for"); | ||
| 39 | &comment("any of elf, a.out, Win32, or Solaris"); | ||
| 40 | &comment("It can be found in SSLeay 0.6.5+ or in libdes 3.26+"); | ||
| 41 | &comment("eric <eay\@cryptsoft.com>"); | ||
| 42 | &comment(""); | ||
| 43 | |||
| 44 | &file("dx86xxxx"); | ||
| 45 | |||
| 46 | $L="edi"; | ||
| 47 | $R="esi"; | ||
| 48 | |||
| 49 | &DES_encrypt("DES_encrypt1",1); | ||
| 50 | &DES_encrypt("DES_encrypt2",0); | ||
| 51 | |||
| 52 | &DES_encrypt3("DES_encrypt3",1); | ||
| 53 | &DES_encrypt3("DES_decrypt3",0); | ||
| 54 | |||
| 55 | &file_end(); | ||
| 56 | |||
| 57 | sub DES_encrypt | ||
| 58 | { | ||
| 59 | local($name,$do_ip)=@_; | ||
| 60 | |||
| 61 | &function_begin($name,"EXTRN _DES_SPtrans:DWORD"); | ||
| 62 | |||
| 63 | &comment(""); | ||
| 64 | &comment("Load the 2 words"); | ||
| 65 | &mov("eax",&wparam(0)); | ||
| 66 | &mov($L,&DWP(0,"eax","",0)); | ||
| 67 | &mov($R,&DWP(4,"eax","",0)); | ||
| 68 | |||
| 69 | $ksp=&wparam(1); | ||
| 70 | |||
| 71 | if ($do_ip) | ||
| 72 | { | ||
| 73 | &comment(""); | ||
| 74 | &comment("IP"); | ||
| 75 | &IP_new($L,$R,"eax"); | ||
| 76 | } | ||
| 77 | |||
| 78 | &comment(""); | ||
| 79 | &comment("fixup rotate"); | ||
| 80 | &rotl($R,3); | ||
| 81 | &rotl($L,3); | ||
| 82 | &exch($L,$R); | ||
| 83 | |||
| 84 | &comment(""); | ||
| 85 | &comment("load counter, key_schedule and enc flag"); | ||
| 86 | &mov("eax",&wparam(2)); # get encrypt flag | ||
| 87 | &mov("ebp",&wparam(1)); # get ks | ||
| 88 | &cmp("eax","0"); | ||
| 89 | &je(&label("start_decrypt")); | ||
| 90 | |||
| 91 | # encrypting part | ||
| 92 | |||
| 93 | for ($i=0; $i<16; $i+=2) | ||
| 94 | { | ||
| 95 | &comment(""); | ||
| 96 | &comment("Round $i"); | ||
| 97 | &D_ENCRYPT($L,$R,$i*2,"ebp","DES_SPtrans","ecx","edx","eax","ebx"); | ||
| 98 | |||
| 99 | &comment(""); | ||
| 100 | &comment("Round ".sprintf("%d",$i+1)); | ||
| 101 | &D_ENCRYPT($R,$L,($i+1)*2,"ebp","DES_SPtrans","ecx","edx","eax","ebx"); | ||
| 102 | } | ||
| 103 | &jmp(&label("end")); | ||
| 104 | |||
| 105 | &set_label("start_decrypt"); | ||
| 106 | |||
| 107 | for ($i=15; $i>0; $i-=2) | ||
| 108 | { | ||
| 109 | &comment(""); | ||
| 110 | &comment("Round $i"); | ||
| 111 | &D_ENCRYPT($L,$R,$i*2,"ebp","DES_SPtrans","ecx","edx","eax","ebx"); | ||
| 112 | &comment(""); | ||
| 113 | &comment("Round ".sprintf("%d",$i-1)); | ||
| 114 | &D_ENCRYPT($R,$L,($i-1)*2,"ebp","DES_SPtrans","ecx","edx","eax","ebx"); | ||
| 115 | } | ||
| 116 | |||
| 117 | &set_label("end"); | ||
| 118 | |||
| 119 | &comment(""); | ||
| 120 | &comment("Fixup"); | ||
| 121 | &rotr($L,3); # r | ||
| 122 | &rotr($R,3); # l | ||
| 123 | |||
| 124 | if ($do_ip) | ||
| 125 | { | ||
| 126 | &comment(""); | ||
| 127 | &comment("FP"); | ||
| 128 | &FP_new($R,$L,"eax"); | ||
| 129 | } | ||
| 130 | |||
| 131 | &mov("eax",&wparam(0)); | ||
| 132 | &mov(&DWP(0,"eax","",0),$L); | ||
| 133 | &mov(&DWP(4,"eax","",0),$R); | ||
| 134 | |||
| 135 | &function_end($name); | ||
| 136 | } | ||
| 137 | |||
| 138 | |||
| 139 | # The logic is to load R into 2 registers and operate on both at the same time. | ||
| 140 | # We also load the 2 R's into 2 more registers so we can do the 'move word down a byte' | ||
| 141 | # while also masking the other copy and doing a lookup. We then also accumulate the | ||
| 142 | # L value in 2 registers then combine them at the end. | ||
| 143 | sub D_ENCRYPT | ||
| 144 | { | ||
| 145 | local($L,$R,$S,$ks,$desSP,$u,$t,$tmp1,$tmp2,$tmp3)=@_; | ||
| 146 | |||
| 147 | &mov( $u, &DWP(&n2a($S*4),$ks,"",0)); | ||
| 148 | &mov( $t, &DWP(&n2a(($S+1)*4),$ks,"",0)); | ||
| 149 | &xor( $u, $R ); | ||
| 150 | &xor( $t, $R ); | ||
| 151 | &rotr( $t, 4 ); | ||
| 152 | |||
| 153 | # the numbers at the end of the line are origional instruction order | ||
| 154 | &mov( $tmp2, $u ); # 1 2 | ||
| 155 | &mov( $tmp1, $t ); # 1 1 | ||
| 156 | &and( $tmp2, "0xfc" ); # 1 4 | ||
| 157 | &and( $tmp1, "0xfc" ); # 1 3 | ||
| 158 | &shr( $t, 8 ); # 1 5 | ||
| 159 | &xor( $L, &DWP("0x100+$desSP",$tmp1,"",0)); # 1 7 | ||
| 160 | &shr( $u, 8 ); # 1 6 | ||
| 161 | &mov( $tmp1, &DWP(" $desSP",$tmp2,"",0)); # 1 8 | ||
| 162 | |||
| 163 | &mov( $tmp2, $u ); # 2 2 | ||
| 164 | &xor( $L, $tmp1 ); # 1 9 | ||
| 165 | &and( $tmp2, "0xfc" ); # 2 4 | ||
| 166 | &mov( $tmp1, $t ); # 2 1 | ||
| 167 | &and( $tmp1, "0xfc" ); # 2 3 | ||
| 168 | &shr( $t, 8 ); # 2 5 | ||
| 169 | &xor( $L, &DWP("0x300+$desSP",$tmp1,"",0)); # 2 7 | ||
| 170 | &shr( $u, 8 ); # 2 6 | ||
| 171 | &mov( $tmp1, &DWP("0x200+$desSP",$tmp2,"",0)); # 2 8 | ||
| 172 | &mov( $tmp2, $u ); # 3 2 | ||
| 173 | |||
| 174 | &xor( $L, $tmp1 ); # 2 9 | ||
| 175 | &and( $tmp2, "0xfc" ); # 3 4 | ||
| 176 | |||
| 177 | &mov( $tmp1, $t ); # 3 1 | ||
| 178 | &shr( $u, 8 ); # 3 6 | ||
| 179 | &and( $tmp1, "0xfc" ); # 3 3 | ||
| 180 | &shr( $t, 8 ); # 3 5 | ||
| 181 | &xor( $L, &DWP("0x500+$desSP",$tmp1,"",0)); # 3 7 | ||
| 182 | &mov( $tmp1, &DWP("0x400+$desSP",$tmp2,"",0)); # 3 8 | ||
| 183 | |||
| 184 | &and( $t, "0xfc" ); # 4 1 | ||
| 185 | &xor( $L, $tmp1 ); # 3 9 | ||
| 186 | |||
| 187 | &and( $u, "0xfc" ); # 4 2 | ||
| 188 | &xor( $L, &DWP("0x700+$desSP",$t,"",0)); # 4 3 | ||
| 189 | &xor( $L, &DWP("0x600+$desSP",$u,"",0)); # 4 4 | ||
| 190 | } | ||
| 191 | |||
| 192 | sub PERM_OP | ||
| 193 | { | ||
| 194 | local($a,$b,$tt,$shift,$mask)=@_; | ||
| 195 | |||
| 196 | &mov( $tt, $a ); | ||
| 197 | &shr( $tt, $shift ); | ||
| 198 | &xor( $tt, $b ); | ||
| 199 | &and( $tt, $mask ); | ||
| 200 | &xor( $b, $tt ); | ||
| 201 | &shl( $tt, $shift ); | ||
| 202 | &xor( $a, $tt ); | ||
| 203 | } | ||
| 204 | |||
| 205 | sub IP_new | ||
| 206 | { | ||
| 207 | local($l,$r,$tt)=@_; | ||
| 208 | |||
| 209 | &PERM_OP($r,$l,$tt, 4,"0x0f0f0f0f"); | ||
| 210 | &PERM_OP($l,$r,$tt,16,"0x0000ffff"); | ||
| 211 | &PERM_OP($r,$l,$tt, 2,"0x33333333"); | ||
| 212 | &PERM_OP($l,$r,$tt, 8,"0x00ff00ff"); | ||
| 213 | &PERM_OP($r,$l,$tt, 1,"0x55555555"); | ||
| 214 | } | ||
| 215 | |||
| 216 | sub FP_new | ||
| 217 | { | ||
| 218 | local($l,$r,$tt)=@_; | ||
| 219 | |||
| 220 | &PERM_OP($l,$r,$tt, 1,"0x55555555"); | ||
| 221 | &PERM_OP($r,$l,$tt, 8,"0x00ff00ff"); | ||
| 222 | &PERM_OP($l,$r,$tt, 2,"0x33333333"); | ||
| 223 | &PERM_OP($r,$l,$tt,16,"0x0000ffff"); | ||
| 224 | &PERM_OP($l,$r,$tt, 4,"0x0f0f0f0f"); | ||
| 225 | } | ||
| 226 | |||
| 227 | sub n2a | ||
| 228 | { | ||
| 229 | sprintf("%d",$_[0]); | ||
| 230 | } | ||
diff --git a/src/lib/libcrypto/des/asm/des_enc.m4 b/src/lib/libcrypto/des/asm/des_enc.m4 new file mode 100644 index 0000000000..f5b1928f99 --- /dev/null +++ b/src/lib/libcrypto/des/asm/des_enc.m4 | |||
| @@ -0,0 +1,1980 @@ | |||
| 1 | ! des_enc.m4 | ||
| 2 | ! des_enc.S (generated from des_enc.m4) | ||
| 3 | ! | ||
| 4 | ! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file. | ||
| 5 | ! | ||
| 6 | ! Version 1.0. 32-bit version. | ||
| 7 | ! | ||
| 8 | ! June 8, 2000. | ||
| 9 | ! | ||
| 10 | ! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation | ||
| 11 | ! by Andy Polyakov. | ||
| 12 | ! | ||
| 13 | ! January 1, 2003. | ||
| 14 | ! | ||
| 15 | ! Assembler version: Copyright Svend Olaf Mikkelsen. | ||
| 16 | ! | ||
| 17 | ! Original C code: Copyright Eric A. Young. | ||
| 18 | ! | ||
| 19 | ! This code can be freely used by LibDES/SSLeay/OpenSSL users. | ||
| 20 | ! | ||
| 21 | ! The LibDES/SSLeay/OpenSSL copyright notices must be respected. | ||
| 22 | ! | ||
| 23 | ! This version can be redistributed. | ||
| 24 | ! | ||
| 25 | ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S | ||
| 26 | ! | ||
| 27 | ! Global registers 1 to 5 are used. This is the same as done by the | ||
| 28 | ! cc compiler. The UltraSPARC load/store little endian feature is used. | ||
| 29 | ! | ||
| 30 | ! Instruction grouping often refers to one CPU cycle. | ||
| 31 | ! | ||
| 32 | ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S | ||
| 33 | ! | ||
| 34 | ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S | ||
| 35 | ! | ||
| 36 | ! Performance improvement according to './apps/openssl speed des' | ||
| 37 | ! | ||
| 38 | ! 32-bit build: | ||
| 39 | ! 23% faster than cc-5.2 -xarch=v8plus -xO5 | ||
| 40 | ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5 | ||
| 41 | ! 64-bit build: | ||
| 42 | ! 50% faster than cc-5.2 -xarch=v9 -xO5 | ||
| 43 | ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5 | ||
| 44 | ! | ||
| 45 | |||
| 46 | .ident "des_enc.m4 2.1" | ||
| 47 | |||
| 48 | #if defined(__SUNPRO_C) && defined(__sparcv9) | ||
| 49 | # define ABI64 /* They've said -xarch=v9 at command line */ | ||
| 50 | #elif defined(__GNUC__) && defined(__arch64__) | ||
| 51 | # define ABI64 /* They've said -m64 at command line */ | ||
| 52 | #endif | ||
| 53 | |||
| 54 | #ifdef ABI64 | ||
| 55 | .register %g2,#scratch | ||
| 56 | .register %g3,#scratch | ||
| 57 | # define FRAME -192 | ||
| 58 | # define BIAS 2047 | ||
| 59 | # define LDPTR ldx | ||
| 60 | # define STPTR stx | ||
| 61 | # define ARG0 128 | ||
| 62 | # define ARGSZ 8 | ||
| 63 | # ifndef OPENSSL_SYSNAME_ULTRASPARC | ||
| 64 | # define OPENSSL_SYSNAME_ULTRASPARC | ||
| 65 | # endif | ||
| 66 | #else | ||
| 67 | # define FRAME -96 | ||
| 68 | # define BIAS 0 | ||
| 69 | # define LDPTR ld | ||
| 70 | # define STPTR st | ||
| 71 | # define ARG0 68 | ||
| 72 | # define ARGSZ 4 | ||
| 73 | #endif | ||
| 74 | |||
| 75 | #define LOOPS 7 | ||
| 76 | |||
| 77 | #define global0 %g0 | ||
| 78 | #define global1 %g1 | ||
| 79 | #define global2 %g2 | ||
| 80 | #define global3 %g3 | ||
| 81 | #define global4 %g4 | ||
| 82 | #define global5 %g5 | ||
| 83 | |||
| 84 | #define local0 %l0 | ||
| 85 | #define local1 %l1 | ||
| 86 | #define local2 %l2 | ||
| 87 | #define local3 %l3 | ||
| 88 | #define local4 %l4 | ||
| 89 | #define local5 %l5 | ||
| 90 | #define local7 %l6 | ||
| 91 | #define local6 %l7 | ||
| 92 | |||
| 93 | #define in0 %i0 | ||
| 94 | #define in1 %i1 | ||
| 95 | #define in2 %i2 | ||
| 96 | #define in3 %i3 | ||
| 97 | #define in4 %i4 | ||
| 98 | #define in5 %i5 | ||
| 99 | #define in6 %i6 | ||
| 100 | #define in7 %i7 | ||
| 101 | |||
| 102 | #define out0 %o0 | ||
| 103 | #define out1 %o1 | ||
| 104 | #define out2 %o2 | ||
| 105 | #define out3 %o3 | ||
| 106 | #define out4 %o4 | ||
| 107 | #define out5 %o5 | ||
| 108 | #define out6 %o6 | ||
| 109 | #define out7 %o7 | ||
| 110 | |||
| 111 | #define stub stb | ||
| 112 | |||
| 113 | changequote({,}) | ||
| 114 | |||
| 115 | |||
| 116 | ! Macro definitions: | ||
| 117 | |||
| 118 | |||
| 119 | ! {ip_macro} | ||
| 120 | ! | ||
| 121 | ! The logic used in initial and final permutations is the same as in | ||
| 122 | ! the C code. The permutations are done with a clever shift, xor, and | ||
| 123 | ! technique. | ||
| 124 | ! | ||
| 125 | ! The macro also loads address sbox 1 to 5 to global 1 to 5, address | ||
| 126 | ! sbox 6 to local6, and addres sbox 8 to out3. | ||
| 127 | ! | ||
| 128 | ! Rotates the halfs 3 left to bring the sbox bits in convenient positions. | ||
| 129 | ! | ||
| 130 | ! Loads key first round from address in parameter 5 to out0, out1. | ||
| 131 | ! | ||
| 132 | ! After the the original LibDES initial permutation, the resulting left | ||
| 133 | ! is in the variable initially used for right and vice versa. The macro | ||
| 134 | ! implements the possibility to keep the halfs in the original registers. | ||
| 135 | ! | ||
| 136 | ! parameter 1 left | ||
| 137 | ! parameter 2 right | ||
| 138 | ! parameter 3 result left (modify in first round) | ||
| 139 | ! parameter 4 result right (use in first round) | ||
| 140 | ! parameter 5 key address | ||
| 141 | ! parameter 6 1/2 for include encryption/decryption | ||
| 142 | ! parameter 7 1 for move in1 to in3 | ||
| 143 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
| 144 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
| 145 | |||
| 146 | define(ip_macro, { | ||
| 147 | |||
| 148 | ! {ip_macro} | ||
| 149 | ! $1 $2 $4 $3 $5 $6 $7 $8 $9 | ||
| 150 | |||
| 151 | ld [out2+256], local1 | ||
| 152 | srl $2, 4, local4 | ||
| 153 | |||
| 154 | xor local4, $1, local4 | ||
| 155 | ifelse($7,1,{mov in1, in3},{nop}) | ||
| 156 | |||
| 157 | ld [out2+260], local2 | ||
| 158 | and local4, local1, local4 | ||
| 159 | ifelse($8,1,{mov in3, in4},{}) | ||
| 160 | ifelse($8,2,{mov in4, in3},{}) | ||
| 161 | |||
| 162 | ld [out2+280], out4 ! loop counter | ||
| 163 | sll local4, 4, local1 | ||
| 164 | xor $1, local4, $1 | ||
| 165 | |||
| 166 | ld [out2+264], local3 | ||
| 167 | srl $1, 16, local4 | ||
| 168 | xor $2, local1, $2 | ||
| 169 | |||
| 170 | ifelse($9,1,{LDPTR KS3, in4},{}) | ||
| 171 | xor local4, $2, local4 | ||
| 172 | nop !sethi %hi(DES_SPtrans), global1 ! sbox addr | ||
| 173 | |||
| 174 | ifelse($9,1,{LDPTR KS2, in3},{}) | ||
| 175 | and local4, local2, local4 | ||
| 176 | nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr | ||
| 177 | |||
| 178 | sll local4, 16, local1 | ||
| 179 | xor $2, local4, $2 | ||
| 180 | |||
| 181 | srl $2, 2, local4 | ||
| 182 | xor $1, local1, $1 | ||
| 183 | |||
| 184 | sethi %hi(16711680), local5 | ||
| 185 | xor local4, $1, local4 | ||
| 186 | |||
| 187 | and local4, local3, local4 | ||
| 188 | or local5, 255, local5 | ||
| 189 | |||
| 190 | sll local4, 2, local2 | ||
| 191 | xor $1, local4, $1 | ||
| 192 | |||
| 193 | srl $1, 8, local4 | ||
| 194 | xor $2, local2, $2 | ||
| 195 | |||
| 196 | xor local4, $2, local4 | ||
| 197 | add global1, 768, global4 | ||
| 198 | |||
| 199 | and local4, local5, local4 | ||
| 200 | add global1, 1024, global5 | ||
| 201 | |||
| 202 | ld [out2+272], local7 | ||
| 203 | sll local4, 8, local1 | ||
| 204 | xor $2, local4, $2 | ||
| 205 | |||
| 206 | srl $2, 1, local4 | ||
| 207 | xor $1, local1, $1 | ||
| 208 | |||
| 209 | ld [$5], out0 ! key 7531 | ||
| 210 | xor local4, $1, local4 | ||
| 211 | add global1, 256, global2 | ||
| 212 | |||
| 213 | ld [$5+4], out1 ! key 8642 | ||
| 214 | and local4, local7, local4 | ||
| 215 | add global1, 512, global3 | ||
| 216 | |||
| 217 | sll local4, 1, local1 | ||
| 218 | xor $1, local4, $1 | ||
| 219 | |||
| 220 | sll $1, 3, local3 | ||
| 221 | xor $2, local1, $2 | ||
| 222 | |||
| 223 | sll $2, 3, local2 | ||
| 224 | add global1, 1280, local6 ! address sbox 8 | ||
| 225 | |||
| 226 | srl $1, 29, local4 | ||
| 227 | add global1, 1792, out3 ! address sbox 8 | ||
| 228 | |||
| 229 | srl $2, 29, local1 | ||
| 230 | or local4, local3, $4 | ||
| 231 | |||
| 232 | or local2, local1, $3 | ||
| 233 | |||
| 234 | ifelse($6, 1, { | ||
| 235 | |||
| 236 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds | ||
| 237 | or local2, local1, $3 | ||
| 238 | xor $4, out0, local1 | ||
| 239 | |||
| 240 | call .des_enc.1 | ||
| 241 | and local1, 252, local1 | ||
| 242 | |||
| 243 | },{}) | ||
| 244 | |||
| 245 | ifelse($6, 2, { | ||
| 246 | |||
| 247 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds | ||
| 248 | or local2, local1, $3 | ||
| 249 | xor $4, out0, local1 | ||
| 250 | |||
| 251 | call .des_dec.1 | ||
| 252 | and local1, 252, local1 | ||
| 253 | |||
| 254 | },{}) | ||
| 255 | }) | ||
| 256 | |||
| 257 | |||
| 258 | ! {rounds_macro} | ||
| 259 | ! | ||
| 260 | ! The logic used in the DES rounds is the same as in the C code, | ||
| 261 | ! except that calculations for sbox 1 and sbox 5 begin before | ||
| 262 | ! the previous round is finished. | ||
| 263 | ! | ||
| 264 | ! In each round one half (work) is modified based on key and the | ||
| 265 | ! other half (use). | ||
| 266 | ! | ||
| 267 | ! In this version we do two rounds in a loop repeated 7 times | ||
| 268 | ! and two rounds seperately. | ||
| 269 | ! | ||
| 270 | ! One half has the bits for the sboxes in the following positions: | ||
| 271 | ! | ||
| 272 | ! 777777xx555555xx333333xx111111xx | ||
| 273 | ! | ||
| 274 | ! 88xx666666xx444444xx222222xx8888 | ||
| 275 | ! | ||
| 276 | ! The bits for each sbox are xor-ed with the key bits for that box. | ||
| 277 | ! The above xx bits are cleared, and the result used for lookup in | ||
| 278 | ! the sbox table. Each sbox entry contains the 4 output bits permuted | ||
| 279 | ! into 32 bits according to the P permutation. | ||
| 280 | ! | ||
| 281 | ! In the description of DES, left and right are switched after | ||
| 282 | ! each round, except after last round. In this code the original | ||
| 283 | ! left and right are kept in the same register in all rounds, meaning | ||
| 284 | ! that after the 16 rounds the result for right is in the register | ||
| 285 | ! originally used for left. | ||
| 286 | ! | ||
| 287 | ! parameter 1 first work (left in first round) | ||
| 288 | ! parameter 2 first use (right in first round) | ||
| 289 | ! parameter 3 enc/dec 1/-1 | ||
| 290 | ! parameter 4 loop label | ||
| 291 | ! parameter 5 key address register | ||
| 292 | ! parameter 6 optional address for key next encryption/decryption | ||
| 293 | ! parameter 7 not empty for include retl | ||
| 294 | ! | ||
| 295 | ! also compares in2 to 8 | ||
| 296 | |||
| 297 | define(rounds_macro, { | ||
| 298 | |||
| 299 | ! {rounds_macro} | ||
| 300 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 301 | |||
| 302 | xor $2, out0, local1 | ||
| 303 | |||
| 304 | ld [out2+284], local5 ! 0x0000FC00 | ||
| 305 | ba $4 | ||
| 306 | and local1, 252, local1 | ||
| 307 | |||
| 308 | .align 32 | ||
| 309 | |||
| 310 | $4: | ||
| 311 | ! local6 is address sbox 6 | ||
| 312 | ! out3 is address sbox 8 | ||
| 313 | ! out4 is loop counter | ||
| 314 | |||
| 315 | ld [global1+local1], local1 | ||
| 316 | xor $2, out1, out1 ! 8642 | ||
| 317 | xor $2, out0, out0 ! 7531 | ||
| 318 | fmovs %f0, %f0 ! fxor used for alignment | ||
| 319 | |||
| 320 | srl out1, 4, local0 ! rotate 4 right | ||
| 321 | and out0, local5, local3 ! 3 | ||
| 322 | fmovs %f0, %f0 | ||
| 323 | |||
| 324 | ld [$5+$3*8], local7 ! key 7531 next round | ||
| 325 | srl local3, 8, local3 ! 3 | ||
| 326 | and local0, 252, local2 ! 2 | ||
| 327 | fmovs %f0, %f0 | ||
| 328 | |||
| 329 | ld [global3+local3],local3 ! 3 | ||
| 330 | sll out1, 28, out1 ! rotate | ||
| 331 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7 | ||
| 332 | |||
| 333 | ld [global2+local2], local2 ! 2 | ||
| 334 | srl out0, 24, local1 ! 7 | ||
| 335 | or out1, local0, out1 ! rotate | ||
| 336 | |||
| 337 | ldub [out2+local1], local1 ! 7 (and 0xFC) | ||
| 338 | srl out1, 24, local0 ! 8 | ||
| 339 | and out1, local5, local4 ! 4 | ||
| 340 | |||
| 341 | ldub [out2+local0], local0 ! 8 (and 0xFC) | ||
| 342 | srl local4, 8, local4 ! 4 | ||
| 343 | xor $1, local2, $1 ! 2 finished local2 now sbox 6 | ||
| 344 | |||
| 345 | ld [global4+local4],local4 ! 4 | ||
| 346 | srl out1, 16, local2 ! 6 | ||
| 347 | xor $1, local3, $1 ! 3 finished local3 now sbox 5 | ||
| 348 | |||
| 349 | ld [out3+local0],local0 ! 8 | ||
| 350 | and local2, 252, local2 ! 6 | ||
| 351 | add global1, 1536, local5 ! address sbox 7 | ||
| 352 | |||
| 353 | ld [local6+local2], local2 ! 6 | ||
| 354 | srl out0, 16, local3 ! 5 | ||
| 355 | xor $1, local4, $1 ! 4 finished | ||
| 356 | |||
| 357 | ld [local5+local1],local1 ! 7 | ||
| 358 | and local3, 252, local3 ! 5 | ||
| 359 | xor $1, local0, $1 ! 8 finished | ||
| 360 | |||
| 361 | ld [global5+local3],local3 ! 5 | ||
| 362 | xor $1, local2, $1 ! 6 finished | ||
| 363 | subcc out4, 1, out4 | ||
| 364 | |||
| 365 | ld [$5+$3*8+4], out0 ! key 8642 next round | ||
| 366 | xor $1, local7, local2 ! sbox 5 next round | ||
| 367 | xor $1, local1, $1 ! 7 finished | ||
| 368 | |||
| 369 | srl local2, 16, local2 ! sbox 5 next round | ||
| 370 | xor $1, local3, $1 ! 5 finished | ||
| 371 | |||
| 372 | ld [$5+$3*16+4], out1 ! key 8642 next round again | ||
| 373 | and local2, 252, local2 ! sbox5 next round | ||
| 374 | ! next round | ||
| 375 | xor $1, local7, local7 ! 7531 | ||
| 376 | |||
| 377 | ld [global5+local2], local2 ! 5 | ||
| 378 | srl local7, 24, local3 ! 7 | ||
| 379 | xor $1, out0, out0 ! 8642 | ||
| 380 | |||
| 381 | ldub [out2+local3], local3 ! 7 (and 0xFC) | ||
| 382 | srl out0, 4, local0 ! rotate 4 right | ||
| 383 | and local7, 252, local1 ! 1 | ||
| 384 | |||
| 385 | sll out0, 28, out0 ! rotate | ||
| 386 | xor $2, local2, $2 ! 5 finished local2 used | ||
| 387 | |||
| 388 | srl local0, 8, local4 ! 4 | ||
| 389 | and local0, 252, local2 ! 2 | ||
| 390 | ld [local5+local3], local3 ! 7 | ||
| 391 | |||
| 392 | srl local0, 16, local5 ! 6 | ||
| 393 | or out0, local0, out0 ! rotate | ||
| 394 | ld [global2+local2], local2 ! 2 | ||
| 395 | |||
| 396 | srl out0, 24, local0 | ||
| 397 | ld [$5+$3*16], out0 ! key 7531 next round | ||
| 398 | and local4, 252, local4 ! 4 | ||
| 399 | |||
| 400 | and local5, 252, local5 ! 6 | ||
| 401 | ld [global4+local4], local4 ! 4 | ||
| 402 | xor $2, local3, $2 ! 7 finished local3 used | ||
| 403 | |||
| 404 | and local0, 252, local0 ! 8 | ||
| 405 | ld [local6+local5], local5 ! 6 | ||
| 406 | xor $2, local2, $2 ! 2 finished local2 now sbox 3 | ||
| 407 | |||
| 408 | srl local7, 8, local2 ! 3 start | ||
| 409 | ld [out3+local0], local0 ! 8 | ||
| 410 | xor $2, local4, $2 ! 4 finished | ||
| 411 | |||
| 412 | and local2, 252, local2 ! 3 | ||
| 413 | ld [global1+local1], local1 ! 1 | ||
| 414 | xor $2, local5, $2 ! 6 finished local5 used | ||
| 415 | |||
| 416 | ld [global3+local2], local2 ! 3 | ||
| 417 | xor $2, local0, $2 ! 8 finished | ||
| 418 | add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer | ||
| 419 | |||
| 420 | ld [out2+284], local5 ! 0x0000FC00 | ||
| 421 | xor $2, out0, local4 ! sbox 1 next round | ||
| 422 | xor $2, local1, $2 ! 1 finished | ||
| 423 | |||
| 424 | xor $2, local2, $2 ! 3 finished | ||
| 425 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 426 | bne,pt %icc, $4 | ||
| 427 | #else | ||
| 428 | bne $4 | ||
| 429 | #endif | ||
| 430 | and local4, 252, local1 ! sbox 1 next round | ||
| 431 | |||
| 432 | ! two rounds more: | ||
| 433 | |||
| 434 | ld [global1+local1], local1 | ||
| 435 | xor $2, out1, out1 | ||
| 436 | xor $2, out0, out0 | ||
| 437 | |||
| 438 | srl out1, 4, local0 ! rotate | ||
| 439 | and out0, local5, local3 | ||
| 440 | |||
| 441 | ld [$5+$3*8], local7 ! key 7531 | ||
| 442 | srl local3, 8, local3 | ||
| 443 | and local0, 252, local2 | ||
| 444 | |||
| 445 | ld [global3+local3],local3 | ||
| 446 | sll out1, 28, out1 ! rotate | ||
| 447 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7 | ||
| 448 | |||
| 449 | ld [global2+local2], local2 | ||
| 450 | srl out0, 24, local1 | ||
| 451 | or out1, local0, out1 ! rotate | ||
| 452 | |||
| 453 | ldub [out2+local1], local1 | ||
| 454 | srl out1, 24, local0 | ||
| 455 | and out1, local5, local4 | ||
| 456 | |||
| 457 | ldub [out2+local0], local0 | ||
| 458 | srl local4, 8, local4 | ||
| 459 | xor $1, local2, $1 ! 2 finished local2 now sbox 6 | ||
| 460 | |||
| 461 | ld [global4+local4],local4 | ||
| 462 | srl out1, 16, local2 | ||
| 463 | xor $1, local3, $1 ! 3 finished local3 now sbox 5 | ||
| 464 | |||
| 465 | ld [out3+local0],local0 | ||
| 466 | and local2, 252, local2 | ||
| 467 | add global1, 1536, local5 ! address sbox 7 | ||
| 468 | |||
| 469 | ld [local6+local2], local2 | ||
| 470 | srl out0, 16, local3 | ||
| 471 | xor $1, local4, $1 ! 4 finished | ||
| 472 | |||
| 473 | ld [local5+local1],local1 | ||
| 474 | and local3, 252, local3 | ||
| 475 | xor $1, local0, $1 | ||
| 476 | |||
| 477 | ld [global5+local3],local3 | ||
| 478 | xor $1, local2, $1 ! 6 finished | ||
| 479 | cmp in2, 8 | ||
| 480 | |||
| 481 | ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter | ||
| 482 | xor $1, local7, local2 ! sbox 5 next round | ||
| 483 | xor $1, local1, $1 ! 7 finished | ||
| 484 | |||
| 485 | ld [$5+$3*8+4], out0 | ||
| 486 | srl local2, 16, local2 ! sbox 5 next round | ||
| 487 | xor $1, local3, $1 ! 5 finished | ||
| 488 | |||
| 489 | and local2, 252, local2 | ||
| 490 | ! next round (two rounds more) | ||
| 491 | xor $1, local7, local7 ! 7531 | ||
| 492 | |||
| 493 | ld [global5+local2], local2 | ||
| 494 | srl local7, 24, local3 | ||
| 495 | xor $1, out0, out0 ! 8642 | ||
| 496 | |||
| 497 | ldub [out2+local3], local3 | ||
| 498 | srl out0, 4, local0 ! rotate | ||
| 499 | and local7, 252, local1 | ||
| 500 | |||
| 501 | sll out0, 28, out0 ! rotate | ||
| 502 | xor $2, local2, $2 ! 5 finished local2 used | ||
| 503 | |||
| 504 | srl local0, 8, local4 | ||
| 505 | and local0, 252, local2 | ||
| 506 | ld [local5+local3], local3 | ||
| 507 | |||
| 508 | srl local0, 16, local5 | ||
| 509 | or out0, local0, out0 ! rotate | ||
| 510 | ld [global2+local2], local2 | ||
| 511 | |||
| 512 | srl out0, 24, local0 | ||
| 513 | ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption | ||
| 514 | and local4, 252, local4 | ||
| 515 | |||
| 516 | and local5, 252, local5 | ||
| 517 | ld [global4+local4], local4 | ||
| 518 | xor $2, local3, $2 ! 7 finished local3 used | ||
| 519 | |||
| 520 | and local0, 252, local0 | ||
| 521 | ld [local6+local5], local5 | ||
| 522 | xor $2, local2, $2 ! 2 finished local2 now sbox 3 | ||
| 523 | |||
| 524 | srl local7, 8, local2 ! 3 start | ||
| 525 | ld [out3+local0], local0 | ||
| 526 | xor $2, local4, $2 | ||
| 527 | |||
| 528 | and local2, 252, local2 | ||
| 529 | ld [global1+local1], local1 | ||
| 530 | xor $2, local5, $2 ! 6 finished local5 used | ||
| 531 | |||
| 532 | ld [global3+local2], local2 | ||
| 533 | srl $1, 3, local3 | ||
| 534 | xor $2, local0, $2 | ||
| 535 | |||
| 536 | ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption | ||
| 537 | sll $1, 29, local4 | ||
| 538 | xor $2, local1, $2 | ||
| 539 | |||
| 540 | ifelse($7,{}, {}, {retl}) | ||
| 541 | xor $2, local2, $2 | ||
| 542 | }) | ||
| 543 | |||
| 544 | |||
| 545 | ! {fp_macro} | ||
| 546 | ! | ||
| 547 | ! parameter 1 right (original left) | ||
| 548 | ! parameter 2 left (original right) | ||
| 549 | ! parameter 3 1 for optional store to [in0] | ||
| 550 | ! parameter 4 1 for load input/output address to local5/7 | ||
| 551 | ! | ||
| 552 | ! The final permutation logic switches the halfes, meaning that | ||
| 553 | ! left and right ends up the the registers originally used. | ||
| 554 | |||
| 555 | define(fp_macro, { | ||
| 556 | |||
| 557 | ! {fp_macro} | ||
| 558 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 559 | |||
| 560 | ! initially undo the rotate 3 left done after initial permutation | ||
| 561 | ! original left is received shifted 3 right and 29 left in local3/4 | ||
| 562 | |||
| 563 | sll $2, 29, local1 | ||
| 564 | or local3, local4, $1 | ||
| 565 | |||
| 566 | srl $2, 3, $2 | ||
| 567 | sethi %hi(0x55555555), local2 | ||
| 568 | |||
| 569 | or $2, local1, $2 | ||
| 570 | or local2, %lo(0x55555555), local2 | ||
| 571 | |||
| 572 | srl $2, 1, local3 | ||
| 573 | sethi %hi(0x00ff00ff), local1 | ||
| 574 | xor local3, $1, local3 | ||
| 575 | or local1, %lo(0x00ff00ff), local1 | ||
| 576 | and local3, local2, local3 | ||
| 577 | sethi %hi(0x33333333), local4 | ||
| 578 | sll local3, 1, local2 | ||
| 579 | |||
| 580 | xor $1, local3, $1 | ||
| 581 | |||
| 582 | srl $1, 8, local3 | ||
| 583 | xor $2, local2, $2 | ||
| 584 | xor local3, $2, local3 | ||
| 585 | or local4, %lo(0x33333333), local4 | ||
| 586 | and local3, local1, local3 | ||
| 587 | sethi %hi(0x0000ffff), local1 | ||
| 588 | sll local3, 8, local2 | ||
| 589 | |||
| 590 | xor $2, local3, $2 | ||
| 591 | |||
| 592 | srl $2, 2, local3 | ||
| 593 | xor $1, local2, $1 | ||
| 594 | xor local3, $1, local3 | ||
| 595 | or local1, %lo(0x0000ffff), local1 | ||
| 596 | and local3, local4, local3 | ||
| 597 | sethi %hi(0x0f0f0f0f), local4 | ||
| 598 | sll local3, 2, local2 | ||
| 599 | |||
| 600 | ifelse($4,1, {LDPTR INPUT, local5}) | ||
| 601 | xor $1, local3, $1 | ||
| 602 | |||
| 603 | ifelse($4,1, {LDPTR OUTPUT, local7}) | ||
| 604 | srl $1, 16, local3 | ||
| 605 | xor $2, local2, $2 | ||
| 606 | xor local3, $2, local3 | ||
| 607 | or local4, %lo(0x0f0f0f0f), local4 | ||
| 608 | and local3, local1, local3 | ||
| 609 | sll local3, 16, local2 | ||
| 610 | |||
| 611 | xor $2, local3, local1 | ||
| 612 | |||
| 613 | srl local1, 4, local3 | ||
| 614 | xor $1, local2, $1 | ||
| 615 | xor local3, $1, local3 | ||
| 616 | and local3, local4, local3 | ||
| 617 | sll local3, 4, local2 | ||
| 618 | |||
| 619 | xor $1, local3, $1 | ||
| 620 | |||
| 621 | ! optional store: | ||
| 622 | |||
| 623 | ifelse($3,1, {st $1, [in0]}) | ||
| 624 | |||
| 625 | xor local1, local2, $2 | ||
| 626 | |||
| 627 | ifelse($3,1, {st $2, [in0+4]}) | ||
| 628 | |||
| 629 | }) | ||
| 630 | |||
| 631 | |||
| 632 | ! {fp_ip_macro} | ||
| 633 | ! | ||
| 634 | ! Does initial permutation for next block mixed with | ||
| 635 | ! final permutation for current block. | ||
| 636 | ! | ||
| 637 | ! parameter 1 original left | ||
| 638 | ! parameter 2 original right | ||
| 639 | ! parameter 3 left ip | ||
| 640 | ! parameter 4 right ip | ||
| 641 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | ||
| 642 | ! 2: mov in4 to in3 | ||
| 643 | ! | ||
| 644 | ! also adds -8 to length in2 and loads loop counter to out4 | ||
| 645 | |||
| 646 | define(fp_ip_macro, { | ||
| 647 | |||
| 648 | ! {fp_ip_macro} | ||
| 649 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 650 | |||
| 651 | define({temp1},{out4}) | ||
| 652 | define({temp2},{local3}) | ||
| 653 | |||
| 654 | define({ip1},{local1}) | ||
| 655 | define({ip2},{local2}) | ||
| 656 | define({ip4},{local4}) | ||
| 657 | define({ip5},{local5}) | ||
| 658 | |||
| 659 | ! $1 in local3, local4 | ||
| 660 | |||
| 661 | ld [out2+256], ip1 | ||
| 662 | sll out5, 29, temp1 | ||
| 663 | or local3, local4, $1 | ||
| 664 | |||
| 665 | srl out5, 3, $2 | ||
| 666 | ifelse($5,2,{mov in4, in3}) | ||
| 667 | |||
| 668 | ld [out2+272], ip5 | ||
| 669 | srl $4, 4, local0 | ||
| 670 | or $2, temp1, $2 | ||
| 671 | |||
| 672 | srl $2, 1, temp1 | ||
| 673 | xor temp1, $1, temp1 | ||
| 674 | |||
| 675 | and temp1, ip5, temp1 | ||
| 676 | xor local0, $3, local0 | ||
| 677 | |||
| 678 | sll temp1, 1, temp2 | ||
| 679 | xor $1, temp1, $1 | ||
| 680 | |||
| 681 | and local0, ip1, local0 | ||
| 682 | add in2, -8, in2 | ||
| 683 | |||
| 684 | sll local0, 4, local7 | ||
| 685 | xor $3, local0, $3 | ||
| 686 | |||
| 687 | ld [out2+268], ip4 | ||
| 688 | srl $1, 8, temp1 | ||
| 689 | xor $2, temp2, $2 | ||
| 690 | ld [out2+260], ip2 | ||
| 691 | srl $3, 16, local0 | ||
| 692 | xor $4, local7, $4 | ||
| 693 | xor temp1, $2, temp1 | ||
| 694 | xor local0, $4, local0 | ||
| 695 | and temp1, ip4, temp1 | ||
| 696 | and local0, ip2, local0 | ||
| 697 | sll temp1, 8, temp2 | ||
| 698 | xor $2, temp1, $2 | ||
| 699 | sll local0, 16, local7 | ||
| 700 | xor $4, local0, $4 | ||
| 701 | |||
| 702 | srl $2, 2, temp1 | ||
| 703 | xor $1, temp2, $1 | ||
| 704 | |||
| 705 | ld [out2+264], temp2 ! ip3 | ||
| 706 | srl $4, 2, local0 | ||
| 707 | xor $3, local7, $3 | ||
| 708 | xor temp1, $1, temp1 | ||
| 709 | xor local0, $3, local0 | ||
| 710 | and temp1, temp2, temp1 | ||
| 711 | and local0, temp2, local0 | ||
| 712 | sll temp1, 2, temp2 | ||
| 713 | xor $1, temp1, $1 | ||
| 714 | sll local0, 2, local7 | ||
| 715 | xor $3, local0, $3 | ||
| 716 | |||
| 717 | srl $1, 16, temp1 | ||
| 718 | xor $2, temp2, $2 | ||
| 719 | srl $3, 8, local0 | ||
| 720 | xor $4, local7, $4 | ||
| 721 | xor temp1, $2, temp1 | ||
| 722 | xor local0, $4, local0 | ||
| 723 | and temp1, ip2, temp1 | ||
| 724 | and local0, ip4, local0 | ||
| 725 | sll temp1, 16, temp2 | ||
| 726 | xor $2, temp1, local4 | ||
| 727 | sll local0, 8, local7 | ||
| 728 | xor $4, local0, $4 | ||
| 729 | |||
| 730 | srl $4, 1, local0 | ||
| 731 | xor $3, local7, $3 | ||
| 732 | |||
| 733 | srl local4, 4, temp1 | ||
| 734 | xor local0, $3, local0 | ||
| 735 | |||
| 736 | xor $1, temp2, $1 | ||
| 737 | and local0, ip5, local0 | ||
| 738 | |||
| 739 | sll local0, 1, local7 | ||
| 740 | xor temp1, $1, temp1 | ||
| 741 | |||
| 742 | xor $3, local0, $3 | ||
| 743 | xor $4, local7, $4 | ||
| 744 | |||
| 745 | sll $3, 3, local5 | ||
| 746 | and temp1, ip1, temp1 | ||
| 747 | |||
| 748 | sll temp1, 4, temp2 | ||
| 749 | xor $1, temp1, $1 | ||
| 750 | |||
| 751 | ifelse($5,1,{LDPTR KS2, in4}) | ||
| 752 | sll $4, 3, local2 | ||
| 753 | xor local4, temp2, $2 | ||
| 754 | |||
| 755 | ! reload since used as temporar: | ||
| 756 | |||
| 757 | ld [out2+280], out4 ! loop counter | ||
| 758 | |||
| 759 | srl $3, 29, local0 | ||
| 760 | ifelse($5,1,{add in4, 120, in4}) | ||
| 761 | |||
| 762 | ifelse($5,1,{LDPTR KS1, in3}) | ||
| 763 | srl $4, 29, local7 | ||
| 764 | |||
| 765 | or local0, local5, $4 | ||
| 766 | or local2, local7, $3 | ||
| 767 | |||
| 768 | }) | ||
| 769 | |||
| 770 | |||
| 771 | |||
| 772 | ! {load_little_endian} | ||
| 773 | ! | ||
| 774 | ! parameter 1 address | ||
| 775 | ! parameter 2 destination left | ||
| 776 | ! parameter 3 destination right | ||
| 777 | ! parameter 4 temporar | ||
| 778 | ! parameter 5 label | ||
| 779 | |||
| 780 | define(load_little_endian, { | ||
| 781 | |||
| 782 | ! {load_little_endian} | ||
| 783 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 784 | |||
| 785 | ! first in memory to rightmost in register | ||
| 786 | |||
| 787 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 788 | andcc $1, 3, global0 | ||
| 789 | bne,pn %icc, $5 | ||
| 790 | nop | ||
| 791 | |||
| 792 | lda [$1] 0x88, $2 | ||
| 793 | add $1, 4, $4 | ||
| 794 | |||
| 795 | ba,pt %icc, $5a | ||
| 796 | lda [$4] 0x88, $3 | ||
| 797 | #endif | ||
| 798 | |||
| 799 | $5: | ||
| 800 | ldub [$1+3], $2 | ||
| 801 | |||
| 802 | ldub [$1+2], $4 | ||
| 803 | sll $2, 8, $2 | ||
| 804 | or $2, $4, $2 | ||
| 805 | |||
| 806 | ldub [$1+1], $4 | ||
| 807 | sll $2, 8, $2 | ||
| 808 | or $2, $4, $2 | ||
| 809 | |||
| 810 | ldub [$1+0], $4 | ||
| 811 | sll $2, 8, $2 | ||
| 812 | or $2, $4, $2 | ||
| 813 | |||
| 814 | |||
| 815 | ldub [$1+3+4], $3 | ||
| 816 | |||
| 817 | ldub [$1+2+4], $4 | ||
| 818 | sll $3, 8, $3 | ||
| 819 | or $3, $4, $3 | ||
| 820 | |||
| 821 | ldub [$1+1+4], $4 | ||
| 822 | sll $3, 8, $3 | ||
| 823 | or $3, $4, $3 | ||
| 824 | |||
| 825 | ldub [$1+0+4], $4 | ||
| 826 | sll $3, 8, $3 | ||
| 827 | or $3, $4, $3 | ||
| 828 | $5a: | ||
| 829 | |||
| 830 | }) | ||
| 831 | |||
| 832 | |||
| 833 | ! {load_little_endian_inc} | ||
| 834 | ! | ||
| 835 | ! parameter 1 address | ||
| 836 | ! parameter 2 destination left | ||
| 837 | ! parameter 3 destination right | ||
| 838 | ! parameter 4 temporar | ||
| 839 | ! parameter 4 label | ||
| 840 | ! | ||
| 841 | ! adds 8 to address | ||
| 842 | |||
| 843 | define(load_little_endian_inc, { | ||
| 844 | |||
| 845 | ! {load_little_endian_inc} | ||
| 846 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 847 | |||
| 848 | ! first in memory to rightmost in register | ||
| 849 | |||
| 850 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 851 | andcc $1, 3, global0 | ||
| 852 | bne,pn %icc, $5 | ||
| 853 | nop | ||
| 854 | |||
| 855 | lda [$1] 0x88, $2 | ||
| 856 | add $1, 4, $1 | ||
| 857 | |||
| 858 | lda [$1] 0x88, $3 | ||
| 859 | ba,pt %icc, $5a | ||
| 860 | add $1, 4, $1 | ||
| 861 | #endif | ||
| 862 | |||
| 863 | $5: | ||
| 864 | ldub [$1+3], $2 | ||
| 865 | |||
| 866 | ldub [$1+2], $4 | ||
| 867 | sll $2, 8, $2 | ||
| 868 | or $2, $4, $2 | ||
| 869 | |||
| 870 | ldub [$1+1], $4 | ||
| 871 | sll $2, 8, $2 | ||
| 872 | or $2, $4, $2 | ||
| 873 | |||
| 874 | ldub [$1+0], $4 | ||
| 875 | sll $2, 8, $2 | ||
| 876 | or $2, $4, $2 | ||
| 877 | |||
| 878 | ldub [$1+3+4], $3 | ||
| 879 | add $1, 8, $1 | ||
| 880 | |||
| 881 | ldub [$1+2+4-8], $4 | ||
| 882 | sll $3, 8, $3 | ||
| 883 | or $3, $4, $3 | ||
| 884 | |||
| 885 | ldub [$1+1+4-8], $4 | ||
| 886 | sll $3, 8, $3 | ||
| 887 | or $3, $4, $3 | ||
| 888 | |||
| 889 | ldub [$1+0+4-8], $4 | ||
| 890 | sll $3, 8, $3 | ||
| 891 | or $3, $4, $3 | ||
| 892 | $5a: | ||
| 893 | |||
| 894 | }) | ||
| 895 | |||
| 896 | |||
| 897 | ! {load_n_bytes} | ||
| 898 | ! | ||
| 899 | ! Loads 1 to 7 bytes little endian | ||
| 900 | ! Remaining bytes are zeroed. | ||
| 901 | ! | ||
| 902 | ! parameter 1 address | ||
| 903 | ! parameter 2 length | ||
| 904 | ! parameter 3 destination register left | ||
| 905 | ! parameter 4 destination register right | ||
| 906 | ! parameter 5 temp | ||
| 907 | ! parameter 6 temp2 | ||
| 908 | ! parameter 7 label | ||
| 909 | ! parameter 8 return label | ||
| 910 | |||
| 911 | define(load_n_bytes, { | ||
| 912 | |||
| 913 | ! {load_n_bytes} | ||
| 914 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9 | ||
| 915 | |||
| 916 | $7.0: call .+8 | ||
| 917 | sll $2, 2, $6 | ||
| 918 | |||
| 919 | add %o7,$7.jmp.table-$7.0,$5 | ||
| 920 | |||
| 921 | add $5, $6, $5 | ||
| 922 | mov 0, $4 | ||
| 923 | |||
| 924 | ld [$5], $5 | ||
| 925 | |||
| 926 | jmp %o7+$5 | ||
| 927 | mov 0, $3 | ||
| 928 | |||
| 929 | $7.7: | ||
| 930 | ldub [$1+6], $5 | ||
| 931 | sll $5, 16, $5 | ||
| 932 | or $3, $5, $3 | ||
| 933 | $7.6: | ||
| 934 | ldub [$1+5], $5 | ||
| 935 | sll $5, 8, $5 | ||
| 936 | or $3, $5, $3 | ||
| 937 | $7.5: | ||
| 938 | ldub [$1+4], $5 | ||
| 939 | or $3, $5, $3 | ||
| 940 | $7.4: | ||
| 941 | ldub [$1+3], $5 | ||
| 942 | sll $5, 24, $5 | ||
| 943 | or $4, $5, $4 | ||
| 944 | $7.3: | ||
| 945 | ldub [$1+2], $5 | ||
| 946 | sll $5, 16, $5 | ||
| 947 | or $4, $5, $4 | ||
| 948 | $7.2: | ||
| 949 | ldub [$1+1], $5 | ||
| 950 | sll $5, 8, $5 | ||
| 951 | or $4, $5, $4 | ||
| 952 | $7.1: | ||
| 953 | ldub [$1+0], $5 | ||
| 954 | ba $8 | ||
| 955 | or $4, $5, $4 | ||
| 956 | |||
| 957 | .align 4 | ||
| 958 | |||
| 959 | $7.jmp.table: | ||
| 960 | .word 0 | ||
| 961 | .word $7.1-$7.0 | ||
| 962 | .word $7.2-$7.0 | ||
| 963 | .word $7.3-$7.0 | ||
| 964 | .word $7.4-$7.0 | ||
| 965 | .word $7.5-$7.0 | ||
| 966 | .word $7.6-$7.0 | ||
| 967 | .word $7.7-$7.0 | ||
| 968 | }) | ||
| 969 | |||
| 970 | |||
| 971 | ! {store_little_endian} | ||
| 972 | ! | ||
| 973 | ! parameter 1 address | ||
| 974 | ! parameter 2 source left | ||
| 975 | ! parameter 3 source right | ||
| 976 | ! parameter 4 temporar | ||
| 977 | |||
| 978 | define(store_little_endian, { | ||
| 979 | |||
| 980 | ! {store_little_endian} | ||
| 981 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 982 | |||
| 983 | ! rightmost in register to first in memory | ||
| 984 | |||
| 985 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 986 | andcc $1, 3, global0 | ||
| 987 | bne,pn %icc, $5 | ||
| 988 | nop | ||
| 989 | |||
| 990 | sta $2, [$1] 0x88 | ||
| 991 | add $1, 4, $4 | ||
| 992 | |||
| 993 | ba,pt %icc, $5a | ||
| 994 | sta $3, [$4] 0x88 | ||
| 995 | #endif | ||
| 996 | |||
| 997 | $5: | ||
| 998 | and $2, 255, $4 | ||
| 999 | stub $4, [$1+0] | ||
| 1000 | |||
| 1001 | srl $2, 8, $4 | ||
| 1002 | and $4, 255, $4 | ||
| 1003 | stub $4, [$1+1] | ||
| 1004 | |||
| 1005 | srl $2, 16, $4 | ||
| 1006 | and $4, 255, $4 | ||
| 1007 | stub $4, [$1+2] | ||
| 1008 | |||
| 1009 | srl $2, 24, $4 | ||
| 1010 | stub $4, [$1+3] | ||
| 1011 | |||
| 1012 | |||
| 1013 | and $3, 255, $4 | ||
| 1014 | stub $4, [$1+0+4] | ||
| 1015 | |||
| 1016 | srl $3, 8, $4 | ||
| 1017 | and $4, 255, $4 | ||
| 1018 | stub $4, [$1+1+4] | ||
| 1019 | |||
| 1020 | srl $3, 16, $4 | ||
| 1021 | and $4, 255, $4 | ||
| 1022 | stub $4, [$1+2+4] | ||
| 1023 | |||
| 1024 | srl $3, 24, $4 | ||
| 1025 | stub $4, [$1+3+4] | ||
| 1026 | |||
| 1027 | $5a: | ||
| 1028 | |||
| 1029 | }) | ||
| 1030 | |||
| 1031 | |||
| 1032 | ! {store_n_bytes} | ||
| 1033 | ! | ||
| 1034 | ! Stores 1 to 7 bytes little endian | ||
| 1035 | ! | ||
| 1036 | ! parameter 1 address | ||
| 1037 | ! parameter 2 length | ||
| 1038 | ! parameter 3 source register left | ||
| 1039 | ! parameter 4 source register right | ||
| 1040 | ! parameter 5 temp | ||
| 1041 | ! parameter 6 temp2 | ||
| 1042 | ! parameter 7 label | ||
| 1043 | ! parameter 8 return label | ||
| 1044 | |||
| 1045 | define(store_n_bytes, { | ||
| 1046 | |||
| 1047 | ! {store_n_bytes} | ||
| 1048 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9 | ||
| 1049 | |||
| 1050 | $7.0: call .+8 | ||
| 1051 | sll $2, 2, $6 | ||
| 1052 | |||
| 1053 | add %o7,$7.jmp.table-$7.0,$5 | ||
| 1054 | |||
| 1055 | add $5, $6, $5 | ||
| 1056 | |||
| 1057 | ld [$5], $5 | ||
| 1058 | |||
| 1059 | jmp %o7+$5 | ||
| 1060 | nop | ||
| 1061 | |||
| 1062 | $7.7: | ||
| 1063 | srl $3, 16, $5 | ||
| 1064 | and $5, 0xff, $5 | ||
| 1065 | stub $5, [$1+6] | ||
| 1066 | $7.6: | ||
| 1067 | srl $3, 8, $5 | ||
| 1068 | and $5, 0xff, $5 | ||
| 1069 | stub $5, [$1+5] | ||
| 1070 | $7.5: | ||
| 1071 | and $3, 0xff, $5 | ||
| 1072 | stub $5, [$1+4] | ||
| 1073 | $7.4: | ||
| 1074 | srl $4, 24, $5 | ||
| 1075 | stub $5, [$1+3] | ||
| 1076 | $7.3: | ||
| 1077 | srl $4, 16, $5 | ||
| 1078 | and $5, 0xff, $5 | ||
| 1079 | stub $5, [$1+2] | ||
| 1080 | $7.2: | ||
| 1081 | srl $4, 8, $5 | ||
| 1082 | and $5, 0xff, $5 | ||
| 1083 | stub $5, [$1+1] | ||
| 1084 | $7.1: | ||
| 1085 | and $4, 0xff, $5 | ||
| 1086 | |||
| 1087 | |||
| 1088 | ba $8 | ||
| 1089 | stub $5, [$1] | ||
| 1090 | |||
| 1091 | .align 4 | ||
| 1092 | |||
| 1093 | $7.jmp.table: | ||
| 1094 | |||
| 1095 | .word 0 | ||
| 1096 | .word $7.1-$7.0 | ||
| 1097 | .word $7.2-$7.0 | ||
| 1098 | .word $7.3-$7.0 | ||
| 1099 | .word $7.4-$7.0 | ||
| 1100 | .word $7.5-$7.0 | ||
| 1101 | .word $7.6-$7.0 | ||
| 1102 | .word $7.7-$7.0 | ||
| 1103 | }) | ||
| 1104 | |||
| 1105 | |||
| 1106 | define(testvalue,{1}) | ||
| 1107 | |||
| 1108 | define(register_init, { | ||
| 1109 | |||
| 1110 | ! For test purposes: | ||
| 1111 | |||
| 1112 | sethi %hi(testvalue), local0 | ||
| 1113 | or local0, %lo(testvalue), local0 | ||
| 1114 | |||
| 1115 | ifelse($1,{},{}, {mov local0, $1}) | ||
| 1116 | ifelse($2,{},{}, {mov local0, $2}) | ||
| 1117 | ifelse($3,{},{}, {mov local0, $3}) | ||
| 1118 | ifelse($4,{},{}, {mov local0, $4}) | ||
| 1119 | ifelse($5,{},{}, {mov local0, $5}) | ||
| 1120 | ifelse($6,{},{}, {mov local0, $6}) | ||
| 1121 | ifelse($7,{},{}, {mov local0, $7}) | ||
| 1122 | ifelse($8,{},{}, {mov local0, $8}) | ||
| 1123 | |||
| 1124 | mov local0, local1 | ||
| 1125 | mov local0, local2 | ||
| 1126 | mov local0, local3 | ||
| 1127 | mov local0, local4 | ||
| 1128 | mov local0, local5 | ||
| 1129 | mov local0, local7 | ||
| 1130 | mov local0, local6 | ||
| 1131 | mov local0, out0 | ||
| 1132 | mov local0, out1 | ||
| 1133 | mov local0, out2 | ||
| 1134 | mov local0, out3 | ||
| 1135 | mov local0, out4 | ||
| 1136 | mov local0, out5 | ||
| 1137 | mov local0, global1 | ||
| 1138 | mov local0, global2 | ||
| 1139 | mov local0, global3 | ||
| 1140 | mov local0, global4 | ||
| 1141 | mov local0, global5 | ||
| 1142 | |||
| 1143 | }) | ||
| 1144 | |||
| 1145 | .section ".text" | ||
| 1146 | |||
| 1147 | .align 32 | ||
| 1148 | |||
| 1149 | .des_enc: | ||
| 1150 | |||
| 1151 | ! key address in3 | ||
| 1152 | ! loads key next encryption/decryption first round from [in4] | ||
| 1153 | |||
| 1154 | rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl) | ||
| 1155 | |||
| 1156 | |||
| 1157 | .align 32 | ||
| 1158 | |||
| 1159 | .des_dec: | ||
| 1160 | |||
| 1161 | ! implemented with out5 as first parameter to avoid | ||
| 1162 | ! register exchange in ede modes | ||
| 1163 | |||
| 1164 | ! key address in4 | ||
| 1165 | ! loads key next encryption/decryption first round from [in3] | ||
| 1166 | |||
| 1167 | rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl) | ||
| 1168 | |||
| 1169 | |||
| 1170 | |||
| 1171 | ! void DES_encrypt1(data, ks, enc) | ||
| 1172 | ! ******************************* | ||
| 1173 | |||
| 1174 | .align 32 | ||
| 1175 | .global DES_encrypt1 | ||
| 1176 | .type DES_encrypt1,#function | ||
| 1177 | |||
| 1178 | DES_encrypt1: | ||
| 1179 | |||
| 1180 | save %sp, FRAME, %sp | ||
| 1181 | |||
| 1182 | call .PIC.me.up | ||
| 1183 | mov .PIC.me.up-(.-4),out0 | ||
| 1184 | |||
| 1185 | ld [in0], in5 ! left | ||
| 1186 | cmp in2, 0 ! enc | ||
| 1187 | |||
| 1188 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1189 | be,pn %icc, .encrypt.dec ! enc/dec | ||
| 1190 | #else | ||
| 1191 | be .encrypt.dec | ||
| 1192 | #endif | ||
| 1193 | ld [in0+4], out5 ! right | ||
| 1194 | |||
| 1195 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1196 | ! parameter 7 1 for move in1 to in3 | ||
| 1197 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
| 1198 | |||
| 1199 | ip_macro(in5, out5, in5, out5, in3, 0, 1, 1) | ||
| 1200 | |||
| 1201 | rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used | ||
| 1202 | |||
| 1203 | fp_macro(in5, out5, 1) ! 1 for store to [in0] | ||
| 1204 | |||
| 1205 | ret | ||
| 1206 | restore | ||
| 1207 | |||
| 1208 | .encrypt.dec: | ||
| 1209 | |||
| 1210 | add in1, 120, in3 ! use last subkey for first round | ||
| 1211 | |||
| 1212 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1213 | ! parameter 7 1 for move in1 to in3 | ||
| 1214 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
| 1215 | |||
| 1216 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4 | ||
| 1217 | |||
| 1218 | fp_macro(out5, in5, 1) ! 1 for store to [in0] | ||
| 1219 | |||
| 1220 | ret | ||
| 1221 | restore | ||
| 1222 | |||
| 1223 | .DES_encrypt1.end: | ||
| 1224 | .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1 | ||
| 1225 | |||
| 1226 | |||
| 1227 | ! void DES_encrypt2(data, ks, enc) | ||
| 1228 | !********************************* | ||
| 1229 | |||
| 1230 | ! encrypts/decrypts without initial/final permutation | ||
| 1231 | |||
| 1232 | .align 32 | ||
| 1233 | .global DES_encrypt2 | ||
| 1234 | .type DES_encrypt2,#function | ||
| 1235 | |||
| 1236 | DES_encrypt2: | ||
| 1237 | |||
| 1238 | save %sp, FRAME, %sp | ||
| 1239 | |||
| 1240 | call .PIC.me.up | ||
| 1241 | mov .PIC.me.up-(.-4),out0 | ||
| 1242 | |||
| 1243 | ! Set sbox address 1 to 6 and rotate halfs 3 left | ||
| 1244 | ! Errors caught by destest? Yes. Still? *NO* | ||
| 1245 | |||
| 1246 | !sethi %hi(DES_SPtrans), global1 ! address sbox 1 | ||
| 1247 | |||
| 1248 | !or global1, %lo(DES_SPtrans), global1 ! sbox 1 | ||
| 1249 | |||
| 1250 | add global1, 256, global2 ! sbox 2 | ||
| 1251 | add global1, 512, global3 ! sbox 3 | ||
| 1252 | |||
| 1253 | ld [in0], out5 ! right | ||
| 1254 | add global1, 768, global4 ! sbox 4 | ||
| 1255 | add global1, 1024, global5 ! sbox 5 | ||
| 1256 | |||
| 1257 | ld [in0+4], in5 ! left | ||
| 1258 | add global1, 1280, local6 ! sbox 6 | ||
| 1259 | add global1, 1792, out3 ! sbox 8 | ||
| 1260 | |||
| 1261 | ! rotate | ||
| 1262 | |||
| 1263 | sll in5, 3, local5 | ||
| 1264 | mov in1, in3 ! key address to in3 | ||
| 1265 | |||
| 1266 | sll out5, 3, local7 | ||
| 1267 | srl in5, 29, in5 | ||
| 1268 | |||
| 1269 | srl out5, 29, out5 | ||
| 1270 | add in5, local5, in5 | ||
| 1271 | |||
| 1272 | add out5, local7, out5 | ||
| 1273 | cmp in2, 0 | ||
| 1274 | |||
| 1275 | ! we use our own stackframe | ||
| 1276 | |||
| 1277 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1278 | be,pn %icc, .encrypt2.dec ! decryption | ||
| 1279 | #else | ||
| 1280 | be .encrypt2.dec | ||
| 1281 | #endif | ||
| 1282 | STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ] | ||
| 1283 | |||
| 1284 | ld [in3], out0 ! key 7531 first round | ||
| 1285 | mov LOOPS, out4 ! loop counter | ||
| 1286 | |||
| 1287 | ld [in3+4], out1 ! key 8642 first round | ||
| 1288 | sethi %hi(0x0000FC00), local5 | ||
| 1289 | |||
| 1290 | call .des_enc | ||
| 1291 | mov in3, in4 | ||
| 1292 | |||
| 1293 | ! rotate | ||
| 1294 | sll in5, 29, in0 | ||
| 1295 | srl in5, 3, in5 | ||
| 1296 | sll out5, 29, in1 | ||
| 1297 | add in5, in0, in5 | ||
| 1298 | srl out5, 3, out5 | ||
| 1299 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 | ||
| 1300 | add out5, in1, out5 | ||
| 1301 | st in5, [in0] | ||
| 1302 | st out5, [in0+4] | ||
| 1303 | |||
| 1304 | ret | ||
| 1305 | restore | ||
| 1306 | |||
| 1307 | |||
| 1308 | .encrypt2.dec: | ||
| 1309 | |||
| 1310 | add in3, 120, in4 | ||
| 1311 | |||
| 1312 | ld [in4], out0 ! key 7531 first round | ||
| 1313 | mov LOOPS, out4 ! loop counter | ||
| 1314 | |||
| 1315 | ld [in4+4], out1 ! key 8642 first round | ||
| 1316 | sethi %hi(0x0000FC00), local5 | ||
| 1317 | |||
| 1318 | mov in5, local1 ! left expected in out5 | ||
| 1319 | mov out5, in5 | ||
| 1320 | |||
| 1321 | call .des_dec | ||
| 1322 | mov local1, out5 | ||
| 1323 | |||
| 1324 | .encrypt2.finish: | ||
| 1325 | |||
| 1326 | ! rotate | ||
| 1327 | sll in5, 29, in0 | ||
| 1328 | srl in5, 3, in5 | ||
| 1329 | sll out5, 29, in1 | ||
| 1330 | add in5, in0, in5 | ||
| 1331 | srl out5, 3, out5 | ||
| 1332 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 | ||
| 1333 | add out5, in1, out5 | ||
| 1334 | st out5, [in0] | ||
| 1335 | st in5, [in0+4] | ||
| 1336 | |||
| 1337 | ret | ||
| 1338 | restore | ||
| 1339 | |||
| 1340 | .DES_encrypt2.end: | ||
| 1341 | .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2 | ||
| 1342 | |||
| 1343 | |||
| 1344 | ! void DES_encrypt3(data, ks1, ks2, ks3) | ||
| 1345 | ! ************************************** | ||
| 1346 | |||
| 1347 | .align 32 | ||
| 1348 | .global DES_encrypt3 | ||
| 1349 | .type DES_encrypt3,#function | ||
| 1350 | |||
| 1351 | DES_encrypt3: | ||
| 1352 | |||
| 1353 | save %sp, FRAME, %sp | ||
| 1354 | |||
| 1355 | call .PIC.me.up | ||
| 1356 | mov .PIC.me.up-(.-4),out0 | ||
| 1357 | |||
| 1358 | ld [in0], in5 ! left | ||
| 1359 | add in2, 120, in4 ! ks2 | ||
| 1360 | |||
| 1361 | ld [in0+4], out5 ! right | ||
| 1362 | mov in3, in2 ! save ks3 | ||
| 1363 | |||
| 1364 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1365 | ! parameter 7 1 for mov in1 to in3 | ||
| 1366 | ! parameter 8 1 for mov in3 to in4 | ||
| 1367 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
| 1368 | |||
| 1369 | ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0) | ||
| 1370 | |||
| 1371 | call .des_dec | ||
| 1372 | mov in2, in3 ! preload ks3 | ||
| 1373 | |||
| 1374 | call .des_enc | ||
| 1375 | nop | ||
| 1376 | |||
| 1377 | fp_macro(in5, out5, 1) | ||
| 1378 | |||
| 1379 | ret | ||
| 1380 | restore | ||
| 1381 | |||
| 1382 | .DES_encrypt3.end: | ||
| 1383 | .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3 | ||
| 1384 | |||
| 1385 | |||
| 1386 | ! void DES_decrypt3(data, ks1, ks2, ks3) | ||
| 1387 | ! ************************************** | ||
| 1388 | |||
| 1389 | .align 32 | ||
| 1390 | .global DES_decrypt3 | ||
| 1391 | .type DES_decrypt3,#function | ||
| 1392 | |||
| 1393 | DES_decrypt3: | ||
| 1394 | |||
| 1395 | save %sp, FRAME, %sp | ||
| 1396 | |||
| 1397 | call .PIC.me.up | ||
| 1398 | mov .PIC.me.up-(.-4),out0 | ||
| 1399 | |||
| 1400 | ld [in0], in5 ! left | ||
| 1401 | add in3, 120, in4 ! ks3 | ||
| 1402 | |||
| 1403 | ld [in0+4], out5 ! right | ||
| 1404 | mov in2, in3 ! ks2 | ||
| 1405 | |||
| 1406 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1407 | ! parameter 7 1 for mov in1 to in3 | ||
| 1408 | ! parameter 8 1 for mov in3 to in4 | ||
| 1409 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
| 1410 | |||
| 1411 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0) | ||
| 1412 | |||
| 1413 | call .des_enc | ||
| 1414 | add in1, 120, in4 ! preload ks1 | ||
| 1415 | |||
| 1416 | call .des_dec | ||
| 1417 | nop | ||
| 1418 | |||
| 1419 | fp_macro(out5, in5, 1) | ||
| 1420 | |||
| 1421 | ret | ||
| 1422 | restore | ||
| 1423 | |||
| 1424 | .DES_decrypt3.end: | ||
| 1425 | .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 | ||
| 1426 | |||
| 1427 | .align 256 | ||
| 1428 | .type .des_and,#object | ||
| 1429 | .size .des_and,284 | ||
| 1430 | |||
| 1431 | .des_and: | ||
| 1432 | |||
| 1433 | ! This table is used for AND 0xFC when it is known that register | ||
| 1434 | ! bits 8-31 are zero. Makes it possible to do three arithmetic | ||
| 1435 | ! operations in one cycle. | ||
| 1436 | |||
| 1437 | .byte 0, 0, 0, 0, 4, 4, 4, 4 | ||
| 1438 | .byte 8, 8, 8, 8, 12, 12, 12, 12 | ||
| 1439 | .byte 16, 16, 16, 16, 20, 20, 20, 20 | ||
| 1440 | .byte 24, 24, 24, 24, 28, 28, 28, 28 | ||
| 1441 | .byte 32, 32, 32, 32, 36, 36, 36, 36 | ||
| 1442 | .byte 40, 40, 40, 40, 44, 44, 44, 44 | ||
| 1443 | .byte 48, 48, 48, 48, 52, 52, 52, 52 | ||
| 1444 | .byte 56, 56, 56, 56, 60, 60, 60, 60 | ||
| 1445 | .byte 64, 64, 64, 64, 68, 68, 68, 68 | ||
| 1446 | .byte 72, 72, 72, 72, 76, 76, 76, 76 | ||
| 1447 | .byte 80, 80, 80, 80, 84, 84, 84, 84 | ||
| 1448 | .byte 88, 88, 88, 88, 92, 92, 92, 92 | ||
| 1449 | .byte 96, 96, 96, 96, 100, 100, 100, 100 | ||
| 1450 | .byte 104, 104, 104, 104, 108, 108, 108, 108 | ||
| 1451 | .byte 112, 112, 112, 112, 116, 116, 116, 116 | ||
| 1452 | .byte 120, 120, 120, 120, 124, 124, 124, 124 | ||
| 1453 | .byte 128, 128, 128, 128, 132, 132, 132, 132 | ||
| 1454 | .byte 136, 136, 136, 136, 140, 140, 140, 140 | ||
| 1455 | .byte 144, 144, 144, 144, 148, 148, 148, 148 | ||
| 1456 | .byte 152, 152, 152, 152, 156, 156, 156, 156 | ||
| 1457 | .byte 160, 160, 160, 160, 164, 164, 164, 164 | ||
| 1458 | .byte 168, 168, 168, 168, 172, 172, 172, 172 | ||
| 1459 | .byte 176, 176, 176, 176, 180, 180, 180, 180 | ||
| 1460 | .byte 184, 184, 184, 184, 188, 188, 188, 188 | ||
| 1461 | .byte 192, 192, 192, 192, 196, 196, 196, 196 | ||
| 1462 | .byte 200, 200, 200, 200, 204, 204, 204, 204 | ||
| 1463 | .byte 208, 208, 208, 208, 212, 212, 212, 212 | ||
| 1464 | .byte 216, 216, 216, 216, 220, 220, 220, 220 | ||
| 1465 | .byte 224, 224, 224, 224, 228, 228, 228, 228 | ||
| 1466 | .byte 232, 232, 232, 232, 236, 236, 236, 236 | ||
| 1467 | .byte 240, 240, 240, 240, 244, 244, 244, 244 | ||
| 1468 | .byte 248, 248, 248, 248, 252, 252, 252, 252 | ||
| 1469 | |||
| 1470 | ! 5 numbers for initil/final permutation | ||
| 1471 | |||
| 1472 | .word 0x0f0f0f0f ! offset 256 | ||
| 1473 | .word 0x0000ffff ! 260 | ||
| 1474 | .word 0x33333333 ! 264 | ||
| 1475 | .word 0x00ff00ff ! 268 | ||
| 1476 | .word 0x55555555 ! 272 | ||
| 1477 | |||
| 1478 | .word 0 ! 276 | ||
| 1479 | .word LOOPS ! 280 | ||
| 1480 | .word 0x0000FC00 ! 284 | ||
| 1481 | .PIC.DES_SPtrans: | ||
| 1482 | .word %r_disp32(DES_SPtrans) | ||
| 1483 | |||
| 1484 | ! input: out0 offset between .PIC.me.up and caller | ||
| 1485 | ! output: out0 pointer to .PIC.me.up | ||
| 1486 | ! out2 pointer to .des_and | ||
| 1487 | ! global1 pointer to DES_SPtrans | ||
| 1488 | .align 32 | ||
| 1489 | .PIC.me.up: | ||
| 1490 | add out0,%o7,out0 ! pointer to .PIC.me.up | ||
| 1491 | #if 1 | ||
| 1492 | ld [out0+(.PIC.DES_SPtrans-.PIC.me.up)],global1 | ||
| 1493 | add global1,(.PIC.DES_SPtrans-.PIC.me.up),global1 | ||
| 1494 | add global1,out0,global1 | ||
| 1495 | #else | ||
| 1496 | # ifdef OPENSSL_PIC | ||
| 1497 | ! In case anybody wonders why this code is same for both ABI. | ||
| 1498 | ! To start with it is not. Do note LDPTR below. But of course | ||
| 1499 | ! you must be wondering why the rest of it does not contain | ||
| 1500 | ! things like %hh, %hm and %lm. Well, those are needed only | ||
| 1501 | ! if OpenSSL library *itself* will become larger than 4GB, | ||
| 1502 | ! which is not going to happen any time soon. | ||
| 1503 | sethi %hi(DES_SPtrans),global1 | ||
| 1504 | or global1,%lo(DES_SPtrans),global1 | ||
| 1505 | sethi %hi(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2 | ||
| 1506 | add global1,out0,global1 | ||
| 1507 | add out2,%lo(_GLOBAL_OFFSET_TABLE_-(.PIC.me.up-.)),out2 | ||
| 1508 | LDPTR [out2+global1],global1 | ||
| 1509 | # elif 0 | ||
| 1510 | setn DES_SPtrans,out2,global1 ! synthetic instruction ! | ||
| 1511 | # elif defined(ABI64) | ||
| 1512 | sethi %hh(DES_SPtrans),out2 | ||
| 1513 | or out2,%hm(DES_SPtrans),out2 | ||
| 1514 | sethi %lm(DES_SPtrans),global1 | ||
| 1515 | or global1,%lo(DES_SPtrans),global1 | ||
| 1516 | sllx out2,32,out2 | ||
| 1517 | or out2,global1,global1 | ||
| 1518 | # else | ||
| 1519 | sethi %hi(DES_SPtrans),global1 | ||
| 1520 | or global1,%lo(DES_SPtrans),global1 | ||
| 1521 | # endif | ||
| 1522 | #endif | ||
| 1523 | retl | ||
| 1524 | add out0,.des_and-.PIC.me.up,out2 | ||
| 1525 | |||
| 1526 | ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) | ||
| 1527 | ! ***************************************************************** | ||
| 1528 | |||
| 1529 | |||
| 1530 | .align 32 | ||
| 1531 | .global DES_ncbc_encrypt | ||
| 1532 | .type DES_ncbc_encrypt,#function | ||
| 1533 | |||
| 1534 | DES_ncbc_encrypt: | ||
| 1535 | |||
| 1536 | save %sp, FRAME, %sp | ||
| 1537 | |||
| 1538 | define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) | ||
| 1539 | define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) | ||
| 1540 | define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) | ||
| 1541 | |||
| 1542 | call .PIC.me.up | ||
| 1543 | mov .PIC.me.up-(.-4),out0 | ||
| 1544 | |||
| 1545 | cmp in5, 0 ! enc | ||
| 1546 | |||
| 1547 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1548 | be,pn %icc, .ncbc.dec | ||
| 1549 | #else | ||
| 1550 | be .ncbc.dec | ||
| 1551 | #endif | ||
| 1552 | STPTR in4, IVEC | ||
| 1553 | |||
| 1554 | ! addr left right temp label | ||
| 1555 | load_little_endian(in4, in5, out5, local3, .LLE1) ! iv | ||
| 1556 | |||
| 1557 | addcc in2, -8, in2 ! bytes missing when first block done | ||
| 1558 | |||
| 1559 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1560 | bl,pn %icc, .ncbc.enc.seven.or.less | ||
| 1561 | #else | ||
| 1562 | bl .ncbc.enc.seven.or.less | ||
| 1563 | #endif | ||
| 1564 | mov in3, in4 ! schedule | ||
| 1565 | |||
| 1566 | .ncbc.enc.next.block: | ||
| 1567 | |||
| 1568 | load_little_endian(in0, out4, global4, local3, .LLE2) ! block | ||
| 1569 | |||
| 1570 | .ncbc.enc.next.block_1: | ||
| 1571 | |||
| 1572 | xor in5, out4, in5 ! iv xor | ||
| 1573 | xor out5, global4, out5 ! iv xor | ||
| 1574 | |||
| 1575 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
| 1576 | ip_macro(in5, out5, in5, out5, in3, 0, 0, 2) | ||
| 1577 | |||
| 1578 | .ncbc.enc.next.block_2: | ||
| 1579 | |||
| 1580 | !// call .des_enc ! compares in2 to 8 | ||
| 1581 | ! rounds inlined for alignment purposes | ||
| 1582 | |||
| 1583 | add global1, 768, global4 ! address sbox 4 since register used below | ||
| 1584 | |||
| 1585 | rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3 | ||
| 1586 | |||
| 1587 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1588 | bl,pn %icc, .ncbc.enc.next.block_fp | ||
| 1589 | #else | ||
| 1590 | bl .ncbc.enc.next.block_fp | ||
| 1591 | #endif | ||
| 1592 | add in0, 8, in0 ! input address | ||
| 1593 | |||
| 1594 | ! If 8 or more bytes are to be encrypted after this block, | ||
| 1595 | ! we combine final permutation for this block with initial | ||
| 1596 | ! permutation for next block. Load next block: | ||
| 1597 | |||
| 1598 | load_little_endian(in0, global3, global4, local5, .LLE12) | ||
| 1599 | |||
| 1600 | ! parameter 1 original left | ||
| 1601 | ! parameter 2 original right | ||
| 1602 | ! parameter 3 left ip | ||
| 1603 | ! parameter 4 right ip | ||
| 1604 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | ||
| 1605 | ! 2: mov in4 to in3 | ||
| 1606 | ! | ||
| 1607 | ! also adds -8 to length in2 and loads loop counter to out4 | ||
| 1608 | |||
| 1609 | fp_ip_macro(out0, out1, global3, global4, 2) | ||
| 1610 | |||
| 1611 | store_little_endian(in1, out0, out1, local3, .SLE10) ! block | ||
| 1612 | |||
| 1613 | ld [in3], out0 ! key 7531 first round next block | ||
| 1614 | mov in5, local1 | ||
| 1615 | xor global3, out5, in5 ! iv xor next block | ||
| 1616 | |||
| 1617 | ld [in3+4], out1 ! key 8642 | ||
| 1618 | add global1, 512, global3 ! address sbox 3 since register used | ||
| 1619 | xor global4, local1, out5 ! iv xor next block | ||
| 1620 | |||
| 1621 | ba .ncbc.enc.next.block_2 | ||
| 1622 | add in1, 8, in1 ! output adress | ||
| 1623 | |||
| 1624 | .ncbc.enc.next.block_fp: | ||
| 1625 | |||
| 1626 | fp_macro(in5, out5) | ||
| 1627 | |||
| 1628 | store_little_endian(in1, in5, out5, local3, .SLE1) ! block | ||
| 1629 | |||
| 1630 | addcc in2, -8, in2 ! bytes missing when next block done | ||
| 1631 | |||
| 1632 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1633 | bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0 | ||
| 1634 | #else | ||
| 1635 | bpos .ncbc.enc.next.block | ||
| 1636 | #endif | ||
| 1637 | add in1, 8, in1 | ||
| 1638 | |||
| 1639 | .ncbc.enc.seven.or.less: | ||
| 1640 | |||
| 1641 | cmp in2, -8 | ||
| 1642 | |||
| 1643 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1644 | ble,pt %icc, .ncbc.enc.finish | ||
| 1645 | #else | ||
| 1646 | ble .ncbc.enc.finish | ||
| 1647 | #endif | ||
| 1648 | nop | ||
| 1649 | |||
| 1650 | add in2, 8, local1 ! bytes to load | ||
| 1651 | |||
| 1652 | ! addr, length, dest left, dest right, temp, temp2, label, ret label | ||
| 1653 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1) | ||
| 1654 | |||
| 1655 | ! Loads 1 to 7 bytes little endian to global4, out4 | ||
| 1656 | |||
| 1657 | |||
| 1658 | .ncbc.enc.finish: | ||
| 1659 | |||
| 1660 | LDPTR IVEC, local4 | ||
| 1661 | store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec | ||
| 1662 | |||
| 1663 | ret | ||
| 1664 | restore | ||
| 1665 | |||
| 1666 | |||
| 1667 | .ncbc.dec: | ||
| 1668 | |||
| 1669 | STPTR in0, INPUT | ||
| 1670 | cmp in2, 0 ! length | ||
| 1671 | add in3, 120, in3 | ||
| 1672 | |||
| 1673 | LDPTR IVEC, local7 ! ivec | ||
| 1674 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1675 | ble,pn %icc, .ncbc.dec.finish | ||
| 1676 | #else | ||
| 1677 | ble .ncbc.dec.finish | ||
| 1678 | #endif | ||
| 1679 | mov in3, in4 ! schedule | ||
| 1680 | |||
| 1681 | STPTR in1, OUTPUT | ||
| 1682 | mov in0, local5 ! input | ||
| 1683 | |||
| 1684 | load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec | ||
| 1685 | |||
| 1686 | .ncbc.dec.next.block: | ||
| 1687 | |||
| 1688 | load_little_endian(local5, in5, out5, local3, .LLE4) ! block | ||
| 1689 | |||
| 1690 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1691 | ! parameter 7 1 for mov in1 to in3 | ||
| 1692 | ! parameter 8 1 for mov in3 to in4 | ||
| 1693 | |||
| 1694 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4 | ||
| 1695 | |||
| 1696 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7 | ||
| 1697 | |||
| 1698 | ! in2 is bytes left to be stored | ||
| 1699 | ! in2 is compared to 8 in the rounds | ||
| 1700 | |||
| 1701 | xor out5, in0, out4 ! iv xor | ||
| 1702 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1703 | bl,pn %icc, .ncbc.dec.seven.or.less | ||
| 1704 | #else | ||
| 1705 | bl .ncbc.dec.seven.or.less | ||
| 1706 | #endif | ||
| 1707 | xor in5, in1, global4 ! iv xor | ||
| 1708 | |||
| 1709 | ! Load ivec next block now, since input and output address might be the same. | ||
| 1710 | |||
| 1711 | load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv | ||
| 1712 | |||
| 1713 | store_little_endian(local7, out4, global4, local3, .SLE3) | ||
| 1714 | |||
| 1715 | STPTR local5, INPUT | ||
| 1716 | add local7, 8, local7 | ||
| 1717 | addcc in2, -8, in2 | ||
| 1718 | |||
| 1719 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1720 | bg,pt %icc, .ncbc.dec.next.block | ||
| 1721 | #else | ||
| 1722 | bg .ncbc.dec.next.block | ||
| 1723 | #endif | ||
| 1724 | STPTR local7, OUTPUT | ||
| 1725 | |||
| 1726 | |||
| 1727 | .ncbc.dec.store.iv: | ||
| 1728 | |||
| 1729 | LDPTR IVEC, local4 ! ivec | ||
| 1730 | store_little_endian(local4, in0, in1, local5, .SLE4) | ||
| 1731 | |||
| 1732 | .ncbc.dec.finish: | ||
| 1733 | |||
| 1734 | ret | ||
| 1735 | restore | ||
| 1736 | |||
| 1737 | .ncbc.dec.seven.or.less: | ||
| 1738 | |||
| 1739 | load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec | ||
| 1740 | |||
| 1741 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) | ||
| 1742 | |||
| 1743 | |||
| 1744 | .DES_ncbc_encrypt.end: | ||
| 1745 | .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt | ||
| 1746 | |||
| 1747 | |||
| 1748 | ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc) | ||
| 1749 | ! ************************************************************************** | ||
| 1750 | |||
| 1751 | |||
| 1752 | .align 32 | ||
| 1753 | .global DES_ede3_cbc_encrypt | ||
| 1754 | .type DES_ede3_cbc_encrypt,#function | ||
| 1755 | |||
| 1756 | DES_ede3_cbc_encrypt: | ||
| 1757 | |||
| 1758 | save %sp, FRAME, %sp | ||
| 1759 | |||
| 1760 | define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) | ||
| 1761 | define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) | ||
| 1762 | define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) | ||
| 1763 | |||
| 1764 | call .PIC.me.up | ||
| 1765 | mov .PIC.me.up-(.-4),out0 | ||
| 1766 | |||
| 1767 | LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc | ||
| 1768 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec | ||
| 1769 | cmp local3, 0 ! enc | ||
| 1770 | |||
| 1771 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1772 | be,pn %icc, .ede3.dec | ||
| 1773 | #else | ||
| 1774 | be .ede3.dec | ||
| 1775 | #endif | ||
| 1776 | STPTR in4, KS2 | ||
| 1777 | |||
| 1778 | STPTR in5, KS3 | ||
| 1779 | |||
| 1780 | load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec | ||
| 1781 | |||
| 1782 | addcc in2, -8, in2 ! bytes missing after next block | ||
| 1783 | |||
| 1784 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1785 | bl,pn %icc, .ede3.enc.seven.or.less | ||
| 1786 | #else | ||
| 1787 | bl .ede3.enc.seven.or.less | ||
| 1788 | #endif | ||
| 1789 | STPTR in3, KS1 | ||
| 1790 | |||
| 1791 | .ede3.enc.next.block: | ||
| 1792 | |||
| 1793 | load_little_endian(in0, out4, global4, local3, .LLE7) | ||
| 1794 | |||
| 1795 | .ede3.enc.next.block_1: | ||
| 1796 | |||
| 1797 | LDPTR KS2, in4 | ||
| 1798 | xor in5, out4, in5 ! iv xor | ||
| 1799 | xor out5, global4, out5 ! iv xor | ||
| 1800 | |||
| 1801 | LDPTR KS1, in3 | ||
| 1802 | add in4, 120, in4 ! for decryption we use last subkey first | ||
| 1803 | nop | ||
| 1804 | |||
| 1805 | ip_macro(in5, out5, in5, out5, in3) | ||
| 1806 | |||
| 1807 | .ede3.enc.next.block_2: | ||
| 1808 | |||
| 1809 | call .des_enc ! ks1 in3 | ||
| 1810 | nop | ||
| 1811 | |||
| 1812 | call .des_dec ! ks2 in4 | ||
| 1813 | LDPTR KS3, in3 | ||
| 1814 | |||
| 1815 | call .des_enc ! ks3 in3 compares in2 to 8 | ||
| 1816 | nop | ||
| 1817 | |||
| 1818 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1819 | bl,pn %icc, .ede3.enc.next.block_fp | ||
| 1820 | #else | ||
| 1821 | bl .ede3.enc.next.block_fp | ||
| 1822 | #endif | ||
| 1823 | add in0, 8, in0 | ||
| 1824 | |||
| 1825 | ! If 8 or more bytes are to be encrypted after this block, | ||
| 1826 | ! we combine final permutation for this block with initial | ||
| 1827 | ! permutation for next block. Load next block: | ||
| 1828 | |||
| 1829 | load_little_endian(in0, global3, global4, local5, .LLE11) | ||
| 1830 | |||
| 1831 | ! parameter 1 original left | ||
| 1832 | ! parameter 2 original right | ||
| 1833 | ! parameter 3 left ip | ||
| 1834 | ! parameter 4 right ip | ||
| 1835 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | ||
| 1836 | ! 2: mov in4 to in3 | ||
| 1837 | ! | ||
| 1838 | ! also adds -8 to length in2 and loads loop counter to out4 | ||
| 1839 | |||
| 1840 | fp_ip_macro(out0, out1, global3, global4, 1) | ||
| 1841 | |||
| 1842 | store_little_endian(in1, out0, out1, local3, .SLE9) ! block | ||
| 1843 | |||
| 1844 | mov in5, local1 | ||
| 1845 | xor global3, out5, in5 ! iv xor next block | ||
| 1846 | |||
| 1847 | ld [in3], out0 ! key 7531 | ||
| 1848 | add global1, 512, global3 ! address sbox 3 | ||
| 1849 | xor global4, local1, out5 ! iv xor next block | ||
| 1850 | |||
| 1851 | ld [in3+4], out1 ! key 8642 | ||
| 1852 | add global1, 768, global4 ! address sbox 4 | ||
| 1853 | ba .ede3.enc.next.block_2 | ||
| 1854 | add in1, 8, in1 | ||
| 1855 | |||
| 1856 | .ede3.enc.next.block_fp: | ||
| 1857 | |||
| 1858 | fp_macro(in5, out5) | ||
| 1859 | |||
| 1860 | store_little_endian(in1, in5, out5, local3, .SLE5) ! block | ||
| 1861 | |||
| 1862 | addcc in2, -8, in2 ! bytes missing when next block done | ||
| 1863 | |||
| 1864 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1865 | bpos,pt %icc, .ede3.enc.next.block | ||
| 1866 | #else | ||
| 1867 | bpos .ede3.enc.next.block | ||
| 1868 | #endif | ||
| 1869 | add in1, 8, in1 | ||
| 1870 | |||
| 1871 | .ede3.enc.seven.or.less: | ||
| 1872 | |||
| 1873 | cmp in2, -8 | ||
| 1874 | |||
| 1875 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1876 | ble,pt %icc, .ede3.enc.finish | ||
| 1877 | #else | ||
| 1878 | ble .ede3.enc.finish | ||
| 1879 | #endif | ||
| 1880 | nop | ||
| 1881 | |||
| 1882 | add in2, 8, local1 ! bytes to load | ||
| 1883 | |||
| 1884 | ! addr, length, dest left, dest right, temp, temp2, label, ret label | ||
| 1885 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1) | ||
| 1886 | |||
| 1887 | .ede3.enc.finish: | ||
| 1888 | |||
| 1889 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec | ||
| 1890 | store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec | ||
| 1891 | |||
| 1892 | ret | ||
| 1893 | restore | ||
| 1894 | |||
| 1895 | .ede3.dec: | ||
| 1896 | |||
| 1897 | STPTR in0, INPUT | ||
| 1898 | add in5, 120, in5 | ||
| 1899 | |||
| 1900 | STPTR in1, OUTPUT | ||
| 1901 | mov in0, local5 | ||
| 1902 | add in3, 120, in3 | ||
| 1903 | |||
| 1904 | STPTR in3, KS1 | ||
| 1905 | cmp in2, 0 | ||
| 1906 | |||
| 1907 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1908 | ble %icc, .ede3.dec.finish | ||
| 1909 | #else | ||
| 1910 | ble .ede3.dec.finish | ||
| 1911 | #endif | ||
| 1912 | STPTR in5, KS3 | ||
| 1913 | |||
| 1914 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv | ||
| 1915 | load_little_endian(local7, in0, in1, local3, .LLE8) | ||
| 1916 | |||
| 1917 | .ede3.dec.next.block: | ||
| 1918 | |||
| 1919 | load_little_endian(local5, in5, out5, local3, .LLE9) | ||
| 1920 | |||
| 1921 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1922 | ! parameter 7 1 for mov in1 to in3 | ||
| 1923 | ! parameter 8 1 for mov in3 to in4 | ||
| 1924 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
| 1925 | |||
| 1926 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4 | ||
| 1927 | |||
| 1928 | call .des_enc ! ks2 in3 | ||
| 1929 | LDPTR KS1, in4 | ||
| 1930 | |||
| 1931 | call .des_dec ! ks1 in4 | ||
| 1932 | nop | ||
| 1933 | |||
| 1934 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7 | ||
| 1935 | |||
| 1936 | ! in2 is bytes left to be stored | ||
| 1937 | ! in2 is compared to 8 in the rounds | ||
| 1938 | |||
| 1939 | xor out5, in0, out4 | ||
| 1940 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1941 | bl,pn %icc, .ede3.dec.seven.or.less | ||
| 1942 | #else | ||
| 1943 | bl .ede3.dec.seven.or.less | ||
| 1944 | #endif | ||
| 1945 | xor in5, in1, global4 | ||
| 1946 | |||
| 1947 | load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block | ||
| 1948 | |||
| 1949 | store_little_endian(local7, out4, global4, local3, .SLE7) ! block | ||
| 1950 | |||
| 1951 | STPTR local5, INPUT | ||
| 1952 | addcc in2, -8, in2 | ||
| 1953 | add local7, 8, local7 | ||
| 1954 | |||
| 1955 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1956 | bg,pt %icc, .ede3.dec.next.block | ||
| 1957 | #else | ||
| 1958 | bg .ede3.dec.next.block | ||
| 1959 | #endif | ||
| 1960 | STPTR local7, OUTPUT | ||
| 1961 | |||
| 1962 | .ede3.dec.store.iv: | ||
| 1963 | |||
| 1964 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec | ||
| 1965 | store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec | ||
| 1966 | |||
| 1967 | .ede3.dec.finish: | ||
| 1968 | |||
| 1969 | ret | ||
| 1970 | restore | ||
| 1971 | |||
| 1972 | .ede3.dec.seven.or.less: | ||
| 1973 | |||
| 1974 | load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv | ||
| 1975 | |||
| 1976 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) | ||
| 1977 | |||
| 1978 | |||
| 1979 | .DES_ede3_cbc_encrypt.end: | ||
| 1980 | .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt | ||
diff --git a/src/lib/libcrypto/des/asm/desboth.pl b/src/lib/libcrypto/des/asm/desboth.pl new file mode 100644 index 0000000000..eec00886e4 --- /dev/null +++ b/src/lib/libcrypto/des/asm/desboth.pl | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | |||
| 3 | $L="edi"; | ||
| 4 | $R="esi"; | ||
| 5 | |||
| 6 | sub DES_encrypt3 | ||
| 7 | { | ||
| 8 | local($name,$enc)=@_; | ||
| 9 | |||
| 10 | &function_begin_B($name,""); | ||
| 11 | &push("ebx"); | ||
| 12 | &mov("ebx",&wparam(0)); | ||
| 13 | |||
| 14 | &push("ebp"); | ||
| 15 | &push("esi"); | ||
| 16 | |||
| 17 | &push("edi"); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &comment("Load the data words"); | ||
| 21 | &mov($L,&DWP(0,"ebx","",0)); | ||
| 22 | &mov($R,&DWP(4,"ebx","",0)); | ||
| 23 | &stack_push(3); | ||
| 24 | |||
| 25 | &comment(""); | ||
| 26 | &comment("IP"); | ||
| 27 | &IP_new($L,$R,"edx",0); | ||
| 28 | |||
| 29 | # put them back | ||
| 30 | |||
| 31 | if ($enc) | ||
| 32 | { | ||
| 33 | &mov(&DWP(4,"ebx","",0),$R); | ||
| 34 | &mov("eax",&wparam(1)); | ||
| 35 | &mov(&DWP(0,"ebx","",0),"edx"); | ||
| 36 | &mov("edi",&wparam(2)); | ||
| 37 | &mov("esi",&wparam(3)); | ||
| 38 | } | ||
| 39 | else | ||
| 40 | { | ||
| 41 | &mov(&DWP(4,"ebx","",0),$R); | ||
| 42 | &mov("esi",&wparam(1)); | ||
| 43 | &mov(&DWP(0,"ebx","",0),"edx"); | ||
| 44 | &mov("edi",&wparam(2)); | ||
| 45 | &mov("eax",&wparam(3)); | ||
| 46 | } | ||
| 47 | &mov(&swtmp(2), (DWC(($enc)?"1":"0"))); | ||
| 48 | &mov(&swtmp(1), "eax"); | ||
| 49 | &mov(&swtmp(0), "ebx"); | ||
| 50 | &call("DES_encrypt2"); | ||
| 51 | &mov(&swtmp(2), (DWC(($enc)?"0":"1"))); | ||
| 52 | &mov(&swtmp(1), "edi"); | ||
| 53 | &mov(&swtmp(0), "ebx"); | ||
| 54 | &call("DES_encrypt2"); | ||
| 55 | &mov(&swtmp(2), (DWC(($enc)?"1":"0"))); | ||
| 56 | &mov(&swtmp(1), "esi"); | ||
| 57 | &mov(&swtmp(0), "ebx"); | ||
| 58 | &call("DES_encrypt2"); | ||
| 59 | |||
| 60 | &stack_pop(3); | ||
| 61 | &mov($L,&DWP(0,"ebx","",0)); | ||
| 62 | &mov($R,&DWP(4,"ebx","",0)); | ||
| 63 | |||
| 64 | &comment(""); | ||
| 65 | &comment("FP"); | ||
| 66 | &FP_new($L,$R,"eax",0); | ||
| 67 | |||
| 68 | &mov(&DWP(0,"ebx","",0),"eax"); | ||
| 69 | &mov(&DWP(4,"ebx","",0),$R); | ||
| 70 | |||
| 71 | &pop("edi"); | ||
| 72 | &pop("esi"); | ||
| 73 | &pop("ebp"); | ||
| 74 | &pop("ebx"); | ||
| 75 | &ret(); | ||
| 76 | &function_end_B($name); | ||
| 77 | } | ||
| 78 | |||
| 79 | |||
diff --git a/src/lib/libcrypto/des/asm/readme b/src/lib/libcrypto/des/asm/readme new file mode 100644 index 0000000000..1beafe253b --- /dev/null +++ b/src/lib/libcrypto/des/asm/readme | |||
| @@ -0,0 +1,131 @@ | |||
| 1 | First up, let me say I don't like writing in assembler. It is not portable, | ||
| 2 | dependant on the particular CPU architecture release and is generally a pig | ||
| 3 | to debug and get right. Having said that, the x86 architecture is probably | ||
| 4 | the most important for speed due to number of boxes and since | ||
| 5 | it appears to be the worst architecture to to get | ||
| 6 | good C compilers for. So due to this, I have lowered myself to do | ||
| 7 | assembler for the inner DES routines in libdes :-). | ||
| 8 | |||
| 9 | The file to implement in assembler is des_enc.c. Replace the following | ||
| 10 | 4 functions | ||
| 11 | des_encrypt1(DES_LONG data[2],des_key_schedule ks, int encrypt); | ||
| 12 | des_encrypt2(DES_LONG data[2],des_key_schedule ks, int encrypt); | ||
| 13 | des_encrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); | ||
| 14 | des_decrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); | ||
| 15 | |||
| 16 | They encrypt/decrypt the 64 bits held in 'data' using | ||
| 17 | the 'ks' key schedules. The only difference between the 4 functions is that | ||
| 18 | des_encrypt2() does not perform IP() or FP() on the data (this is an | ||
| 19 | optimization for when doing triple DES and des_encrypt3() and des_decrypt3() | ||
| 20 | perform triple des. The triple DES routines are in here because it does | ||
| 21 | make a big difference to have them located near the des_encrypt2 function | ||
| 22 | at link time.. | ||
| 23 | |||
| 24 | Now as we all know, there are lots of different operating systems running on | ||
| 25 | x86 boxes, and unfortunately they normally try to make sure their assembler | ||
| 26 | formating is not the same as the other peoples. | ||
| 27 | The 4 main formats I know of are | ||
| 28 | Microsoft Windows 95/Windows NT | ||
| 29 | Elf Includes Linux and FreeBSD(?). | ||
| 30 | a.out The older Linux. | ||
| 31 | Solaris Same as Elf but different comments :-(. | ||
| 32 | |||
| 33 | Now I was not overly keen to write 4 different copies of the same code, | ||
| 34 | so I wrote a few perl routines to output the correct assembler, given | ||
| 35 | a target assembler type. This code is ugly and is just a hack. | ||
| 36 | The libraries are x86unix.pl and x86ms.pl. | ||
| 37 | des586.pl, des686.pl and des-som[23].pl are the programs to actually | ||
| 38 | generate the assembler. | ||
| 39 | |||
| 40 | So to generate elf assembler | ||
| 41 | perl des-som3.pl elf >dx86-elf.s | ||
| 42 | For Windows 95/NT | ||
| 43 | perl des-som2.pl win32 >win32.asm | ||
| 44 | |||
| 45 | [ update 4 Jan 1996 ] | ||
| 46 | I have added another way to do things. | ||
| 47 | perl des-som3.pl cpp >dx86-cpp.s | ||
| 48 | generates a file that will be included by dx86unix.cpp when it is compiled. | ||
| 49 | To build for elf, a.out, solaris, bsdi etc, | ||
| 50 | cc -E -DELF asm/dx86unix.cpp | as -o asm/dx86-elf.o | ||
| 51 | cc -E -DSOL asm/dx86unix.cpp | as -o asm/dx86-sol.o | ||
| 52 | cc -E -DOUT asm/dx86unix.cpp | as -o asm/dx86-out.o | ||
| 53 | cc -E -DBSDI asm/dx86unix.cpp | as -o asm/dx86bsdi.o | ||
| 54 | This was done to cut down the number of files in the distribution. | ||
| 55 | |||
| 56 | Now the ugly part. I acquired my copy of Intels | ||
| 57 | "Optimization's For Intel's 32-Bit Processors" and found a few interesting | ||
| 58 | things. First, the aim of the exersize is to 'extract' one byte at a time | ||
| 59 | from a word and do an array lookup. This involves getting the byte from | ||
| 60 | the 4 locations in the word and moving it to a new word and doing the lookup. | ||
| 61 | The most obvious way to do this is | ||
| 62 | xor eax, eax # clear word | ||
| 63 | movb al, cl # get low byte | ||
| 64 | xor edi DWORD PTR 0x100+des_SP[eax] # xor in word | ||
| 65 | movb al, ch # get next byte | ||
| 66 | xor edi DWORD PTR 0x300+des_SP[eax] # xor in word | ||
| 67 | shr ecx 16 | ||
| 68 | which seems ok. For the pentium, this system appears to be the best. | ||
| 69 | One has to do instruction interleaving to keep both functional units | ||
| 70 | operating, but it is basically very efficient. | ||
| 71 | |||
| 72 | Now the crunch. When a full register is used after a partial write, eg. | ||
| 73 | mov al, cl | ||
| 74 | xor edi, DWORD PTR 0x100+des_SP[eax] | ||
| 75 | 386 - 1 cycle stall | ||
| 76 | 486 - 1 cycle stall | ||
| 77 | 586 - 0 cycle stall | ||
| 78 | 686 - at least 7 cycle stall (page 22 of the above mentioned document). | ||
| 79 | |||
| 80 | So the technique that produces the best results on a pentium, according to | ||
| 81 | the documentation, will produce hideous results on a pentium pro. | ||
| 82 | |||
| 83 | To get around this, des686.pl will generate code that is not as fast on | ||
| 84 | a pentium, should be very good on a pentium pro. | ||
| 85 | mov eax, ecx # copy word | ||
| 86 | shr ecx, 8 # line up next byte | ||
| 87 | and eax, 0fch # mask byte | ||
| 88 | xor edi DWORD PTR 0x100+des_SP[eax] # xor in array lookup | ||
| 89 | mov eax, ecx # get word | ||
| 90 | shr ecx 8 # line up next byte | ||
| 91 | and eax, 0fch # mask byte | ||
| 92 | xor edi DWORD PTR 0x300+des_SP[eax] # xor in array lookup | ||
| 93 | |||
| 94 | Due to the execution units in the pentium, this actually works quite well. | ||
| 95 | For a pentium pro it should be very good. This is the type of output | ||
| 96 | Visual C++ generates. | ||
| 97 | |||
| 98 | There is a third option. instead of using | ||
| 99 | mov al, ch | ||
| 100 | which is bad on the pentium pro, one may be able to use | ||
| 101 | movzx eax, ch | ||
| 102 | which may not incur the partial write penalty. On the pentium, | ||
| 103 | this instruction takes 4 cycles so is not worth using but on the | ||
| 104 | pentium pro it appears it may be worth while. I need access to one to | ||
| 105 | experiment :-). | ||
| 106 | |||
| 107 | eric (20 Oct 1996) | ||
| 108 | |||
| 109 | 22 Nov 1996 - I have asked people to run the 2 different version on pentium | ||
| 110 | pros and it appears that the intel documentation is wrong. The | ||
| 111 | mov al,bh is still faster on a pentium pro, so just use the des586.pl | ||
| 112 | install des686.pl | ||
| 113 | |||
| 114 | 3 Dec 1996 - I added des_encrypt3/des_decrypt3 because I have moved these | ||
| 115 | functions into des_enc.c because it does make a massive performance | ||
| 116 | difference on some boxes to have the functions code located close to | ||
| 117 | the des_encrypt2() function. | ||
| 118 | |||
| 119 | 9 Jan 1997 - des-som2.pl is now the correct perl script to use for | ||
| 120 | pentiums. It contains an inner loop from | ||
| 121 | Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> which does raw ecb DES calls at | ||
| 122 | 273,000 per second. He had a previous version at 250,000 and the best | ||
| 123 | I was able to get was 203,000. The content has not changed, this is all | ||
| 124 | due to instruction sequencing (and actual instructions choice) which is able | ||
| 125 | to keep both functional units of the pentium going. | ||
| 126 | We may have lost the ugly register usage restrictions when x86 went 32 bit | ||
| 127 | but for the pentium it has been replaced by evil instruction ordering tricks. | ||
| 128 | |||
| 129 | 13 Jan 1997 - des-som3.pl, more optimizations from Svend Olaf. | ||
| 130 | raw DES at 281,000 per second on a pentium 100. | ||
| 131 | |||
