diff options
Diffstat (limited to '')
| -rw-r--r-- | src/lib/libcrypto/des/asm/crypt586.pl | 209 | ||||
| -rw-r--r-- | src/lib/libcrypto/des/asm/des-586.pl | 453 | ||||
| -rw-r--r-- | src/lib/libcrypto/des/asm/des_enc.m4 | 2099 | ||||
| -rw-r--r-- | src/lib/libcrypto/des/asm/desboth.pl | 79 | ||||
| -rw-r--r-- | src/lib/libcrypto/des/asm/readme | 131 |
5 files changed, 0 insertions, 2971 deletions
diff --git a/src/lib/libcrypto/des/asm/crypt586.pl b/src/lib/libcrypto/des/asm/crypt586.pl deleted file mode 100644 index e36f7d44bd..0000000000 --- a/src/lib/libcrypto/des/asm/crypt586.pl +++ /dev/null | |||
| @@ -1,209 +0,0 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # | ||
| 3 | # The inner loop instruction sequence and the IP/FP modifications are from | ||
| 4 | # Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> | ||
| 5 | # I've added the stuff needed for crypt() but I've not worried about making | ||
| 6 | # things perfect. | ||
| 7 | # | ||
| 8 | |||
| 9 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | ||
| 10 | push(@INC,"${dir}","${dir}../../perlasm"); | ||
| 11 | require "x86asm.pl"; | ||
| 12 | |||
| 13 | &asm_init($ARGV[0],"crypt586.pl"); | ||
| 14 | |||
| 15 | $L="edi"; | ||
| 16 | $R="esi"; | ||
| 17 | |||
| 18 | &external_label("DES_SPtrans"); | ||
| 19 | &fcrypt_body("fcrypt_body"); | ||
| 20 | &asm_finish(); | ||
| 21 | |||
| 22 | sub fcrypt_body | ||
| 23 | { | ||
| 24 | local($name,$do_ip)=@_; | ||
| 25 | |||
| 26 | &function_begin($name); | ||
| 27 | |||
| 28 | &comment(""); | ||
| 29 | &comment("Load the 2 words"); | ||
| 30 | $trans="ebp"; | ||
| 31 | |||
| 32 | &xor( $L, $L); | ||
| 33 | &xor( $R, $R); | ||
| 34 | |||
| 35 | # PIC-ification:-) | ||
| 36 | &picmeup("edx","DES_SPtrans"); | ||
| 37 | #if ($cpp) { &picmeup("edx","DES_SPtrans"); } | ||
| 38 | #else { &lea("edx",&DWP("DES_SPtrans")); } | ||
| 39 | &push("edx"); # becomes &swtmp(1) | ||
| 40 | # | ||
| 41 | &mov($trans,&wparam(1)); # reloaded with DES_SPtrans in D_ENCRYPT | ||
| 42 | |||
| 43 | &push(&DWC(25)); # add a variable | ||
| 44 | |||
| 45 | &set_label("start"); | ||
| 46 | for ($i=0; $i<16; $i+=2) | ||
| 47 | { | ||
| 48 | &comment(""); | ||
| 49 | &comment("Round $i"); | ||
| 50 | &D_ENCRYPT($i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx"); | ||
| 51 | |||
| 52 | &comment(""); | ||
| 53 | &comment("Round ".sprintf("%d",$i+1)); | ||
| 54 | &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$trans,"eax","ebx","ecx","edx"); | ||
| 55 | } | ||
| 56 | &mov("ebx", &swtmp(0)); | ||
| 57 | &mov("eax", $L); | ||
| 58 | &dec("ebx"); | ||
| 59 | &mov($L, $R); | ||
| 60 | &mov($R, "eax"); | ||
| 61 | &mov(&swtmp(0), "ebx"); | ||
| 62 | &jnz(&label("start")); | ||
| 63 | |||
| 64 | &comment(""); | ||
| 65 | &comment("FP"); | ||
| 66 | &mov("edx",&wparam(0)); | ||
| 67 | |||
| 68 | &FP_new($R,$L,"eax",3); | ||
| 69 | &mov(&DWP(0,"edx","",0),"eax"); | ||
| 70 | &mov(&DWP(4,"edx","",0),$L); | ||
| 71 | |||
| 72 | &add("esp",8); # remove variables | ||
| 73 | |||
| 74 | &function_end($name); | ||
| 75 | } | ||
| 76 | |||
| 77 | sub D_ENCRYPT | ||
| 78 | { | ||
| 79 | local($r,$L,$R,$S,$trans,$u,$tmp1,$tmp2,$t)=@_; | ||
| 80 | |||
| 81 | &mov( $u, &wparam(2)); # 2 | ||
| 82 | &mov( $t, $R); | ||
| 83 | &shr( $t, 16); # 1 | ||
| 84 | &mov( $tmp2, &wparam(3)); # 2 | ||
| 85 | &xor( $t, $R); # 1 | ||
| 86 | |||
| 87 | &and( $u, $t); # 2 | ||
| 88 | &and( $t, $tmp2); # 2 | ||
| 89 | |||
| 90 | &mov( $tmp1, $u); | ||
| 91 | &shl( $tmp1, 16); # 1 | ||
| 92 | &mov( $tmp2, $t); | ||
| 93 | &shl( $tmp2, 16); # 1 | ||
| 94 | &xor( $u, $tmp1); # 2 | ||
| 95 | &xor( $t, $tmp2); # 2 | ||
| 96 | &mov( $tmp1, &DWP(&n2a($S*4),$trans,"",0)); # 2 | ||
| 97 | &xor( $u, $tmp1); | ||
| 98 | &mov( $tmp2, &DWP(&n2a(($S+1)*4),$trans,"",0)); # 2 | ||
| 99 | &xor( $u, $R); | ||
| 100 | &xor( $t, $R); | ||
| 101 | &xor( $t, $tmp2); | ||
| 102 | |||
| 103 | &and( $u, "0xfcfcfcfc" ); # 2 | ||
| 104 | &xor( $tmp1, $tmp1); # 1 | ||
| 105 | &and( $t, "0xcfcfcfcf" ); # 2 | ||
| 106 | &xor( $tmp2, $tmp2); | ||
| 107 | &movb( &LB($tmp1), &LB($u) ); | ||
| 108 | &movb( &LB($tmp2), &HB($u) ); | ||
| 109 | &rotr( $t, 4 ); | ||
| 110 | &mov( $trans, &swtmp(1)); | ||
| 111 | &xor( $L, &DWP(" ",$trans,$tmp1,0)); | ||
| 112 | &movb( &LB($tmp1), &LB($t) ); | ||
| 113 | &xor( $L, &DWP("0x200",$trans,$tmp2,0)); | ||
| 114 | &movb( &LB($tmp2), &HB($t) ); | ||
| 115 | &shr( $u, 16); | ||
| 116 | &xor( $L, &DWP("0x100",$trans,$tmp1,0)); | ||
| 117 | &movb( &LB($tmp1), &HB($u) ); | ||
| 118 | &shr( $t, 16); | ||
| 119 | &xor( $L, &DWP("0x300",$trans,$tmp2,0)); | ||
| 120 | &movb( &LB($tmp2), &HB($t) ); | ||
| 121 | &and( $u, "0xff" ); | ||
| 122 | &and( $t, "0xff" ); | ||
| 123 | &mov( $tmp1, &DWP("0x600",$trans,$tmp1,0)); | ||
| 124 | &xor( $L, $tmp1); | ||
| 125 | &mov( $tmp1, &DWP("0x700",$trans,$tmp2,0)); | ||
| 126 | &xor( $L, $tmp1); | ||
| 127 | &mov( $tmp1, &DWP("0x400",$trans,$u,0)); | ||
| 128 | &xor( $L, $tmp1); | ||
| 129 | &mov( $tmp1, &DWP("0x500",$trans,$t,0)); | ||
| 130 | &xor( $L, $tmp1); | ||
| 131 | &mov( $trans, &wparam(1)); | ||
| 132 | } | ||
| 133 | |||
| 134 | sub n2a | ||
| 135 | { | ||
| 136 | sprintf("%d",$_[0]); | ||
| 137 | } | ||
| 138 | |||
| 139 | # now has a side affect of rotating $a by $shift | ||
| 140 | sub R_PERM_OP | ||
| 141 | { | ||
| 142 | local($a,$b,$tt,$shift,$mask,$last)=@_; | ||
| 143 | |||
| 144 | &rotl( $a, $shift ) if ($shift != 0); | ||
| 145 | &mov( $tt, $a ); | ||
| 146 | &xor( $a, $b ); | ||
| 147 | &and( $a, $mask ); | ||
| 148 | if ($notlast eq $b) | ||
| 149 | { | ||
| 150 | &xor( $b, $a ); | ||
| 151 | &xor( $tt, $a ); | ||
| 152 | } | ||
| 153 | else | ||
| 154 | { | ||
| 155 | &xor( $tt, $a ); | ||
| 156 | &xor( $b, $a ); | ||
| 157 | } | ||
| 158 | &comment(""); | ||
| 159 | } | ||
| 160 | |||
| 161 | sub IP_new | ||
| 162 | { | ||
| 163 | local($l,$r,$tt,$lr)=@_; | ||
| 164 | |||
| 165 | &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l); | ||
| 166 | &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l); | ||
| 167 | &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r); | ||
| 168 | &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r); | ||
| 169 | &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r); | ||
| 170 | |||
| 171 | if ($lr != 3) | ||
| 172 | { | ||
| 173 | if (($lr-3) < 0) | ||
| 174 | { &rotr($tt, 3-$lr); } | ||
| 175 | else { &rotl($tt, $lr-3); } | ||
| 176 | } | ||
| 177 | if ($lr != 2) | ||
| 178 | { | ||
| 179 | if (($lr-2) < 0) | ||
| 180 | { &rotr($r, 2-$lr); } | ||
| 181 | else { &rotl($r, $lr-2); } | ||
| 182 | } | ||
| 183 | } | ||
| 184 | |||
| 185 | sub FP_new | ||
| 186 | { | ||
| 187 | local($l,$r,$tt,$lr)=@_; | ||
| 188 | |||
| 189 | if ($lr != 2) | ||
| 190 | { | ||
| 191 | if (($lr-2) < 0) | ||
| 192 | { &rotl($r, 2-$lr); } | ||
| 193 | else { &rotr($r, $lr-2); } | ||
| 194 | } | ||
| 195 | if ($lr != 3) | ||
| 196 | { | ||
| 197 | if (($lr-3) < 0) | ||
| 198 | { &rotl($l, 3-$lr); } | ||
| 199 | else { &rotr($l, $lr-3); } | ||
| 200 | } | ||
| 201 | |||
| 202 | &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r); | ||
| 203 | &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r); | ||
| 204 | &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l); | ||
| 205 | &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l); | ||
| 206 | &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r); | ||
| 207 | &rotr($tt , 4); | ||
| 208 | } | ||
| 209 | |||
diff --git a/src/lib/libcrypto/des/asm/des-586.pl b/src/lib/libcrypto/des/asm/des-586.pl deleted file mode 100644 index 5b5f39cebd..0000000000 --- a/src/lib/libcrypto/des/asm/des-586.pl +++ /dev/null | |||
| @@ -1,453 +0,0 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # | ||
| 3 | # The inner loop instruction sequence and the IP/FP modifications are from | ||
| 4 | # Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> | ||
| 5 | # | ||
| 6 | |||
| 7 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | ||
| 8 | push(@INC,"${dir}","${dir}../../perlasm"); | ||
| 9 | require "x86asm.pl"; | ||
| 10 | require "cbc.pl"; | ||
| 11 | require "desboth.pl"; | ||
| 12 | |||
| 13 | # base code is in microsft | ||
| 14 | # op dest, source | ||
| 15 | # format. | ||
| 16 | # | ||
| 17 | |||
| 18 | &asm_init($ARGV[0],"des-586.pl"); | ||
| 19 | |||
| 20 | $L="edi"; | ||
| 21 | $R="esi"; | ||
| 22 | $trans="ebp"; | ||
| 23 | $small_footprint=1 if (grep(/\-DOPENSSL_SMALL_FOOTPRINT/,@ARGV)); | ||
| 24 | # one can discuss setting this variable to 1 unconditionally, as | ||
| 25 | # the folded loop is only 3% slower than unrolled, but >7 times smaller | ||
| 26 | |||
| 27 | &public_label("DES_SPtrans"); | ||
| 28 | |||
| 29 | &DES_encrypt_internal(); | ||
| 30 | &DES_decrypt_internal(); | ||
| 31 | &DES_encrypt("DES_encrypt1",1); | ||
| 32 | &DES_encrypt("DES_encrypt2",0); | ||
| 33 | &DES_encrypt3("DES_encrypt3",1); | ||
| 34 | &DES_encrypt3("DES_decrypt3",0); | ||
| 35 | &cbc("DES_ncbc_encrypt","DES_encrypt1","DES_encrypt1",0,4,5,3,5,-1); | ||
| 36 | &cbc("DES_ede3_cbc_encrypt","DES_encrypt3","DES_decrypt3",0,6,7,3,4,5); | ||
| 37 | &DES_SPtrans(); | ||
| 38 | |||
| 39 | &asm_finish(); | ||
| 40 | |||
| 41 | sub DES_encrypt_internal() | ||
| 42 | { | ||
| 43 | &function_begin_B("_x86_DES_encrypt"); | ||
| 44 | |||
| 45 | if ($small_footprint) | ||
| 46 | { | ||
| 47 | &lea("edx",&DWP(128,"ecx")); | ||
| 48 | &push("edx"); | ||
| 49 | &push("ecx"); | ||
| 50 | &set_label("eloop"); | ||
| 51 | &D_ENCRYPT(0,$L,$R,0,$trans,"eax","ebx","ecx","edx",&swtmp(0)); | ||
| 52 | &comment(""); | ||
| 53 | &D_ENCRYPT(1,$R,$L,2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); | ||
| 54 | &comment(""); | ||
| 55 | &add("ecx",16); | ||
| 56 | &cmp("ecx",&swtmp(1)); | ||
| 57 | &mov(&swtmp(0),"ecx"); | ||
| 58 | &jb(&label("eloop")); | ||
| 59 | &add("esp",8); | ||
| 60 | } | ||
| 61 | else | ||
| 62 | { | ||
| 63 | &push("ecx"); | ||
| 64 | for ($i=0; $i<16; $i+=2) | ||
| 65 | { | ||
| 66 | &comment("Round $i"); | ||
| 67 | &D_ENCRYPT($i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); | ||
| 68 | &comment("Round ".sprintf("%d",$i+1)); | ||
| 69 | &D_ENCRYPT($i+1,$R,$L,($i+1)*2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); | ||
| 70 | } | ||
| 71 | &add("esp",4); | ||
| 72 | } | ||
| 73 | &ret(); | ||
| 74 | |||
| 75 | &function_end_B("_x86_DES_encrypt"); | ||
| 76 | } | ||
| 77 | |||
| 78 | sub DES_decrypt_internal() | ||
| 79 | { | ||
| 80 | &function_begin_B("_x86_DES_decrypt"); | ||
| 81 | |||
| 82 | if ($small_footprint) | ||
| 83 | { | ||
| 84 | &push("ecx"); | ||
| 85 | &lea("ecx",&DWP(128,"ecx")); | ||
| 86 | &push("ecx"); | ||
| 87 | &set_label("dloop"); | ||
| 88 | &D_ENCRYPT(0,$L,$R,-2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); | ||
| 89 | &comment(""); | ||
| 90 | &D_ENCRYPT(1,$R,$L,-4,$trans,"eax","ebx","ecx","edx",&swtmp(0)); | ||
| 91 | &comment(""); | ||
| 92 | &sub("ecx",16); | ||
| 93 | &cmp("ecx",&swtmp(1)); | ||
| 94 | &mov(&swtmp(0),"ecx"); | ||
| 95 | &ja(&label("dloop")); | ||
| 96 | &add("esp",8); | ||
| 97 | } | ||
| 98 | else | ||
| 99 | { | ||
| 100 | &push("ecx"); | ||
| 101 | for ($i=15; $i>0; $i-=2) | ||
| 102 | { | ||
| 103 | &comment("Round $i"); | ||
| 104 | &D_ENCRYPT(15-$i,$L,$R,$i*2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); | ||
| 105 | &comment("Round ".sprintf("%d",$i-1)); | ||
| 106 | &D_ENCRYPT(15-$i+1,$R,$L,($i-1)*2,$trans,"eax","ebx","ecx","edx",&swtmp(0)); | ||
| 107 | } | ||
| 108 | &add("esp",4); | ||
| 109 | } | ||
| 110 | &ret(); | ||
| 111 | |||
| 112 | &function_end_B("_x86_DES_decrypt"); | ||
| 113 | } | ||
| 114 | |||
| 115 | sub DES_encrypt | ||
| 116 | { | ||
| 117 | local($name,$do_ip)=@_; | ||
| 118 | |||
| 119 | &function_begin_B($name); | ||
| 120 | |||
| 121 | &push("esi"); | ||
| 122 | &push("edi"); | ||
| 123 | |||
| 124 | &comment(""); | ||
| 125 | &comment("Load the 2 words"); | ||
| 126 | |||
| 127 | if ($do_ip) | ||
| 128 | { | ||
| 129 | &mov($R,&wparam(0)); | ||
| 130 | &xor( "ecx", "ecx" ); | ||
| 131 | |||
| 132 | &push("ebx"); | ||
| 133 | &push("ebp"); | ||
| 134 | |||
| 135 | &mov("eax",&DWP(0,$R,"",0)); | ||
| 136 | &mov("ebx",&wparam(2)); # get encrypt flag | ||
| 137 | &mov($L,&DWP(4,$R,"",0)); | ||
| 138 | &comment(""); | ||
| 139 | &comment("IP"); | ||
| 140 | &IP_new("eax",$L,$R,3); | ||
| 141 | } | ||
| 142 | else | ||
| 143 | { | ||
| 144 | &mov("eax",&wparam(0)); | ||
| 145 | &xor( "ecx", "ecx" ); | ||
| 146 | |||
| 147 | &push("ebx"); | ||
| 148 | &push("ebp"); | ||
| 149 | |||
| 150 | &mov($R,&DWP(0,"eax","",0)); | ||
| 151 | &mov("ebx",&wparam(2)); # get encrypt flag | ||
| 152 | &rotl($R,3); | ||
| 153 | &mov($L,&DWP(4,"eax","",0)); | ||
| 154 | &rotl($L,3); | ||
| 155 | } | ||
| 156 | |||
| 157 | # PIC-ification:-) | ||
| 158 | &call (&label("pic_point")); | ||
| 159 | &set_label("pic_point"); | ||
| 160 | &blindpop($trans); | ||
| 161 | &lea ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans)); | ||
| 162 | |||
| 163 | &mov( "ecx", &wparam(1) ); | ||
| 164 | |||
| 165 | &cmp("ebx","0"); | ||
| 166 | &je(&label("decrypt")); | ||
| 167 | &call("_x86_DES_encrypt"); | ||
| 168 | &jmp(&label("done")); | ||
| 169 | &set_label("decrypt"); | ||
| 170 | &call("_x86_DES_decrypt"); | ||
| 171 | &set_label("done"); | ||
| 172 | |||
| 173 | if ($do_ip) | ||
| 174 | { | ||
| 175 | &comment(""); | ||
| 176 | &comment("FP"); | ||
| 177 | &mov("edx",&wparam(0)); | ||
| 178 | &FP_new($L,$R,"eax",3); | ||
| 179 | |||
| 180 | &mov(&DWP(0,"edx","",0),"eax"); | ||
| 181 | &mov(&DWP(4,"edx","",0),$R); | ||
| 182 | } | ||
| 183 | else | ||
| 184 | { | ||
| 185 | &comment(""); | ||
| 186 | &comment("Fixup"); | ||
| 187 | &rotr($L,3); # r | ||
| 188 | &mov("eax",&wparam(0)); | ||
| 189 | &rotr($R,3); # l | ||
| 190 | &mov(&DWP(0,"eax","",0),$L); | ||
| 191 | &mov(&DWP(4,"eax","",0),$R); | ||
| 192 | } | ||
| 193 | |||
| 194 | &pop("ebp"); | ||
| 195 | &pop("ebx"); | ||
| 196 | &pop("edi"); | ||
| 197 | &pop("esi"); | ||
| 198 | &ret(); | ||
| 199 | |||
| 200 | &function_end_B($name); | ||
| 201 | } | ||
| 202 | |||
| 203 | sub D_ENCRYPT | ||
| 204 | { | ||
| 205 | local($r,$L,$R,$S,$trans,$u,$tmp1,$tmp2,$t,$wp1)=@_; | ||
| 206 | |||
| 207 | &mov( $u, &DWP(&n2a($S*4),$tmp2,"",0)); | ||
| 208 | &xor( $tmp1, $tmp1); | ||
| 209 | &mov( $t, &DWP(&n2a(($S+1)*4),$tmp2,"",0)); | ||
| 210 | &xor( $u, $R); | ||
| 211 | &xor( $tmp2, $tmp2); | ||
| 212 | &xor( $t, $R); | ||
| 213 | &and( $u, "0xfcfcfcfc" ); | ||
| 214 | &and( $t, "0xcfcfcfcf" ); | ||
| 215 | &movb( &LB($tmp1), &LB($u) ); | ||
| 216 | &movb( &LB($tmp2), &HB($u) ); | ||
| 217 | &rotr( $t, 4 ); | ||
| 218 | &xor( $L, &DWP(" ",$trans,$tmp1,0)); | ||
| 219 | &movb( &LB($tmp1), &LB($t) ); | ||
| 220 | &xor( $L, &DWP("0x200",$trans,$tmp2,0)); | ||
| 221 | &movb( &LB($tmp2), &HB($t) ); | ||
| 222 | &shr( $u, 16); | ||
| 223 | &xor( $L, &DWP("0x100",$trans,$tmp1,0)); | ||
| 224 | &movb( &LB($tmp1), &HB($u) ); | ||
| 225 | &shr( $t, 16); | ||
| 226 | &xor( $L, &DWP("0x300",$trans,$tmp2,0)); | ||
| 227 | &movb( &LB($tmp2), &HB($t) ); | ||
| 228 | &and( $u, "0xff" ); | ||
| 229 | &and( $t, "0xff" ); | ||
| 230 | &xor( $L, &DWP("0x600",$trans,$tmp1,0)); | ||
| 231 | &xor( $L, &DWP("0x700",$trans,$tmp2,0)); | ||
| 232 | &mov( $tmp2, $wp1 ); | ||
| 233 | &xor( $L, &DWP("0x400",$trans,$u,0)); | ||
| 234 | &xor( $L, &DWP("0x500",$trans,$t,0)); | ||
| 235 | } | ||
| 236 | |||
| 237 | sub n2a | ||
| 238 | { | ||
| 239 | sprintf("%d",$_[0]); | ||
| 240 | } | ||
| 241 | |||
| 242 | # now has a side affect of rotating $a by $shift | ||
| 243 | sub R_PERM_OP | ||
| 244 | { | ||
| 245 | local($a,$b,$tt,$shift,$mask,$last)=@_; | ||
| 246 | |||
| 247 | &rotl( $a, $shift ) if ($shift != 0); | ||
| 248 | &mov( $tt, $a ); | ||
| 249 | &xor( $a, $b ); | ||
| 250 | &and( $a, $mask ); | ||
| 251 | # This can never succeed, and besides it is difficult to see what the | ||
| 252 | # idea was - Ben 13 Feb 99 | ||
| 253 | if (!$last eq $b) | ||
| 254 | { | ||
| 255 | &xor( $b, $a ); | ||
| 256 | &xor( $tt, $a ); | ||
| 257 | } | ||
| 258 | else | ||
| 259 | { | ||
| 260 | &xor( $tt, $a ); | ||
| 261 | &xor( $b, $a ); | ||
| 262 | } | ||
| 263 | &comment(""); | ||
| 264 | } | ||
| 265 | |||
| 266 | sub IP_new | ||
| 267 | { | ||
| 268 | local($l,$r,$tt,$lr)=@_; | ||
| 269 | |||
| 270 | &R_PERM_OP($l,$r,$tt, 4,"0xf0f0f0f0",$l); | ||
| 271 | &R_PERM_OP($r,$tt,$l,20,"0xfff0000f",$l); | ||
| 272 | &R_PERM_OP($l,$tt,$r,14,"0x33333333",$r); | ||
| 273 | &R_PERM_OP($tt,$r,$l,22,"0x03fc03fc",$r); | ||
| 274 | &R_PERM_OP($l,$r,$tt, 9,"0xaaaaaaaa",$r); | ||
| 275 | |||
| 276 | if ($lr != 3) | ||
| 277 | { | ||
| 278 | if (($lr-3) < 0) | ||
| 279 | { &rotr($tt, 3-$lr); } | ||
| 280 | else { &rotl($tt, $lr-3); } | ||
| 281 | } | ||
| 282 | if ($lr != 2) | ||
| 283 | { | ||
| 284 | if (($lr-2) < 0) | ||
| 285 | { &rotr($r, 2-$lr); } | ||
| 286 | else { &rotl($r, $lr-2); } | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | sub FP_new | ||
| 291 | { | ||
| 292 | local($l,$r,$tt,$lr)=@_; | ||
| 293 | |||
| 294 | if ($lr != 2) | ||
| 295 | { | ||
| 296 | if (($lr-2) < 0) | ||
| 297 | { &rotl($r, 2-$lr); } | ||
| 298 | else { &rotr($r, $lr-2); } | ||
| 299 | } | ||
| 300 | if ($lr != 3) | ||
| 301 | { | ||
| 302 | if (($lr-3) < 0) | ||
| 303 | { &rotl($l, 3-$lr); } | ||
| 304 | else { &rotr($l, $lr-3); } | ||
| 305 | } | ||
| 306 | |||
| 307 | &R_PERM_OP($l,$r,$tt, 0,"0xaaaaaaaa",$r); | ||
| 308 | &R_PERM_OP($tt,$r,$l,23,"0x03fc03fc",$r); | ||
| 309 | &R_PERM_OP($l,$r,$tt,10,"0x33333333",$l); | ||
| 310 | &R_PERM_OP($r,$tt,$l,18,"0xfff0000f",$l); | ||
| 311 | &R_PERM_OP($l,$tt,$r,12,"0xf0f0f0f0",$r); | ||
| 312 | &rotr($tt , 4); | ||
| 313 | } | ||
| 314 | |||
| 315 | sub DES_SPtrans | ||
| 316 | { | ||
| 317 | &set_label("DES_SPtrans",64); | ||
| 318 | &data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802); | ||
| 319 | &data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002); | ||
| 320 | &data_word(0x00080802, 0x02080800, 0x02080000, 0x00000802); | ||
| 321 | &data_word(0x02000802, 0x02000000, 0x00000000, 0x00080002); | ||
| 322 | &data_word(0x00080000, 0x00000002, 0x02000800, 0x00080800); | ||
| 323 | &data_word(0x02080802, 0x02080000, 0x00000802, 0x02000800); | ||
| 324 | &data_word(0x00000002, 0x00000800, 0x00080800, 0x02080002); | ||
| 325 | &data_word(0x00000800, 0x02000802, 0x02080002, 0x00000000); | ||
| 326 | &data_word(0x00000000, 0x02080802, 0x02000800, 0x00080002); | ||
| 327 | &data_word(0x02080800, 0x00080000, 0x00000802, 0x02000800); | ||
| 328 | &data_word(0x02080002, 0x00000800, 0x00080800, 0x02000002); | ||
| 329 | &data_word(0x00080802, 0x00000002, 0x02000002, 0x02080000); | ||
| 330 | &data_word(0x02080802, 0x00080800, 0x02080000, 0x02000802); | ||
| 331 | &data_word(0x02000000, 0x00000802, 0x00080002, 0x00000000); | ||
| 332 | &data_word(0x00080000, 0x02000000, 0x02000802, 0x02080800); | ||
| 333 | &data_word(0x00000002, 0x02080002, 0x00000800, 0x00080802); | ||
| 334 | # nibble 1 | ||
| 335 | &data_word(0x40108010, 0x00000000, 0x00108000, 0x40100000); | ||
| 336 | &data_word(0x40000010, 0x00008010, 0x40008000, 0x00108000); | ||
| 337 | &data_word(0x00008000, 0x40100010, 0x00000010, 0x40008000); | ||
| 338 | &data_word(0x00100010, 0x40108000, 0x40100000, 0x00000010); | ||
| 339 | &data_word(0x00100000, 0x40008010, 0x40100010, 0x00008000); | ||
| 340 | &data_word(0x00108010, 0x40000000, 0x00000000, 0x00100010); | ||
| 341 | &data_word(0x40008010, 0x00108010, 0x40108000, 0x40000010); | ||
| 342 | &data_word(0x40000000, 0x00100000, 0x00008010, 0x40108010); | ||
| 343 | &data_word(0x00100010, 0x40108000, 0x40008000, 0x00108010); | ||
| 344 | &data_word(0x40108010, 0x00100010, 0x40000010, 0x00000000); | ||
| 345 | &data_word(0x40000000, 0x00008010, 0x00100000, 0x40100010); | ||
| 346 | &data_word(0x00008000, 0x40000000, 0x00108010, 0x40008010); | ||
| 347 | &data_word(0x40108000, 0x00008000, 0x00000000, 0x40000010); | ||
| 348 | &data_word(0x00000010, 0x40108010, 0x00108000, 0x40100000); | ||
| 349 | &data_word(0x40100010, 0x00100000, 0x00008010, 0x40008000); | ||
| 350 | &data_word(0x40008010, 0x00000010, 0x40100000, 0x00108000); | ||
| 351 | # nibble 2 | ||
| 352 | &data_word(0x04000001, 0x04040100, 0x00000100, 0x04000101); | ||
| 353 | &data_word(0x00040001, 0x04000000, 0x04000101, 0x00040100); | ||
| 354 | &data_word(0x04000100, 0x00040000, 0x04040000, 0x00000001); | ||
| 355 | &data_word(0x04040101, 0x00000101, 0x00000001, 0x04040001); | ||
| 356 | &data_word(0x00000000, 0x00040001, 0x04040100, 0x00000100); | ||
| 357 | &data_word(0x00000101, 0x04040101, 0x00040000, 0x04000001); | ||
| 358 | &data_word(0x04040001, 0x04000100, 0x00040101, 0x04040000); | ||
| 359 | &data_word(0x00040100, 0x00000000, 0x04000000, 0x00040101); | ||
| 360 | &data_word(0x04040100, 0x00000100, 0x00000001, 0x00040000); | ||
| 361 | &data_word(0x00000101, 0x00040001, 0x04040000, 0x04000101); | ||
| 362 | &data_word(0x00000000, 0x04040100, 0x00040100, 0x04040001); | ||
| 363 | &data_word(0x00040001, 0x04000000, 0x04040101, 0x00000001); | ||
| 364 | &data_word(0x00040101, 0x04000001, 0x04000000, 0x04040101); | ||
| 365 | &data_word(0x00040000, 0x04000100, 0x04000101, 0x00040100); | ||
| 366 | &data_word(0x04000100, 0x00000000, 0x04040001, 0x00000101); | ||
| 367 | &data_word(0x04000001, 0x00040101, 0x00000100, 0x04040000); | ||
| 368 | # nibble 3 | ||
| 369 | &data_word(0x00401008, 0x10001000, 0x00000008, 0x10401008); | ||
| 370 | &data_word(0x00000000, 0x10400000, 0x10001008, 0x00400008); | ||
| 371 | &data_word(0x10401000, 0x10000008, 0x10000000, 0x00001008); | ||
| 372 | &data_word(0x10000008, 0x00401008, 0x00400000, 0x10000000); | ||
| 373 | &data_word(0x10400008, 0x00401000, 0x00001000, 0x00000008); | ||
| 374 | &data_word(0x00401000, 0x10001008, 0x10400000, 0x00001000); | ||
| 375 | &data_word(0x00001008, 0x00000000, 0x00400008, 0x10401000); | ||
| 376 | &data_word(0x10001000, 0x10400008, 0x10401008, 0x00400000); | ||
| 377 | &data_word(0x10400008, 0x00001008, 0x00400000, 0x10000008); | ||
| 378 | &data_word(0x00401000, 0x10001000, 0x00000008, 0x10400000); | ||
| 379 | &data_word(0x10001008, 0x00000000, 0x00001000, 0x00400008); | ||
| 380 | &data_word(0x00000000, 0x10400008, 0x10401000, 0x00001000); | ||
| 381 | &data_word(0x10000000, 0x10401008, 0x00401008, 0x00400000); | ||
| 382 | &data_word(0x10401008, 0x00000008, 0x10001000, 0x00401008); | ||
| 383 | &data_word(0x00400008, 0x00401000, 0x10400000, 0x10001008); | ||
| 384 | &data_word(0x00001008, 0x10000000, 0x10000008, 0x10401000); | ||
| 385 | # nibble 4 | ||
| 386 | &data_word(0x08000000, 0x00010000, 0x00000400, 0x08010420); | ||
| 387 | &data_word(0x08010020, 0x08000400, 0x00010420, 0x08010000); | ||
| 388 | &data_word(0x00010000, 0x00000020, 0x08000020, 0x00010400); | ||
| 389 | &data_word(0x08000420, 0x08010020, 0x08010400, 0x00000000); | ||
| 390 | &data_word(0x00010400, 0x08000000, 0x00010020, 0x00000420); | ||
| 391 | &data_word(0x08000400, 0x00010420, 0x00000000, 0x08000020); | ||
| 392 | &data_word(0x00000020, 0x08000420, 0x08010420, 0x00010020); | ||
| 393 | &data_word(0x08010000, 0x00000400, 0x00000420, 0x08010400); | ||
| 394 | &data_word(0x08010400, 0x08000420, 0x00010020, 0x08010000); | ||
| 395 | &data_word(0x00010000, 0x00000020, 0x08000020, 0x08000400); | ||
| 396 | &data_word(0x08000000, 0x00010400, 0x08010420, 0x00000000); | ||
| 397 | &data_word(0x00010420, 0x08000000, 0x00000400, 0x00010020); | ||
| 398 | &data_word(0x08000420, 0x00000400, 0x00000000, 0x08010420); | ||
| 399 | &data_word(0x08010020, 0x08010400, 0x00000420, 0x00010000); | ||
| 400 | &data_word(0x00010400, 0x08010020, 0x08000400, 0x00000420); | ||
| 401 | &data_word(0x00000020, 0x00010420, 0x08010000, 0x08000020); | ||
| 402 | # nibble 5 | ||
| 403 | &data_word(0x80000040, 0x00200040, 0x00000000, 0x80202000); | ||
| 404 | &data_word(0x00200040, 0x00002000, 0x80002040, 0x00200000); | ||
| 405 | &data_word(0x00002040, 0x80202040, 0x00202000, 0x80000000); | ||
| 406 | &data_word(0x80002000, 0x80000040, 0x80200000, 0x00202040); | ||
| 407 | &data_word(0x00200000, 0x80002040, 0x80200040, 0x00000000); | ||
| 408 | &data_word(0x00002000, 0x00000040, 0x80202000, 0x80200040); | ||
| 409 | &data_word(0x80202040, 0x80200000, 0x80000000, 0x00002040); | ||
| 410 | &data_word(0x00000040, 0x00202000, 0x00202040, 0x80002000); | ||
| 411 | &data_word(0x00002040, 0x80000000, 0x80002000, 0x00202040); | ||
| 412 | &data_word(0x80202000, 0x00200040, 0x00000000, 0x80002000); | ||
| 413 | &data_word(0x80000000, 0x00002000, 0x80200040, 0x00200000); | ||
| 414 | &data_word(0x00200040, 0x80202040, 0x00202000, 0x00000040); | ||
| 415 | &data_word(0x80202040, 0x00202000, 0x00200000, 0x80002040); | ||
| 416 | &data_word(0x80000040, 0x80200000, 0x00202040, 0x00000000); | ||
| 417 | &data_word(0x00002000, 0x80000040, 0x80002040, 0x80202000); | ||
| 418 | &data_word(0x80200000, 0x00002040, 0x00000040, 0x80200040); | ||
| 419 | # nibble 6 | ||
| 420 | &data_word(0x00004000, 0x00000200, 0x01000200, 0x01000004); | ||
| 421 | &data_word(0x01004204, 0x00004004, 0x00004200, 0x00000000); | ||
| 422 | &data_word(0x01000000, 0x01000204, 0x00000204, 0x01004000); | ||
| 423 | &data_word(0x00000004, 0x01004200, 0x01004000, 0x00000204); | ||
| 424 | &data_word(0x01000204, 0x00004000, 0x00004004, 0x01004204); | ||
| 425 | &data_word(0x00000000, 0x01000200, 0x01000004, 0x00004200); | ||
| 426 | &data_word(0x01004004, 0x00004204, 0x01004200, 0x00000004); | ||
| 427 | &data_word(0x00004204, 0x01004004, 0x00000200, 0x01000000); | ||
| 428 | &data_word(0x00004204, 0x01004000, 0x01004004, 0x00000204); | ||
| 429 | &data_word(0x00004000, 0x00000200, 0x01000000, 0x01004004); | ||
| 430 | &data_word(0x01000204, 0x00004204, 0x00004200, 0x00000000); | ||
| 431 | &data_word(0x00000200, 0x01000004, 0x00000004, 0x01000200); | ||
| 432 | &data_word(0x00000000, 0x01000204, 0x01000200, 0x00004200); | ||
| 433 | &data_word(0x00000204, 0x00004000, 0x01004204, 0x01000000); | ||
| 434 | &data_word(0x01004200, 0x00000004, 0x00004004, 0x01004204); | ||
| 435 | &data_word(0x01000004, 0x01004200, 0x01004000, 0x00004004); | ||
| 436 | # nibble 7 | ||
| 437 | &data_word(0x20800080, 0x20820000, 0x00020080, 0x00000000); | ||
| 438 | &data_word(0x20020000, 0x00800080, 0x20800000, 0x20820080); | ||
| 439 | &data_word(0x00000080, 0x20000000, 0x00820000, 0x00020080); | ||
| 440 | &data_word(0x00820080, 0x20020080, 0x20000080, 0x20800000); | ||
| 441 | &data_word(0x00020000, 0x00820080, 0x00800080, 0x20020000); | ||
| 442 | &data_word(0x20820080, 0x20000080, 0x00000000, 0x00820000); | ||
| 443 | &data_word(0x20000000, 0x00800000, 0x20020080, 0x20800080); | ||
| 444 | &data_word(0x00800000, 0x00020000, 0x20820000, 0x00000080); | ||
| 445 | &data_word(0x00800000, 0x00020000, 0x20000080, 0x20820080); | ||
| 446 | &data_word(0x00020080, 0x20000000, 0x00000000, 0x00820000); | ||
| 447 | &data_word(0x20800080, 0x20020080, 0x20020000, 0x00800080); | ||
| 448 | &data_word(0x20820000, 0x00000080, 0x00800080, 0x20020000); | ||
| 449 | &data_word(0x20820080, 0x00800000, 0x20800000, 0x20000080); | ||
| 450 | &data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000); | ||
| 451 | &data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000); | ||
| 452 | &data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080); | ||
| 453 | } | ||
diff --git a/src/lib/libcrypto/des/asm/des_enc.m4 b/src/lib/libcrypto/des/asm/des_enc.m4 deleted file mode 100644 index 3280595478..0000000000 --- a/src/lib/libcrypto/des/asm/des_enc.m4 +++ /dev/null | |||
| @@ -1,2099 +0,0 @@ | |||
| 1 | ! des_enc.m4 | ||
| 2 | ! des_enc.S (generated from des_enc.m4) | ||
| 3 | ! | ||
| 4 | ! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file. | ||
| 5 | ! | ||
| 6 | ! Version 1.0. 32-bit version. | ||
| 7 | ! | ||
| 8 | ! June 8, 2000. | ||
| 9 | ! | ||
| 10 | ! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation | ||
| 11 | ! by Andy Polyakov. | ||
| 12 | ! | ||
| 13 | ! January 1, 2003. | ||
| 14 | ! | ||
| 15 | ! Assembler version: Copyright Svend Olaf Mikkelsen. | ||
| 16 | ! | ||
| 17 | ! Original C code: Copyright Eric A. Young. | ||
| 18 | ! | ||
| 19 | ! This code can be freely used by LibDES/SSLeay/OpenSSL users. | ||
| 20 | ! | ||
| 21 | ! The LibDES/SSLeay/OpenSSL copyright notices must be respected. | ||
| 22 | ! | ||
| 23 | ! This version can be redistributed. | ||
| 24 | ! | ||
| 25 | ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S | ||
| 26 | ! | ||
| 27 | ! Global registers 1 to 5 are used. This is the same as done by the | ||
| 28 | ! cc compiler. The UltraSPARC load/store little endian feature is used. | ||
| 29 | ! | ||
| 30 | ! Instruction grouping often refers to one CPU cycle. | ||
| 31 | ! | ||
| 32 | ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S | ||
| 33 | ! | ||
| 34 | ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S | ||
| 35 | ! | ||
| 36 | ! Performance improvement according to './apps/openssl speed des' | ||
| 37 | ! | ||
| 38 | ! 32-bit build: | ||
| 39 | ! 23% faster than cc-5.2 -xarch=v8plus -xO5 | ||
| 40 | ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5 | ||
| 41 | ! 64-bit build: | ||
| 42 | ! 50% faster than cc-5.2 -xarch=v9 -xO5 | ||
| 43 | ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5 | ||
| 44 | ! | ||
| 45 | |||
| 46 | .ident "des_enc.m4 2.1" | ||
| 47 | .file "des_enc-sparc.S" | ||
| 48 | |||
| 49 | #if defined(__SUNPRO_C) && defined(__sparcv9) | ||
| 50 | # define ABI64 /* They've said -xarch=v9 at command line */ | ||
| 51 | #elif defined(__GNUC__) && defined(__arch64__) | ||
| 52 | # define ABI64 /* They've said -m64 at command line */ | ||
| 53 | #endif | ||
| 54 | |||
| 55 | #ifdef ABI64 | ||
| 56 | .register %g2,#scratch | ||
| 57 | .register %g3,#scratch | ||
| 58 | # define FRAME -192 | ||
| 59 | # define BIAS 2047 | ||
| 60 | # define LDPTR ldx | ||
| 61 | # define STPTR stx | ||
| 62 | # define ARG0 128 | ||
| 63 | # define ARGSZ 8 | ||
| 64 | # ifndef OPENSSL_SYSNAME_ULTRASPARC | ||
| 65 | # define OPENSSL_SYSNAME_ULTRASPARC | ||
| 66 | # endif | ||
| 67 | #else | ||
| 68 | # define FRAME -96 | ||
| 69 | # define BIAS 0 | ||
| 70 | # define LDPTR ld | ||
| 71 | # define STPTR st | ||
| 72 | # define ARG0 68 | ||
| 73 | # define ARGSZ 4 | ||
| 74 | #endif | ||
| 75 | |||
| 76 | #define LOOPS 7 | ||
| 77 | |||
| 78 | #define global0 %g0 | ||
| 79 | #define global1 %g1 | ||
| 80 | #define global2 %g2 | ||
| 81 | #define global3 %g3 | ||
| 82 | #define global4 %g4 | ||
| 83 | #define global5 %g5 | ||
| 84 | |||
| 85 | #define local0 %l0 | ||
| 86 | #define local1 %l1 | ||
| 87 | #define local2 %l2 | ||
| 88 | #define local3 %l3 | ||
| 89 | #define local4 %l4 | ||
| 90 | #define local5 %l5 | ||
| 91 | #define local7 %l6 | ||
| 92 | #define local6 %l7 | ||
| 93 | |||
| 94 | #define in0 %i0 | ||
| 95 | #define in1 %i1 | ||
| 96 | #define in2 %i2 | ||
| 97 | #define in3 %i3 | ||
| 98 | #define in4 %i4 | ||
| 99 | #define in5 %i5 | ||
| 100 | #define in6 %i6 | ||
| 101 | #define in7 %i7 | ||
| 102 | |||
| 103 | #define out0 %o0 | ||
| 104 | #define out1 %o1 | ||
| 105 | #define out2 %o2 | ||
| 106 | #define out3 %o3 | ||
| 107 | #define out4 %o4 | ||
| 108 | #define out5 %o5 | ||
| 109 | #define out6 %o6 | ||
| 110 | #define out7 %o7 | ||
| 111 | |||
| 112 | #define stub stb | ||
| 113 | |||
| 114 | changequote({,}) | ||
| 115 | |||
| 116 | |||
| 117 | ! Macro definitions: | ||
| 118 | |||
| 119 | |||
| 120 | ! {ip_macro} | ||
| 121 | ! | ||
| 122 | ! The logic used in initial and final permutations is the same as in | ||
| 123 | ! the C code. The permutations are done with a clever shift, xor, and | ||
| 124 | ! technique. | ||
| 125 | ! | ||
| 126 | ! The macro also loads address sbox 1 to 5 to global 1 to 5, address | ||
| 127 | ! sbox 6 to local6, and addres sbox 8 to out3. | ||
| 128 | ! | ||
| 129 | ! Rotates the halfs 3 left to bring the sbox bits in convenient positions. | ||
| 130 | ! | ||
| 131 | ! Loads key first round from address in parameter 5 to out0, out1. | ||
| 132 | ! | ||
| 133 | ! After the the original LibDES initial permutation, the resulting left | ||
| 134 | ! is in the variable initially used for right and vice versa. The macro | ||
| 135 | ! implements the possibility to keep the halfs in the original registers. | ||
| 136 | ! | ||
| 137 | ! parameter 1 left | ||
| 138 | ! parameter 2 right | ||
| 139 | ! parameter 3 result left (modify in first round) | ||
| 140 | ! parameter 4 result right (use in first round) | ||
| 141 | ! parameter 5 key address | ||
| 142 | ! parameter 6 1/2 for include encryption/decryption | ||
| 143 | ! parameter 7 1 for move in1 to in3 | ||
| 144 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
| 145 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
| 146 | |||
| 147 | define(ip_macro, { | ||
| 148 | |||
| 149 | ! {ip_macro} | ||
| 150 | ! $1 $2 $4 $3 $5 $6 $7 $8 $9 | ||
| 151 | |||
| 152 | ld [out2+256], local1 | ||
| 153 | srl $2, 4, local4 | ||
| 154 | |||
| 155 | xor local4, $1, local4 | ||
| 156 | ifelse($7,1,{mov in1, in3},{nop}) | ||
| 157 | |||
| 158 | ld [out2+260], local2 | ||
| 159 | and local4, local1, local4 | ||
| 160 | ifelse($8,1,{mov in3, in4},{}) | ||
| 161 | ifelse($8,2,{mov in4, in3},{}) | ||
| 162 | |||
| 163 | ld [out2+280], out4 ! loop counter | ||
| 164 | sll local4, 4, local1 | ||
| 165 | xor $1, local4, $1 | ||
| 166 | |||
| 167 | ld [out2+264], local3 | ||
| 168 | srl $1, 16, local4 | ||
| 169 | xor $2, local1, $2 | ||
| 170 | |||
| 171 | ifelse($9,1,{LDPTR KS3, in4},{}) | ||
| 172 | xor local4, $2, local4 | ||
| 173 | nop !sethi %hi(DES_SPtrans), global1 ! sbox addr | ||
| 174 | |||
| 175 | ifelse($9,1,{LDPTR KS2, in3},{}) | ||
| 176 | and local4, local2, local4 | ||
| 177 | nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr | ||
| 178 | |||
| 179 | sll local4, 16, local1 | ||
| 180 | xor $2, local4, $2 | ||
| 181 | |||
| 182 | srl $2, 2, local4 | ||
| 183 | xor $1, local1, $1 | ||
| 184 | |||
| 185 | sethi %hi(16711680), local5 | ||
| 186 | xor local4, $1, local4 | ||
| 187 | |||
| 188 | and local4, local3, local4 | ||
| 189 | or local5, 255, local5 | ||
| 190 | |||
| 191 | sll local4, 2, local2 | ||
| 192 | xor $1, local4, $1 | ||
| 193 | |||
| 194 | srl $1, 8, local4 | ||
| 195 | xor $2, local2, $2 | ||
| 196 | |||
| 197 | xor local4, $2, local4 | ||
| 198 | add global1, 768, global4 | ||
| 199 | |||
| 200 | and local4, local5, local4 | ||
| 201 | add global1, 1024, global5 | ||
| 202 | |||
| 203 | ld [out2+272], local7 | ||
| 204 | sll local4, 8, local1 | ||
| 205 | xor $2, local4, $2 | ||
| 206 | |||
| 207 | srl $2, 1, local4 | ||
| 208 | xor $1, local1, $1 | ||
| 209 | |||
| 210 | ld [$5], out0 ! key 7531 | ||
| 211 | xor local4, $1, local4 | ||
| 212 | add global1, 256, global2 | ||
| 213 | |||
| 214 | ld [$5+4], out1 ! key 8642 | ||
| 215 | and local4, local7, local4 | ||
| 216 | add global1, 512, global3 | ||
| 217 | |||
| 218 | sll local4, 1, local1 | ||
| 219 | xor $1, local4, $1 | ||
| 220 | |||
| 221 | sll $1, 3, local3 | ||
| 222 | xor $2, local1, $2 | ||
| 223 | |||
| 224 | sll $2, 3, local2 | ||
| 225 | add global1, 1280, local6 ! address sbox 8 | ||
| 226 | |||
| 227 | srl $1, 29, local4 | ||
| 228 | add global1, 1792, out3 ! address sbox 8 | ||
| 229 | |||
| 230 | srl $2, 29, local1 | ||
| 231 | or local4, local3, $4 | ||
| 232 | |||
| 233 | or local2, local1, $3 | ||
| 234 | |||
| 235 | ifelse($6, 1, { | ||
| 236 | |||
| 237 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds | ||
| 238 | or local2, local1, $3 | ||
| 239 | xor $4, out0, local1 | ||
| 240 | |||
| 241 | call .des_enc.1 | ||
| 242 | and local1, 252, local1 | ||
| 243 | |||
| 244 | },{}) | ||
| 245 | |||
| 246 | ifelse($6, 2, { | ||
| 247 | |||
| 248 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds | ||
| 249 | or local2, local1, $3 | ||
| 250 | xor $4, out0, local1 | ||
| 251 | |||
| 252 | call .des_dec.1 | ||
| 253 | and local1, 252, local1 | ||
| 254 | |||
| 255 | },{}) | ||
| 256 | }) | ||
| 257 | |||
| 258 | |||
| 259 | ! {rounds_macro} | ||
| 260 | ! | ||
| 261 | ! The logic used in the DES rounds is the same as in the C code, | ||
| 262 | ! except that calculations for sbox 1 and sbox 5 begin before | ||
| 263 | ! the previous round is finished. | ||
| 264 | ! | ||
| 265 | ! In each round one half (work) is modified based on key and the | ||
| 266 | ! other half (use). | ||
| 267 | ! | ||
| 268 | ! In this version we do two rounds in a loop repeated 7 times | ||
| 269 | ! and two rounds seperately. | ||
| 270 | ! | ||
| 271 | ! One half has the bits for the sboxes in the following positions: | ||
| 272 | ! | ||
| 273 | ! 777777xx555555xx333333xx111111xx | ||
| 274 | ! | ||
| 275 | ! 88xx666666xx444444xx222222xx8888 | ||
| 276 | ! | ||
| 277 | ! The bits for each sbox are xor-ed with the key bits for that box. | ||
| 278 | ! The above xx bits are cleared, and the result used for lookup in | ||
| 279 | ! the sbox table. Each sbox entry contains the 4 output bits permuted | ||
| 280 | ! into 32 bits according to the P permutation. | ||
| 281 | ! | ||
| 282 | ! In the description of DES, left and right are switched after | ||
| 283 | ! each round, except after last round. In this code the original | ||
| 284 | ! left and right are kept in the same register in all rounds, meaning | ||
| 285 | ! that after the 16 rounds the result for right is in the register | ||
| 286 | ! originally used for left. | ||
| 287 | ! | ||
| 288 | ! parameter 1 first work (left in first round) | ||
| 289 | ! parameter 2 first use (right in first round) | ||
| 290 | ! parameter 3 enc/dec 1/-1 | ||
| 291 | ! parameter 4 loop label | ||
| 292 | ! parameter 5 key address register | ||
| 293 | ! parameter 6 optional address for key next encryption/decryption | ||
| 294 | ! parameter 7 not empty for include retl | ||
| 295 | ! | ||
| 296 | ! also compares in2 to 8 | ||
| 297 | |||
| 298 | define(rounds_macro, { | ||
| 299 | |||
| 300 | ! {rounds_macro} | ||
| 301 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 302 | |||
| 303 | xor $2, out0, local1 | ||
| 304 | |||
| 305 | ld [out2+284], local5 ! 0x0000FC00 | ||
| 306 | ba $4 | ||
| 307 | and local1, 252, local1 | ||
| 308 | |||
| 309 | .align 32 | ||
| 310 | |||
| 311 | $4: | ||
| 312 | ! local6 is address sbox 6 | ||
| 313 | ! out3 is address sbox 8 | ||
| 314 | ! out4 is loop counter | ||
| 315 | |||
| 316 | ld [global1+local1], local1 | ||
| 317 | xor $2, out1, out1 ! 8642 | ||
| 318 | xor $2, out0, out0 ! 7531 | ||
| 319 | ! fmovs %f0, %f0 ! fxor used for alignment | ||
| 320 | |||
| 321 | srl out1, 4, local0 ! rotate 4 right | ||
| 322 | and out0, local5, local3 ! 3 | ||
| 323 | ! fmovs %f0, %f0 | ||
| 324 | |||
| 325 | ld [$5+$3*8], local7 ! key 7531 next round | ||
| 326 | srl local3, 8, local3 ! 3 | ||
| 327 | and local0, 252, local2 ! 2 | ||
| 328 | ! fmovs %f0, %f0 | ||
| 329 | |||
| 330 | ld [global3+local3],local3 ! 3 | ||
| 331 | sll out1, 28, out1 ! rotate | ||
| 332 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7 | ||
| 333 | |||
| 334 | ld [global2+local2], local2 ! 2 | ||
| 335 | srl out0, 24, local1 ! 7 | ||
| 336 | or out1, local0, out1 ! rotate | ||
| 337 | |||
| 338 | ldub [out2+local1], local1 ! 7 (and 0xFC) | ||
| 339 | srl out1, 24, local0 ! 8 | ||
| 340 | and out1, local5, local4 ! 4 | ||
| 341 | |||
| 342 | ldub [out2+local0], local0 ! 8 (and 0xFC) | ||
| 343 | srl local4, 8, local4 ! 4 | ||
| 344 | xor $1, local2, $1 ! 2 finished local2 now sbox 6 | ||
| 345 | |||
| 346 | ld [global4+local4],local4 ! 4 | ||
| 347 | srl out1, 16, local2 ! 6 | ||
| 348 | xor $1, local3, $1 ! 3 finished local3 now sbox 5 | ||
| 349 | |||
| 350 | ld [out3+local0],local0 ! 8 | ||
| 351 | and local2, 252, local2 ! 6 | ||
| 352 | add global1, 1536, local5 ! address sbox 7 | ||
| 353 | |||
| 354 | ld [local6+local2], local2 ! 6 | ||
| 355 | srl out0, 16, local3 ! 5 | ||
| 356 | xor $1, local4, $1 ! 4 finished | ||
| 357 | |||
| 358 | ld [local5+local1],local1 ! 7 | ||
| 359 | and local3, 252, local3 ! 5 | ||
| 360 | xor $1, local0, $1 ! 8 finished | ||
| 361 | |||
| 362 | ld [global5+local3],local3 ! 5 | ||
| 363 | xor $1, local2, $1 ! 6 finished | ||
| 364 | subcc out4, 1, out4 | ||
| 365 | |||
| 366 | ld [$5+$3*8+4], out0 ! key 8642 next round | ||
| 367 | xor $1, local7, local2 ! sbox 5 next round | ||
| 368 | xor $1, local1, $1 ! 7 finished | ||
| 369 | |||
| 370 | srl local2, 16, local2 ! sbox 5 next round | ||
| 371 | xor $1, local3, $1 ! 5 finished | ||
| 372 | |||
| 373 | ld [$5+$3*16+4], out1 ! key 8642 next round again | ||
| 374 | and local2, 252, local2 ! sbox5 next round | ||
| 375 | ! next round | ||
| 376 | xor $1, local7, local7 ! 7531 | ||
| 377 | |||
| 378 | ld [global5+local2], local2 ! 5 | ||
| 379 | srl local7, 24, local3 ! 7 | ||
| 380 | xor $1, out0, out0 ! 8642 | ||
| 381 | |||
| 382 | ldub [out2+local3], local3 ! 7 (and 0xFC) | ||
| 383 | srl out0, 4, local0 ! rotate 4 right | ||
| 384 | and local7, 252, local1 ! 1 | ||
| 385 | |||
| 386 | sll out0, 28, out0 ! rotate | ||
| 387 | xor $2, local2, $2 ! 5 finished local2 used | ||
| 388 | |||
| 389 | srl local0, 8, local4 ! 4 | ||
| 390 | and local0, 252, local2 ! 2 | ||
| 391 | ld [local5+local3], local3 ! 7 | ||
| 392 | |||
| 393 | srl local0, 16, local5 ! 6 | ||
| 394 | or out0, local0, out0 ! rotate | ||
| 395 | ld [global2+local2], local2 ! 2 | ||
| 396 | |||
| 397 | srl out0, 24, local0 | ||
| 398 | ld [$5+$3*16], out0 ! key 7531 next round | ||
| 399 | and local4, 252, local4 ! 4 | ||
| 400 | |||
| 401 | and local5, 252, local5 ! 6 | ||
| 402 | ld [global4+local4], local4 ! 4 | ||
| 403 | xor $2, local3, $2 ! 7 finished local3 used | ||
| 404 | |||
| 405 | and local0, 252, local0 ! 8 | ||
| 406 | ld [local6+local5], local5 ! 6 | ||
| 407 | xor $2, local2, $2 ! 2 finished local2 now sbox 3 | ||
| 408 | |||
| 409 | srl local7, 8, local2 ! 3 start | ||
| 410 | ld [out3+local0], local0 ! 8 | ||
| 411 | xor $2, local4, $2 ! 4 finished | ||
| 412 | |||
| 413 | and local2, 252, local2 ! 3 | ||
| 414 | ld [global1+local1], local1 ! 1 | ||
| 415 | xor $2, local5, $2 ! 6 finished local5 used | ||
| 416 | |||
| 417 | ld [global3+local2], local2 ! 3 | ||
| 418 | xor $2, local0, $2 ! 8 finished | ||
| 419 | add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer | ||
| 420 | |||
| 421 | ld [out2+284], local5 ! 0x0000FC00 | ||
| 422 | xor $2, out0, local4 ! sbox 1 next round | ||
| 423 | xor $2, local1, $2 ! 1 finished | ||
| 424 | |||
| 425 | xor $2, local2, $2 ! 3 finished | ||
| 426 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 427 | bne,pt %icc, $4 | ||
| 428 | #else | ||
| 429 | bne $4 | ||
| 430 | #endif | ||
| 431 | and local4, 252, local1 ! sbox 1 next round | ||
| 432 | |||
| 433 | ! two rounds more: | ||
| 434 | |||
| 435 | ld [global1+local1], local1 | ||
| 436 | xor $2, out1, out1 | ||
| 437 | xor $2, out0, out0 | ||
| 438 | |||
| 439 | srl out1, 4, local0 ! rotate | ||
| 440 | and out0, local5, local3 | ||
| 441 | |||
| 442 | ld [$5+$3*8], local7 ! key 7531 | ||
| 443 | srl local3, 8, local3 | ||
| 444 | and local0, 252, local2 | ||
| 445 | |||
| 446 | ld [global3+local3],local3 | ||
| 447 | sll out1, 28, out1 ! rotate | ||
| 448 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7 | ||
| 449 | |||
| 450 | ld [global2+local2], local2 | ||
| 451 | srl out0, 24, local1 | ||
| 452 | or out1, local0, out1 ! rotate | ||
| 453 | |||
| 454 | ldub [out2+local1], local1 | ||
| 455 | srl out1, 24, local0 | ||
| 456 | and out1, local5, local4 | ||
| 457 | |||
| 458 | ldub [out2+local0], local0 | ||
| 459 | srl local4, 8, local4 | ||
| 460 | xor $1, local2, $1 ! 2 finished local2 now sbox 6 | ||
| 461 | |||
| 462 | ld [global4+local4],local4 | ||
| 463 | srl out1, 16, local2 | ||
| 464 | xor $1, local3, $1 ! 3 finished local3 now sbox 5 | ||
| 465 | |||
| 466 | ld [out3+local0],local0 | ||
| 467 | and local2, 252, local2 | ||
| 468 | add global1, 1536, local5 ! address sbox 7 | ||
| 469 | |||
| 470 | ld [local6+local2], local2 | ||
| 471 | srl out0, 16, local3 | ||
| 472 | xor $1, local4, $1 ! 4 finished | ||
| 473 | |||
| 474 | ld [local5+local1],local1 | ||
| 475 | and local3, 252, local3 | ||
| 476 | xor $1, local0, $1 | ||
| 477 | |||
| 478 | ld [global5+local3],local3 | ||
| 479 | xor $1, local2, $1 ! 6 finished | ||
| 480 | cmp in2, 8 | ||
| 481 | |||
| 482 | ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter | ||
| 483 | xor $1, local7, local2 ! sbox 5 next round | ||
| 484 | xor $1, local1, $1 ! 7 finished | ||
| 485 | |||
| 486 | ld [$5+$3*8+4], out0 | ||
| 487 | srl local2, 16, local2 ! sbox 5 next round | ||
| 488 | xor $1, local3, $1 ! 5 finished | ||
| 489 | |||
| 490 | and local2, 252, local2 | ||
| 491 | ! next round (two rounds more) | ||
| 492 | xor $1, local7, local7 ! 7531 | ||
| 493 | |||
| 494 | ld [global5+local2], local2 | ||
| 495 | srl local7, 24, local3 | ||
| 496 | xor $1, out0, out0 ! 8642 | ||
| 497 | |||
| 498 | ldub [out2+local3], local3 | ||
| 499 | srl out0, 4, local0 ! rotate | ||
| 500 | and local7, 252, local1 | ||
| 501 | |||
| 502 | sll out0, 28, out0 ! rotate | ||
| 503 | xor $2, local2, $2 ! 5 finished local2 used | ||
| 504 | |||
| 505 | srl local0, 8, local4 | ||
| 506 | and local0, 252, local2 | ||
| 507 | ld [local5+local3], local3 | ||
| 508 | |||
| 509 | srl local0, 16, local5 | ||
| 510 | or out0, local0, out0 ! rotate | ||
| 511 | ld [global2+local2], local2 | ||
| 512 | |||
| 513 | srl out0, 24, local0 | ||
| 514 | ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption | ||
| 515 | and local4, 252, local4 | ||
| 516 | |||
| 517 | and local5, 252, local5 | ||
| 518 | ld [global4+local4], local4 | ||
| 519 | xor $2, local3, $2 ! 7 finished local3 used | ||
| 520 | |||
| 521 | and local0, 252, local0 | ||
| 522 | ld [local6+local5], local5 | ||
| 523 | xor $2, local2, $2 ! 2 finished local2 now sbox 3 | ||
| 524 | |||
| 525 | srl local7, 8, local2 ! 3 start | ||
| 526 | ld [out3+local0], local0 | ||
| 527 | xor $2, local4, $2 | ||
| 528 | |||
| 529 | and local2, 252, local2 | ||
| 530 | ld [global1+local1], local1 | ||
| 531 | xor $2, local5, $2 ! 6 finished local5 used | ||
| 532 | |||
| 533 | ld [global3+local2], local2 | ||
| 534 | srl $1, 3, local3 | ||
| 535 | xor $2, local0, $2 | ||
| 536 | |||
| 537 | ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption | ||
| 538 | sll $1, 29, local4 | ||
| 539 | xor $2, local1, $2 | ||
| 540 | |||
| 541 | ifelse($7,{}, {}, {retl}) | ||
| 542 | xor $2, local2, $2 | ||
| 543 | }) | ||
| 544 | |||
| 545 | |||
| 546 | ! {fp_macro} | ||
| 547 | ! | ||
| 548 | ! parameter 1 right (original left) | ||
| 549 | ! parameter 2 left (original right) | ||
| 550 | ! parameter 3 1 for optional store to [in0] | ||
| 551 | ! parameter 4 1 for load input/output address to local5/7 | ||
| 552 | ! | ||
| 553 | ! The final permutation logic switches the halfes, meaning that | ||
| 554 | ! left and right ends up the the registers originally used. | ||
| 555 | |||
| 556 | define(fp_macro, { | ||
| 557 | |||
| 558 | ! {fp_macro} | ||
| 559 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 560 | |||
| 561 | ! initially undo the rotate 3 left done after initial permutation | ||
| 562 | ! original left is received shifted 3 right and 29 left in local3/4 | ||
| 563 | |||
| 564 | sll $2, 29, local1 | ||
| 565 | or local3, local4, $1 | ||
| 566 | |||
| 567 | srl $2, 3, $2 | ||
| 568 | sethi %hi(0x55555555), local2 | ||
| 569 | |||
| 570 | or $2, local1, $2 | ||
| 571 | or local2, %lo(0x55555555), local2 | ||
| 572 | |||
| 573 | srl $2, 1, local3 | ||
| 574 | sethi %hi(0x00ff00ff), local1 | ||
| 575 | xor local3, $1, local3 | ||
| 576 | or local1, %lo(0x00ff00ff), local1 | ||
| 577 | and local3, local2, local3 | ||
| 578 | sethi %hi(0x33333333), local4 | ||
| 579 | sll local3, 1, local2 | ||
| 580 | |||
| 581 | xor $1, local3, $1 | ||
| 582 | |||
| 583 | srl $1, 8, local3 | ||
| 584 | xor $2, local2, $2 | ||
| 585 | xor local3, $2, local3 | ||
| 586 | or local4, %lo(0x33333333), local4 | ||
| 587 | and local3, local1, local3 | ||
| 588 | sethi %hi(0x0000ffff), local1 | ||
| 589 | sll local3, 8, local2 | ||
| 590 | |||
| 591 | xor $2, local3, $2 | ||
| 592 | |||
| 593 | srl $2, 2, local3 | ||
| 594 | xor $1, local2, $1 | ||
| 595 | xor local3, $1, local3 | ||
| 596 | or local1, %lo(0x0000ffff), local1 | ||
| 597 | and local3, local4, local3 | ||
| 598 | sethi %hi(0x0f0f0f0f), local4 | ||
| 599 | sll local3, 2, local2 | ||
| 600 | |||
| 601 | ifelse($4,1, {LDPTR INPUT, local5}) | ||
| 602 | xor $1, local3, $1 | ||
| 603 | |||
| 604 | ifelse($4,1, {LDPTR OUTPUT, local7}) | ||
| 605 | srl $1, 16, local3 | ||
| 606 | xor $2, local2, $2 | ||
| 607 | xor local3, $2, local3 | ||
| 608 | or local4, %lo(0x0f0f0f0f), local4 | ||
| 609 | and local3, local1, local3 | ||
| 610 | sll local3, 16, local2 | ||
| 611 | |||
| 612 | xor $2, local3, local1 | ||
| 613 | |||
| 614 | srl local1, 4, local3 | ||
| 615 | xor $1, local2, $1 | ||
| 616 | xor local3, $1, local3 | ||
| 617 | and local3, local4, local3 | ||
| 618 | sll local3, 4, local2 | ||
| 619 | |||
| 620 | xor $1, local3, $1 | ||
| 621 | |||
| 622 | ! optional store: | ||
| 623 | |||
| 624 | ifelse($3,1, {st $1, [in0]}) | ||
| 625 | |||
| 626 | xor local1, local2, $2 | ||
| 627 | |||
| 628 | ifelse($3,1, {st $2, [in0+4]}) | ||
| 629 | |||
| 630 | }) | ||
| 631 | |||
| 632 | |||
| 633 | ! {fp_ip_macro} | ||
| 634 | ! | ||
| 635 | ! Does initial permutation for next block mixed with | ||
| 636 | ! final permutation for current block. | ||
| 637 | ! | ||
| 638 | ! parameter 1 original left | ||
| 639 | ! parameter 2 original right | ||
| 640 | ! parameter 3 left ip | ||
| 641 | ! parameter 4 right ip | ||
| 642 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | ||
| 643 | ! 2: mov in4 to in3 | ||
| 644 | ! | ||
| 645 | ! also adds -8 to length in2 and loads loop counter to out4 | ||
| 646 | |||
| 647 | define(fp_ip_macro, { | ||
| 648 | |||
| 649 | ! {fp_ip_macro} | ||
| 650 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 651 | |||
| 652 | define({temp1},{out4}) | ||
| 653 | define({temp2},{local3}) | ||
| 654 | |||
| 655 | define({ip1},{local1}) | ||
| 656 | define({ip2},{local2}) | ||
| 657 | define({ip4},{local4}) | ||
| 658 | define({ip5},{local5}) | ||
| 659 | |||
| 660 | ! $1 in local3, local4 | ||
| 661 | |||
| 662 | ld [out2+256], ip1 | ||
| 663 | sll out5, 29, temp1 | ||
| 664 | or local3, local4, $1 | ||
| 665 | |||
| 666 | srl out5, 3, $2 | ||
| 667 | ifelse($5,2,{mov in4, in3}) | ||
| 668 | |||
| 669 | ld [out2+272], ip5 | ||
| 670 | srl $4, 4, local0 | ||
| 671 | or $2, temp1, $2 | ||
| 672 | |||
| 673 | srl $2, 1, temp1 | ||
| 674 | xor temp1, $1, temp1 | ||
| 675 | |||
| 676 | and temp1, ip5, temp1 | ||
| 677 | xor local0, $3, local0 | ||
| 678 | |||
| 679 | sll temp1, 1, temp2 | ||
| 680 | xor $1, temp1, $1 | ||
| 681 | |||
| 682 | and local0, ip1, local0 | ||
| 683 | add in2, -8, in2 | ||
| 684 | |||
| 685 | sll local0, 4, local7 | ||
| 686 | xor $3, local0, $3 | ||
| 687 | |||
| 688 | ld [out2+268], ip4 | ||
| 689 | srl $1, 8, temp1 | ||
| 690 | xor $2, temp2, $2 | ||
| 691 | ld [out2+260], ip2 | ||
| 692 | srl $3, 16, local0 | ||
| 693 | xor $4, local7, $4 | ||
| 694 | xor temp1, $2, temp1 | ||
| 695 | xor local0, $4, local0 | ||
| 696 | and temp1, ip4, temp1 | ||
| 697 | and local0, ip2, local0 | ||
| 698 | sll temp1, 8, temp2 | ||
| 699 | xor $2, temp1, $2 | ||
| 700 | sll local0, 16, local7 | ||
| 701 | xor $4, local0, $4 | ||
| 702 | |||
| 703 | srl $2, 2, temp1 | ||
| 704 | xor $1, temp2, $1 | ||
| 705 | |||
| 706 | ld [out2+264], temp2 ! ip3 | ||
| 707 | srl $4, 2, local0 | ||
| 708 | xor $3, local7, $3 | ||
| 709 | xor temp1, $1, temp1 | ||
| 710 | xor local0, $3, local0 | ||
| 711 | and temp1, temp2, temp1 | ||
| 712 | and local0, temp2, local0 | ||
| 713 | sll temp1, 2, temp2 | ||
| 714 | xor $1, temp1, $1 | ||
| 715 | sll local0, 2, local7 | ||
| 716 | xor $3, local0, $3 | ||
| 717 | |||
| 718 | srl $1, 16, temp1 | ||
| 719 | xor $2, temp2, $2 | ||
| 720 | srl $3, 8, local0 | ||
| 721 | xor $4, local7, $4 | ||
| 722 | xor temp1, $2, temp1 | ||
| 723 | xor local0, $4, local0 | ||
| 724 | and temp1, ip2, temp1 | ||
| 725 | and local0, ip4, local0 | ||
| 726 | sll temp1, 16, temp2 | ||
| 727 | xor $2, temp1, local4 | ||
| 728 | sll local0, 8, local7 | ||
| 729 | xor $4, local0, $4 | ||
| 730 | |||
| 731 | srl $4, 1, local0 | ||
| 732 | xor $3, local7, $3 | ||
| 733 | |||
| 734 | srl local4, 4, temp1 | ||
| 735 | xor local0, $3, local0 | ||
| 736 | |||
| 737 | xor $1, temp2, $1 | ||
| 738 | and local0, ip5, local0 | ||
| 739 | |||
| 740 | sll local0, 1, local7 | ||
| 741 | xor temp1, $1, temp1 | ||
| 742 | |||
| 743 | xor $3, local0, $3 | ||
| 744 | xor $4, local7, $4 | ||
| 745 | |||
| 746 | sll $3, 3, local5 | ||
| 747 | and temp1, ip1, temp1 | ||
| 748 | |||
| 749 | sll temp1, 4, temp2 | ||
| 750 | xor $1, temp1, $1 | ||
| 751 | |||
| 752 | ifelse($5,1,{LDPTR KS2, in4}) | ||
| 753 | sll $4, 3, local2 | ||
| 754 | xor local4, temp2, $2 | ||
| 755 | |||
| 756 | ! reload since used as temporar: | ||
| 757 | |||
| 758 | ld [out2+280], out4 ! loop counter | ||
| 759 | |||
| 760 | srl $3, 29, local0 | ||
| 761 | ifelse($5,1,{add in4, 120, in4}) | ||
| 762 | |||
| 763 | ifelse($5,1,{LDPTR KS1, in3}) | ||
| 764 | srl $4, 29, local7 | ||
| 765 | |||
| 766 | or local0, local5, $4 | ||
| 767 | or local2, local7, $3 | ||
| 768 | |||
| 769 | }) | ||
| 770 | |||
| 771 | |||
| 772 | |||
| 773 | ! {load_little_endian} | ||
| 774 | ! | ||
| 775 | ! parameter 1 address | ||
| 776 | ! parameter 2 destination left | ||
| 777 | ! parameter 3 destination right | ||
| 778 | ! parameter 4 temporar | ||
| 779 | ! parameter 5 label | ||
| 780 | |||
| 781 | define(load_little_endian, { | ||
| 782 | |||
| 783 | ! {load_little_endian} | ||
| 784 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 785 | |||
| 786 | ! first in memory to rightmost in register | ||
| 787 | |||
| 788 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 789 | andcc $1, 3, global0 | ||
| 790 | bne,pn %icc, $5 | ||
| 791 | nop | ||
| 792 | |||
| 793 | lda [$1] 0x88, $2 | ||
| 794 | add $1, 4, $4 | ||
| 795 | |||
| 796 | ba,pt %icc, $5a | ||
| 797 | lda [$4] 0x88, $3 | ||
| 798 | #endif | ||
| 799 | |||
| 800 | $5: | ||
| 801 | ldub [$1+3], $2 | ||
| 802 | |||
| 803 | ldub [$1+2], $4 | ||
| 804 | sll $2, 8, $2 | ||
| 805 | or $2, $4, $2 | ||
| 806 | |||
| 807 | ldub [$1+1], $4 | ||
| 808 | sll $2, 8, $2 | ||
| 809 | or $2, $4, $2 | ||
| 810 | |||
| 811 | ldub [$1+0], $4 | ||
| 812 | sll $2, 8, $2 | ||
| 813 | or $2, $4, $2 | ||
| 814 | |||
| 815 | |||
| 816 | ldub [$1+3+4], $3 | ||
| 817 | |||
| 818 | ldub [$1+2+4], $4 | ||
| 819 | sll $3, 8, $3 | ||
| 820 | or $3, $4, $3 | ||
| 821 | |||
| 822 | ldub [$1+1+4], $4 | ||
| 823 | sll $3, 8, $3 | ||
| 824 | or $3, $4, $3 | ||
| 825 | |||
| 826 | ldub [$1+0+4], $4 | ||
| 827 | sll $3, 8, $3 | ||
| 828 | or $3, $4, $3 | ||
| 829 | $5a: | ||
| 830 | |||
| 831 | }) | ||
| 832 | |||
| 833 | |||
| 834 | ! {load_little_endian_inc} | ||
| 835 | ! | ||
| 836 | ! parameter 1 address | ||
| 837 | ! parameter 2 destination left | ||
| 838 | ! parameter 3 destination right | ||
| 839 | ! parameter 4 temporar | ||
| 840 | ! parameter 4 label | ||
| 841 | ! | ||
| 842 | ! adds 8 to address | ||
| 843 | |||
| 844 | define(load_little_endian_inc, { | ||
| 845 | |||
| 846 | ! {load_little_endian_inc} | ||
| 847 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 848 | |||
| 849 | ! first in memory to rightmost in register | ||
| 850 | |||
| 851 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 852 | andcc $1, 3, global0 | ||
| 853 | bne,pn %icc, $5 | ||
| 854 | nop | ||
| 855 | |||
| 856 | lda [$1] 0x88, $2 | ||
| 857 | add $1, 4, $1 | ||
| 858 | |||
| 859 | lda [$1] 0x88, $3 | ||
| 860 | ba,pt %icc, $5a | ||
| 861 | add $1, 4, $1 | ||
| 862 | #endif | ||
| 863 | |||
| 864 | $5: | ||
| 865 | ldub [$1+3], $2 | ||
| 866 | |||
| 867 | ldub [$1+2], $4 | ||
| 868 | sll $2, 8, $2 | ||
| 869 | or $2, $4, $2 | ||
| 870 | |||
| 871 | ldub [$1+1], $4 | ||
| 872 | sll $2, 8, $2 | ||
| 873 | or $2, $4, $2 | ||
| 874 | |||
| 875 | ldub [$1+0], $4 | ||
| 876 | sll $2, 8, $2 | ||
| 877 | or $2, $4, $2 | ||
| 878 | |||
| 879 | ldub [$1+3+4], $3 | ||
| 880 | add $1, 8, $1 | ||
| 881 | |||
| 882 | ldub [$1+2+4-8], $4 | ||
| 883 | sll $3, 8, $3 | ||
| 884 | or $3, $4, $3 | ||
| 885 | |||
| 886 | ldub [$1+1+4-8], $4 | ||
| 887 | sll $3, 8, $3 | ||
| 888 | or $3, $4, $3 | ||
| 889 | |||
| 890 | ldub [$1+0+4-8], $4 | ||
| 891 | sll $3, 8, $3 | ||
| 892 | or $3, $4, $3 | ||
| 893 | $5a: | ||
| 894 | |||
| 895 | }) | ||
| 896 | |||
| 897 | |||
| 898 | ! {load_n_bytes} | ||
| 899 | ! | ||
| 900 | ! Loads 1 to 7 bytes little endian | ||
| 901 | ! Remaining bytes are zeroed. | ||
| 902 | ! | ||
| 903 | ! parameter 1 address | ||
| 904 | ! parameter 2 length | ||
| 905 | ! parameter 3 destination register left | ||
| 906 | ! parameter 4 destination register right | ||
| 907 | ! parameter 5 temp | ||
| 908 | ! parameter 6 temp2 | ||
| 909 | ! parameter 7 label | ||
| 910 | ! parameter 8 return label | ||
| 911 | |||
| 912 | define(load_n_bytes, { | ||
| 913 | |||
| 914 | ! {load_n_bytes} | ||
| 915 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9 | ||
| 916 | |||
| 917 | $7.0: call .+8 | ||
| 918 | sll $2, 2, $6 | ||
| 919 | |||
| 920 | add %o7,$7.jmp.table-$7.0,$5 | ||
| 921 | |||
| 922 | add $5, $6, $5 | ||
| 923 | mov 0, $4 | ||
| 924 | |||
| 925 | ld [$5], $5 | ||
| 926 | |||
| 927 | jmp %o7+$5 | ||
| 928 | mov 0, $3 | ||
| 929 | |||
| 930 | $7.7: | ||
| 931 | ldub [$1+6], $5 | ||
| 932 | sll $5, 16, $5 | ||
| 933 | or $3, $5, $3 | ||
| 934 | $7.6: | ||
| 935 | ldub [$1+5], $5 | ||
| 936 | sll $5, 8, $5 | ||
| 937 | or $3, $5, $3 | ||
| 938 | $7.5: | ||
| 939 | ldub [$1+4], $5 | ||
| 940 | or $3, $5, $3 | ||
| 941 | $7.4: | ||
| 942 | ldub [$1+3], $5 | ||
| 943 | sll $5, 24, $5 | ||
| 944 | or $4, $5, $4 | ||
| 945 | $7.3: | ||
| 946 | ldub [$1+2], $5 | ||
| 947 | sll $5, 16, $5 | ||
| 948 | or $4, $5, $4 | ||
| 949 | $7.2: | ||
| 950 | ldub [$1+1], $5 | ||
| 951 | sll $5, 8, $5 | ||
| 952 | or $4, $5, $4 | ||
| 953 | $7.1: | ||
| 954 | ldub [$1+0], $5 | ||
| 955 | ba $8 | ||
| 956 | or $4, $5, $4 | ||
| 957 | |||
| 958 | .align 4 | ||
| 959 | |||
| 960 | $7.jmp.table: | ||
| 961 | .word 0 | ||
| 962 | .word $7.1-$7.0 | ||
| 963 | .word $7.2-$7.0 | ||
| 964 | .word $7.3-$7.0 | ||
| 965 | .word $7.4-$7.0 | ||
| 966 | .word $7.5-$7.0 | ||
| 967 | .word $7.6-$7.0 | ||
| 968 | .word $7.7-$7.0 | ||
| 969 | }) | ||
| 970 | |||
| 971 | |||
| 972 | ! {store_little_endian} | ||
| 973 | ! | ||
| 974 | ! parameter 1 address | ||
| 975 | ! parameter 2 source left | ||
| 976 | ! parameter 3 source right | ||
| 977 | ! parameter 4 temporar | ||
| 978 | |||
| 979 | define(store_little_endian, { | ||
| 980 | |||
| 981 | ! {store_little_endian} | ||
| 982 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | ||
| 983 | |||
| 984 | ! rightmost in register to first in memory | ||
| 985 | |||
| 986 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 987 | andcc $1, 3, global0 | ||
| 988 | bne,pn %icc, $5 | ||
| 989 | nop | ||
| 990 | |||
| 991 | sta $2, [$1] 0x88 | ||
| 992 | add $1, 4, $4 | ||
| 993 | |||
| 994 | ba,pt %icc, $5a | ||
| 995 | sta $3, [$4] 0x88 | ||
| 996 | #endif | ||
| 997 | |||
| 998 | $5: | ||
| 999 | and $2, 255, $4 | ||
| 1000 | stub $4, [$1+0] | ||
| 1001 | |||
| 1002 | srl $2, 8, $4 | ||
| 1003 | and $4, 255, $4 | ||
| 1004 | stub $4, [$1+1] | ||
| 1005 | |||
| 1006 | srl $2, 16, $4 | ||
| 1007 | and $4, 255, $4 | ||
| 1008 | stub $4, [$1+2] | ||
| 1009 | |||
| 1010 | srl $2, 24, $4 | ||
| 1011 | stub $4, [$1+3] | ||
| 1012 | |||
| 1013 | |||
| 1014 | and $3, 255, $4 | ||
| 1015 | stub $4, [$1+0+4] | ||
| 1016 | |||
| 1017 | srl $3, 8, $4 | ||
| 1018 | and $4, 255, $4 | ||
| 1019 | stub $4, [$1+1+4] | ||
| 1020 | |||
| 1021 | srl $3, 16, $4 | ||
| 1022 | and $4, 255, $4 | ||
| 1023 | stub $4, [$1+2+4] | ||
| 1024 | |||
| 1025 | srl $3, 24, $4 | ||
| 1026 | stub $4, [$1+3+4] | ||
| 1027 | |||
| 1028 | $5a: | ||
| 1029 | |||
| 1030 | }) | ||
| 1031 | |||
| 1032 | |||
| 1033 | ! {store_n_bytes} | ||
| 1034 | ! | ||
| 1035 | ! Stores 1 to 7 bytes little endian | ||
| 1036 | ! | ||
| 1037 | ! parameter 1 address | ||
| 1038 | ! parameter 2 length | ||
| 1039 | ! parameter 3 source register left | ||
| 1040 | ! parameter 4 source register right | ||
| 1041 | ! parameter 5 temp | ||
| 1042 | ! parameter 6 temp2 | ||
| 1043 | ! parameter 7 label | ||
| 1044 | ! parameter 8 return label | ||
| 1045 | |||
| 1046 | define(store_n_bytes, { | ||
| 1047 | |||
| 1048 | ! {store_n_bytes} | ||
| 1049 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9 | ||
| 1050 | |||
| 1051 | $7.0: call .+8 | ||
| 1052 | sll $2, 2, $6 | ||
| 1053 | |||
| 1054 | add %o7,$7.jmp.table-$7.0,$5 | ||
| 1055 | |||
| 1056 | add $5, $6, $5 | ||
| 1057 | |||
| 1058 | ld [$5], $5 | ||
| 1059 | |||
| 1060 | jmp %o7+$5 | ||
| 1061 | nop | ||
| 1062 | |||
| 1063 | $7.7: | ||
| 1064 | srl $3, 16, $5 | ||
| 1065 | and $5, 0xff, $5 | ||
| 1066 | stub $5, [$1+6] | ||
| 1067 | $7.6: | ||
| 1068 | srl $3, 8, $5 | ||
| 1069 | and $5, 0xff, $5 | ||
| 1070 | stub $5, [$1+5] | ||
| 1071 | $7.5: | ||
| 1072 | and $3, 0xff, $5 | ||
| 1073 | stub $5, [$1+4] | ||
| 1074 | $7.4: | ||
| 1075 | srl $4, 24, $5 | ||
| 1076 | stub $5, [$1+3] | ||
| 1077 | $7.3: | ||
| 1078 | srl $4, 16, $5 | ||
| 1079 | and $5, 0xff, $5 | ||
| 1080 | stub $5, [$1+2] | ||
| 1081 | $7.2: | ||
| 1082 | srl $4, 8, $5 | ||
| 1083 | and $5, 0xff, $5 | ||
| 1084 | stub $5, [$1+1] | ||
| 1085 | $7.1: | ||
| 1086 | and $4, 0xff, $5 | ||
| 1087 | |||
| 1088 | |||
| 1089 | ba $8 | ||
| 1090 | stub $5, [$1] | ||
| 1091 | |||
| 1092 | .align 4 | ||
| 1093 | |||
| 1094 | $7.jmp.table: | ||
| 1095 | |||
| 1096 | .word 0 | ||
| 1097 | .word $7.1-$7.0 | ||
| 1098 | .word $7.2-$7.0 | ||
| 1099 | .word $7.3-$7.0 | ||
| 1100 | .word $7.4-$7.0 | ||
| 1101 | .word $7.5-$7.0 | ||
| 1102 | .word $7.6-$7.0 | ||
| 1103 | .word $7.7-$7.0 | ||
| 1104 | }) | ||
| 1105 | |||
| 1106 | |||
| 1107 | define(testvalue,{1}) | ||
| 1108 | |||
| 1109 | define(register_init, { | ||
| 1110 | |||
| 1111 | ! For test purposes: | ||
| 1112 | |||
| 1113 | sethi %hi(testvalue), local0 | ||
| 1114 | or local0, %lo(testvalue), local0 | ||
| 1115 | |||
| 1116 | ifelse($1,{},{}, {mov local0, $1}) | ||
| 1117 | ifelse($2,{},{}, {mov local0, $2}) | ||
| 1118 | ifelse($3,{},{}, {mov local0, $3}) | ||
| 1119 | ifelse($4,{},{}, {mov local0, $4}) | ||
| 1120 | ifelse($5,{},{}, {mov local0, $5}) | ||
| 1121 | ifelse($6,{},{}, {mov local0, $6}) | ||
| 1122 | ifelse($7,{},{}, {mov local0, $7}) | ||
| 1123 | ifelse($8,{},{}, {mov local0, $8}) | ||
| 1124 | |||
| 1125 | mov local0, local1 | ||
| 1126 | mov local0, local2 | ||
| 1127 | mov local0, local3 | ||
| 1128 | mov local0, local4 | ||
| 1129 | mov local0, local5 | ||
| 1130 | mov local0, local7 | ||
| 1131 | mov local0, local6 | ||
| 1132 | mov local0, out0 | ||
| 1133 | mov local0, out1 | ||
| 1134 | mov local0, out2 | ||
| 1135 | mov local0, out3 | ||
| 1136 | mov local0, out4 | ||
| 1137 | mov local0, out5 | ||
| 1138 | mov local0, global1 | ||
| 1139 | mov local0, global2 | ||
| 1140 | mov local0, global3 | ||
| 1141 | mov local0, global4 | ||
| 1142 | mov local0, global5 | ||
| 1143 | |||
| 1144 | }) | ||
| 1145 | |||
| 1146 | .section ".text" | ||
| 1147 | |||
| 1148 | .align 32 | ||
| 1149 | |||
| 1150 | .des_enc: | ||
| 1151 | |||
| 1152 | ! key address in3 | ||
| 1153 | ! loads key next encryption/decryption first round from [in4] | ||
| 1154 | |||
| 1155 | rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl) | ||
| 1156 | |||
| 1157 | |||
| 1158 | .align 32 | ||
| 1159 | |||
| 1160 | .des_dec: | ||
| 1161 | |||
| 1162 | ! implemented with out5 as first parameter to avoid | ||
| 1163 | ! register exchange in ede modes | ||
| 1164 | |||
| 1165 | ! key address in4 | ||
| 1166 | ! loads key next encryption/decryption first round from [in3] | ||
| 1167 | |||
| 1168 | rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl) | ||
| 1169 | |||
| 1170 | |||
| 1171 | |||
| 1172 | ! void DES_encrypt1(data, ks, enc) | ||
| 1173 | ! ******************************* | ||
| 1174 | |||
| 1175 | .align 32 | ||
| 1176 | .global DES_encrypt1 | ||
| 1177 | .type DES_encrypt1,#function | ||
| 1178 | |||
| 1179 | DES_encrypt1: | ||
| 1180 | |||
| 1181 | save %sp, FRAME, %sp | ||
| 1182 | |||
| 1183 | sethi %hi(.PIC.DES_SPtrans-1f),global1 | ||
| 1184 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | ||
| 1185 | 1: call .+8 | ||
| 1186 | add %o7,global1,global1 | ||
| 1187 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | ||
| 1188 | |||
| 1189 | ld [in0], in5 ! left | ||
| 1190 | cmp in2, 0 ! enc | ||
| 1191 | |||
| 1192 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1193 | be,pn %icc, .encrypt.dec ! enc/dec | ||
| 1194 | #else | ||
| 1195 | be .encrypt.dec | ||
| 1196 | #endif | ||
| 1197 | ld [in0+4], out5 ! right | ||
| 1198 | |||
| 1199 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1200 | ! parameter 7 1 for move in1 to in3 | ||
| 1201 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
| 1202 | |||
| 1203 | ip_macro(in5, out5, in5, out5, in3, 0, 1, 1) | ||
| 1204 | |||
| 1205 | rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used | ||
| 1206 | |||
| 1207 | fp_macro(in5, out5, 1) ! 1 for store to [in0] | ||
| 1208 | |||
| 1209 | ret | ||
| 1210 | restore | ||
| 1211 | |||
| 1212 | .encrypt.dec: | ||
| 1213 | |||
| 1214 | add in1, 120, in3 ! use last subkey for first round | ||
| 1215 | |||
| 1216 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1217 | ! parameter 7 1 for move in1 to in3 | ||
| 1218 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
| 1219 | |||
| 1220 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4 | ||
| 1221 | |||
| 1222 | fp_macro(out5, in5, 1) ! 1 for store to [in0] | ||
| 1223 | |||
| 1224 | ret | ||
| 1225 | restore | ||
| 1226 | |||
| 1227 | .DES_encrypt1.end: | ||
| 1228 | .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1 | ||
| 1229 | |||
| 1230 | |||
| 1231 | ! void DES_encrypt2(data, ks, enc) | ||
| 1232 | !********************************* | ||
| 1233 | |||
| 1234 | ! encrypts/decrypts without initial/final permutation | ||
| 1235 | |||
| 1236 | .align 32 | ||
| 1237 | .global DES_encrypt2 | ||
| 1238 | .type DES_encrypt2,#function | ||
| 1239 | |||
| 1240 | DES_encrypt2: | ||
| 1241 | |||
| 1242 | save %sp, FRAME, %sp | ||
| 1243 | |||
| 1244 | sethi %hi(.PIC.DES_SPtrans-1f),global1 | ||
| 1245 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | ||
| 1246 | 1: call .+8 | ||
| 1247 | add %o7,global1,global1 | ||
| 1248 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | ||
| 1249 | |||
| 1250 | ! Set sbox address 1 to 6 and rotate halfs 3 left | ||
| 1251 | ! Errors caught by destest? Yes. Still? *NO* | ||
| 1252 | |||
| 1253 | !sethi %hi(DES_SPtrans), global1 ! address sbox 1 | ||
| 1254 | |||
| 1255 | !or global1, %lo(DES_SPtrans), global1 ! sbox 1 | ||
| 1256 | |||
| 1257 | add global1, 256, global2 ! sbox 2 | ||
| 1258 | add global1, 512, global3 ! sbox 3 | ||
| 1259 | |||
| 1260 | ld [in0], out5 ! right | ||
| 1261 | add global1, 768, global4 ! sbox 4 | ||
| 1262 | add global1, 1024, global5 ! sbox 5 | ||
| 1263 | |||
| 1264 | ld [in0+4], in5 ! left | ||
| 1265 | add global1, 1280, local6 ! sbox 6 | ||
| 1266 | add global1, 1792, out3 ! sbox 8 | ||
| 1267 | |||
| 1268 | ! rotate | ||
| 1269 | |||
| 1270 | sll in5, 3, local5 | ||
| 1271 | mov in1, in3 ! key address to in3 | ||
| 1272 | |||
| 1273 | sll out5, 3, local7 | ||
| 1274 | srl in5, 29, in5 | ||
| 1275 | |||
| 1276 | srl out5, 29, out5 | ||
| 1277 | add in5, local5, in5 | ||
| 1278 | |||
| 1279 | add out5, local7, out5 | ||
| 1280 | cmp in2, 0 | ||
| 1281 | |||
| 1282 | ! we use our own stackframe | ||
| 1283 | |||
| 1284 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1285 | be,pn %icc, .encrypt2.dec ! decryption | ||
| 1286 | #else | ||
| 1287 | be .encrypt2.dec | ||
| 1288 | #endif | ||
| 1289 | STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ] | ||
| 1290 | |||
| 1291 | ld [in3], out0 ! key 7531 first round | ||
| 1292 | mov LOOPS, out4 ! loop counter | ||
| 1293 | |||
| 1294 | ld [in3+4], out1 ! key 8642 first round | ||
| 1295 | sethi %hi(0x0000FC00), local5 | ||
| 1296 | |||
| 1297 | call .des_enc | ||
| 1298 | mov in3, in4 | ||
| 1299 | |||
| 1300 | ! rotate | ||
| 1301 | sll in5, 29, in0 | ||
| 1302 | srl in5, 3, in5 | ||
| 1303 | sll out5, 29, in1 | ||
| 1304 | add in5, in0, in5 | ||
| 1305 | srl out5, 3, out5 | ||
| 1306 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 | ||
| 1307 | add out5, in1, out5 | ||
| 1308 | st in5, [in0] | ||
| 1309 | st out5, [in0+4] | ||
| 1310 | |||
| 1311 | ret | ||
| 1312 | restore | ||
| 1313 | |||
| 1314 | |||
| 1315 | .encrypt2.dec: | ||
| 1316 | |||
| 1317 | add in3, 120, in4 | ||
| 1318 | |||
| 1319 | ld [in4], out0 ! key 7531 first round | ||
| 1320 | mov LOOPS, out4 ! loop counter | ||
| 1321 | |||
| 1322 | ld [in4+4], out1 ! key 8642 first round | ||
| 1323 | sethi %hi(0x0000FC00), local5 | ||
| 1324 | |||
| 1325 | mov in5, local1 ! left expected in out5 | ||
| 1326 | mov out5, in5 | ||
| 1327 | |||
| 1328 | call .des_dec | ||
| 1329 | mov local1, out5 | ||
| 1330 | |||
| 1331 | .encrypt2.finish: | ||
| 1332 | |||
| 1333 | ! rotate | ||
| 1334 | sll in5, 29, in0 | ||
| 1335 | srl in5, 3, in5 | ||
| 1336 | sll out5, 29, in1 | ||
| 1337 | add in5, in0, in5 | ||
| 1338 | srl out5, 3, out5 | ||
| 1339 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 | ||
| 1340 | add out5, in1, out5 | ||
| 1341 | st out5, [in0] | ||
| 1342 | st in5, [in0+4] | ||
| 1343 | |||
| 1344 | ret | ||
| 1345 | restore | ||
| 1346 | |||
| 1347 | .DES_encrypt2.end: | ||
| 1348 | .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2 | ||
| 1349 | |||
| 1350 | |||
| 1351 | ! void DES_encrypt3(data, ks1, ks2, ks3) | ||
| 1352 | ! ************************************** | ||
| 1353 | |||
| 1354 | .align 32 | ||
| 1355 | .global DES_encrypt3 | ||
| 1356 | .type DES_encrypt3,#function | ||
| 1357 | |||
| 1358 | DES_encrypt3: | ||
| 1359 | |||
| 1360 | save %sp, FRAME, %sp | ||
| 1361 | |||
| 1362 | sethi %hi(.PIC.DES_SPtrans-1f),global1 | ||
| 1363 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | ||
| 1364 | 1: call .+8 | ||
| 1365 | add %o7,global1,global1 | ||
| 1366 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | ||
| 1367 | |||
| 1368 | ld [in0], in5 ! left | ||
| 1369 | add in2, 120, in4 ! ks2 | ||
| 1370 | |||
| 1371 | ld [in0+4], out5 ! right | ||
| 1372 | mov in3, in2 ! save ks3 | ||
| 1373 | |||
| 1374 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1375 | ! parameter 7 1 for mov in1 to in3 | ||
| 1376 | ! parameter 8 1 for mov in3 to in4 | ||
| 1377 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
| 1378 | |||
| 1379 | ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0) | ||
| 1380 | |||
| 1381 | call .des_dec | ||
| 1382 | mov in2, in3 ! preload ks3 | ||
| 1383 | |||
| 1384 | call .des_enc | ||
| 1385 | nop | ||
| 1386 | |||
| 1387 | fp_macro(in5, out5, 1) | ||
| 1388 | |||
| 1389 | ret | ||
| 1390 | restore | ||
| 1391 | |||
| 1392 | .DES_encrypt3.end: | ||
| 1393 | .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3 | ||
| 1394 | |||
| 1395 | |||
| 1396 | ! void DES_decrypt3(data, ks1, ks2, ks3) | ||
| 1397 | ! ************************************** | ||
| 1398 | |||
| 1399 | .align 32 | ||
| 1400 | .global DES_decrypt3 | ||
| 1401 | .type DES_decrypt3,#function | ||
| 1402 | |||
| 1403 | DES_decrypt3: | ||
| 1404 | |||
| 1405 | save %sp, FRAME, %sp | ||
| 1406 | |||
| 1407 | sethi %hi(.PIC.DES_SPtrans-1f),global1 | ||
| 1408 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | ||
| 1409 | 1: call .+8 | ||
| 1410 | add %o7,global1,global1 | ||
| 1411 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | ||
| 1412 | |||
| 1413 | ld [in0], in5 ! left | ||
| 1414 | add in3, 120, in4 ! ks3 | ||
| 1415 | |||
| 1416 | ld [in0+4], out5 ! right | ||
| 1417 | mov in2, in3 ! ks2 | ||
| 1418 | |||
| 1419 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1420 | ! parameter 7 1 for mov in1 to in3 | ||
| 1421 | ! parameter 8 1 for mov in3 to in4 | ||
| 1422 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
| 1423 | |||
| 1424 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0) | ||
| 1425 | |||
| 1426 | call .des_enc | ||
| 1427 | add in1, 120, in4 ! preload ks1 | ||
| 1428 | |||
| 1429 | call .des_dec | ||
| 1430 | nop | ||
| 1431 | |||
| 1432 | fp_macro(out5, in5, 1) | ||
| 1433 | |||
| 1434 | ret | ||
| 1435 | restore | ||
| 1436 | |||
| 1437 | .DES_decrypt3.end: | ||
| 1438 | .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 | ||
| 1439 | |||
| 1440 | ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) | ||
| 1441 | ! ***************************************************************** | ||
| 1442 | |||
| 1443 | |||
| 1444 | .align 32 | ||
| 1445 | .global DES_ncbc_encrypt | ||
| 1446 | .type DES_ncbc_encrypt,#function | ||
| 1447 | |||
| 1448 | DES_ncbc_encrypt: | ||
| 1449 | |||
| 1450 | save %sp, FRAME, %sp | ||
| 1451 | |||
| 1452 | define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) | ||
| 1453 | define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) | ||
| 1454 | define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) | ||
| 1455 | |||
| 1456 | sethi %hi(.PIC.DES_SPtrans-1f),global1 | ||
| 1457 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | ||
| 1458 | 1: call .+8 | ||
| 1459 | add %o7,global1,global1 | ||
| 1460 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | ||
| 1461 | |||
| 1462 | cmp in5, 0 ! enc | ||
| 1463 | |||
| 1464 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1465 | be,pn %icc, .ncbc.dec | ||
| 1466 | #else | ||
| 1467 | be .ncbc.dec | ||
| 1468 | #endif | ||
| 1469 | STPTR in4, IVEC | ||
| 1470 | |||
| 1471 | ! addr left right temp label | ||
| 1472 | load_little_endian(in4, in5, out5, local3, .LLE1) ! iv | ||
| 1473 | |||
| 1474 | addcc in2, -8, in2 ! bytes missing when first block done | ||
| 1475 | |||
| 1476 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1477 | bl,pn %icc, .ncbc.enc.seven.or.less | ||
| 1478 | #else | ||
| 1479 | bl .ncbc.enc.seven.or.less | ||
| 1480 | #endif | ||
| 1481 | mov in3, in4 ! schedule | ||
| 1482 | |||
| 1483 | .ncbc.enc.next.block: | ||
| 1484 | |||
| 1485 | load_little_endian(in0, out4, global4, local3, .LLE2) ! block | ||
| 1486 | |||
| 1487 | .ncbc.enc.next.block_1: | ||
| 1488 | |||
| 1489 | xor in5, out4, in5 ! iv xor | ||
| 1490 | xor out5, global4, out5 ! iv xor | ||
| 1491 | |||
| 1492 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | ||
| 1493 | ip_macro(in5, out5, in5, out5, in3, 0, 0, 2) | ||
| 1494 | |||
| 1495 | .ncbc.enc.next.block_2: | ||
| 1496 | |||
| 1497 | !// call .des_enc ! compares in2 to 8 | ||
| 1498 | ! rounds inlined for alignment purposes | ||
| 1499 | |||
| 1500 | add global1, 768, global4 ! address sbox 4 since register used below | ||
| 1501 | |||
| 1502 | rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3 | ||
| 1503 | |||
| 1504 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1505 | bl,pn %icc, .ncbc.enc.next.block_fp | ||
| 1506 | #else | ||
| 1507 | bl .ncbc.enc.next.block_fp | ||
| 1508 | #endif | ||
| 1509 | add in0, 8, in0 ! input address | ||
| 1510 | |||
| 1511 | ! If 8 or more bytes are to be encrypted after this block, | ||
| 1512 | ! we combine final permutation for this block with initial | ||
| 1513 | ! permutation for next block. Load next block: | ||
| 1514 | |||
| 1515 | load_little_endian(in0, global3, global4, local5, .LLE12) | ||
| 1516 | |||
| 1517 | ! parameter 1 original left | ||
| 1518 | ! parameter 2 original right | ||
| 1519 | ! parameter 3 left ip | ||
| 1520 | ! parameter 4 right ip | ||
| 1521 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | ||
| 1522 | ! 2: mov in4 to in3 | ||
| 1523 | ! | ||
| 1524 | ! also adds -8 to length in2 and loads loop counter to out4 | ||
| 1525 | |||
| 1526 | fp_ip_macro(out0, out1, global3, global4, 2) | ||
| 1527 | |||
| 1528 | store_little_endian(in1, out0, out1, local3, .SLE10) ! block | ||
| 1529 | |||
| 1530 | ld [in3], out0 ! key 7531 first round next block | ||
| 1531 | mov in5, local1 | ||
| 1532 | xor global3, out5, in5 ! iv xor next block | ||
| 1533 | |||
| 1534 | ld [in3+4], out1 ! key 8642 | ||
| 1535 | add global1, 512, global3 ! address sbox 3 since register used | ||
| 1536 | xor global4, local1, out5 ! iv xor next block | ||
| 1537 | |||
| 1538 | ba .ncbc.enc.next.block_2 | ||
| 1539 | add in1, 8, in1 ! output adress | ||
| 1540 | |||
| 1541 | .ncbc.enc.next.block_fp: | ||
| 1542 | |||
| 1543 | fp_macro(in5, out5) | ||
| 1544 | |||
| 1545 | store_little_endian(in1, in5, out5, local3, .SLE1) ! block | ||
| 1546 | |||
| 1547 | addcc in2, -8, in2 ! bytes missing when next block done | ||
| 1548 | |||
| 1549 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1550 | bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0 | ||
| 1551 | #else | ||
| 1552 | bpos .ncbc.enc.next.block | ||
| 1553 | #endif | ||
| 1554 | add in1, 8, in1 | ||
| 1555 | |||
| 1556 | .ncbc.enc.seven.or.less: | ||
| 1557 | |||
| 1558 | cmp in2, -8 | ||
| 1559 | |||
| 1560 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1561 | ble,pt %icc, .ncbc.enc.finish | ||
| 1562 | #else | ||
| 1563 | ble .ncbc.enc.finish | ||
| 1564 | #endif | ||
| 1565 | nop | ||
| 1566 | |||
| 1567 | add in2, 8, local1 ! bytes to load | ||
| 1568 | |||
| 1569 | ! addr, length, dest left, dest right, temp, temp2, label, ret label | ||
| 1570 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1) | ||
| 1571 | |||
| 1572 | ! Loads 1 to 7 bytes little endian to global4, out4 | ||
| 1573 | |||
| 1574 | |||
| 1575 | .ncbc.enc.finish: | ||
| 1576 | |||
| 1577 | LDPTR IVEC, local4 | ||
| 1578 | store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec | ||
| 1579 | |||
| 1580 | ret | ||
| 1581 | restore | ||
| 1582 | |||
| 1583 | |||
| 1584 | .ncbc.dec: | ||
| 1585 | |||
| 1586 | STPTR in0, INPUT | ||
| 1587 | cmp in2, 0 ! length | ||
| 1588 | add in3, 120, in3 | ||
| 1589 | |||
| 1590 | LDPTR IVEC, local7 ! ivec | ||
| 1591 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1592 | ble,pn %icc, .ncbc.dec.finish | ||
| 1593 | #else | ||
| 1594 | ble .ncbc.dec.finish | ||
| 1595 | #endif | ||
| 1596 | mov in3, in4 ! schedule | ||
| 1597 | |||
| 1598 | STPTR in1, OUTPUT | ||
| 1599 | mov in0, local5 ! input | ||
| 1600 | |||
| 1601 | load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec | ||
| 1602 | |||
| 1603 | .ncbc.dec.next.block: | ||
| 1604 | |||
| 1605 | load_little_endian(local5, in5, out5, local3, .LLE4) ! block | ||
| 1606 | |||
| 1607 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1608 | ! parameter 7 1 for mov in1 to in3 | ||
| 1609 | ! parameter 8 1 for mov in3 to in4 | ||
| 1610 | |||
| 1611 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4 | ||
| 1612 | |||
| 1613 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7 | ||
| 1614 | |||
| 1615 | ! in2 is bytes left to be stored | ||
| 1616 | ! in2 is compared to 8 in the rounds | ||
| 1617 | |||
| 1618 | xor out5, in0, out4 ! iv xor | ||
| 1619 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1620 | bl,pn %icc, .ncbc.dec.seven.or.less | ||
| 1621 | #else | ||
| 1622 | bl .ncbc.dec.seven.or.less | ||
| 1623 | #endif | ||
| 1624 | xor in5, in1, global4 ! iv xor | ||
| 1625 | |||
| 1626 | ! Load ivec next block now, since input and output address might be the same. | ||
| 1627 | |||
| 1628 | load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv | ||
| 1629 | |||
| 1630 | store_little_endian(local7, out4, global4, local3, .SLE3) | ||
| 1631 | |||
| 1632 | STPTR local5, INPUT | ||
| 1633 | add local7, 8, local7 | ||
| 1634 | addcc in2, -8, in2 | ||
| 1635 | |||
| 1636 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1637 | bg,pt %icc, .ncbc.dec.next.block | ||
| 1638 | #else | ||
| 1639 | bg .ncbc.dec.next.block | ||
| 1640 | #endif | ||
| 1641 | STPTR local7, OUTPUT | ||
| 1642 | |||
| 1643 | |||
| 1644 | .ncbc.dec.store.iv: | ||
| 1645 | |||
| 1646 | LDPTR IVEC, local4 ! ivec | ||
| 1647 | store_little_endian(local4, in0, in1, local5, .SLE4) | ||
| 1648 | |||
| 1649 | .ncbc.dec.finish: | ||
| 1650 | |||
| 1651 | ret | ||
| 1652 | restore | ||
| 1653 | |||
| 1654 | .ncbc.dec.seven.or.less: | ||
| 1655 | |||
| 1656 | load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec | ||
| 1657 | |||
| 1658 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) | ||
| 1659 | |||
| 1660 | |||
| 1661 | .DES_ncbc_encrypt.end: | ||
| 1662 | .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt | ||
| 1663 | |||
| 1664 | |||
| 1665 | ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc) | ||
| 1666 | ! ************************************************************************** | ||
| 1667 | |||
| 1668 | |||
| 1669 | .align 32 | ||
| 1670 | .global DES_ede3_cbc_encrypt | ||
| 1671 | .type DES_ede3_cbc_encrypt,#function | ||
| 1672 | |||
| 1673 | DES_ede3_cbc_encrypt: | ||
| 1674 | |||
| 1675 | save %sp, FRAME, %sp | ||
| 1676 | |||
| 1677 | define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) | ||
| 1678 | define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) | ||
| 1679 | define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) | ||
| 1680 | |||
| 1681 | sethi %hi(.PIC.DES_SPtrans-1f),global1 | ||
| 1682 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | ||
| 1683 | 1: call .+8 | ||
| 1684 | add %o7,global1,global1 | ||
| 1685 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | ||
| 1686 | |||
| 1687 | LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc | ||
| 1688 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec | ||
| 1689 | cmp local3, 0 ! enc | ||
| 1690 | |||
| 1691 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1692 | be,pn %icc, .ede3.dec | ||
| 1693 | #else | ||
| 1694 | be .ede3.dec | ||
| 1695 | #endif | ||
| 1696 | STPTR in4, KS2 | ||
| 1697 | |||
| 1698 | STPTR in5, KS3 | ||
| 1699 | |||
| 1700 | load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec | ||
| 1701 | |||
| 1702 | addcc in2, -8, in2 ! bytes missing after next block | ||
| 1703 | |||
| 1704 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1705 | bl,pn %icc, .ede3.enc.seven.or.less | ||
| 1706 | #else | ||
| 1707 | bl .ede3.enc.seven.or.less | ||
| 1708 | #endif | ||
| 1709 | STPTR in3, KS1 | ||
| 1710 | |||
| 1711 | .ede3.enc.next.block: | ||
| 1712 | |||
| 1713 | load_little_endian(in0, out4, global4, local3, .LLE7) | ||
| 1714 | |||
| 1715 | .ede3.enc.next.block_1: | ||
| 1716 | |||
| 1717 | LDPTR KS2, in4 | ||
| 1718 | xor in5, out4, in5 ! iv xor | ||
| 1719 | xor out5, global4, out5 ! iv xor | ||
| 1720 | |||
| 1721 | LDPTR KS1, in3 | ||
| 1722 | add in4, 120, in4 ! for decryption we use last subkey first | ||
| 1723 | nop | ||
| 1724 | |||
| 1725 | ip_macro(in5, out5, in5, out5, in3) | ||
| 1726 | |||
| 1727 | .ede3.enc.next.block_2: | ||
| 1728 | |||
| 1729 | call .des_enc ! ks1 in3 | ||
| 1730 | nop | ||
| 1731 | |||
| 1732 | call .des_dec ! ks2 in4 | ||
| 1733 | LDPTR KS3, in3 | ||
| 1734 | |||
| 1735 | call .des_enc ! ks3 in3 compares in2 to 8 | ||
| 1736 | nop | ||
| 1737 | |||
| 1738 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1739 | bl,pn %icc, .ede3.enc.next.block_fp | ||
| 1740 | #else | ||
| 1741 | bl .ede3.enc.next.block_fp | ||
| 1742 | #endif | ||
| 1743 | add in0, 8, in0 | ||
| 1744 | |||
| 1745 | ! If 8 or more bytes are to be encrypted after this block, | ||
| 1746 | ! we combine final permutation for this block with initial | ||
| 1747 | ! permutation for next block. Load next block: | ||
| 1748 | |||
| 1749 | load_little_endian(in0, global3, global4, local5, .LLE11) | ||
| 1750 | |||
| 1751 | ! parameter 1 original left | ||
| 1752 | ! parameter 2 original right | ||
| 1753 | ! parameter 3 left ip | ||
| 1754 | ! parameter 4 right ip | ||
| 1755 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | ||
| 1756 | ! 2: mov in4 to in3 | ||
| 1757 | ! | ||
| 1758 | ! also adds -8 to length in2 and loads loop counter to out4 | ||
| 1759 | |||
| 1760 | fp_ip_macro(out0, out1, global3, global4, 1) | ||
| 1761 | |||
| 1762 | store_little_endian(in1, out0, out1, local3, .SLE9) ! block | ||
| 1763 | |||
| 1764 | mov in5, local1 | ||
| 1765 | xor global3, out5, in5 ! iv xor next block | ||
| 1766 | |||
| 1767 | ld [in3], out0 ! key 7531 | ||
| 1768 | add global1, 512, global3 ! address sbox 3 | ||
| 1769 | xor global4, local1, out5 ! iv xor next block | ||
| 1770 | |||
| 1771 | ld [in3+4], out1 ! key 8642 | ||
| 1772 | add global1, 768, global4 ! address sbox 4 | ||
| 1773 | ba .ede3.enc.next.block_2 | ||
| 1774 | add in1, 8, in1 | ||
| 1775 | |||
| 1776 | .ede3.enc.next.block_fp: | ||
| 1777 | |||
| 1778 | fp_macro(in5, out5) | ||
| 1779 | |||
| 1780 | store_little_endian(in1, in5, out5, local3, .SLE5) ! block | ||
| 1781 | |||
| 1782 | addcc in2, -8, in2 ! bytes missing when next block done | ||
| 1783 | |||
| 1784 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1785 | bpos,pt %icc, .ede3.enc.next.block | ||
| 1786 | #else | ||
| 1787 | bpos .ede3.enc.next.block | ||
| 1788 | #endif | ||
| 1789 | add in1, 8, in1 | ||
| 1790 | |||
| 1791 | .ede3.enc.seven.or.less: | ||
| 1792 | |||
| 1793 | cmp in2, -8 | ||
| 1794 | |||
| 1795 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1796 | ble,pt %icc, .ede3.enc.finish | ||
| 1797 | #else | ||
| 1798 | ble .ede3.enc.finish | ||
| 1799 | #endif | ||
| 1800 | nop | ||
| 1801 | |||
| 1802 | add in2, 8, local1 ! bytes to load | ||
| 1803 | |||
| 1804 | ! addr, length, dest left, dest right, temp, temp2, label, ret label | ||
| 1805 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1) | ||
| 1806 | |||
| 1807 | .ede3.enc.finish: | ||
| 1808 | |||
| 1809 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec | ||
| 1810 | store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec | ||
| 1811 | |||
| 1812 | ret | ||
| 1813 | restore | ||
| 1814 | |||
| 1815 | .ede3.dec: | ||
| 1816 | |||
| 1817 | STPTR in0, INPUT | ||
| 1818 | add in5, 120, in5 | ||
| 1819 | |||
| 1820 | STPTR in1, OUTPUT | ||
| 1821 | mov in0, local5 | ||
| 1822 | add in3, 120, in3 | ||
| 1823 | |||
| 1824 | STPTR in3, KS1 | ||
| 1825 | cmp in2, 0 | ||
| 1826 | |||
| 1827 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1828 | ble %icc, .ede3.dec.finish | ||
| 1829 | #else | ||
| 1830 | ble .ede3.dec.finish | ||
| 1831 | #endif | ||
| 1832 | STPTR in5, KS3 | ||
| 1833 | |||
| 1834 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv | ||
| 1835 | load_little_endian(local7, in0, in1, local3, .LLE8) | ||
| 1836 | |||
| 1837 | .ede3.dec.next.block: | ||
| 1838 | |||
| 1839 | load_little_endian(local5, in5, out5, local3, .LLE9) | ||
| 1840 | |||
| 1841 | ! parameter 6 1/2 for include encryption/decryption | ||
| 1842 | ! parameter 7 1 for mov in1 to in3 | ||
| 1843 | ! parameter 8 1 for mov in3 to in4 | ||
| 1844 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | ||
| 1845 | |||
| 1846 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4 | ||
| 1847 | |||
| 1848 | call .des_enc ! ks2 in3 | ||
| 1849 | LDPTR KS1, in4 | ||
| 1850 | |||
| 1851 | call .des_dec ! ks1 in4 | ||
| 1852 | nop | ||
| 1853 | |||
| 1854 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7 | ||
| 1855 | |||
| 1856 | ! in2 is bytes left to be stored | ||
| 1857 | ! in2 is compared to 8 in the rounds | ||
| 1858 | |||
| 1859 | xor out5, in0, out4 | ||
| 1860 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1861 | bl,pn %icc, .ede3.dec.seven.or.less | ||
| 1862 | #else | ||
| 1863 | bl .ede3.dec.seven.or.less | ||
| 1864 | #endif | ||
| 1865 | xor in5, in1, global4 | ||
| 1866 | |||
| 1867 | load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block | ||
| 1868 | |||
| 1869 | store_little_endian(local7, out4, global4, local3, .SLE7) ! block | ||
| 1870 | |||
| 1871 | STPTR local5, INPUT | ||
| 1872 | addcc in2, -8, in2 | ||
| 1873 | add local7, 8, local7 | ||
| 1874 | |||
| 1875 | #ifdef OPENSSL_SYSNAME_ULTRASPARC | ||
| 1876 | bg,pt %icc, .ede3.dec.next.block | ||
| 1877 | #else | ||
| 1878 | bg .ede3.dec.next.block | ||
| 1879 | #endif | ||
| 1880 | STPTR local7, OUTPUT | ||
| 1881 | |||
| 1882 | .ede3.dec.store.iv: | ||
| 1883 | |||
| 1884 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec | ||
| 1885 | store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec | ||
| 1886 | |||
| 1887 | .ede3.dec.finish: | ||
| 1888 | |||
| 1889 | ret | ||
| 1890 | restore | ||
| 1891 | |||
| 1892 | .ede3.dec.seven.or.less: | ||
| 1893 | |||
| 1894 | load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv | ||
| 1895 | |||
| 1896 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) | ||
| 1897 | |||
| 1898 | |||
| 1899 | .DES_ede3_cbc_encrypt.end: | ||
| 1900 | .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt | ||
| 1901 | |||
| 1902 | .align 256 | ||
| 1903 | .type .des_and,#object | ||
| 1904 | .size .des_and,284 | ||
| 1905 | |||
| 1906 | .des_and: | ||
| 1907 | |||
| 1908 | ! This table is used for AND 0xFC when it is known that register | ||
| 1909 | ! bits 8-31 are zero. Makes it possible to do three arithmetic | ||
| 1910 | ! operations in one cycle. | ||
| 1911 | |||
| 1912 | .byte 0, 0, 0, 0, 4, 4, 4, 4 | ||
| 1913 | .byte 8, 8, 8, 8, 12, 12, 12, 12 | ||
| 1914 | .byte 16, 16, 16, 16, 20, 20, 20, 20 | ||
| 1915 | .byte 24, 24, 24, 24, 28, 28, 28, 28 | ||
| 1916 | .byte 32, 32, 32, 32, 36, 36, 36, 36 | ||
| 1917 | .byte 40, 40, 40, 40, 44, 44, 44, 44 | ||
| 1918 | .byte 48, 48, 48, 48, 52, 52, 52, 52 | ||
| 1919 | .byte 56, 56, 56, 56, 60, 60, 60, 60 | ||
| 1920 | .byte 64, 64, 64, 64, 68, 68, 68, 68 | ||
| 1921 | .byte 72, 72, 72, 72, 76, 76, 76, 76 | ||
| 1922 | .byte 80, 80, 80, 80, 84, 84, 84, 84 | ||
| 1923 | .byte 88, 88, 88, 88, 92, 92, 92, 92 | ||
| 1924 | .byte 96, 96, 96, 96, 100, 100, 100, 100 | ||
| 1925 | .byte 104, 104, 104, 104, 108, 108, 108, 108 | ||
| 1926 | .byte 112, 112, 112, 112, 116, 116, 116, 116 | ||
| 1927 | .byte 120, 120, 120, 120, 124, 124, 124, 124 | ||
| 1928 | .byte 128, 128, 128, 128, 132, 132, 132, 132 | ||
| 1929 | .byte 136, 136, 136, 136, 140, 140, 140, 140 | ||
| 1930 | .byte 144, 144, 144, 144, 148, 148, 148, 148 | ||
| 1931 | .byte 152, 152, 152, 152, 156, 156, 156, 156 | ||
| 1932 | .byte 160, 160, 160, 160, 164, 164, 164, 164 | ||
| 1933 | .byte 168, 168, 168, 168, 172, 172, 172, 172 | ||
| 1934 | .byte 176, 176, 176, 176, 180, 180, 180, 180 | ||
| 1935 | .byte 184, 184, 184, 184, 188, 188, 188, 188 | ||
| 1936 | .byte 192, 192, 192, 192, 196, 196, 196, 196 | ||
| 1937 | .byte 200, 200, 200, 200, 204, 204, 204, 204 | ||
| 1938 | .byte 208, 208, 208, 208, 212, 212, 212, 212 | ||
| 1939 | .byte 216, 216, 216, 216, 220, 220, 220, 220 | ||
| 1940 | .byte 224, 224, 224, 224, 228, 228, 228, 228 | ||
| 1941 | .byte 232, 232, 232, 232, 236, 236, 236, 236 | ||
| 1942 | .byte 240, 240, 240, 240, 244, 244, 244, 244 | ||
| 1943 | .byte 248, 248, 248, 248, 252, 252, 252, 252 | ||
| 1944 | |||
| 1945 | ! 5 numbers for initil/final permutation | ||
| 1946 | |||
| 1947 | .word 0x0f0f0f0f ! offset 256 | ||
| 1948 | .word 0x0000ffff ! 260 | ||
| 1949 | .word 0x33333333 ! 264 | ||
| 1950 | .word 0x00ff00ff ! 268 | ||
| 1951 | .word 0x55555555 ! 272 | ||
| 1952 | |||
| 1953 | .word 0 ! 276 | ||
| 1954 | .word LOOPS ! 280 | ||
| 1955 | .word 0x0000FC00 ! 284 | ||
| 1956 | |||
| 1957 | .global DES_SPtrans | ||
| 1958 | .type DES_SPtrans,#object | ||
| 1959 | .size DES_SPtrans,2048 | ||
| 1960 | .align 64 | ||
| 1961 | DES_SPtrans: | ||
| 1962 | .PIC.DES_SPtrans: | ||
| 1963 | ! nibble 0 | ||
| 1964 | .word 0x02080800, 0x00080000, 0x02000002, 0x02080802 | ||
| 1965 | .word 0x02000000, 0x00080802, 0x00080002, 0x02000002 | ||
| 1966 | .word 0x00080802, 0x02080800, 0x02080000, 0x00000802 | ||
| 1967 | .word 0x02000802, 0x02000000, 0x00000000, 0x00080002 | ||
| 1968 | .word 0x00080000, 0x00000002, 0x02000800, 0x00080800 | ||
| 1969 | .word 0x02080802, 0x02080000, 0x00000802, 0x02000800 | ||
| 1970 | .word 0x00000002, 0x00000800, 0x00080800, 0x02080002 | ||
| 1971 | .word 0x00000800, 0x02000802, 0x02080002, 0x00000000 | ||
| 1972 | .word 0x00000000, 0x02080802, 0x02000800, 0x00080002 | ||
| 1973 | .word 0x02080800, 0x00080000, 0x00000802, 0x02000800 | ||
| 1974 | .word 0x02080002, 0x00000800, 0x00080800, 0x02000002 | ||
| 1975 | .word 0x00080802, 0x00000002, 0x02000002, 0x02080000 | ||
| 1976 | .word 0x02080802, 0x00080800, 0x02080000, 0x02000802 | ||
| 1977 | .word 0x02000000, 0x00000802, 0x00080002, 0x00000000 | ||
| 1978 | .word 0x00080000, 0x02000000, 0x02000802, 0x02080800 | ||
| 1979 | .word 0x00000002, 0x02080002, 0x00000800, 0x00080802 | ||
| 1980 | ! nibble 1 | ||
| 1981 | .word 0x40108010, 0x00000000, 0x00108000, 0x40100000 | ||
| 1982 | .word 0x40000010, 0x00008010, 0x40008000, 0x00108000 | ||
| 1983 | .word 0x00008000, 0x40100010, 0x00000010, 0x40008000 | ||
| 1984 | .word 0x00100010, 0x40108000, 0x40100000, 0x00000010 | ||
| 1985 | .word 0x00100000, 0x40008010, 0x40100010, 0x00008000 | ||
| 1986 | .word 0x00108010, 0x40000000, 0x00000000, 0x00100010 | ||
| 1987 | .word 0x40008010, 0x00108010, 0x40108000, 0x40000010 | ||
| 1988 | .word 0x40000000, 0x00100000, 0x00008010, 0x40108010 | ||
| 1989 | .word 0x00100010, 0x40108000, 0x40008000, 0x00108010 | ||
| 1990 | .word 0x40108010, 0x00100010, 0x40000010, 0x00000000 | ||
| 1991 | .word 0x40000000, 0x00008010, 0x00100000, 0x40100010 | ||
| 1992 | .word 0x00008000, 0x40000000, 0x00108010, 0x40008010 | ||
| 1993 | .word 0x40108000, 0x00008000, 0x00000000, 0x40000010 | ||
| 1994 | .word 0x00000010, 0x40108010, 0x00108000, 0x40100000 | ||
| 1995 | .word 0x40100010, 0x00100000, 0x00008010, 0x40008000 | ||
| 1996 | .word 0x40008010, 0x00000010, 0x40100000, 0x00108000 | ||
| 1997 | ! nibble 2 | ||
| 1998 | .word 0x04000001, 0x04040100, 0x00000100, 0x04000101 | ||
| 1999 | .word 0x00040001, 0x04000000, 0x04000101, 0x00040100 | ||
| 2000 | .word 0x04000100, 0x00040000, 0x04040000, 0x00000001 | ||
| 2001 | .word 0x04040101, 0x00000101, 0x00000001, 0x04040001 | ||
| 2002 | .word 0x00000000, 0x00040001, 0x04040100, 0x00000100 | ||
| 2003 | .word 0x00000101, 0x04040101, 0x00040000, 0x04000001 | ||
| 2004 | .word 0x04040001, 0x04000100, 0x00040101, 0x04040000 | ||
| 2005 | .word 0x00040100, 0x00000000, 0x04000000, 0x00040101 | ||
| 2006 | .word 0x04040100, 0x00000100, 0x00000001, 0x00040000 | ||
| 2007 | .word 0x00000101, 0x00040001, 0x04040000, 0x04000101 | ||
| 2008 | .word 0x00000000, 0x04040100, 0x00040100, 0x04040001 | ||
| 2009 | .word 0x00040001, 0x04000000, 0x04040101, 0x00000001 | ||
| 2010 | .word 0x00040101, 0x04000001, 0x04000000, 0x04040101 | ||
| 2011 | .word 0x00040000, 0x04000100, 0x04000101, 0x00040100 | ||
| 2012 | .word 0x04000100, 0x00000000, 0x04040001, 0x00000101 | ||
| 2013 | .word 0x04000001, 0x00040101, 0x00000100, 0x04040000 | ||
| 2014 | ! nibble 3 | ||
| 2015 | .word 0x00401008, 0x10001000, 0x00000008, 0x10401008 | ||
| 2016 | .word 0x00000000, 0x10400000, 0x10001008, 0x00400008 | ||
| 2017 | .word 0x10401000, 0x10000008, 0x10000000, 0x00001008 | ||
| 2018 | .word 0x10000008, 0x00401008, 0x00400000, 0x10000000 | ||
| 2019 | .word 0x10400008, 0x00401000, 0x00001000, 0x00000008 | ||
| 2020 | .word 0x00401000, 0x10001008, 0x10400000, 0x00001000 | ||
| 2021 | .word 0x00001008, 0x00000000, 0x00400008, 0x10401000 | ||
| 2022 | .word 0x10001000, 0x10400008, 0x10401008, 0x00400000 | ||
| 2023 | .word 0x10400008, 0x00001008, 0x00400000, 0x10000008 | ||
| 2024 | .word 0x00401000, 0x10001000, 0x00000008, 0x10400000 | ||
| 2025 | .word 0x10001008, 0x00000000, 0x00001000, 0x00400008 | ||
| 2026 | .word 0x00000000, 0x10400008, 0x10401000, 0x00001000 | ||
| 2027 | .word 0x10000000, 0x10401008, 0x00401008, 0x00400000 | ||
| 2028 | .word 0x10401008, 0x00000008, 0x10001000, 0x00401008 | ||
| 2029 | .word 0x00400008, 0x00401000, 0x10400000, 0x10001008 | ||
| 2030 | .word 0x00001008, 0x10000000, 0x10000008, 0x10401000 | ||
| 2031 | ! nibble 4 | ||
| 2032 | .word 0x08000000, 0x00010000, 0x00000400, 0x08010420 | ||
| 2033 | .word 0x08010020, 0x08000400, 0x00010420, 0x08010000 | ||
| 2034 | .word 0x00010000, 0x00000020, 0x08000020, 0x00010400 | ||
| 2035 | .word 0x08000420, 0x08010020, 0x08010400, 0x00000000 | ||
| 2036 | .word 0x00010400, 0x08000000, 0x00010020, 0x00000420 | ||
| 2037 | .word 0x08000400, 0x00010420, 0x00000000, 0x08000020 | ||
| 2038 | .word 0x00000020, 0x08000420, 0x08010420, 0x00010020 | ||
| 2039 | .word 0x08010000, 0x00000400, 0x00000420, 0x08010400 | ||
| 2040 | .word 0x08010400, 0x08000420, 0x00010020, 0x08010000 | ||
| 2041 | .word 0x00010000, 0x00000020, 0x08000020, 0x08000400 | ||
| 2042 | .word 0x08000000, 0x00010400, 0x08010420, 0x00000000 | ||
| 2043 | .word 0x00010420, 0x08000000, 0x00000400, 0x00010020 | ||
| 2044 | .word 0x08000420, 0x00000400, 0x00000000, 0x08010420 | ||
| 2045 | .word 0x08010020, 0x08010400, 0x00000420, 0x00010000 | ||
| 2046 | .word 0x00010400, 0x08010020, 0x08000400, 0x00000420 | ||
| 2047 | .word 0x00000020, 0x00010420, 0x08010000, 0x08000020 | ||
| 2048 | ! nibble 5 | ||
| 2049 | .word 0x80000040, 0x00200040, 0x00000000, 0x80202000 | ||
| 2050 | .word 0x00200040, 0x00002000, 0x80002040, 0x00200000 | ||
| 2051 | .word 0x00002040, 0x80202040, 0x00202000, 0x80000000 | ||
| 2052 | .word 0x80002000, 0x80000040, 0x80200000, 0x00202040 | ||
| 2053 | .word 0x00200000, 0x80002040, 0x80200040, 0x00000000 | ||
| 2054 | .word 0x00002000, 0x00000040, 0x80202000, 0x80200040 | ||
| 2055 | .word 0x80202040, 0x80200000, 0x80000000, 0x00002040 | ||
| 2056 | .word 0x00000040, 0x00202000, 0x00202040, 0x80002000 | ||
| 2057 | .word 0x00002040, 0x80000000, 0x80002000, 0x00202040 | ||
| 2058 | .word 0x80202000, 0x00200040, 0x00000000, 0x80002000 | ||
| 2059 | .word 0x80000000, 0x00002000, 0x80200040, 0x00200000 | ||
| 2060 | .word 0x00200040, 0x80202040, 0x00202000, 0x00000040 | ||
| 2061 | .word 0x80202040, 0x00202000, 0x00200000, 0x80002040 | ||
| 2062 | .word 0x80000040, 0x80200000, 0x00202040, 0x00000000 | ||
| 2063 | .word 0x00002000, 0x80000040, 0x80002040, 0x80202000 | ||
| 2064 | .word 0x80200000, 0x00002040, 0x00000040, 0x80200040 | ||
| 2065 | ! nibble 6 | ||
| 2066 | .word 0x00004000, 0x00000200, 0x01000200, 0x01000004 | ||
| 2067 | .word 0x01004204, 0x00004004, 0x00004200, 0x00000000 | ||
| 2068 | .word 0x01000000, 0x01000204, 0x00000204, 0x01004000 | ||
| 2069 | .word 0x00000004, 0x01004200, 0x01004000, 0x00000204 | ||
| 2070 | .word 0x01000204, 0x00004000, 0x00004004, 0x01004204 | ||
| 2071 | .word 0x00000000, 0x01000200, 0x01000004, 0x00004200 | ||
| 2072 | .word 0x01004004, 0x00004204, 0x01004200, 0x00000004 | ||
| 2073 | .word 0x00004204, 0x01004004, 0x00000200, 0x01000000 | ||
| 2074 | .word 0x00004204, 0x01004000, 0x01004004, 0x00000204 | ||
| 2075 | .word 0x00004000, 0x00000200, 0x01000000, 0x01004004 | ||
| 2076 | .word 0x01000204, 0x00004204, 0x00004200, 0x00000000 | ||
| 2077 | .word 0x00000200, 0x01000004, 0x00000004, 0x01000200 | ||
| 2078 | .word 0x00000000, 0x01000204, 0x01000200, 0x00004200 | ||
| 2079 | .word 0x00000204, 0x00004000, 0x01004204, 0x01000000 | ||
| 2080 | .word 0x01004200, 0x00000004, 0x00004004, 0x01004204 | ||
| 2081 | .word 0x01000004, 0x01004200, 0x01004000, 0x00004004 | ||
| 2082 | ! nibble 7 | ||
| 2083 | .word 0x20800080, 0x20820000, 0x00020080, 0x00000000 | ||
| 2084 | .word 0x20020000, 0x00800080, 0x20800000, 0x20820080 | ||
| 2085 | .word 0x00000080, 0x20000000, 0x00820000, 0x00020080 | ||
| 2086 | .word 0x00820080, 0x20020080, 0x20000080, 0x20800000 | ||
| 2087 | .word 0x00020000, 0x00820080, 0x00800080, 0x20020000 | ||
| 2088 | .word 0x20820080, 0x20000080, 0x00000000, 0x00820000 | ||
| 2089 | .word 0x20000000, 0x00800000, 0x20020080, 0x20800080 | ||
| 2090 | .word 0x00800000, 0x00020000, 0x20820000, 0x00000080 | ||
| 2091 | .word 0x00800000, 0x00020000, 0x20000080, 0x20820080 | ||
| 2092 | .word 0x00020080, 0x20000000, 0x00000000, 0x00820000 | ||
| 2093 | .word 0x20800080, 0x20020080, 0x20020000, 0x00800080 | ||
| 2094 | .word 0x20820000, 0x00000080, 0x00800080, 0x20020000 | ||
| 2095 | .word 0x20820080, 0x00800000, 0x20800000, 0x20000080 | ||
| 2096 | .word 0x00820000, 0x00020080, 0x20020080, 0x20800000 | ||
| 2097 | .word 0x00000080, 0x20820000, 0x00820080, 0x00000000 | ||
| 2098 | .word 0x20000000, 0x20800080, 0x00020000, 0x00820080 | ||
| 2099 | |||
diff --git a/src/lib/libcrypto/des/asm/desboth.pl b/src/lib/libcrypto/des/asm/desboth.pl deleted file mode 100644 index eec00886e4..0000000000 --- a/src/lib/libcrypto/des/asm/desboth.pl +++ /dev/null | |||
| @@ -1,79 +0,0 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | |||
| 3 | $L="edi"; | ||
| 4 | $R="esi"; | ||
| 5 | |||
| 6 | sub DES_encrypt3 | ||
| 7 | { | ||
| 8 | local($name,$enc)=@_; | ||
| 9 | |||
| 10 | &function_begin_B($name,""); | ||
| 11 | &push("ebx"); | ||
| 12 | &mov("ebx",&wparam(0)); | ||
| 13 | |||
| 14 | &push("ebp"); | ||
| 15 | &push("esi"); | ||
| 16 | |||
| 17 | &push("edi"); | ||
| 18 | |||
| 19 | &comment(""); | ||
| 20 | &comment("Load the data words"); | ||
| 21 | &mov($L,&DWP(0,"ebx","",0)); | ||
| 22 | &mov($R,&DWP(4,"ebx","",0)); | ||
| 23 | &stack_push(3); | ||
| 24 | |||
| 25 | &comment(""); | ||
| 26 | &comment("IP"); | ||
| 27 | &IP_new($L,$R,"edx",0); | ||
| 28 | |||
| 29 | # put them back | ||
| 30 | |||
| 31 | if ($enc) | ||
| 32 | { | ||
| 33 | &mov(&DWP(4,"ebx","",0),$R); | ||
| 34 | &mov("eax",&wparam(1)); | ||
| 35 | &mov(&DWP(0,"ebx","",0),"edx"); | ||
| 36 | &mov("edi",&wparam(2)); | ||
| 37 | &mov("esi",&wparam(3)); | ||
| 38 | } | ||
| 39 | else | ||
| 40 | { | ||
| 41 | &mov(&DWP(4,"ebx","",0),$R); | ||
| 42 | &mov("esi",&wparam(1)); | ||
| 43 | &mov(&DWP(0,"ebx","",0),"edx"); | ||
| 44 | &mov("edi",&wparam(2)); | ||
| 45 | &mov("eax",&wparam(3)); | ||
| 46 | } | ||
| 47 | &mov(&swtmp(2), (DWC(($enc)?"1":"0"))); | ||
| 48 | &mov(&swtmp(1), "eax"); | ||
| 49 | &mov(&swtmp(0), "ebx"); | ||
| 50 | &call("DES_encrypt2"); | ||
| 51 | &mov(&swtmp(2), (DWC(($enc)?"0":"1"))); | ||
| 52 | &mov(&swtmp(1), "edi"); | ||
| 53 | &mov(&swtmp(0), "ebx"); | ||
| 54 | &call("DES_encrypt2"); | ||
| 55 | &mov(&swtmp(2), (DWC(($enc)?"1":"0"))); | ||
| 56 | &mov(&swtmp(1), "esi"); | ||
| 57 | &mov(&swtmp(0), "ebx"); | ||
| 58 | &call("DES_encrypt2"); | ||
| 59 | |||
| 60 | &stack_pop(3); | ||
| 61 | &mov($L,&DWP(0,"ebx","",0)); | ||
| 62 | &mov($R,&DWP(4,"ebx","",0)); | ||
| 63 | |||
| 64 | &comment(""); | ||
| 65 | &comment("FP"); | ||
| 66 | &FP_new($L,$R,"eax",0); | ||
| 67 | |||
| 68 | &mov(&DWP(0,"ebx","",0),"eax"); | ||
| 69 | &mov(&DWP(4,"ebx","",0),$R); | ||
| 70 | |||
| 71 | &pop("edi"); | ||
| 72 | &pop("esi"); | ||
| 73 | &pop("ebp"); | ||
| 74 | &pop("ebx"); | ||
| 75 | &ret(); | ||
| 76 | &function_end_B($name); | ||
| 77 | } | ||
| 78 | |||
| 79 | |||
diff --git a/src/lib/libcrypto/des/asm/readme b/src/lib/libcrypto/des/asm/readme deleted file mode 100644 index 1beafe253b..0000000000 --- a/src/lib/libcrypto/des/asm/readme +++ /dev/null | |||
| @@ -1,131 +0,0 @@ | |||
| 1 | First up, let me say I don't like writing in assembler. It is not portable, | ||
| 2 | dependant on the particular CPU architecture release and is generally a pig | ||
| 3 | to debug and get right. Having said that, the x86 architecture is probably | ||
| 4 | the most important for speed due to number of boxes and since | ||
| 5 | it appears to be the worst architecture to to get | ||
| 6 | good C compilers for. So due to this, I have lowered myself to do | ||
| 7 | assembler for the inner DES routines in libdes :-). | ||
| 8 | |||
| 9 | The file to implement in assembler is des_enc.c. Replace the following | ||
| 10 | 4 functions | ||
| 11 | des_encrypt1(DES_LONG data[2],des_key_schedule ks, int encrypt); | ||
| 12 | des_encrypt2(DES_LONG data[2],des_key_schedule ks, int encrypt); | ||
| 13 | des_encrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); | ||
| 14 | des_decrypt3(DES_LONG data[2],des_key_schedule ks1,ks2,ks3); | ||
| 15 | |||
| 16 | They encrypt/decrypt the 64 bits held in 'data' using | ||
| 17 | the 'ks' key schedules. The only difference between the 4 functions is that | ||
| 18 | des_encrypt2() does not perform IP() or FP() on the data (this is an | ||
| 19 | optimization for when doing triple DES and des_encrypt3() and des_decrypt3() | ||
| 20 | perform triple des. The triple DES routines are in here because it does | ||
| 21 | make a big difference to have them located near the des_encrypt2 function | ||
| 22 | at link time.. | ||
| 23 | |||
| 24 | Now as we all know, there are lots of different operating systems running on | ||
| 25 | x86 boxes, and unfortunately they normally try to make sure their assembler | ||
| 26 | formating is not the same as the other peoples. | ||
| 27 | The 4 main formats I know of are | ||
| 28 | Microsoft Windows 95/Windows NT | ||
| 29 | Elf Includes Linux and FreeBSD(?). | ||
| 30 | a.out The older Linux. | ||
| 31 | Solaris Same as Elf but different comments :-(. | ||
| 32 | |||
| 33 | Now I was not overly keen to write 4 different copies of the same code, | ||
| 34 | so I wrote a few perl routines to output the correct assembler, given | ||
| 35 | a target assembler type. This code is ugly and is just a hack. | ||
| 36 | The libraries are x86unix.pl and x86ms.pl. | ||
| 37 | des586.pl, des686.pl and des-som[23].pl are the programs to actually | ||
| 38 | generate the assembler. | ||
| 39 | |||
| 40 | So to generate elf assembler | ||
| 41 | perl des-som3.pl elf >dx86-elf.s | ||
| 42 | For Windows 95/NT | ||
| 43 | perl des-som2.pl win32 >win32.asm | ||
| 44 | |||
| 45 | [ update 4 Jan 1996 ] | ||
| 46 | I have added another way to do things. | ||
| 47 | perl des-som3.pl cpp >dx86-cpp.s | ||
| 48 | generates a file that will be included by dx86unix.cpp when it is compiled. | ||
| 49 | To build for elf, a.out, solaris, bsdi etc, | ||
| 50 | cc -E -DELF asm/dx86unix.cpp | as -o asm/dx86-elf.o | ||
| 51 | cc -E -DSOL asm/dx86unix.cpp | as -o asm/dx86-sol.o | ||
| 52 | cc -E -DOUT asm/dx86unix.cpp | as -o asm/dx86-out.o | ||
| 53 | cc -E -DBSDI asm/dx86unix.cpp | as -o asm/dx86bsdi.o | ||
| 54 | This was done to cut down the number of files in the distribution. | ||
| 55 | |||
| 56 | Now the ugly part. I acquired my copy of Intels | ||
| 57 | "Optimization's For Intel's 32-Bit Processors" and found a few interesting | ||
| 58 | things. First, the aim of the exersize is to 'extract' one byte at a time | ||
| 59 | from a word and do an array lookup. This involves getting the byte from | ||
| 60 | the 4 locations in the word and moving it to a new word and doing the lookup. | ||
| 61 | The most obvious way to do this is | ||
| 62 | xor eax, eax # clear word | ||
| 63 | movb al, cl # get low byte | ||
| 64 | xor edi DWORD PTR 0x100+des_SP[eax] # xor in word | ||
| 65 | movb al, ch # get next byte | ||
| 66 | xor edi DWORD PTR 0x300+des_SP[eax] # xor in word | ||
| 67 | shr ecx 16 | ||
| 68 | which seems ok. For the pentium, this system appears to be the best. | ||
| 69 | One has to do instruction interleaving to keep both functional units | ||
| 70 | operating, but it is basically very efficient. | ||
| 71 | |||
| 72 | Now the crunch. When a full register is used after a partial write, eg. | ||
| 73 | mov al, cl | ||
| 74 | xor edi, DWORD PTR 0x100+des_SP[eax] | ||
| 75 | 386 - 1 cycle stall | ||
| 76 | 486 - 1 cycle stall | ||
| 77 | 586 - 0 cycle stall | ||
| 78 | 686 - at least 7 cycle stall (page 22 of the above mentioned document). | ||
| 79 | |||
| 80 | So the technique that produces the best results on a pentium, according to | ||
| 81 | the documentation, will produce hideous results on a pentium pro. | ||
| 82 | |||
| 83 | To get around this, des686.pl will generate code that is not as fast on | ||
| 84 | a pentium, should be very good on a pentium pro. | ||
| 85 | mov eax, ecx # copy word | ||
| 86 | shr ecx, 8 # line up next byte | ||
| 87 | and eax, 0fch # mask byte | ||
| 88 | xor edi DWORD PTR 0x100+des_SP[eax] # xor in array lookup | ||
| 89 | mov eax, ecx # get word | ||
| 90 | shr ecx 8 # line up next byte | ||
| 91 | and eax, 0fch # mask byte | ||
| 92 | xor edi DWORD PTR 0x300+des_SP[eax] # xor in array lookup | ||
| 93 | |||
| 94 | Due to the execution units in the pentium, this actually works quite well. | ||
| 95 | For a pentium pro it should be very good. This is the type of output | ||
| 96 | Visual C++ generates. | ||
| 97 | |||
| 98 | There is a third option. instead of using | ||
| 99 | mov al, ch | ||
| 100 | which is bad on the pentium pro, one may be able to use | ||
| 101 | movzx eax, ch | ||
| 102 | which may not incur the partial write penalty. On the pentium, | ||
| 103 | this instruction takes 4 cycles so is not worth using but on the | ||
| 104 | pentium pro it appears it may be worth while. I need access to one to | ||
| 105 | experiment :-). | ||
| 106 | |||
| 107 | eric (20 Oct 1996) | ||
| 108 | |||
| 109 | 22 Nov 1996 - I have asked people to run the 2 different version on pentium | ||
| 110 | pros and it appears that the intel documentation is wrong. The | ||
| 111 | mov al,bh is still faster on a pentium pro, so just use the des586.pl | ||
| 112 | install des686.pl | ||
| 113 | |||
| 114 | 3 Dec 1996 - I added des_encrypt3/des_decrypt3 because I have moved these | ||
| 115 | functions into des_enc.c because it does make a massive performance | ||
| 116 | difference on some boxes to have the functions code located close to | ||
| 117 | the des_encrypt2() function. | ||
| 118 | |||
| 119 | 9 Jan 1997 - des-som2.pl is now the correct perl script to use for | ||
| 120 | pentiums. It contains an inner loop from | ||
| 121 | Svend Olaf Mikkelsen <svolaf@inet.uni-c.dk> which does raw ecb DES calls at | ||
| 122 | 273,000 per second. He had a previous version at 250,000 and the best | ||
| 123 | I was able to get was 203,000. The content has not changed, this is all | ||
| 124 | due to instruction sequencing (and actual instructions choice) which is able | ||
| 125 | to keep both functional units of the pentium going. | ||
| 126 | We may have lost the ugly register usage restrictions when x86 went 32 bit | ||
| 127 | but for the pentium it has been replaced by evil instruction ordering tricks. | ||
| 128 | |||
| 129 | 13 Jan 1997 - des-som3.pl, more optimizations from Svend Olaf. | ||
| 130 | raw DES at 281,000 per second on a pentium 100. | ||
| 131 | |||
