diff options
Diffstat (limited to 'src/lib/libcrypto/cast/asm')
| -rw-r--r-- | src/lib/libcrypto/cast/asm/cast-586.pl | 176 | ||||
| -rw-r--r-- | src/lib/libcrypto/cast/asm/readme | 7 |
2 files changed, 183 insertions, 0 deletions
diff --git a/src/lib/libcrypto/cast/asm/cast-586.pl b/src/lib/libcrypto/cast/asm/cast-586.pl new file mode 100644 index 0000000000..0ed55d1905 --- /dev/null +++ b/src/lib/libcrypto/cast/asm/cast-586.pl | |||
| @@ -0,0 +1,176 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | |||
| 3 | # define for pentium pro friendly version | ||
| 4 | $ppro=1; | ||
| 5 | |||
| 6 | push(@INC,"perlasm","../../perlasm"); | ||
| 7 | require "x86asm.pl"; | ||
| 8 | require "cbc.pl"; | ||
| 9 | |||
| 10 | &asm_init($ARGV[0],"cast-586.pl",$ARGV[$#ARGV] eq "386"); | ||
| 11 | |||
| 12 | $CAST_ROUNDS=16; | ||
| 13 | $L="edi"; | ||
| 14 | $R="esi"; | ||
| 15 | $K="ebp"; | ||
| 16 | $tmp1="ecx"; | ||
| 17 | $tmp2="ebx"; | ||
| 18 | $tmp3="eax"; | ||
| 19 | $tmp4="edx"; | ||
| 20 | $S1="CAST_S_table0"; | ||
| 21 | $S2="CAST_S_table1"; | ||
| 22 | $S3="CAST_S_table2"; | ||
| 23 | $S4="CAST_S_table3"; | ||
| 24 | |||
| 25 | @F1=("add","xor","sub"); | ||
| 26 | @F2=("xor","sub","add"); | ||
| 27 | @F3=("sub","add","xor"); | ||
| 28 | |||
| 29 | &CAST_encrypt("CAST_encrypt",1); | ||
| 30 | &CAST_encrypt("CAST_decrypt",0); | ||
| 31 | &cbc("CAST_cbc_encrypt","CAST_encrypt","CAST_decrypt",1,4,5,3,-1,-1) unless $main'openbsd; | ||
| 32 | |||
| 33 | &asm_finish(); | ||
| 34 | |||
| 35 | sub CAST_encrypt { | ||
| 36 | local($name,$enc)=@_; | ||
| 37 | |||
| 38 | local($win_ex)=<<"EOF"; | ||
| 39 | EXTERN _CAST_S_table0:DWORD | ||
| 40 | EXTERN _CAST_S_table1:DWORD | ||
| 41 | EXTERN _CAST_S_table2:DWORD | ||
| 42 | EXTERN _CAST_S_table3:DWORD | ||
| 43 | EOF | ||
| 44 | &main::external_label( | ||
| 45 | "CAST_S_table0", | ||
| 46 | "CAST_S_table1", | ||
| 47 | "CAST_S_table2", | ||
| 48 | "CAST_S_table3", | ||
| 49 | ); | ||
| 50 | |||
| 51 | &function_begin_B($name,$win_ex); | ||
| 52 | |||
| 53 | &comment(""); | ||
| 54 | |||
| 55 | &push("ebp"); | ||
| 56 | &push("ebx"); | ||
| 57 | &mov($tmp2,&wparam(0)); | ||
| 58 | &mov($K,&wparam(1)); | ||
| 59 | &push("esi"); | ||
| 60 | &push("edi"); | ||
| 61 | |||
| 62 | &comment("Load the 2 words"); | ||
| 63 | &mov($L,&DWP(0,$tmp2,"",0)); | ||
| 64 | &mov($R,&DWP(4,$tmp2,"",0)); | ||
| 65 | |||
| 66 | &comment('Get short key flag'); | ||
| 67 | &mov($tmp3,&DWP(128,$K,"",0)); | ||
| 68 | if($enc) { | ||
| 69 | &push($tmp3); | ||
| 70 | } else { | ||
| 71 | &or($tmp3,$tmp3); | ||
| 72 | &jnz(&label('cast_dec_skip')); | ||
| 73 | } | ||
| 74 | |||
| 75 | &xor($tmp3, $tmp3); | ||
| 76 | |||
| 77 | # encrypting part | ||
| 78 | |||
| 79 | if ($enc) { | ||
| 80 | &E_CAST( 0,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 81 | &E_CAST( 1,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 82 | &E_CAST( 2,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 83 | &E_CAST( 3,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 84 | &E_CAST( 4,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 85 | &E_CAST( 5,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 86 | &E_CAST( 6,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 87 | &E_CAST( 7,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 88 | &E_CAST( 8,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 89 | &E_CAST( 9,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 90 | &E_CAST(10,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 91 | &E_CAST(11,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 92 | &comment('test short key flag'); | ||
| 93 | &pop($tmp4); | ||
| 94 | &or($tmp4,$tmp4); | ||
| 95 | &jnz(&label('cast_enc_done')); | ||
| 96 | &E_CAST(12,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 97 | &E_CAST(13,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 98 | &E_CAST(14,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 99 | &E_CAST(15,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 100 | } else { | ||
| 101 | &E_CAST(15,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 102 | &E_CAST(14,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 103 | &E_CAST(13,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 104 | &E_CAST(12,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 105 | &set_label('cast_dec_skip'); | ||
| 106 | &E_CAST(11,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 107 | &E_CAST(10,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 108 | &E_CAST( 9,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 109 | &E_CAST( 8,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 110 | &E_CAST( 7,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 111 | &E_CAST( 6,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 112 | &E_CAST( 5,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 113 | &E_CAST( 4,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 114 | &E_CAST( 3,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 115 | &E_CAST( 2,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 116 | &E_CAST( 1,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 117 | &E_CAST( 0,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); | ||
| 118 | } | ||
| 119 | |||
| 120 | &set_label('cast_enc_done') if $enc; | ||
| 121 | # Why the nop? - Ben 17/1/99 | ||
| 122 | &nop(); | ||
| 123 | &mov($tmp3,&wparam(0)); | ||
| 124 | &mov(&DWP(4,$tmp3,"",0),$L); | ||
| 125 | &mov(&DWP(0,$tmp3,"",0),$R); | ||
| 126 | &function_end($name); | ||
| 127 | } | ||
| 128 | |||
| 129 | sub E_CAST { | ||
| 130 | local($i,$S,$L,$R,$K,$OP1,$OP2,$OP3,$tmp1,$tmp2,$tmp3,$tmp4)=@_; | ||
| 131 | # Ri needs to have 16 pre added. | ||
| 132 | |||
| 133 | &comment("round $i"); | ||
| 134 | &mov( $tmp4, &DWP($i*8,$K,"",1)); | ||
| 135 | |||
| 136 | &mov( $tmp1, &DWP($i*8+4,$K,"",1)); | ||
| 137 | &$OP1( $tmp4, $R); | ||
| 138 | |||
| 139 | &rotl( $tmp4, &LB($tmp1)); | ||
| 140 | |||
| 141 | if ($ppro) { | ||
| 142 | &mov( $tmp2, $tmp4); # B | ||
| 143 | &xor( $tmp1, $tmp1); | ||
| 144 | |||
| 145 | &movb( &LB($tmp1), &HB($tmp4)); # A | ||
| 146 | &and( $tmp2, 0xff); | ||
| 147 | |||
| 148 | &shr( $tmp4, 16); # | ||
| 149 | &xor( $tmp3, $tmp3); | ||
| 150 | } else { | ||
| 151 | &mov( $tmp2, $tmp4); # B | ||
| 152 | &movb( &LB($tmp1), &HB($tmp4)); # A # BAD BAD BAD | ||
| 153 | |||
| 154 | &shr( $tmp4, 16); # | ||
| 155 | &and( $tmp2, 0xff); | ||
| 156 | } | ||
| 157 | |||
| 158 | &movb( &LB($tmp3), &HB($tmp4)); # C # BAD BAD BAD | ||
| 159 | &and( $tmp4, 0xff); # D | ||
| 160 | |||
| 161 | &mov( $tmp1, &DWP($S1,"",$tmp1,4)); | ||
| 162 | &mov( $tmp2, &DWP($S2,"",$tmp2,4)); | ||
| 163 | |||
| 164 | &$OP2( $tmp1, $tmp2); | ||
| 165 | &mov( $tmp2, &DWP($S3,"",$tmp3,4)); | ||
| 166 | |||
| 167 | &$OP3( $tmp1, $tmp2); | ||
| 168 | &mov( $tmp2, &DWP($S4,"",$tmp4,4)); | ||
| 169 | |||
| 170 | &$OP1( $tmp1, $tmp2); | ||
| 171 | # XXX | ||
| 172 | |||
| 173 | &xor( $L, $tmp1); | ||
| 174 | # XXX | ||
| 175 | } | ||
| 176 | |||
diff --git a/src/lib/libcrypto/cast/asm/readme b/src/lib/libcrypto/cast/asm/readme new file mode 100644 index 0000000000..fbcd76289e --- /dev/null +++ b/src/lib/libcrypto/cast/asm/readme | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | There is a ppro flag in cast-586 which turns on/off | ||
| 2 | generation of pentium pro/II friendly code | ||
| 3 | |||
| 4 | This flag makes the inner loop one cycle longer, but generates | ||
| 5 | code that runs %30 faster on the pentium pro/II, while only %7 slower | ||
| 6 | on the pentium. By default, this flag is on. | ||
| 7 | |||
