diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib/libcrypto/aes/asm/aes-s390x.pl | 1071 |
1 files changed, 993 insertions, 78 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-s390x.pl b/src/lib/libcrypto/aes/asm/aes-s390x.pl index 7e01889298..445a1e6762 100644 --- a/src/lib/libcrypto/aes/asm/aes-s390x.pl +++ b/src/lib/libcrypto/aes/asm/aes-s390x.pl | |||
@@ -44,12 +44,57 @@ | |||
44 | # Unlike previous version hardware support detection takes place only | 44 | # Unlike previous version hardware support detection takes place only |
45 | # at the moment of key schedule setup, which is denoted in key->rounds. | 45 | # at the moment of key schedule setup, which is denoted in key->rounds. |
46 | # This is done, because deferred key setup can't be made MT-safe, not | 46 | # This is done, because deferred key setup can't be made MT-safe, not |
47 | # for key lengthes longer than 128 bits. | 47 | # for keys longer than 128 bits. |
48 | # | 48 | # |
49 | # Add AES_cbc_encrypt, which gives incredible performance improvement, | 49 | # Add AES_cbc_encrypt, which gives incredible performance improvement, |
50 | # it was measured to be ~6.6x. It's less than previously mentioned 8x, | 50 | # it was measured to be ~6.6x. It's less than previously mentioned 8x, |
51 | # because software implementation was optimized. | 51 | # because software implementation was optimized. |
52 | 52 | ||
53 | # May 2010. | ||
54 | # | ||
55 | # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x | ||
56 | # performance improvement over "generic" counter mode routine relying | ||
57 | # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers | ||
58 | # to the fact that exact throughput value depends on current stack | ||
59 | # frame alignment within 4KB page. In worst case you get ~75% of the | ||
60 | # maximum, but *on average* it would be as much as ~98%. Meaning that | ||
61 | # worst case is unlike, it's like hitting ravine on plateau. | ||
62 | |||
63 | # November 2010. | ||
64 | # | ||
65 | # Adapt for -m31 build. If kernel supports what's called "highgprs" | ||
66 | # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit | ||
67 | # instructions and achieve "64-bit" performance even in 31-bit legacy | ||
68 | # application context. The feature is not specific to any particular | ||
69 | # processor, as long as it's "z-CPU". Latter implies that the code | ||
70 | # remains z/Architecture specific. On z990 it was measured to perform | ||
71 | # 2x better than code generated by gcc 4.3. | ||
72 | |||
73 | # December 2010. | ||
74 | # | ||
75 | # Add support for z196 "cipher message with counter" instruction. | ||
76 | # Note however that it's disengaged, because it was measured to | ||
77 | # perform ~12% worse than vanilla km-based code... | ||
78 | |||
79 | # February 2011. | ||
80 | # | ||
81 | # Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes | ||
82 | # instructions, which deliver ~70% improvement at 8KB block size over | ||
83 | # vanilla km-based code, 37% - at most like 512-bytes block size. | ||
84 | |||
85 | $flavour = shift; | ||
86 | |||
87 | if ($flavour =~ /3[12]/) { | ||
88 | $SIZE_T=4; | ||
89 | $g=""; | ||
90 | } else { | ||
91 | $SIZE_T=8; | ||
92 | $g="g"; | ||
93 | } | ||
94 | |||
95 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | ||
96 | open STDOUT,">$output"; | ||
97 | |||
53 | $softonly=0; # allow hardware support | 98 | $softonly=0; # allow hardware support |
54 | 99 | ||
55 | $t0="%r0"; $mask="%r0"; | 100 | $t0="%r0"; $mask="%r0"; |
@@ -69,6 +114,8 @@ $rounds="%r13"; | |||
69 | $ra="%r14"; | 114 | $ra="%r14"; |
70 | $sp="%r15"; | 115 | $sp="%r15"; |
71 | 116 | ||
117 | $stdframe=16*$SIZE_T+4*8; | ||
118 | |||
72 | sub _data_word() | 119 | sub _data_word() |
73 | { my $i; | 120 | { my $i; |
74 | while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } | 121 | while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } |
@@ -210,7 +257,7 @@ $code.=<<___ if (!$softonly); | |||
210 | .Lesoft: | 257 | .Lesoft: |
211 | ___ | 258 | ___ |
212 | $code.=<<___; | 259 | $code.=<<___; |
213 | stmg %r3,$ra,24($sp) | 260 | stm${g} %r3,$ra,3*$SIZE_T($sp) |
214 | 261 | ||
215 | llgf $s0,0($inp) | 262 | llgf $s0,0($inp) |
216 | llgf $s1,4($inp) | 263 | llgf $s1,4($inp) |
@@ -220,20 +267,20 @@ $code.=<<___; | |||
220 | larl $tbl,AES_Te | 267 | larl $tbl,AES_Te |
221 | bras $ra,_s390x_AES_encrypt | 268 | bras $ra,_s390x_AES_encrypt |
222 | 269 | ||
223 | lg $out,24($sp) | 270 | l${g} $out,3*$SIZE_T($sp) |
224 | st $s0,0($out) | 271 | st $s0,0($out) |
225 | st $s1,4($out) | 272 | st $s1,4($out) |
226 | st $s2,8($out) | 273 | st $s2,8($out) |
227 | st $s3,12($out) | 274 | st $s3,12($out) |
228 | 275 | ||
229 | lmg %r6,$ra,48($sp) | 276 | lm${g} %r6,$ra,6*$SIZE_T($sp) |
230 | br $ra | 277 | br $ra |
231 | .size AES_encrypt,.-AES_encrypt | 278 | .size AES_encrypt,.-AES_encrypt |
232 | 279 | ||
233 | .type _s390x_AES_encrypt,\@function | 280 | .type _s390x_AES_encrypt,\@function |
234 | .align 16 | 281 | .align 16 |
235 | _s390x_AES_encrypt: | 282 | _s390x_AES_encrypt: |
236 | stg $ra,152($sp) | 283 | st${g} $ra,15*$SIZE_T($sp) |
237 | x $s0,0($key) | 284 | x $s0,0($key) |
238 | x $s1,4($key) | 285 | x $s1,4($key) |
239 | x $s2,8($key) | 286 | x $s2,8($key) |
@@ -397,7 +444,7 @@ _s390x_AES_encrypt: | |||
397 | or $s2,$i3 | 444 | or $s2,$i3 |
398 | or $s3,$t3 | 445 | or $s3,$t3 |
399 | 446 | ||
400 | lg $ra,152($sp) | 447 | l${g} $ra,15*$SIZE_T($sp) |
401 | xr $s0,$t0 | 448 | xr $s0,$t0 |
402 | xr $s1,$t2 | 449 | xr $s1,$t2 |
403 | x $s2,24($key) | 450 | x $s2,24($key) |
@@ -536,7 +583,7 @@ $code.=<<___ if (!$softonly); | |||
536 | .Ldsoft: | 583 | .Ldsoft: |
537 | ___ | 584 | ___ |
538 | $code.=<<___; | 585 | $code.=<<___; |
539 | stmg %r3,$ra,24($sp) | 586 | stm${g} %r3,$ra,3*$SIZE_T($sp) |
540 | 587 | ||
541 | llgf $s0,0($inp) | 588 | llgf $s0,0($inp) |
542 | llgf $s1,4($inp) | 589 | llgf $s1,4($inp) |
@@ -546,20 +593,20 @@ $code.=<<___; | |||
546 | larl $tbl,AES_Td | 593 | larl $tbl,AES_Td |
547 | bras $ra,_s390x_AES_decrypt | 594 | bras $ra,_s390x_AES_decrypt |
548 | 595 | ||
549 | lg $out,24($sp) | 596 | l${g} $out,3*$SIZE_T($sp) |
550 | st $s0,0($out) | 597 | st $s0,0($out) |
551 | st $s1,4($out) | 598 | st $s1,4($out) |
552 | st $s2,8($out) | 599 | st $s2,8($out) |
553 | st $s3,12($out) | 600 | st $s3,12($out) |
554 | 601 | ||
555 | lmg %r6,$ra,48($sp) | 602 | lm${g} %r6,$ra,6*$SIZE_T($sp) |
556 | br $ra | 603 | br $ra |
557 | .size AES_decrypt,.-AES_decrypt | 604 | .size AES_decrypt,.-AES_decrypt |
558 | 605 | ||
559 | .type _s390x_AES_decrypt,\@function | 606 | .type _s390x_AES_decrypt,\@function |
560 | .align 16 | 607 | .align 16 |
561 | _s390x_AES_decrypt: | 608 | _s390x_AES_decrypt: |
562 | stg $ra,152($sp) | 609 | st${g} $ra,15*$SIZE_T($sp) |
563 | x $s0,0($key) | 610 | x $s0,0($key) |
564 | x $s1,4($key) | 611 | x $s1,4($key) |
565 | x $s2,8($key) | 612 | x $s2,8($key) |
@@ -703,7 +750,7 @@ _s390x_AES_decrypt: | |||
703 | nr $i1,$mask | 750 | nr $i1,$mask |
704 | nr $i2,$mask | 751 | nr $i2,$mask |
705 | 752 | ||
706 | lg $ra,152($sp) | 753 | l${g} $ra,15*$SIZE_T($sp) |
707 | or $s1,$t1 | 754 | or $s1,$t1 |
708 | l $t0,16($key) | 755 | l $t0,16($key) |
709 | l $t1,20($key) | 756 | l $t1,20($key) |
@@ -732,14 +779,15 @@ ___ | |||
732 | $code.=<<___; | 779 | $code.=<<___; |
733 | # void AES_set_encrypt_key(const unsigned char *in, int bits, | 780 | # void AES_set_encrypt_key(const unsigned char *in, int bits, |
734 | # AES_KEY *key) { | 781 | # AES_KEY *key) { |
735 | .globl AES_set_encrypt_key | 782 | .globl private_AES_set_encrypt_key |
736 | .type AES_set_encrypt_key,\@function | 783 | .type private_AES_set_encrypt_key,\@function |
737 | .align 16 | 784 | .align 16 |
738 | AES_set_encrypt_key: | 785 | private_AES_set_encrypt_key: |
786 | _s390x_AES_set_encrypt_key: | ||
739 | lghi $t0,0 | 787 | lghi $t0,0 |
740 | clgr $inp,$t0 | 788 | cl${g}r $inp,$t0 |
741 | je .Lminus1 | 789 | je .Lminus1 |
742 | clgr $key,$t0 | 790 | cl${g}r $key,$t0 |
743 | je .Lminus1 | 791 | je .Lminus1 |
744 | 792 | ||
745 | lghi $t0,128 | 793 | lghi $t0,128 |
@@ -789,7 +837,8 @@ $code.=<<___ if (!$softonly); | |||
789 | je 1f | 837 | je 1f |
790 | lg %r1,24($inp) | 838 | lg %r1,24($inp) |
791 | stg %r1,24($key) | 839 | stg %r1,24($key) |
792 | 1: st $bits,236($key) # save bits | 840 | 1: st $bits,236($key) # save bits [for debugging purposes] |
841 | lgr $t0,%r5 | ||
793 | st %r5,240($key) # save km code | 842 | st %r5,240($key) # save km code |
794 | lghi %r2,0 | 843 | lghi %r2,0 |
795 | br %r14 | 844 | br %r14 |
@@ -797,7 +846,7 @@ ___ | |||
797 | $code.=<<___; | 846 | $code.=<<___; |
798 | .align 16 | 847 | .align 16 |
799 | .Lekey_internal: | 848 | .Lekey_internal: |
800 | stmg %r6,%r13,48($sp) # all non-volatile regs | 849 | stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key |
801 | 850 | ||
802 | larl $tbl,AES_Te+2048 | 851 | larl $tbl,AES_Te+2048 |
803 | 852 | ||
@@ -857,8 +906,9 @@ $code.=<<___; | |||
857 | la $key,16($key) # key+=4 | 906 | la $key,16($key) # key+=4 |
858 | la $t3,4($t3) # i++ | 907 | la $t3,4($t3) # i++ |
859 | brct $rounds,.L128_loop | 908 | brct $rounds,.L128_loop |
909 | lghi $t0,10 | ||
860 | lghi %r2,0 | 910 | lghi %r2,0 |
861 | lmg %r6,%r13,48($sp) | 911 | lm${g} %r4,%r13,4*$SIZE_T($sp) |
862 | br $ra | 912 | br $ra |
863 | 913 | ||
864 | .align 16 | 914 | .align 16 |
@@ -905,8 +955,9 @@ $code.=<<___; | |||
905 | st $s2,32($key) | 955 | st $s2,32($key) |
906 | st $s3,36($key) | 956 | st $s3,36($key) |
907 | brct $rounds,.L192_continue | 957 | brct $rounds,.L192_continue |
958 | lghi $t0,12 | ||
908 | lghi %r2,0 | 959 | lghi %r2,0 |
909 | lmg %r6,%r13,48($sp) | 960 | lm${g} %r4,%r13,4*$SIZE_T($sp) |
910 | br $ra | 961 | br $ra |
911 | 962 | ||
912 | .align 16 | 963 | .align 16 |
@@ -967,8 +1018,9 @@ $code.=<<___; | |||
967 | st $s2,40($key) | 1018 | st $s2,40($key) |
968 | st $s3,44($key) | 1019 | st $s3,44($key) |
969 | brct $rounds,.L256_continue | 1020 | brct $rounds,.L256_continue |
1021 | lghi $t0,14 | ||
970 | lghi %r2,0 | 1022 | lghi %r2,0 |
971 | lmg %r6,%r13,48($sp) | 1023 | lm${g} %r4,%r13,4*$SIZE_T($sp) |
972 | br $ra | 1024 | br $ra |
973 | 1025 | ||
974 | .align 16 | 1026 | .align 16 |
@@ -1011,42 +1063,34 @@ $code.=<<___; | |||
1011 | .Lminus1: | 1063 | .Lminus1: |
1012 | lghi %r2,-1 | 1064 | lghi %r2,-1 |
1013 | br $ra | 1065 | br $ra |
1014 | .size AES_set_encrypt_key,.-AES_set_encrypt_key | 1066 | .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key |
1015 | 1067 | ||
1016 | # void AES_set_decrypt_key(const unsigned char *in, int bits, | 1068 | # void AES_set_decrypt_key(const unsigned char *in, int bits, |
1017 | # AES_KEY *key) { | 1069 | # AES_KEY *key) { |
1018 | .globl AES_set_decrypt_key | 1070 | .globl private_AES_set_decrypt_key |
1019 | .type AES_set_decrypt_key,\@function | 1071 | .type private_AES_set_decrypt_key,\@function |
1020 | .align 16 | 1072 | .align 16 |
1021 | AES_set_decrypt_key: | 1073 | private_AES_set_decrypt_key: |
1022 | stg $key,32($sp) # I rely on AES_set_encrypt_key to | 1074 | #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to |
1023 | stg $ra,112($sp) # save non-volatile registers! | 1075 | st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key! |
1024 | bras $ra,AES_set_encrypt_key | 1076 | bras $ra,_s390x_AES_set_encrypt_key |
1025 | lg $key,32($sp) | 1077 | #l${g} $key,4*$SIZE_T($sp) |
1026 | lg $ra,112($sp) | 1078 | l${g} $ra,14*$SIZE_T($sp) |
1027 | ltgr %r2,%r2 | 1079 | ltgr %r2,%r2 |
1028 | bnzr $ra | 1080 | bnzr $ra |
1029 | ___ | 1081 | ___ |
1030 | $code.=<<___ if (!$softonly); | 1082 | $code.=<<___ if (!$softonly); |
1031 | l $t0,240($key) | 1083 | #l $t0,240($key) |
1032 | lhi $t1,16 | 1084 | lhi $t1,16 |
1033 | cr $t0,$t1 | 1085 | cr $t0,$t1 |
1034 | jl .Lgo | 1086 | jl .Lgo |
1035 | oill $t0,0x80 # set "decrypt" bit | 1087 | oill $t0,0x80 # set "decrypt" bit |
1036 | st $t0,240($key) | 1088 | st $t0,240($key) |
1037 | br $ra | 1089 | br $ra |
1038 | |||
1039 | .align 16 | ||
1040 | .Ldkey_internal: | ||
1041 | stg $key,32($sp) | ||
1042 | stg $ra,40($sp) | ||
1043 | bras $ra,.Lekey_internal | ||
1044 | lg $key,32($sp) | ||
1045 | lg $ra,40($sp) | ||
1046 | ___ | 1090 | ___ |
1047 | $code.=<<___; | 1091 | $code.=<<___; |
1048 | 1092 | .align 16 | |
1049 | .Lgo: llgf $rounds,240($key) | 1093 | .Lgo: lgr $rounds,$t0 #llgf $rounds,240($key) |
1050 | la $i1,0($key) | 1094 | la $i1,0($key) |
1051 | sllg $i2,$rounds,4 | 1095 | sllg $i2,$rounds,4 |
1052 | la $i2,0($i2,$key) | 1096 | la $i2,0($i2,$key) |
@@ -1123,13 +1167,14 @@ $code.=<<___; | |||
1123 | la $key,4($key) | 1167 | la $key,4($key) |
1124 | brct $rounds,.Lmix | 1168 | brct $rounds,.Lmix |
1125 | 1169 | ||
1126 | lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key! | 1170 | lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key! |
1127 | lghi %r2,0 | 1171 | lghi %r2,0 |
1128 | br $ra | 1172 | br $ra |
1129 | .size AES_set_decrypt_key,.-AES_set_decrypt_key | 1173 | .size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key |
1130 | ___ | 1174 | ___ |
1131 | 1175 | ||
1132 | #void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, | 1176 | ######################################################################## |
1177 | # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, | ||
1133 | # size_t length, const AES_KEY *key, | 1178 | # size_t length, const AES_KEY *key, |
1134 | # unsigned char *ivec, const int enc) | 1179 | # unsigned char *ivec, const int enc) |
1135 | { | 1180 | { |
@@ -1163,7 +1208,7 @@ $code.=<<___ if (!$softonly); | |||
1163 | l %r0,240($key) # load kmc code | 1208 | l %r0,240($key) # load kmc code |
1164 | lghi $key,15 # res=len%16, len-=res; | 1209 | lghi $key,15 # res=len%16, len-=res; |
1165 | ngr $key,$len | 1210 | ngr $key,$len |
1166 | slgr $len,$key | 1211 | sl${g}r $len,$key |
1167 | la %r1,16($sp) # parameter block - ivec || key | 1212 | la %r1,16($sp) # parameter block - ivec || key |
1168 | jz .Lkmc_truncated | 1213 | jz .Lkmc_truncated |
1169 | .long 0xb92f0042 # kmc %r4,%r2 | 1214 | .long 0xb92f0042 # kmc %r4,%r2 |
@@ -1181,34 +1226,34 @@ $code.=<<___ if (!$softonly); | |||
1181 | tmll %r0,0x80 | 1226 | tmll %r0,0x80 |
1182 | jnz .Lkmc_truncated_dec | 1227 | jnz .Lkmc_truncated_dec |
1183 | lghi %r1,0 | 1228 | lghi %r1,0 |
1184 | stg %r1,128($sp) | 1229 | stg %r1,16*$SIZE_T($sp) |
1185 | stg %r1,136($sp) | 1230 | stg %r1,16*$SIZE_T+8($sp) |
1186 | bras %r1,1f | 1231 | bras %r1,1f |
1187 | mvc 128(1,$sp),0($inp) | 1232 | mvc 16*$SIZE_T(1,$sp),0($inp) |
1188 | 1: ex $key,0(%r1) | 1233 | 1: ex $key,0(%r1) |
1189 | la %r1,16($sp) # restore parameter block | 1234 | la %r1,16($sp) # restore parameter block |
1190 | la $inp,128($sp) | 1235 | la $inp,16*$SIZE_T($sp) |
1191 | lghi $len,16 | 1236 | lghi $len,16 |
1192 | .long 0xb92f0042 # kmc %r4,%r2 | 1237 | .long 0xb92f0042 # kmc %r4,%r2 |
1193 | j .Lkmc_done | 1238 | j .Lkmc_done |
1194 | .align 16 | 1239 | .align 16 |
1195 | .Lkmc_truncated_dec: | 1240 | .Lkmc_truncated_dec: |
1196 | stg $out,64($sp) | 1241 | st${g} $out,4*$SIZE_T($sp) |
1197 | la $out,128($sp) | 1242 | la $out,16*$SIZE_T($sp) |
1198 | lghi $len,16 | 1243 | lghi $len,16 |
1199 | .long 0xb92f0042 # kmc %r4,%r2 | 1244 | .long 0xb92f0042 # kmc %r4,%r2 |
1200 | lg $out,64($sp) | 1245 | l${g} $out,4*$SIZE_T($sp) |
1201 | bras %r1,2f | 1246 | bras %r1,2f |
1202 | mvc 0(1,$out),128($sp) | 1247 | mvc 0(1,$out),16*$SIZE_T($sp) |
1203 | 2: ex $key,0(%r1) | 1248 | 2: ex $key,0(%r1) |
1204 | j .Lkmc_done | 1249 | j .Lkmc_done |
1205 | .align 16 | 1250 | .align 16 |
1206 | .Lcbc_software: | 1251 | .Lcbc_software: |
1207 | ___ | 1252 | ___ |
1208 | $code.=<<___; | 1253 | $code.=<<___; |
1209 | stmg $key,$ra,40($sp) | 1254 | stm${g} $key,$ra,5*$SIZE_T($sp) |
1210 | lhi %r0,0 | 1255 | lhi %r0,0 |
1211 | cl %r0,164($sp) | 1256 | cl %r0,`$stdframe+$SIZE_T-4`($sp) |
1212 | je .Lcbc_decrypt | 1257 | je .Lcbc_decrypt |
1213 | 1258 | ||
1214 | larl $tbl,AES_Te | 1259 | larl $tbl,AES_Te |
@@ -1219,10 +1264,10 @@ $code.=<<___; | |||
1219 | llgf $s3,12($ivp) | 1264 | llgf $s3,12($ivp) |
1220 | 1265 | ||
1221 | lghi $t0,16 | 1266 | lghi $t0,16 |
1222 | slgr $len,$t0 | 1267 | sl${g}r $len,$t0 |
1223 | brc 4,.Lcbc_enc_tail # if borrow | 1268 | brc 4,.Lcbc_enc_tail # if borrow |
1224 | .Lcbc_enc_loop: | 1269 | .Lcbc_enc_loop: |
1225 | stmg $inp,$out,16($sp) | 1270 | stm${g} $inp,$out,2*$SIZE_T($sp) |
1226 | x $s0,0($inp) | 1271 | x $s0,0($inp) |
1227 | x $s1,4($inp) | 1272 | x $s1,4($inp) |
1228 | x $s2,8($inp) | 1273 | x $s2,8($inp) |
@@ -1231,7 +1276,7 @@ $code.=<<___; | |||
1231 | 1276 | ||
1232 | bras $ra,_s390x_AES_encrypt | 1277 | bras $ra,_s390x_AES_encrypt |
1233 | 1278 | ||
1234 | lmg $inp,$key,16($sp) | 1279 | lm${g} $inp,$key,2*$SIZE_T($sp) |
1235 | st $s0,0($out) | 1280 | st $s0,0($out) |
1236 | st $s1,4($out) | 1281 | st $s1,4($out) |
1237 | st $s2,8($out) | 1282 | st $s2,8($out) |
@@ -1240,33 +1285,33 @@ $code.=<<___; | |||
1240 | la $inp,16($inp) | 1285 | la $inp,16($inp) |
1241 | la $out,16($out) | 1286 | la $out,16($out) |
1242 | lghi $t0,16 | 1287 | lghi $t0,16 |
1243 | ltgr $len,$len | 1288 | lt${g}r $len,$len |
1244 | jz .Lcbc_enc_done | 1289 | jz .Lcbc_enc_done |
1245 | slgr $len,$t0 | 1290 | sl${g}r $len,$t0 |
1246 | brc 4,.Lcbc_enc_tail # if borrow | 1291 | brc 4,.Lcbc_enc_tail # if borrow |
1247 | j .Lcbc_enc_loop | 1292 | j .Lcbc_enc_loop |
1248 | .align 16 | 1293 | .align 16 |
1249 | .Lcbc_enc_done: | 1294 | .Lcbc_enc_done: |
1250 | lg $ivp,48($sp) | 1295 | l${g} $ivp,6*$SIZE_T($sp) |
1251 | st $s0,0($ivp) | 1296 | st $s0,0($ivp) |
1252 | st $s1,4($ivp) | 1297 | st $s1,4($ivp) |
1253 | st $s2,8($ivp) | 1298 | st $s2,8($ivp) |
1254 | st $s3,12($ivp) | 1299 | st $s3,12($ivp) |
1255 | 1300 | ||
1256 | lmg %r7,$ra,56($sp) | 1301 | lm${g} %r7,$ra,7*$SIZE_T($sp) |
1257 | br $ra | 1302 | br $ra |
1258 | 1303 | ||
1259 | .align 16 | 1304 | .align 16 |
1260 | .Lcbc_enc_tail: | 1305 | .Lcbc_enc_tail: |
1261 | aghi $len,15 | 1306 | aghi $len,15 |
1262 | lghi $t0,0 | 1307 | lghi $t0,0 |
1263 | stg $t0,128($sp) | 1308 | stg $t0,16*$SIZE_T($sp) |
1264 | stg $t0,136($sp) | 1309 | stg $t0,16*$SIZE_T+8($sp) |
1265 | bras $t1,3f | 1310 | bras $t1,3f |
1266 | mvc 128(1,$sp),0($inp) | 1311 | mvc 16*$SIZE_T(1,$sp),0($inp) |
1267 | 3: ex $len,0($t1) | 1312 | 3: ex $len,0($t1) |
1268 | lghi $len,0 | 1313 | lghi $len,0 |
1269 | la $inp,128($sp) | 1314 | la $inp,16*$SIZE_T($sp) |
1270 | j .Lcbc_enc_loop | 1315 | j .Lcbc_enc_loop |
1271 | 1316 | ||
1272 | .align 16 | 1317 | .align 16 |
@@ -1275,10 +1320,10 @@ $code.=<<___; | |||
1275 | 1320 | ||
1276 | lg $t0,0($ivp) | 1321 | lg $t0,0($ivp) |
1277 | lg $t1,8($ivp) | 1322 | lg $t1,8($ivp) |
1278 | stmg $t0,$t1,128($sp) | 1323 | stmg $t0,$t1,16*$SIZE_T($sp) |
1279 | 1324 | ||
1280 | .Lcbc_dec_loop: | 1325 | .Lcbc_dec_loop: |
1281 | stmg $inp,$out,16($sp) | 1326 | stm${g} $inp,$out,2*$SIZE_T($sp) |
1282 | llgf $s0,0($inp) | 1327 | llgf $s0,0($inp) |
1283 | llgf $s1,4($inp) | 1328 | llgf $s1,4($inp) |
1284 | llgf $s2,8($inp) | 1329 | llgf $s2,8($inp) |
@@ -1287,7 +1332,7 @@ $code.=<<___; | |||
1287 | 1332 | ||
1288 | bras $ra,_s390x_AES_decrypt | 1333 | bras $ra,_s390x_AES_decrypt |
1289 | 1334 | ||
1290 | lmg $inp,$key,16($sp) | 1335 | lm${g} $inp,$key,2*$SIZE_T($sp) |
1291 | sllg $s0,$s0,32 | 1336 | sllg $s0,$s0,32 |
1292 | sllg $s2,$s2,32 | 1337 | sllg $s2,$s2,32 |
1293 | lr $s0,$s1 | 1338 | lr $s0,$s1 |
@@ -1295,15 +1340,15 @@ $code.=<<___; | |||
1295 | 1340 | ||
1296 | lg $t0,0($inp) | 1341 | lg $t0,0($inp) |
1297 | lg $t1,8($inp) | 1342 | lg $t1,8($inp) |
1298 | xg $s0,128($sp) | 1343 | xg $s0,16*$SIZE_T($sp) |
1299 | xg $s2,136($sp) | 1344 | xg $s2,16*$SIZE_T+8($sp) |
1300 | lghi $s1,16 | 1345 | lghi $s1,16 |
1301 | slgr $len,$s1 | 1346 | sl${g}r $len,$s1 |
1302 | brc 4,.Lcbc_dec_tail # if borrow | 1347 | brc 4,.Lcbc_dec_tail # if borrow |
1303 | brc 2,.Lcbc_dec_done # if zero | 1348 | brc 2,.Lcbc_dec_done # if zero |
1304 | stg $s0,0($out) | 1349 | stg $s0,0($out) |
1305 | stg $s2,8($out) | 1350 | stg $s2,8($out) |
1306 | stmg $t0,$t1,128($sp) | 1351 | stmg $t0,$t1,16*$SIZE_T($sp) |
1307 | 1352 | ||
1308 | la $inp,16($inp) | 1353 | la $inp,16($inp) |
1309 | la $out,16($out) | 1354 | la $out,16($out) |
@@ -1313,7 +1358,7 @@ $code.=<<___; | |||
1313 | stg $s0,0($out) | 1358 | stg $s0,0($out) |
1314 | stg $s2,8($out) | 1359 | stg $s2,8($out) |
1315 | .Lcbc_dec_exit: | 1360 | .Lcbc_dec_exit: |
1316 | lmg $ivp,$ra,48($sp) | 1361 | lm${g} %r6,$ra,6*$SIZE_T($sp) |
1317 | stmg $t0,$t1,0($ivp) | 1362 | stmg $t0,$t1,0($ivp) |
1318 | 1363 | ||
1319 | br $ra | 1364 | br $ra |
@@ -1321,19 +1366,889 @@ $code.=<<___; | |||
1321 | .align 16 | 1366 | .align 16 |
1322 | .Lcbc_dec_tail: | 1367 | .Lcbc_dec_tail: |
1323 | aghi $len,15 | 1368 | aghi $len,15 |
1324 | stg $s0,128($sp) | 1369 | stg $s0,16*$SIZE_T($sp) |
1325 | stg $s2,136($sp) | 1370 | stg $s2,16*$SIZE_T+8($sp) |
1326 | bras $s1,4f | 1371 | bras $s1,4f |
1327 | mvc 0(1,$out),128($sp) | 1372 | mvc 0(1,$out),16*$SIZE_T($sp) |
1328 | 4: ex $len,0($s1) | 1373 | 4: ex $len,0($s1) |
1329 | j .Lcbc_dec_exit | 1374 | j .Lcbc_dec_exit |
1330 | .size AES_cbc_encrypt,.-AES_cbc_encrypt | 1375 | .size AES_cbc_encrypt,.-AES_cbc_encrypt |
1331 | .comm OPENSSL_s390xcap_P,8,8 | 1376 | ___ |
1377 | } | ||
1378 | ######################################################################## | ||
1379 | # void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, | ||
1380 | # size_t blocks, const AES_KEY *key, | ||
1381 | # const unsigned char *ivec) | ||
1382 | { | ||
1383 | my $inp="%r2"; | ||
1384 | my $out="%r4"; # blocks and out are swapped | ||
1385 | my $len="%r3"; | ||
1386 | my $key="%r5"; my $iv0="%r5"; | ||
1387 | my $ivp="%r6"; | ||
1388 | my $fp ="%r7"; | ||
1389 | |||
1390 | $code.=<<___; | ||
1391 | .globl AES_ctr32_encrypt | ||
1392 | .type AES_ctr32_encrypt,\@function | ||
1393 | .align 16 | ||
1394 | AES_ctr32_encrypt: | ||
1395 | xgr %r3,%r4 # flip %r3 and %r4, $out and $len | ||
1396 | xgr %r4,%r3 | ||
1397 | xgr %r3,%r4 | ||
1398 | llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case | ||
1399 | ___ | ||
1400 | $code.=<<___ if (!$softonly); | ||
1401 | l %r0,240($key) | ||
1402 | lhi %r1,16 | ||
1403 | clr %r0,%r1 | ||
1404 | jl .Lctr32_software | ||
1405 | |||
1406 | stm${g} %r6,$s3,6*$SIZE_T($sp) | ||
1407 | |||
1408 | slgr $out,$inp | ||
1409 | la %r1,0($key) # %r1 is permanent copy of $key | ||
1410 | lg $iv0,0($ivp) # load ivec | ||
1411 | lg $ivp,8($ivp) | ||
1412 | |||
1413 | # prepare and allocate stack frame at the top of 4K page | ||
1414 | # with 1K reserved for eventual signal handling | ||
1415 | lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer | ||
1416 | lghi $s1,-4096 | ||
1417 | algr $s0,$sp | ||
1418 | lgr $fp,$sp | ||
1419 | ngr $s0,$s1 # align at page boundary | ||
1420 | slgr $fp,$s0 # total buffer size | ||
1421 | lgr $s2,$sp | ||
1422 | lghi $s1,1024+16 # sl[g]fi is extended-immediate facility | ||
1423 | slgr $fp,$s1 # deduct reservation to get usable buffer size | ||
1424 | # buffer size is at lest 256 and at most 3072+256-16 | ||
1425 | |||
1426 | la $sp,1024($s0) # alloca | ||
1427 | srlg $fp,$fp,4 # convert bytes to blocks, minimum 16 | ||
1428 | st${g} $s2,0($sp) # back-chain | ||
1429 | st${g} $fp,$SIZE_T($sp) | ||
1430 | |||
1431 | slgr $len,$fp | ||
1432 | brc 1,.Lctr32_hw_switch # not zero, no borrow | ||
1433 | algr $fp,$len # input is shorter than allocated buffer | ||
1434 | lghi $len,0 | ||
1435 | st${g} $fp,$SIZE_T($sp) | ||
1436 | |||
1437 | .Lctr32_hw_switch: | ||
1438 | ___ | ||
1439 | $code.=<<___ if (0); ######### kmctr code was measured to be ~12% slower | ||
1440 | larl $s0,OPENSSL_s390xcap_P | ||
1441 | lg $s0,8($s0) | ||
1442 | tmhh $s0,0x0004 # check for message_security-assist-4 | ||
1443 | jz .Lctr32_km_loop | ||
1444 | |||
1445 | llgfr $s0,%r0 | ||
1446 | lgr $s1,%r1 | ||
1447 | lghi %r0,0 | ||
1448 | la %r1,16($sp) | ||
1449 | .long 0xb92d2042 # kmctr %r4,%r2,%r2 | ||
1450 | |||
1451 | llihh %r0,0x8000 # check if kmctr supports the function code | ||
1452 | srlg %r0,%r0,0($s0) | ||
1453 | ng %r0,16($sp) | ||
1454 | lgr %r0,$s0 | ||
1455 | lgr %r1,$s1 | ||
1456 | jz .Lctr32_km_loop | ||
1457 | |||
1458 | ####### kmctr code | ||
1459 | algr $out,$inp # restore $out | ||
1460 | lgr $s1,$len # $s1 undertakes $len | ||
1461 | j .Lctr32_kmctr_loop | ||
1462 | .align 16 | ||
1463 | .Lctr32_kmctr_loop: | ||
1464 | la $s2,16($sp) | ||
1465 | lgr $s3,$fp | ||
1466 | .Lctr32_kmctr_prepare: | ||
1467 | stg $iv0,0($s2) | ||
1468 | stg $ivp,8($s2) | ||
1469 | la $s2,16($s2) | ||
1470 | ahi $ivp,1 # 32-bit increment, preserves upper half | ||
1471 | brct $s3,.Lctr32_kmctr_prepare | ||
1472 | |||
1473 | #la $inp,0($inp) # inp | ||
1474 | sllg $len,$fp,4 # len | ||
1475 | #la $out,0($out) # out | ||
1476 | la $s2,16($sp) # iv | ||
1477 | .long 0xb92da042 # kmctr $out,$s2,$inp | ||
1478 | brc 1,.-4 # pay attention to "partial completion" | ||
1479 | |||
1480 | slgr $s1,$fp | ||
1481 | brc 1,.Lctr32_kmctr_loop # not zero, no borrow | ||
1482 | algr $fp,$s1 | ||
1483 | lghi $s1,0 | ||
1484 | brc 4+1,.Lctr32_kmctr_loop # not zero | ||
1485 | |||
1486 | l${g} $sp,0($sp) | ||
1487 | lm${g} %r6,$s3,6*$SIZE_T($sp) | ||
1488 | br $ra | ||
1489 | .align 16 | ||
1490 | ___ | ||
1491 | $code.=<<___; | ||
1492 | .Lctr32_km_loop: | ||
1493 | la $s2,16($sp) | ||
1494 | lgr $s3,$fp | ||
1495 | .Lctr32_km_prepare: | ||
1496 | stg $iv0,0($s2) | ||
1497 | stg $ivp,8($s2) | ||
1498 | la $s2,16($s2) | ||
1499 | ahi $ivp,1 # 32-bit increment, preserves upper half | ||
1500 | brct $s3,.Lctr32_km_prepare | ||
1501 | |||
1502 | la $s0,16($sp) # inp | ||
1503 | sllg $s1,$fp,4 # len | ||
1504 | la $s2,16($sp) # out | ||
1505 | .long 0xb92e00a8 # km %r10,%r8 | ||
1506 | brc 1,.-4 # pay attention to "partial completion" | ||
1507 | |||
1508 | la $s2,16($sp) | ||
1509 | lgr $s3,$fp | ||
1510 | slgr $s2,$inp | ||
1511 | .Lctr32_km_xor: | ||
1512 | lg $s0,0($inp) | ||
1513 | lg $s1,8($inp) | ||
1514 | xg $s0,0($s2,$inp) | ||
1515 | xg $s1,8($s2,$inp) | ||
1516 | stg $s0,0($out,$inp) | ||
1517 | stg $s1,8($out,$inp) | ||
1518 | la $inp,16($inp) | ||
1519 | brct $s3,.Lctr32_km_xor | ||
1520 | |||
1521 | slgr $len,$fp | ||
1522 | brc 1,.Lctr32_km_loop # not zero, no borrow | ||
1523 | algr $fp,$len | ||
1524 | lghi $len,0 | ||
1525 | brc 4+1,.Lctr32_km_loop # not zero | ||
1526 | |||
1527 | l${g} $s0,0($sp) | ||
1528 | l${g} $s1,$SIZE_T($sp) | ||
1529 | la $s2,16($sp) | ||
1530 | .Lctr32_km_zap: | ||
1531 | stg $s0,0($s2) | ||
1532 | stg $s0,8($s2) | ||
1533 | la $s2,16($s2) | ||
1534 | brct $s1,.Lctr32_km_zap | ||
1535 | |||
1536 | la $sp,0($s0) | ||
1537 | lm${g} %r6,$s3,6*$SIZE_T($sp) | ||
1538 | br $ra | ||
1539 | .align 16 | ||
1540 | .Lctr32_software: | ||
1541 | ___ | ||
1542 | $code.=<<___; | ||
1543 | stm${g} $key,$ra,5*$SIZE_T($sp) | ||
1544 | sl${g}r $inp,$out | ||
1545 | larl $tbl,AES_Te | ||
1546 | llgf $t1,12($ivp) | ||
1547 | |||
1548 | .Lctr32_loop: | ||
1549 | stm${g} $inp,$out,2*$SIZE_T($sp) | ||
1550 | llgf $s0,0($ivp) | ||
1551 | llgf $s1,4($ivp) | ||
1552 | llgf $s2,8($ivp) | ||
1553 | lgr $s3,$t1 | ||
1554 | st $t1,16*$SIZE_T($sp) | ||
1555 | lgr %r4,$key | ||
1556 | |||
1557 | bras $ra,_s390x_AES_encrypt | ||
1558 | |||
1559 | lm${g} $inp,$ivp,2*$SIZE_T($sp) | ||
1560 | llgf $t1,16*$SIZE_T($sp) | ||
1561 | x $s0,0($inp,$out) | ||
1562 | x $s1,4($inp,$out) | ||
1563 | x $s2,8($inp,$out) | ||
1564 | x $s3,12($inp,$out) | ||
1565 | stm $s0,$s3,0($out) | ||
1566 | |||
1567 | la $out,16($out) | ||
1568 | ahi $t1,1 # 32-bit increment | ||
1569 | brct $len,.Lctr32_loop | ||
1570 | |||
1571 | lm${g} %r6,$ra,6*$SIZE_T($sp) | ||
1572 | br $ra | ||
1573 | .size AES_ctr32_encrypt,.-AES_ctr32_encrypt | ||
1574 | ___ | ||
1575 | } | ||
1576 | |||
1577 | ######################################################################## | ||
1578 | # void AES_xts_encrypt(const char *inp,char *out,size_t len, | ||
1579 | # const AES_KEY *key1, const AES_KEY *key2, | ||
1580 | # const unsigned char iv[16]); | ||
1581 | # | ||
1582 | { | ||
1583 | my $inp="%r2"; | ||
1584 | my $out="%r4"; # len and out are swapped | ||
1585 | my $len="%r3"; | ||
1586 | my $key1="%r5"; # $i1 | ||
1587 | my $key2="%r6"; # $i2 | ||
1588 | my $fp="%r7"; # $i3 | ||
1589 | my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame... | ||
1590 | |||
1591 | $code.=<<___; | ||
1592 | .type _s390x_xts_km,\@function | ||
1593 | .align 16 | ||
1594 | _s390x_xts_km: | ||
1595 | ___ | ||
1596 | $code.=<<___ if(1); | ||
1597 | llgfr $s0,%r0 # put aside the function code | ||
1598 | lghi $s1,0x7f | ||
1599 | nr $s1,%r0 | ||
1600 | lghi %r0,0 # query capability vector | ||
1601 | la %r1,2*$SIZE_T($sp) | ||
1602 | .long 0xb92e0042 # km %r4,%r2 | ||
1603 | llihh %r1,0x8000 | ||
1604 | srlg %r1,%r1,32($s1) # check for 32+function code | ||
1605 | ng %r1,2*$SIZE_T($sp) | ||
1606 | lgr %r0,$s0 # restore the function code | ||
1607 | la %r1,0($key1) # restore $key1 | ||
1608 | jz .Lxts_km_vanilla | ||
1609 | |||
1610 | lmg $i2,$i3,$tweak($sp) # put aside the tweak value | ||
1611 | algr $out,$inp | ||
1612 | |||
1613 | oill %r0,32 # switch to xts function code | ||
1614 | aghi $s1,-18 # | ||
1615 | sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16 | ||
1616 | la %r1,$tweak-16($sp) | ||
1617 | slgr %r1,$s1 # parameter block position | ||
1618 | lmg $s0,$s3,0($key1) # load 256 bits of key material, | ||
1619 | stmg $s0,$s3,0(%r1) # and copy it to parameter block. | ||
1620 | # yes, it contains junk and overlaps | ||
1621 | # with the tweak in 128-bit case. | ||
1622 | # it's done to avoid conditional | ||
1623 | # branch. | ||
1624 | stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value | ||
1625 | |||
1626 | .long 0xb92e0042 # km %r4,%r2 | ||
1627 | brc 1,.-4 # pay attention to "partial completion" | ||
1628 | |||
1629 | lrvg $s0,$tweak+0($sp) # load the last tweak | ||
1630 | lrvg $s1,$tweak+8($sp) | ||
1631 | stmg %r0,%r3,$tweak-32(%r1) # wipe copy of the key | ||
1632 | |||
1633 | nill %r0,0xffdf # switch back to original function code | ||
1634 | la %r1,0($key1) # restore pointer to $key1 | ||
1635 | slgr $out,$inp | ||
1636 | |||
1637 | llgc $len,2*$SIZE_T-1($sp) | ||
1638 | nill $len,0x0f # $len%=16 | ||
1639 | br $ra | ||
1640 | |||
1641 | .align 16 | ||
1642 | .Lxts_km_vanilla: | ||
1643 | ___ | ||
1644 | $code.=<<___; | ||
1645 | # prepare and allocate stack frame at the top of 4K page | ||
1646 | # with 1K reserved for eventual signal handling | ||
1647 | lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer | ||
1648 | lghi $s1,-4096 | ||
1649 | algr $s0,$sp | ||
1650 | lgr $fp,$sp | ||
1651 | ngr $s0,$s1 # align at page boundary | ||
1652 | slgr $fp,$s0 # total buffer size | ||
1653 | lgr $s2,$sp | ||
1654 | lghi $s1,1024+16 # sl[g]fi is extended-immediate facility | ||
1655 | slgr $fp,$s1 # deduct reservation to get usable buffer size | ||
1656 | # buffer size is at lest 256 and at most 3072+256-16 | ||
1657 | |||
1658 | la $sp,1024($s0) # alloca | ||
1659 | nill $fp,0xfff0 # round to 16*n | ||
1660 | st${g} $s2,0($sp) # back-chain | ||
1661 | nill $len,0xfff0 # redundant | ||
1662 | st${g} $fp,$SIZE_T($sp) | ||
1663 | |||
1664 | slgr $len,$fp | ||
1665 | brc 1,.Lxts_km_go # not zero, no borrow | ||
1666 | algr $fp,$len # input is shorter than allocated buffer | ||
1667 | lghi $len,0 | ||
1668 | st${g} $fp,$SIZE_T($sp) | ||
1669 | |||
1670 | .Lxts_km_go: | ||
1671 | lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian | ||
1672 | lrvg $s1,$tweak+8($s2) | ||
1673 | |||
1674 | la $s2,16($sp) # vector of ascending tweak values | ||
1675 | slgr $s2,$inp | ||
1676 | srlg $s3,$fp,4 | ||
1677 | j .Lxts_km_start | ||
1678 | |||
1679 | .Lxts_km_loop: | ||
1680 | la $s2,16($sp) | ||
1681 | slgr $s2,$inp | ||
1682 | srlg $s3,$fp,4 | ||
1683 | .Lxts_km_prepare: | ||
1684 | lghi $i1,0x87 | ||
1685 | srag $i2,$s1,63 # broadcast upper bit | ||
1686 | ngr $i1,$i2 # rem | ||
1687 | srlg $i2,$s0,63 # carry bit from lower half | ||
1688 | sllg $s0,$s0,1 | ||
1689 | sllg $s1,$s1,1 | ||
1690 | xgr $s0,$i1 | ||
1691 | ogr $s1,$i2 | ||
1692 | .Lxts_km_start: | ||
1693 | lrvgr $i1,$s0 # flip byte order | ||
1694 | lrvgr $i2,$s1 | ||
1695 | stg $i1,0($s2,$inp) | ||
1696 | stg $i2,8($s2,$inp) | ||
1697 | xg $i1,0($inp) | ||
1698 | xg $i2,8($inp) | ||
1699 | stg $i1,0($out,$inp) | ||
1700 | stg $i2,8($out,$inp) | ||
1701 | la $inp,16($inp) | ||
1702 | brct $s3,.Lxts_km_prepare | ||
1703 | |||
1704 | slgr $inp,$fp # rewind $inp | ||
1705 | la $s2,0($out,$inp) | ||
1706 | lgr $s3,$fp | ||
1707 | .long 0xb92e00aa # km $s2,$s2 | ||
1708 | brc 1,.-4 # pay attention to "partial completion" | ||
1709 | |||
1710 | la $s2,16($sp) | ||
1711 | slgr $s2,$inp | ||
1712 | srlg $s3,$fp,4 | ||
1713 | .Lxts_km_xor: | ||
1714 | lg $i1,0($out,$inp) | ||
1715 | lg $i2,8($out,$inp) | ||
1716 | xg $i1,0($s2,$inp) | ||
1717 | xg $i2,8($s2,$inp) | ||
1718 | stg $i1,0($out,$inp) | ||
1719 | stg $i2,8($out,$inp) | ||
1720 | la $inp,16($inp) | ||
1721 | brct $s3,.Lxts_km_xor | ||
1722 | |||
1723 | slgr $len,$fp | ||
1724 | brc 1,.Lxts_km_loop # not zero, no borrow | ||
1725 | algr $fp,$len | ||
1726 | lghi $len,0 | ||
1727 | brc 4+1,.Lxts_km_loop # not zero | ||
1728 | |||
1729 | l${g} $i1,0($sp) # back-chain | ||
1730 | llgf $fp,`2*$SIZE_T-4`($sp) # bytes used | ||
1731 | la $i2,16($sp) | ||
1732 | srlg $fp,$fp,4 | ||
1733 | .Lxts_km_zap: | ||
1734 | stg $i1,0($i2) | ||
1735 | stg $i1,8($i2) | ||
1736 | la $i2,16($i2) | ||
1737 | brct $fp,.Lxts_km_zap | ||
1738 | |||
1739 | la $sp,0($i1) | ||
1740 | llgc $len,2*$SIZE_T-1($i1) | ||
1741 | nill $len,0x0f # $len%=16 | ||
1742 | bzr $ra | ||
1743 | |||
1744 | # generate one more tweak... | ||
1745 | lghi $i1,0x87 | ||
1746 | srag $i2,$s1,63 # broadcast upper bit | ||
1747 | ngr $i1,$i2 # rem | ||
1748 | srlg $i2,$s0,63 # carry bit from lower half | ||
1749 | sllg $s0,$s0,1 | ||
1750 | sllg $s1,$s1,1 | ||
1751 | xgr $s0,$i1 | ||
1752 | ogr $s1,$i2 | ||
1753 | |||
1754 | ltr $len,$len # clear zero flag | ||
1755 | br $ra | ||
1756 | .size _s390x_xts_km,.-_s390x_xts_km | ||
1757 | |||
1758 | .globl AES_xts_encrypt | ||
1759 | .type AES_xts_encrypt,\@function | ||
1760 | .align 16 | ||
1761 | AES_xts_encrypt: | ||
1762 | xgr %r3,%r4 # flip %r3 and %r4, $out and $len | ||
1763 | xgr %r4,%r3 | ||
1764 | xgr %r3,%r4 | ||
1765 | ___ | ||
1766 | $code.=<<___ if ($SIZE_T==4); | ||
1767 | llgfr $len,$len | ||
1768 | ___ | ||
1769 | $code.=<<___; | ||
1770 | st${g} $len,1*$SIZE_T($sp) # save copy of $len | ||
1771 | srag $len,$len,4 # formally wrong, because it expands | ||
1772 | # sign byte, but who can afford asking | ||
1773 | # to process more than 2^63-1 bytes? | ||
1774 | # I use it, because it sets condition | ||
1775 | # code... | ||
1776 | bcr 8,$ra # abort if zero (i.e. less than 16) | ||
1777 | ___ | ||
1778 | $code.=<<___ if (!$softonly); | ||
1779 | llgf %r0,240($key2) | ||
1780 | lhi %r1,16 | ||
1781 | clr %r0,%r1 | ||
1782 | jl .Lxts_enc_software | ||
1783 | |||
1784 | stm${g} %r6,$s3,6*$SIZE_T($sp) | ||
1785 | st${g} $ra,14*$SIZE_T($sp) | ||
1786 | |||
1787 | sllg $len,$len,4 # $len&=~15 | ||
1788 | slgr $out,$inp | ||
1789 | |||
1790 | # generate the tweak value | ||
1791 | l${g} $s3,$stdframe($sp) # pointer to iv | ||
1792 | la $s2,$tweak($sp) | ||
1793 | lmg $s0,$s1,0($s3) | ||
1794 | lghi $s3,16 | ||
1795 | stmg $s0,$s1,0($s2) | ||
1796 | la %r1,0($key2) # $key2 is not needed anymore | ||
1797 | .long 0xb92e00aa # km $s2,$s2, generate the tweak | ||
1798 | brc 1,.-4 # can this happen? | ||
1799 | |||
1800 | l %r0,240($key1) | ||
1801 | la %r1,0($key1) # $key1 is not needed anymore | ||
1802 | bras $ra,_s390x_xts_km | ||
1803 | jz .Lxts_enc_km_done | ||
1804 | |||
1805 | aghi $inp,-16 # take one step back | ||
1806 | la $i3,0($out,$inp) # put aside real $out | ||
1807 | .Lxts_enc_km_steal: | ||
1808 | llgc $i1,16($inp) | ||
1809 | llgc $i2,0($out,$inp) | ||
1810 | stc $i1,0($out,$inp) | ||
1811 | stc $i2,16($out,$inp) | ||
1812 | la $inp,1($inp) | ||
1813 | brct $len,.Lxts_enc_km_steal | ||
1814 | |||
1815 | la $s2,0($i3) | ||
1816 | lghi $s3,16 | ||
1817 | lrvgr $i1,$s0 # flip byte order | ||
1818 | lrvgr $i2,$s1 | ||
1819 | xg $i1,0($s2) | ||
1820 | xg $i2,8($s2) | ||
1821 | stg $i1,0($s2) | ||
1822 | stg $i2,8($s2) | ||
1823 | .long 0xb92e00aa # km $s2,$s2 | ||
1824 | brc 1,.-4 # can this happen? | ||
1825 | lrvgr $i1,$s0 # flip byte order | ||
1826 | lrvgr $i2,$s1 | ||
1827 | xg $i1,0($i3) | ||
1828 | xg $i2,8($i3) | ||
1829 | stg $i1,0($i3) | ||
1830 | stg $i2,8($i3) | ||
1831 | |||
1832 | .Lxts_enc_km_done: | ||
1833 | l${g} $ra,14*$SIZE_T($sp) | ||
1834 | st${g} $sp,$tweak($sp) # wipe tweak | ||
1835 | st${g} $sp,$tweak($sp) | ||
1836 | lm${g} %r6,$s3,6*$SIZE_T($sp) | ||
1837 | br $ra | ||
1838 | .align 16 | ||
1839 | .Lxts_enc_software: | ||
1840 | ___ | ||
1841 | $code.=<<___; | ||
1842 | stm${g} %r6,$ra,6*$SIZE_T($sp) | ||
1843 | |||
1844 | slgr $out,$inp | ||
1845 | |||
1846 | xgr $s0,$s0 # clear upper half | ||
1847 | xgr $s1,$s1 | ||
1848 | lrv $s0,$stdframe+4($sp) # load secno | ||
1849 | lrv $s1,$stdframe+0($sp) | ||
1850 | xgr $s2,$s2 | ||
1851 | xgr $s3,$s3 | ||
1852 | stm${g} %r2,%r5,2*$SIZE_T($sp) | ||
1853 | la $key,0($key2) | ||
1854 | larl $tbl,AES_Te | ||
1855 | bras $ra,_s390x_AES_encrypt # generate the tweak | ||
1856 | lm${g} %r2,%r5,2*$SIZE_T($sp) | ||
1857 | stm $s0,$s3,$tweak($sp) # save the tweak | ||
1858 | j .Lxts_enc_enter | ||
1859 | |||
1860 | .align 16 | ||
1861 | .Lxts_enc_loop: | ||
1862 | lrvg $s1,$tweak+0($sp) # load the tweak in little-endian | ||
1863 | lrvg $s3,$tweak+8($sp) | ||
1864 | lghi %r1,0x87 | ||
1865 | srag %r0,$s3,63 # broadcast upper bit | ||
1866 | ngr %r1,%r0 # rem | ||
1867 | srlg %r0,$s1,63 # carry bit from lower half | ||
1868 | sllg $s1,$s1,1 | ||
1869 | sllg $s3,$s3,1 | ||
1870 | xgr $s1,%r1 | ||
1871 | ogr $s3,%r0 | ||
1872 | lrvgr $s1,$s1 # flip byte order | ||
1873 | lrvgr $s3,$s3 | ||
1874 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | ||
1875 | stg $s1,$tweak+0($sp) # save the tweak | ||
1876 | llgfr $s1,$s1 | ||
1877 | srlg $s2,$s3,32 | ||
1878 | stg $s3,$tweak+8($sp) | ||
1879 | llgfr $s3,$s3 | ||
1880 | la $inp,16($inp) # $inp+=16 | ||
1881 | .Lxts_enc_enter: | ||
1882 | x $s0,0($inp) # ^=*($inp) | ||
1883 | x $s1,4($inp) | ||
1884 | x $s2,8($inp) | ||
1885 | x $s3,12($inp) | ||
1886 | stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing | ||
1887 | la $key,0($key1) | ||
1888 | bras $ra,_s390x_AES_encrypt | ||
1889 | lm${g} %r2,%r5,2*$SIZE_T($sp) | ||
1890 | x $s0,$tweak+0($sp) # ^=tweak | ||
1891 | x $s1,$tweak+4($sp) | ||
1892 | x $s2,$tweak+8($sp) | ||
1893 | x $s3,$tweak+12($sp) | ||
1894 | st $s0,0($out,$inp) | ||
1895 | st $s1,4($out,$inp) | ||
1896 | st $s2,8($out,$inp) | ||
1897 | st $s3,12($out,$inp) | ||
1898 | brct${g} $len,.Lxts_enc_loop | ||
1899 | |||
1900 | llgc $len,`2*$SIZE_T-1`($sp) | ||
1901 | nill $len,0x0f # $len%16 | ||
1902 | jz .Lxts_enc_done | ||
1903 | |||
1904 | la $i3,0($inp,$out) # put aside real $out | ||
1905 | .Lxts_enc_steal: | ||
1906 | llgc %r0,16($inp) | ||
1907 | llgc %r1,0($out,$inp) | ||
1908 | stc %r0,0($out,$inp) | ||
1909 | stc %r1,16($out,$inp) | ||
1910 | la $inp,1($inp) | ||
1911 | brct $len,.Lxts_enc_steal | ||
1912 | la $out,0($i3) # restore real $out | ||
1913 | |||
1914 | # generate last tweak... | ||
1915 | lrvg $s1,$tweak+0($sp) # load the tweak in little-endian | ||
1916 | lrvg $s3,$tweak+8($sp) | ||
1917 | lghi %r1,0x87 | ||
1918 | srag %r0,$s3,63 # broadcast upper bit | ||
1919 | ngr %r1,%r0 # rem | ||
1920 | srlg %r0,$s1,63 # carry bit from lower half | ||
1921 | sllg $s1,$s1,1 | ||
1922 | sllg $s3,$s3,1 | ||
1923 | xgr $s1,%r1 | ||
1924 | ogr $s3,%r0 | ||
1925 | lrvgr $s1,$s1 # flip byte order | ||
1926 | lrvgr $s3,$s3 | ||
1927 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | ||
1928 | stg $s1,$tweak+0($sp) # save the tweak | ||
1929 | llgfr $s1,$s1 | ||
1930 | srlg $s2,$s3,32 | ||
1931 | stg $s3,$tweak+8($sp) | ||
1932 | llgfr $s3,$s3 | ||
1933 | |||
1934 | x $s0,0($out) # ^=*(inp)|stolen cipther-text | ||
1935 | x $s1,4($out) | ||
1936 | x $s2,8($out) | ||
1937 | x $s3,12($out) | ||
1938 | st${g} $out,4*$SIZE_T($sp) | ||
1939 | la $key,0($key1) | ||
1940 | bras $ra,_s390x_AES_encrypt | ||
1941 | l${g} $out,4*$SIZE_T($sp) | ||
1942 | x $s0,`$tweak+0`($sp) # ^=tweak | ||
1943 | x $s1,`$tweak+4`($sp) | ||
1944 | x $s2,`$tweak+8`($sp) | ||
1945 | x $s3,`$tweak+12`($sp) | ||
1946 | st $s0,0($out) | ||
1947 | st $s1,4($out) | ||
1948 | st $s2,8($out) | ||
1949 | st $s3,12($out) | ||
1950 | |||
1951 | .Lxts_enc_done: | ||
1952 | stg $sp,$tweak+0($sp) # wipe tweak | ||
1953 | stg $sp,$twesk+8($sp) | ||
1954 | lm${g} %r6,$ra,6*$SIZE_T($sp) | ||
1955 | br $ra | ||
1956 | .size AES_xts_encrypt,.-AES_xts_encrypt | ||
1957 | ___ | ||
1958 | # void AES_xts_decrypt(const char *inp,char *out,size_t len, | ||
1959 | # const AES_KEY *key1, const AES_KEY *key2,u64 secno); | ||
1960 | # | ||
1961 | $code.=<<___; | ||
1962 | .globl AES_xts_decrypt | ||
1963 | .type AES_xts_decrypt,\@function | ||
1964 | .align 16 | ||
1965 | AES_xts_decrypt: | ||
1966 | xgr %r3,%r4 # flip %r3 and %r4, $out and $len | ||
1967 | xgr %r4,%r3 | ||
1968 | xgr %r3,%r4 | ||
1969 | ___ | ||
1970 | $code.=<<___ if ($SIZE_T==4); | ||
1971 | llgfr $len,$len | ||
1972 | ___ | ||
1973 | $code.=<<___; | ||
1974 | st${g} $len,1*$SIZE_T($sp) # save copy of $len | ||
1975 | aghi $len,-16 | ||
1976 | bcr 4,$ra # abort if less than zero. formally | ||
1977 | # wrong, because $len is unsigned, | ||
1978 | # but who can afford asking to | ||
1979 | # process more than 2^63-1 bytes? | ||
1980 | tmll $len,0x0f | ||
1981 | jnz .Lxts_dec_proceed | ||
1982 | aghi $len,16 | ||
1983 | .Lxts_dec_proceed: | ||
1984 | ___ | ||
1985 | $code.=<<___ if (!$softonly); | ||
1986 | llgf %r0,240($key2) | ||
1987 | lhi %r1,16 | ||
1988 | clr %r0,%r1 | ||
1989 | jl .Lxts_dec_software | ||
1990 | |||
1991 | stm${g} %r6,$s3,6*$SIZE_T($sp) | ||
1992 | st${g} $ra,14*$SIZE_T($sp) | ||
1993 | |||
1994 | nill $len,0xfff0 # $len&=~15 | ||
1995 | slgr $out,$inp | ||
1996 | |||
1997 | # generate the tweak value | ||
1998 | l${g} $s3,$stdframe($sp) # pointer to iv | ||
1999 | la $s2,$tweak($sp) | ||
2000 | lmg $s0,$s1,0($s3) | ||
2001 | lghi $s3,16 | ||
2002 | stmg $s0,$s1,0($s2) | ||
2003 | la %r1,0($key2) # $key2 is not needed past this point | ||
2004 | .long 0xb92e00aa # km $s2,$s2, generate the tweak | ||
2005 | brc 1,.-4 # can this happen? | ||
2006 | |||
2007 | l %r0,240($key1) | ||
2008 | la %r1,0($key1) # $key1 is not needed anymore | ||
2009 | |||
2010 | ltgr $len,$len | ||
2011 | jz .Lxts_dec_km_short | ||
2012 | bras $ra,_s390x_xts_km | ||
2013 | jz .Lxts_dec_km_done | ||
2014 | |||
2015 | lrvgr $s2,$s0 # make copy in reverse byte order | ||
2016 | lrvgr $s3,$s1 | ||
2017 | j .Lxts_dec_km_2ndtweak | ||
2018 | |||
2019 | .Lxts_dec_km_short: | ||
2020 | llgc $len,`2*$SIZE_T-1`($sp) | ||
2021 | nill $len,0x0f # $len%=16 | ||
2022 | lrvg $s0,$tweak+0($sp) # load the tweak | ||
2023 | lrvg $s1,$tweak+8($sp) | ||
2024 | lrvgr $s2,$s0 # make copy in reverse byte order | ||
2025 | lrvgr $s3,$s1 | ||
2026 | |||
2027 | .Lxts_dec_km_2ndtweak: | ||
2028 | lghi $i1,0x87 | ||
2029 | srag $i2,$s1,63 # broadcast upper bit | ||
2030 | ngr $i1,$i2 # rem | ||
2031 | srlg $i2,$s0,63 # carry bit from lower half | ||
2032 | sllg $s0,$s0,1 | ||
2033 | sllg $s1,$s1,1 | ||
2034 | xgr $s0,$i1 | ||
2035 | ogr $s1,$i2 | ||
2036 | lrvgr $i1,$s0 # flip byte order | ||
2037 | lrvgr $i2,$s1 | ||
2038 | |||
2039 | xg $i1,0($inp) | ||
2040 | xg $i2,8($inp) | ||
2041 | stg $i1,0($out,$inp) | ||
2042 | stg $i2,8($out,$inp) | ||
2043 | la $i2,0($out,$inp) | ||
2044 | lghi $i3,16 | ||
2045 | .long 0xb92e0066 # km $i2,$i2 | ||
2046 | brc 1,.-4 # can this happen? | ||
2047 | lrvgr $i1,$s0 | ||
2048 | lrvgr $i2,$s1 | ||
2049 | xg $i1,0($out,$inp) | ||
2050 | xg $i2,8($out,$inp) | ||
2051 | stg $i1,0($out,$inp) | ||
2052 | stg $i2,8($out,$inp) | ||
2053 | |||
2054 | la $i3,0($out,$inp) # put aside real $out | ||
2055 | .Lxts_dec_km_steal: | ||
2056 | llgc $i1,16($inp) | ||
2057 | llgc $i2,0($out,$inp) | ||
2058 | stc $i1,0($out,$inp) | ||
2059 | stc $i2,16($out,$inp) | ||
2060 | la $inp,1($inp) | ||
2061 | brct $len,.Lxts_dec_km_steal | ||
2062 | |||
2063 | lgr $s0,$s2 | ||
2064 | lgr $s1,$s3 | ||
2065 | xg $s0,0($i3) | ||
2066 | xg $s1,8($i3) | ||
2067 | stg $s0,0($i3) | ||
2068 | stg $s1,8($i3) | ||
2069 | la $s0,0($i3) | ||
2070 | lghi $s1,16 | ||
2071 | .long 0xb92e0088 # km $s0,$s0 | ||
2072 | brc 1,.-4 # can this happen? | ||
2073 | xg $s2,0($i3) | ||
2074 | xg $s3,8($i3) | ||
2075 | stg $s2,0($i3) | ||
2076 | stg $s3,8($i3) | ||
2077 | .Lxts_dec_km_done: | ||
2078 | l${g} $ra,14*$SIZE_T($sp) | ||
2079 | st${g} $sp,$tweak($sp) # wipe tweak | ||
2080 | st${g} $sp,$tweak($sp) | ||
2081 | lm${g} %r6,$s3,6*$SIZE_T($sp) | ||
2082 | br $ra | ||
2083 | .align 16 | ||
2084 | .Lxts_dec_software: | ||
2085 | ___ | ||
2086 | $code.=<<___; | ||
2087 | stm${g} %r6,$ra,6*$SIZE_T($sp) | ||
2088 | |||
2089 | srlg $len,$len,4 | ||
2090 | slgr $out,$inp | ||
2091 | |||
2092 | xgr $s0,$s0 # clear upper half | ||
2093 | xgr $s1,$s1 | ||
2094 | lrv $s0,$stdframe+4($sp) # load secno | ||
2095 | lrv $s1,$stdframe+0($sp) | ||
2096 | xgr $s2,$s2 | ||
2097 | xgr $s3,$s3 | ||
2098 | stm${g} %r2,%r5,2*$SIZE_T($sp) | ||
2099 | la $key,0($key2) | ||
2100 | larl $tbl,AES_Te | ||
2101 | bras $ra,_s390x_AES_encrypt # generate the tweak | ||
2102 | lm${g} %r2,%r5,2*$SIZE_T($sp) | ||
2103 | larl $tbl,AES_Td | ||
2104 | lt${g}r $len,$len | ||
2105 | stm $s0,$s3,$tweak($sp) # save the tweak | ||
2106 | jz .Lxts_dec_short | ||
2107 | j .Lxts_dec_enter | ||
2108 | |||
2109 | .align 16 | ||
2110 | .Lxts_dec_loop: | ||
2111 | lrvg $s1,$tweak+0($sp) # load the tweak in little-endian | ||
2112 | lrvg $s3,$tweak+8($sp) | ||
2113 | lghi %r1,0x87 | ||
2114 | srag %r0,$s3,63 # broadcast upper bit | ||
2115 | ngr %r1,%r0 # rem | ||
2116 | srlg %r0,$s1,63 # carry bit from lower half | ||
2117 | sllg $s1,$s1,1 | ||
2118 | sllg $s3,$s3,1 | ||
2119 | xgr $s1,%r1 | ||
2120 | ogr $s3,%r0 | ||
2121 | lrvgr $s1,$s1 # flip byte order | ||
2122 | lrvgr $s3,$s3 | ||
2123 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | ||
2124 | stg $s1,$tweak+0($sp) # save the tweak | ||
2125 | llgfr $s1,$s1 | ||
2126 | srlg $s2,$s3,32 | ||
2127 | stg $s3,$tweak+8($sp) | ||
2128 | llgfr $s3,$s3 | ||
2129 | .Lxts_dec_enter: | ||
2130 | x $s0,0($inp) # tweak^=*(inp) | ||
2131 | x $s1,4($inp) | ||
2132 | x $s2,8($inp) | ||
2133 | x $s3,12($inp) | ||
2134 | stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing | ||
2135 | la $key,0($key1) | ||
2136 | bras $ra,_s390x_AES_decrypt | ||
2137 | lm${g} %r2,%r5,2*$SIZE_T($sp) | ||
2138 | x $s0,$tweak+0($sp) # ^=tweak | ||
2139 | x $s1,$tweak+4($sp) | ||
2140 | x $s2,$tweak+8($sp) | ||
2141 | x $s3,$tweak+12($sp) | ||
2142 | st $s0,0($out,$inp) | ||
2143 | st $s1,4($out,$inp) | ||
2144 | st $s2,8($out,$inp) | ||
2145 | st $s3,12($out,$inp) | ||
2146 | la $inp,16($inp) | ||
2147 | brct${g} $len,.Lxts_dec_loop | ||
2148 | |||
2149 | llgc $len,`2*$SIZE_T-1`($sp) | ||
2150 | nill $len,0x0f # $len%16 | ||
2151 | jz .Lxts_dec_done | ||
2152 | |||
2153 | # generate pair of tweaks... | ||
2154 | lrvg $s1,$tweak+0($sp) # load the tweak in little-endian | ||
2155 | lrvg $s3,$tweak+8($sp) | ||
2156 | lghi %r1,0x87 | ||
2157 | srag %r0,$s3,63 # broadcast upper bit | ||
2158 | ngr %r1,%r0 # rem | ||
2159 | srlg %r0,$s1,63 # carry bit from lower half | ||
2160 | sllg $s1,$s1,1 | ||
2161 | sllg $s3,$s3,1 | ||
2162 | xgr $s1,%r1 | ||
2163 | ogr $s3,%r0 | ||
2164 | lrvgr $i2,$s1 # flip byte order | ||
2165 | lrvgr $i3,$s3 | ||
2166 | stmg $i2,$i3,$tweak($sp) # save the 1st tweak | ||
2167 | j .Lxts_dec_2ndtweak | ||
2168 | |||
2169 | .align 16 | ||
2170 | .Lxts_dec_short: | ||
2171 | llgc $len,`2*$SIZE_T-1`($sp) | ||
2172 | nill $len,0x0f # $len%16 | ||
2173 | lrvg $s1,$tweak+0($sp) # load the tweak in little-endian | ||
2174 | lrvg $s3,$tweak+8($sp) | ||
2175 | .Lxts_dec_2ndtweak: | ||
2176 | lghi %r1,0x87 | ||
2177 | srag %r0,$s3,63 # broadcast upper bit | ||
2178 | ngr %r1,%r0 # rem | ||
2179 | srlg %r0,$s1,63 # carry bit from lower half | ||
2180 | sllg $s1,$s1,1 | ||
2181 | sllg $s3,$s3,1 | ||
2182 | xgr $s1,%r1 | ||
2183 | ogr $s3,%r0 | ||
2184 | lrvgr $s1,$s1 # flip byte order | ||
2185 | lrvgr $s3,$s3 | ||
2186 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | ||
2187 | stg $s1,$tweak-16+0($sp) # save the 2nd tweak | ||
2188 | llgfr $s1,$s1 | ||
2189 | srlg $s2,$s3,32 | ||
2190 | stg $s3,$tweak-16+8($sp) | ||
2191 | llgfr $s3,$s3 | ||
2192 | |||
2193 | x $s0,0($inp) # tweak_the_2nd^=*(inp) | ||
2194 | x $s1,4($inp) | ||
2195 | x $s2,8($inp) | ||
2196 | x $s3,12($inp) | ||
2197 | stm${g} %r2,%r3,2*$SIZE_T($sp) | ||
2198 | la $key,0($key1) | ||
2199 | bras $ra,_s390x_AES_decrypt | ||
2200 | lm${g} %r2,%r5,2*$SIZE_T($sp) | ||
2201 | x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd | ||
2202 | x $s1,$tweak-16+4($sp) | ||
2203 | x $s2,$tweak-16+8($sp) | ||
2204 | x $s3,$tweak-16+12($sp) | ||
2205 | st $s0,0($out,$inp) | ||
2206 | st $s1,4($out,$inp) | ||
2207 | st $s2,8($out,$inp) | ||
2208 | st $s3,12($out,$inp) | ||
2209 | |||
2210 | la $i3,0($out,$inp) # put aside real $out | ||
2211 | .Lxts_dec_steal: | ||
2212 | llgc %r0,16($inp) | ||
2213 | llgc %r1,0($out,$inp) | ||
2214 | stc %r0,0($out,$inp) | ||
2215 | stc %r1,16($out,$inp) | ||
2216 | la $inp,1($inp) | ||
2217 | brct $len,.Lxts_dec_steal | ||
2218 | la $out,0($i3) # restore real $out | ||
2219 | |||
2220 | lm $s0,$s3,$tweak($sp) # load the 1st tweak | ||
2221 | x $s0,0($out) # tweak^=*(inp)|stolen cipher-text | ||
2222 | x $s1,4($out) | ||
2223 | x $s2,8($out) | ||
2224 | x $s3,12($out) | ||
2225 | st${g} $out,4*$SIZE_T($sp) | ||
2226 | la $key,0($key1) | ||
2227 | bras $ra,_s390x_AES_decrypt | ||
2228 | l${g} $out,4*$SIZE_T($sp) | ||
2229 | x $s0,$tweak+0($sp) # ^=tweak | ||
2230 | x $s1,$tweak+4($sp) | ||
2231 | x $s2,$tweak+8($sp) | ||
2232 | x $s3,$tweak+12($sp) | ||
2233 | st $s0,0($out) | ||
2234 | st $s1,4($out) | ||
2235 | st $s2,8($out) | ||
2236 | st $s3,12($out) | ||
2237 | stg $sp,$tweak-16+0($sp) # wipe 2nd tweak | ||
2238 | stg $sp,$tweak-16+8($sp) | ||
2239 | .Lxts_dec_done: | ||
2240 | stg $sp,$tweak+0($sp) # wipe tweak | ||
2241 | stg $sp,$twesk+8($sp) | ||
2242 | lm${g} %r6,$ra,6*$SIZE_T($sp) | ||
2243 | br $ra | ||
2244 | .size AES_xts_decrypt,.-AES_xts_decrypt | ||
1332 | ___ | 2245 | ___ |
1333 | } | 2246 | } |
1334 | $code.=<<___; | 2247 | $code.=<<___; |
1335 | .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" | 2248 | .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" |
2249 | .comm OPENSSL_s390xcap_P,16,8 | ||
1336 | ___ | 2250 | ___ |
1337 | 2251 | ||
1338 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | 2252 | $code =~ s/\`([^\`]*)\`/eval $1/gem; |
1339 | print $code; | 2253 | print $code; |
2254 | close STDOUT; # force flush | ||