summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes/asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/aes/asm')
-rw-r--r--src/lib/libcrypto/aes/asm/aes-586.pl1533
-rw-r--r--src/lib/libcrypto/aes/asm/aes-armv4.pl1030
-rw-r--r--src/lib/libcrypto/aes/asm/aes-ia64.S1123
-rw-r--r--src/lib/libcrypto/aes/asm/aes-ppc.pl1176
-rw-r--r--src/lib/libcrypto/aes/asm/aes-s390x.pl1333
-rwxr-xr-xsrc/lib/libcrypto/aes/asm/aes-sparcv9.pl1181
-rwxr-xr-xsrc/lib/libcrypto/aes/asm/aes-x86_64.pl1579
7 files changed, 0 insertions, 8955 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl
deleted file mode 100644
index e771e83953..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-586.pl
+++ /dev/null
@@ -1,1533 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 3.6.
10#
11# You might fail to appreciate this module performance from the first
12# try. If compared to "vanilla" linux-ia32-icc target, i.e. considered
13# to be *the* best Intel C compiler without -KPIC, performance appears
14# to be virtually identical... But try to re-configure with shared
15# library support... Aha! Intel compiler "suddenly" lags behind by 30%
16# [on P4, more on others]:-) And if compared to position-independent
17# code generated by GNU C, this code performs *more* than *twice* as
18# fast! Yes, all this buzz about PIC means that unlike other hand-
19# coded implementations, this one was explicitly designed to be safe
20# to use even in shared library context... This also means that this
21# code isn't necessarily absolutely fastest "ever," because in order
22# to achieve position independence an extra register has to be
23# off-loaded to stack, which affects the benchmark result.
24#
25# Special note about instruction choice. Do you recall RC4_INT code
26# performing poorly on P4? It might be the time to figure out why.
27# RC4_INT code implies effective address calculations in base+offset*4
28# form. Trouble is that it seems that offset scaling turned to be
29# critical path... At least eliminating scaling resulted in 2.8x RC4
30# performance improvement [as you might recall]. As AES code is hungry
31# for scaling too, I [try to] avoid the latter by favoring off-by-2
32# shifts and masking the result with 0xFF<<2 instead of "boring" 0xFF.
33#
34# As was shown by Dean Gaudet <dean@arctic.org>, the above note turned
35# void. Performance improvement with off-by-2 shifts was observed on
36# intermediate implementation, which was spilling yet another register
37# to stack... Final offset*4 code below runs just a tad faster on P4,
38# but exhibits up to 10% improvement on other cores.
39#
40# Second version is "monolithic" replacement for aes_core.c, which in
41# addition to AES_[de|en]crypt implements AES_set_[de|en]cryption_key.
42# This made it possible to implement little-endian variant of the
43# algorithm without modifying the base C code. Motivating factor for
44# the undertaken effort was that it appeared that in tight IA-32
45# register window little-endian flavor could achieve slightly higher
46# Instruction Level Parallelism, and it indeed resulted in up to 15%
47# better performance on most recent µ-archs...
48#
49# Third version adds AES_cbc_encrypt implementation, which resulted in
50# up to 40% performance imrovement of CBC benchmark results. 40% was
51# observed on P4 core, where "overall" imrovement coefficient, i.e. if
52# compared to PIC generated by GCC and in CBC mode, was observed to be
53# as large as 4x:-) CBC performance is virtually identical to ECB now
54# and on some platforms even better, e.g. 17.6 "small" cycles/byte on
55# Opteron, because certain function prologues and epilogues are
56# effectively taken out of the loop...
57#
58# Version 3.2 implements compressed tables and prefetch of these tables
59# in CBC[!] mode. Former means that 3/4 of table references are now
60# misaligned, which unfortunately has negative impact on elder IA-32
61# implementations, Pentium suffered 30% penalty, PIII - 10%.
62#
63# Version 3.3 avoids L1 cache aliasing between stack frame and
64# S-boxes, and 3.4 - L1 cache aliasing even between key schedule. The
65# latter is achieved by copying the key schedule to controlled place in
66# stack. This unfortunately has rather strong impact on small block CBC
67# performance, ~2x deterioration on 16-byte block if compared to 3.3.
68#
69# Version 3.5 checks if there is L1 cache aliasing between user-supplied
70# key schedule and S-boxes and abstains from copying the former if
71# there is no. This allows end-user to consciously retain small block
72# performance by aligning key schedule in specific manner.
73#
74# Version 3.6 compresses Td4 to 256 bytes and prefetches it in ECB.
75#
76# Current ECB performance numbers for 128-bit key in CPU cycles per
77# processed byte [measure commonly used by AES benchmarkers] are:
78#
79# small footprint fully unrolled
80# P4 24 22
81# AMD K8 20 19
82# PIII 25 23
83# Pentium 81 78
84
85push(@INC,"perlasm","../../perlasm");
86require "x86asm.pl";
87
88&asm_init($ARGV[0],"aes-586.pl",$ARGV[$#ARGV] eq "386");
89
90$s0="eax";
91$s1="ebx";
92$s2="ecx";
93$s3="edx";
94$key="edi";
95$acc="esi";
96
97$compromise=0; # $compromise=128 abstains from copying key
98 # schedule to stack when encrypting inputs
99 # shorter than 128 bytes at the cost of
100 # risksing aliasing with S-boxes. In return
101 # you get way better, up to +70%, small block
102 # performance.
103$small_footprint=1; # $small_footprint=1 code is ~5% slower [on
104 # recent µ-archs], but ~5 times smaller!
105 # I favor compact code to minimize cache
106 # contention and in hope to "collect" 5% back
107 # in real-life applications...
108$vertical_spin=0; # shift "verticaly" defaults to 0, because of
109 # its proof-of-concept status...
110
111# Note that there is no decvert(), as well as last encryption round is
112# performed with "horizontal" shifts. This is because this "vertical"
113# implementation [one which groups shifts on a given $s[i] to form a
114# "column," unlike "horizontal" one, which groups shifts on different
115# $s[i] to form a "row"] is work in progress. It was observed to run
116# few percents faster on Intel cores, but not AMD. On AMD K8 core it's
117# whole 12% slower:-( So we face a trade-off... Shall it be resolved
118# some day? Till then the code is considered experimental and by
119# default remains dormant...
120
121sub encvert()
122{ my ($te,@s) = @_;
123 my $v0 = $acc, $v1 = $key;
124
125 &mov ($v0,$s[3]); # copy s3
126 &mov (&DWP(4,"esp"),$s[2]); # save s2
127 &mov ($v1,$s[0]); # copy s0
128 &mov (&DWP(8,"esp"),$s[1]); # save s1
129
130 &movz ($s[2],&HB($s[0]));
131 &and ($s[0],0xFF);
132 &mov ($s[0],&DWP(0,$te,$s[0],8)); # s0>>0
133 &shr ($v1,16);
134 &mov ($s[3],&DWP(3,$te,$s[2],8)); # s0>>8
135 &movz ($s[1],&HB($v1));
136 &and ($v1,0xFF);
137 &mov ($s[2],&DWP(2,$te,$v1,8)); # s0>>16
138 &mov ($v1,$v0);
139 &mov ($s[1],&DWP(1,$te,$s[1],8)); # s0>>24
140
141 &and ($v0,0xFF);
142 &xor ($s[3],&DWP(0,$te,$v0,8)); # s3>>0
143 &movz ($v0,&HB($v1));
144 &shr ($v1,16);
145 &xor ($s[2],&DWP(3,$te,$v0,8)); # s3>>8
146 &movz ($v0,&HB($v1));
147 &and ($v1,0xFF);
148 &xor ($s[1],&DWP(2,$te,$v1,8)); # s3>>16
149 &mov ($v1,&DWP(4,"esp")); # restore s2
150 &xor ($s[0],&DWP(1,$te,$v0,8)); # s3>>24
151
152 &mov ($v0,$v1);
153 &and ($v1,0xFF);
154 &xor ($s[2],&DWP(0,$te,$v1,8)); # s2>>0
155 &movz ($v1,&HB($v0));
156 &shr ($v0,16);
157 &xor ($s[1],&DWP(3,$te,$v1,8)); # s2>>8
158 &movz ($v1,&HB($v0));
159 &and ($v0,0xFF);
160 &xor ($s[0],&DWP(2,$te,$v0,8)); # s2>>16
161 &mov ($v0,&DWP(8,"esp")); # restore s1
162 &xor ($s[3],&DWP(1,$te,$v1,8)); # s2>>24
163
164 &mov ($v1,$v0);
165 &and ($v0,0xFF);
166 &xor ($s[1],&DWP(0,$te,$v0,8)); # s1>>0
167 &movz ($v0,&HB($v1));
168 &shr ($v1,16);
169 &xor ($s[0],&DWP(3,$te,$v0,8)); # s1>>8
170 &movz ($v0,&HB($v1));
171 &and ($v1,0xFF);
172 &xor ($s[3],&DWP(2,$te,$v1,8)); # s1>>16
173 &mov ($key,&DWP(12,"esp")); # reincarnate v1 as key
174 &xor ($s[2],&DWP(1,$te,$v0,8)); # s1>>24
175}
176
177sub encstep()
178{ my ($i,$te,@s) = @_;
179 my $tmp = $key;
180 my $out = $i==3?$s[0]:$acc;
181
182 # lines marked with #%e?x[i] denote "reordered" instructions...
183 if ($i==3) { &mov ($key,&DWP(12,"esp")); }##%edx
184 else { &mov ($out,$s[0]);
185 &and ($out,0xFF); }
186 if ($i==1) { &shr ($s[0],16); }#%ebx[1]
187 if ($i==2) { &shr ($s[0],24); }#%ecx[2]
188 &mov ($out,&DWP(0,$te,$out,8));
189
190 if ($i==3) { $tmp=$s[1]; }##%eax
191 &movz ($tmp,&HB($s[1]));
192 &xor ($out,&DWP(3,$te,$tmp,8));
193
194 if ($i==3) { $tmp=$s[2]; &mov ($s[1],&DWP(4,"esp")); }##%ebx
195 else { &mov ($tmp,$s[2]);
196 &shr ($tmp,16); }
197 if ($i==2) { &and ($s[1],0xFF); }#%edx[2]
198 &and ($tmp,0xFF);
199 &xor ($out,&DWP(2,$te,$tmp,8));
200
201 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }##%ecx
202 elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2]
203 else { &mov ($tmp,$s[3]);
204 &shr ($tmp,24) }
205 &xor ($out,&DWP(1,$te,$tmp,8));
206 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
207 if ($i==3) { &mov ($s[3],$acc); }
208 &comment();
209}
210
211sub enclast()
212{ my ($i,$te,@s)=@_;
213 my $tmp = $key;
214 my $out = $i==3?$s[0]:$acc;
215
216 if ($i==3) { &mov ($key,&DWP(12,"esp")); }##%edx
217 else { &mov ($out,$s[0]); }
218 &and ($out,0xFF);
219 if ($i==1) { &shr ($s[0],16); }#%ebx[1]
220 if ($i==2) { &shr ($s[0],24); }#%ecx[2]
221 &mov ($out,&DWP(2,$te,$out,8));
222 &and ($out,0x000000ff);
223
224 if ($i==3) { $tmp=$s[1]; }##%eax
225 &movz ($tmp,&HB($s[1]));
226 &mov ($tmp,&DWP(0,$te,$tmp,8));
227 &and ($tmp,0x0000ff00);
228 &xor ($out,$tmp);
229
230 if ($i==3) { $tmp=$s[2]; &mov ($s[1],&DWP(4,"esp")); }##%ebx
231 else { mov ($tmp,$s[2]);
232 &shr ($tmp,16); }
233 if ($i==2) { &and ($s[1],0xFF); }#%edx[2]
234 &and ($tmp,0xFF);
235 &mov ($tmp,&DWP(0,$te,$tmp,8));
236 &and ($tmp,0x00ff0000);
237 &xor ($out,$tmp);
238
239 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }##%ecx
240 elsif($i==2){ &movz ($tmp,&HB($s[3])); }#%ebx[2]
241 else { &mov ($tmp,$s[3]);
242 &shr ($tmp,24); }
243 &mov ($tmp,&DWP(2,$te,$tmp,8));
244 &and ($tmp,0xff000000);
245 &xor ($out,$tmp);
246 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
247 if ($i==3) { &mov ($s[3],$acc); }
248}
249
250sub _data_word() { my $i; while(defined($i=shift)) { &data_word($i,$i); } }
251
252&public_label("AES_Te");
253&function_begin_C("_x86_AES_encrypt");
254 if ($vertical_spin) {
255 # I need high parts of volatile registers to be accessible...
256 &exch ($s1="edi",$key="ebx");
257 &mov ($s2="esi",$acc="ecx");
258 }
259
260 # note that caller is expected to allocate stack frame for me!
261 &mov (&DWP(12,"esp"),$key); # save key
262
263 &xor ($s0,&DWP(0,$key)); # xor with key
264 &xor ($s1,&DWP(4,$key));
265 &xor ($s2,&DWP(8,$key));
266 &xor ($s3,&DWP(12,$key));
267
268 &mov ($acc,&DWP(240,$key)); # load key->rounds
269
270 if ($small_footprint) {
271 &lea ($acc,&DWP(-2,$acc,$acc));
272 &lea ($acc,&DWP(0,$key,$acc,8));
273 &mov (&DWP(16,"esp"),$acc); # end of key schedule
274 &align (4);
275 &set_label("loop");
276 if ($vertical_spin) {
277 &encvert("ebp",$s0,$s1,$s2,$s3);
278 } else {
279 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
280 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
281 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
282 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
283 }
284 &add ($key,16); # advance rd_key
285 &xor ($s0,&DWP(0,$key));
286 &xor ($s1,&DWP(4,$key));
287 &xor ($s2,&DWP(8,$key));
288 &xor ($s3,&DWP(12,$key));
289 &cmp ($key,&DWP(16,"esp"));
290 &mov (&DWP(12,"esp"),$key);
291 &jb (&label("loop"));
292 }
293 else {
294 &cmp ($acc,10);
295 &jle (&label("10rounds"));
296 &cmp ($acc,12);
297 &jle (&label("12rounds"));
298
299 &set_label("14rounds");
300 for ($i=1;$i<3;$i++) {
301 if ($vertical_spin) {
302 &encvert("ebp",$s0,$s1,$s2,$s3);
303 } else {
304 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
305 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
306 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
307 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
308 }
309 &xor ($s0,&DWP(16*$i+0,$key));
310 &xor ($s1,&DWP(16*$i+4,$key));
311 &xor ($s2,&DWP(16*$i+8,$key));
312 &xor ($s3,&DWP(16*$i+12,$key));
313 }
314 &add ($key,32);
315 &mov (&DWP(12,"esp"),$key); # advance rd_key
316 &set_label("12rounds");
317 for ($i=1;$i<3;$i++) {
318 if ($vertical_spin) {
319 &encvert("ebp",$s0,$s1,$s2,$s3);
320 } else {
321 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
322 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
323 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
324 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
325 }
326 &xor ($s0,&DWP(16*$i+0,$key));
327 &xor ($s1,&DWP(16*$i+4,$key));
328 &xor ($s2,&DWP(16*$i+8,$key));
329 &xor ($s3,&DWP(16*$i+12,$key));
330 }
331 &add ($key,32);
332 &mov (&DWP(12,"esp"),$key); # advance rd_key
333 &set_label("10rounds");
334 for ($i=1;$i<10;$i++) {
335 if ($vertical_spin) {
336 &encvert("ebp",$s0,$s1,$s2,$s3);
337 } else {
338 &encstep(0,"ebp",$s0,$s1,$s2,$s3);
339 &encstep(1,"ebp",$s1,$s2,$s3,$s0);
340 &encstep(2,"ebp",$s2,$s3,$s0,$s1);
341 &encstep(3,"ebp",$s3,$s0,$s1,$s2);
342 }
343 &xor ($s0,&DWP(16*$i+0,$key));
344 &xor ($s1,&DWP(16*$i+4,$key));
345 &xor ($s2,&DWP(16*$i+8,$key));
346 &xor ($s3,&DWP(16*$i+12,$key));
347 }
348 }
349
350 if ($vertical_spin) {
351 # "reincarnate" some registers for "horizontal" spin...
352 &mov ($s1="ebx",$key="edi");
353 &mov ($s2="ecx",$acc="esi");
354 }
355 &enclast(0,"ebp",$s0,$s1,$s2,$s3);
356 &enclast(1,"ebp",$s1,$s2,$s3,$s0);
357 &enclast(2,"ebp",$s2,$s3,$s0,$s1);
358 &enclast(3,"ebp",$s3,$s0,$s1,$s2);
359
360 &add ($key,$small_footprint?16:160);
361 &xor ($s0,&DWP(0,$key));
362 &xor ($s1,&DWP(4,$key));
363 &xor ($s2,&DWP(8,$key));
364 &xor ($s3,&DWP(12,$key));
365
366 &ret ();
367
368&set_label("AES_Te",64); # Yes! I keep it in the code segment!
369 &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
370 &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
371 &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
372 &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
373 &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
374 &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
375 &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
376 &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
377 &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
378 &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
379 &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
380 &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
381 &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
382 &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
383 &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
384 &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
385 &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
386 &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
387 &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
388 &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
389 &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
390 &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
391 &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
392 &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
393 &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
394 &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
395 &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
396 &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
397 &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
398 &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
399 &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
400 &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
401 &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
402 &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
403 &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
404 &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
405 &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
406 &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
407 &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
408 &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
409 &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
410 &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
411 &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
412 &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
413 &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
414 &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
415 &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
416 &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
417 &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
418 &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
419 &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
420 &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
421 &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
422 &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
423 &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
424 &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
425 &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
426 &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
427 &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
428 &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
429 &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
430 &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
431 &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
432 &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
433#rcon:
434 &data_word(0x00000001, 0x00000002, 0x00000004, 0x00000008);
435 &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080);
436 &data_word(0x0000001b, 0x00000036, 0, 0, 0, 0, 0, 0);
437&function_end_B("_x86_AES_encrypt");
438
439# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
440&public_label("AES_Te");
441&function_begin("AES_encrypt");
442 &mov ($acc,&wparam(0)); # load inp
443 &mov ($key,&wparam(2)); # load key
444
445 &mov ($s0,"esp");
446 &sub ("esp",24);
447 &and ("esp",-64);
448 &add ("esp",4);
449 &mov (&DWP(16,"esp"),$s0);
450
451 &call (&label("pic_point")); # make it PIC!
452 &set_label("pic_point");
453 &blindpop("ebp");
454 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
455
456 &mov ($s0,&DWP(0,$acc)); # load input data
457 &mov ($s1,&DWP(4,$acc));
458 &mov ($s2,&DWP(8,$acc));
459 &mov ($s3,&DWP(12,$acc));
460
461 &call ("_x86_AES_encrypt");
462
463 &mov ("esp",&DWP(16,"esp"));
464
465 &mov ($acc,&wparam(1)); # load out
466 &mov (&DWP(0,$acc),$s0); # write output data
467 &mov (&DWP(4,$acc),$s1);
468 &mov (&DWP(8,$acc),$s2);
469 &mov (&DWP(12,$acc),$s3);
470&function_end("AES_encrypt");
471
472#------------------------------------------------------------------#
473
474sub decstep()
475{ my ($i,$td,@s) = @_;
476 my $tmp = $key;
477 my $out = $i==3?$s[0]:$acc;
478
479 # no instructions are reordered, as performance appears
480 # optimal... or rather that all attempts to reorder didn't
481 # result in better performance [which by the way is not a
482 # bit lower than ecryption].
483 if($i==3) { &mov ($key,&DWP(12,"esp")); }
484 else { &mov ($out,$s[0]); }
485 &and ($out,0xFF);
486 &mov ($out,&DWP(0,$td,$out,8));
487
488 if ($i==3) { $tmp=$s[1]; }
489 &movz ($tmp,&HB($s[1]));
490 &xor ($out,&DWP(3,$td,$tmp,8));
491
492 if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); }
493 else { &mov ($tmp,$s[2]); }
494 &shr ($tmp,16);
495 &and ($tmp,0xFF);
496 &xor ($out,&DWP(2,$td,$tmp,8));
497
498 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }
499 else { &mov ($tmp,$s[3]); }
500 &shr ($tmp,24);
501 &xor ($out,&DWP(1,$td,$tmp,8));
502 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
503 if ($i==3) { &mov ($s[3],&DWP(4,"esp")); }
504 &comment();
505}
506
507sub declast()
508{ my ($i,$td,@s)=@_;
509 my $tmp = $key;
510 my $out = $i==3?$s[0]:$acc;
511
512 if($i==3) { &mov ($key,&DWP(12,"esp")); }
513 else { &mov ($out,$s[0]); }
514 &and ($out,0xFF);
515 &movz ($out,&BP(2048,$td,$out,1));
516
517 if ($i==3) { $tmp=$s[1]; }
518 &movz ($tmp,&HB($s[1]));
519 &movz ($tmp,&BP(2048,$td,$tmp,1));
520 &shl ($tmp,8);
521 &xor ($out,$tmp);
522
523 if ($i==3) { $tmp=$s[2]; &mov ($s[1],$acc); }
524 else { mov ($tmp,$s[2]); }
525 &shr ($tmp,16);
526 &and ($tmp,0xFF);
527 &movz ($tmp,&BP(2048,$td,$tmp,1));
528 &shl ($tmp,16);
529 &xor ($out,$tmp);
530
531 if ($i==3) { $tmp=$s[3]; &mov ($s[2],&DWP(8,"esp")); }
532 else { &mov ($tmp,$s[3]); }
533 &shr ($tmp,24);
534 &movz ($tmp,&BP(2048,$td,$tmp,1));
535 &shl ($tmp,24);
536 &xor ($out,$tmp);
537 if ($i<2) { &mov (&DWP(4+4*$i,"esp"),$out); }
538 if ($i==3) { &mov ($s[3],&DWP(4,"esp")); }
539}
540
541&public_label("AES_Td");
542&function_begin_C("_x86_AES_decrypt");
543 # note that caller is expected to allocate stack frame for me!
544 &mov (&DWP(12,"esp"),$key); # save key
545
546 &xor ($s0,&DWP(0,$key)); # xor with key
547 &xor ($s1,&DWP(4,$key));
548 &xor ($s2,&DWP(8,$key));
549 &xor ($s3,&DWP(12,$key));
550
551 &mov ($acc,&DWP(240,$key)); # load key->rounds
552
553 if ($small_footprint) {
554 &lea ($acc,&DWP(-2,$acc,$acc));
555 &lea ($acc,&DWP(0,$key,$acc,8));
556 &mov (&DWP(16,"esp"),$acc); # end of key schedule
557 &align (4);
558 &set_label("loop");
559 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
560 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
561 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
562 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
563 &add ($key,16); # advance rd_key
564 &xor ($s0,&DWP(0,$key));
565 &xor ($s1,&DWP(4,$key));
566 &xor ($s2,&DWP(8,$key));
567 &xor ($s3,&DWP(12,$key));
568 &cmp ($key,&DWP(16,"esp"));
569 &mov (&DWP(12,"esp"),$key);
570 &jb (&label("loop"));
571 }
572 else {
573 &cmp ($acc,10);
574 &jle (&label("10rounds"));
575 &cmp ($acc,12);
576 &jle (&label("12rounds"));
577
578 &set_label("14rounds");
579 for ($i=1;$i<3;$i++) {
580 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
581 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
582 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
583 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
584 &xor ($s0,&DWP(16*$i+0,$key));
585 &xor ($s1,&DWP(16*$i+4,$key));
586 &xor ($s2,&DWP(16*$i+8,$key));
587 &xor ($s3,&DWP(16*$i+12,$key));
588 }
589 &add ($key,32);
590 &mov (&DWP(12,"esp"),$key); # advance rd_key
591 &set_label("12rounds");
592 for ($i=1;$i<3;$i++) {
593 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
594 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
595 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
596 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
597 &xor ($s0,&DWP(16*$i+0,$key));
598 &xor ($s1,&DWP(16*$i+4,$key));
599 &xor ($s2,&DWP(16*$i+8,$key));
600 &xor ($s3,&DWP(16*$i+12,$key));
601 }
602 &add ($key,32);
603 &mov (&DWP(12,"esp"),$key); # advance rd_key
604 &set_label("10rounds");
605 for ($i=1;$i<10;$i++) {
606 &decstep(0,"ebp",$s0,$s3,$s2,$s1);
607 &decstep(1,"ebp",$s1,$s0,$s3,$s2);
608 &decstep(2,"ebp",$s2,$s1,$s0,$s3);
609 &decstep(3,"ebp",$s3,$s2,$s1,$s0);
610 &xor ($s0,&DWP(16*$i+0,$key));
611 &xor ($s1,&DWP(16*$i+4,$key));
612 &xor ($s2,&DWP(16*$i+8,$key));
613 &xor ($s3,&DWP(16*$i+12,$key));
614 }
615 }
616
617 &declast(0,"ebp",$s0,$s3,$s2,$s1);
618 &declast(1,"ebp",$s1,$s0,$s3,$s2);
619 &declast(2,"ebp",$s2,$s1,$s0,$s3);
620 &declast(3,"ebp",$s3,$s2,$s1,$s0);
621
622 &add ($key,$small_footprint?16:160);
623 &xor ($s0,&DWP(0,$key));
624 &xor ($s1,&DWP(4,$key));
625 &xor ($s2,&DWP(8,$key));
626 &xor ($s3,&DWP(12,$key));
627
628 &ret ();
629
630&set_label("AES_Td",64); # Yes! I keep it in the code segment!
631 &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
632 &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
633 &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
634 &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
635 &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
636 &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
637 &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
638 &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
639 &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
640 &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
641 &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
642 &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
643 &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
644 &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
645 &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
646 &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
647 &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
648 &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
649 &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
650 &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
651 &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
652 &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
653 &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
654 &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
655 &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
656 &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
657 &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
658 &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
659 &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
660 &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
661 &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
662 &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
663 &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
664 &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
665 &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
666 &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
667 &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
668 &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
669 &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
670 &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
671 &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
672 &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
673 &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
674 &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
675 &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
676 &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
677 &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
678 &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
679 &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
680 &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
681 &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
682 &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
683 &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
684 &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
685 &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
686 &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
687 &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
688 &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
689 &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
690 &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
691 &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
692 &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
693 &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
694 &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
695#Td4:
696 &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
697 &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
698 &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
699 &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
700 &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
701 &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
702 &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
703 &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
704 &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
705 &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
706 &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
707 &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
708 &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
709 &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
710 &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
711 &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
712 &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
713 &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
714 &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
715 &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
716 &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
717 &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
718 &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
719 &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
720 &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
721 &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
722 &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
723 &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
724 &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
725 &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
726 &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
727 &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
728&function_end_B("_x86_AES_decrypt");
729
730# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
731&public_label("AES_Td");
732&function_begin("AES_decrypt");
733 &mov ($acc,&wparam(0)); # load inp
734 &mov ($key,&wparam(2)); # load key
735
736 &mov ($s0,"esp");
737 &sub ("esp",24);
738 &and ("esp",-64);
739 &add ("esp",4);
740 &mov (&DWP(16,"esp"),$s0);
741
742 &call (&label("pic_point")); # make it PIC!
743 &set_label("pic_point");
744 &blindpop("ebp");
745 &lea ("ebp",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
746
747 # prefetch Td4
748 &lea ("ebp",&DWP(2048+128,"ebp"));
749 &mov ($s0,&DWP(0-128,"ebp"));
750 &mov ($s1,&DWP(32-128,"ebp"));
751 &mov ($s2,&DWP(64-128,"ebp"));
752 &mov ($s3,&DWP(96-128,"ebp"));
753 &mov ($s0,&DWP(128-128,"ebp"));
754 &mov ($s1,&DWP(160-128,"ebp"));
755 &mov ($s2,&DWP(192-128,"ebp"));
756 &mov ($s3,&DWP(224-128,"ebp"));
757 &lea ("ebp",&DWP(-2048-128,"ebp"));
758
759 &mov ($s0,&DWP(0,$acc)); # load input data
760 &mov ($s1,&DWP(4,$acc));
761 &mov ($s2,&DWP(8,$acc));
762 &mov ($s3,&DWP(12,$acc));
763
764 &call ("_x86_AES_decrypt");
765
766 &mov ("esp",&DWP(16,"esp"));
767
768 &mov ($acc,&wparam(1)); # load out
769 &mov (&DWP(0,$acc),$s0); # write output data
770 &mov (&DWP(4,$acc),$s1);
771 &mov (&DWP(8,$acc),$s2);
772 &mov (&DWP(12,$acc),$s3);
773&function_end("AES_decrypt");
774
775# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
776# size_t length, const AES_KEY *key,
777# unsigned char *ivp,const int enc);
778{
779# stack frame layout
780# -4(%esp) 0(%esp) return address
781# 0(%esp) 4(%esp) tmp1
782# 4(%esp) 8(%esp) tmp2
783# 8(%esp) 12(%esp) key
784# 12(%esp) 16(%esp) end of key schedule
785my $_esp=&DWP(16,"esp"); #saved %esp
786my $_inp=&DWP(20,"esp"); #copy of wparam(0)
787my $_out=&DWP(24,"esp"); #copy of wparam(1)
788my $_len=&DWP(28,"esp"); #copy of wparam(2)
789my $_key=&DWP(32,"esp"); #copy of wparam(3)
790my $_ivp=&DWP(36,"esp"); #copy of wparam(4)
791my $_tmp=&DWP(40,"esp"); #volatile variable
792my $ivec=&DWP(44,"esp"); #ivec[16]
793my $aes_key=&DWP(60,"esp"); #copy of aes_key
794my $mark=&DWP(60+240,"esp"); #copy of aes_key->rounds
795
796&public_label("AES_Te");
797&public_label("AES_Td");
798&function_begin("AES_cbc_encrypt");
799 &mov ($s2 eq "ecx"? $s2 : "",&wparam(2)); # load len
800 &cmp ($s2,0);
801 &je (&label("enc_out"));
802
803 &call (&label("pic_point")); # make it PIC!
804 &set_label("pic_point");
805 &blindpop("ebp");
806
807 &pushf ();
808 &cld ();
809
810 &cmp (&wparam(5),0);
811 &je (&label("DECRYPT"));
812
813 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
814
815 # allocate aligned stack frame...
816 &lea ($key,&DWP(-64-244,"esp"));
817 &and ($key,-64);
818
819 # ... and make sure it doesn't alias with AES_Te modulo 4096
820 &mov ($s0,"ebp");
821 &lea ($s1,&DWP(2048,"ebp"));
822 &mov ($s3,$key);
823 &and ($s0,0xfff); # s = %ebp&0xfff
824 &and ($s1,0xfff); # e = (%ebp+2048)&0xfff
825 &and ($s3,0xfff); # p = %esp&0xfff
826
827 &cmp ($s3,$s1); # if (p>=e) %esp =- (p-e);
828 &jb (&label("te_break_out"));
829 &sub ($s3,$s1);
830 &sub ($key,$s3);
831 &jmp (&label("te_ok"));
832 &set_label("te_break_out"); # else %esp -= (p-s)&0xfff + framesz;
833 &sub ($s3,$s0);
834 &and ($s3,0xfff);
835 &add ($s3,64+256);
836 &sub ($key,$s3);
837 &align (4);
838 &set_label("te_ok");
839
840 &mov ($s0,&wparam(0)); # load inp
841 &mov ($s1,&wparam(1)); # load out
842 &mov ($s3,&wparam(3)); # load key
843 &mov ($acc,&wparam(4)); # load ivp
844
845 &exch ("esp",$key);
846 &add ("esp",4); # reserve for return address!
847 &mov ($_esp,$key); # save %esp
848
849 &mov ($_inp,$s0); # save copy of inp
850 &mov ($_out,$s1); # save copy of out
851 &mov ($_len,$s2); # save copy of len
852 &mov ($_key,$s3); # save copy of key
853 &mov ($_ivp,$acc); # save copy of ivp
854
855 &mov ($mark,0); # copy of aes_key->rounds = 0;
856 if ($compromise) {
857 &cmp ($s2,$compromise);
858 &jb (&label("skip_ecopy"));
859 }
860 # do we copy key schedule to stack?
861 &mov ($s1 eq "ebx" ? $s1 : "",$s3);
862 &mov ($s2 eq "ecx" ? $s2 : "",244/4);
863 &sub ($s1,"ebp");
864 &mov ("esi",$s3);
865 &and ($s1,0xfff);
866 &lea ("edi",$aes_key);
867 &cmp ($s1,2048);
868 &jb (&label("do_ecopy"));
869 &cmp ($s1,4096-244);
870 &jb (&label("skip_ecopy"));
871 &align (4);
872 &set_label("do_ecopy");
873 &mov ($_key,"edi");
874 &data_word(0xA5F3F689); # rep movsd
875 &set_label("skip_ecopy");
876
877 &mov ($acc,$s0);
878 &mov ($key,16);
879 &align (4);
880 &set_label("prefetch_te");
881 &mov ($s0,&DWP(0,"ebp"));
882 &mov ($s1,&DWP(32,"ebp"));
883 &mov ($s2,&DWP(64,"ebp"));
884 &mov ($s3,&DWP(96,"ebp"));
885 &lea ("ebp",&DWP(128,"ebp"));
886 &dec ($key);
887 &jnz (&label("prefetch_te"));
888 &sub ("ebp",2048);
889
890 &mov ($s2,$_len);
891 &mov ($key,$_ivp);
892 &test ($s2,0xFFFFFFF0);
893 &jz (&label("enc_tail")); # short input...
894
895 &mov ($s0,&DWP(0,$key)); # load iv
896 &mov ($s1,&DWP(4,$key));
897
898 &align (4);
899 &set_label("enc_loop");
900 &mov ($s2,&DWP(8,$key));
901 &mov ($s3,&DWP(12,$key));
902
903 &xor ($s0,&DWP(0,$acc)); # xor input data
904 &xor ($s1,&DWP(4,$acc));
905 &xor ($s2,&DWP(8,$acc));
906 &xor ($s3,&DWP(12,$acc));
907
908 &mov ($key,$_key); # load key
909 &call ("_x86_AES_encrypt");
910
911 &mov ($acc,$_inp); # load inp
912 &mov ($key,$_out); # load out
913
914 &mov (&DWP(0,$key),$s0); # save output data
915 &mov (&DWP(4,$key),$s1);
916 &mov (&DWP(8,$key),$s2);
917 &mov (&DWP(12,$key),$s3);
918
919 &mov ($s2,$_len); # load len
920
921 &lea ($acc,&DWP(16,$acc));
922 &mov ($_inp,$acc); # save inp
923
924 &lea ($s3,&DWP(16,$key));
925 &mov ($_out,$s3); # save out
926
927 &sub ($s2,16);
928 &test ($s2,0xFFFFFFF0);
929 &mov ($_len,$s2); # save len
930 &jnz (&label("enc_loop"));
931 &test ($s2,15);
932 &jnz (&label("enc_tail"));
933 &mov ($acc,$_ivp); # load ivp
934 &mov ($s2,&DWP(8,$key)); # restore last dwords
935 &mov ($s3,&DWP(12,$key));
936 &mov (&DWP(0,$acc),$s0); # save ivec
937 &mov (&DWP(4,$acc),$s1);
938 &mov (&DWP(8,$acc),$s2);
939 &mov (&DWP(12,$acc),$s3);
940
941 &cmp ($mark,0); # was the key schedule copied?
942 &mov ("edi",$_key);
943 &je (&label("skip_ezero"));
944 # zero copy of key schedule
945 &mov ("ecx",240/4);
946 &xor ("eax","eax");
947 &align (4);
948 &data_word(0xABF3F689); # rep stosd
949 &set_label("skip_ezero")
950 &mov ("esp",$_esp);
951 &popf ();
952 &set_label("enc_out");
953 &function_end_A();
954 &pushf (); # kludge, never executed
955
956 &align (4);
957 &set_label("enc_tail");
958 &mov ($s0,$key eq "edi" ? $key : "");
959 &mov ($key,$_out); # load out
960 &push ($s0); # push ivp
961 &mov ($s1,16);
962 &sub ($s1,$s2);
963 &cmp ($key,$acc); # compare with inp
964 &je (&label("enc_in_place"));
965 &align (4);
966 &data_word(0xA4F3F689); # rep movsb # copy input
967 &jmp (&label("enc_skip_in_place"));
968 &set_label("enc_in_place");
969 &lea ($key,&DWP(0,$key,$s2));
970 &set_label("enc_skip_in_place");
971 &mov ($s2,$s1);
972 &xor ($s0,$s0);
973 &align (4);
974 &data_word(0xAAF3F689); # rep stosb # zero tail
975 &pop ($key); # pop ivp
976
977 &mov ($acc,$_out); # output as input
978 &mov ($s0,&DWP(0,$key));
979 &mov ($s1,&DWP(4,$key));
980 &mov ($_len,16); # len=16
981 &jmp (&label("enc_loop")); # one more spin...
982
983#----------------------------- DECRYPT -----------------------------#
984&align (4);
985&set_label("DECRYPT");
986 &lea ("ebp",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
987
988 # allocate aligned stack frame...
989 &lea ($key,&DWP(-64-244,"esp"));
990 &and ($key,-64);
991
992 # ... and make sure it doesn't alias with AES_Td modulo 4096
993 &mov ($s0,"ebp");
994 &lea ($s1,&DWP(2048+256,"ebp"));
995 &mov ($s3,$key);
996 &and ($s0,0xfff); # s = %ebp&0xfff
997 &and ($s1,0xfff); # e = (%ebp+2048+256)&0xfff
998 &and ($s3,0xfff); # p = %esp&0xfff
999
1000 &cmp ($s3,$s1); # if (p>=e) %esp =- (p-e);
1001 &jb (&label("td_break_out"));
1002 &sub ($s3,$s1);
1003 &sub ($key,$s3);
1004 &jmp (&label("td_ok"));
1005 &set_label("td_break_out"); # else %esp -= (p-s)&0xfff + framesz;
1006 &sub ($s3,$s0);
1007 &and ($s3,0xfff);
1008 &add ($s3,64+256);
1009 &sub ($key,$s3);
1010 &align (4);
1011 &set_label("td_ok");
1012
1013 &mov ($s0,&wparam(0)); # load inp
1014 &mov ($s1,&wparam(1)); # load out
1015 &mov ($s3,&wparam(3)); # load key
1016 &mov ($acc,&wparam(4)); # load ivp
1017
1018 &exch ("esp",$key);
1019 &add ("esp",4); # reserve for return address!
1020 &mov ($_esp,$key); # save %esp
1021
1022 &mov ($_inp,$s0); # save copy of inp
1023 &mov ($_out,$s1); # save copy of out
1024 &mov ($_len,$s2); # save copy of len
1025 &mov ($_key,$s3); # save copy of key
1026 &mov ($_ivp,$acc); # save copy of ivp
1027
1028 &mov ($mark,0); # copy of aes_key->rounds = 0;
1029 if ($compromise) {
1030 &cmp ($s2,$compromise);
1031 &jb (&label("skip_dcopy"));
1032 }
1033 # do we copy key schedule to stack?
1034 &mov ($s1 eq "ebx" ? $s1 : "",$s3);
1035 &mov ($s2 eq "ecx" ? $s2 : "",244/4);
1036 &sub ($s1,"ebp");
1037 &mov ("esi",$s3);
1038 &and ($s1,0xfff);
1039 &lea ("edi",$aes_key);
1040 &cmp ($s1,2048+256);
1041 &jb (&label("do_dcopy"));
1042 &cmp ($s1,4096-244);
1043 &jb (&label("skip_dcopy"));
1044 &align (4);
1045 &set_label("do_dcopy");
1046 &mov ($_key,"edi");
1047 &data_word(0xA5F3F689); # rep movsd
1048 &set_label("skip_dcopy");
1049
1050 &mov ($acc,$s0);
1051 &mov ($key,18);
1052 &align (4);
1053 &set_label("prefetch_td");
1054 &mov ($s0,&DWP(0,"ebp"));
1055 &mov ($s1,&DWP(32,"ebp"));
1056 &mov ($s2,&DWP(64,"ebp"));
1057 &mov ($s3,&DWP(96,"ebp"));
1058 &lea ("ebp",&DWP(128,"ebp"));
1059 &dec ($key);
1060 &jnz (&label("prefetch_td"));
1061 &sub ("ebp",2048+256);
1062
1063 &cmp ($acc,$_out);
1064 &je (&label("dec_in_place")); # in-place processing...
1065
1066 &mov ($key,$_ivp); # load ivp
1067 &mov ($_tmp,$key);
1068
1069 &align (4);
1070 &set_label("dec_loop");
1071 &mov ($s0,&DWP(0,$acc)); # read input
1072 &mov ($s1,&DWP(4,$acc));
1073 &mov ($s2,&DWP(8,$acc));
1074 &mov ($s3,&DWP(12,$acc));
1075
1076 &mov ($key,$_key); # load key
1077 &call ("_x86_AES_decrypt");
1078
1079 &mov ($key,$_tmp); # load ivp
1080 &mov ($acc,$_len); # load len
1081 &xor ($s0,&DWP(0,$key)); # xor iv
1082 &xor ($s1,&DWP(4,$key));
1083 &xor ($s2,&DWP(8,$key));
1084 &xor ($s3,&DWP(12,$key));
1085
1086 &sub ($acc,16);
1087 &jc (&label("dec_partial"));
1088 &mov ($_len,$acc); # save len
1089 &mov ($acc,$_inp); # load inp
1090 &mov ($key,$_out); # load out
1091
1092 &mov (&DWP(0,$key),$s0); # write output
1093 &mov (&DWP(4,$key),$s1);
1094 &mov (&DWP(8,$key),$s2);
1095 &mov (&DWP(12,$key),$s3);
1096
1097 &mov ($_tmp,$acc); # save ivp
1098 &lea ($acc,&DWP(16,$acc));
1099 &mov ($_inp,$acc); # save inp
1100
1101 &lea ($key,&DWP(16,$key));
1102 &mov ($_out,$key); # save out
1103
1104 &jnz (&label("dec_loop"));
1105 &mov ($key,$_tmp); # load temp ivp
1106 &set_label("dec_end");
1107 &mov ($acc,$_ivp); # load user ivp
1108 &mov ($s0,&DWP(0,$key)); # load iv
1109 &mov ($s1,&DWP(4,$key));
1110 &mov ($s2,&DWP(8,$key));
1111 &mov ($s3,&DWP(12,$key));
1112 &mov (&DWP(0,$acc),$s0); # copy back to user
1113 &mov (&DWP(4,$acc),$s1);
1114 &mov (&DWP(8,$acc),$s2);
1115 &mov (&DWP(12,$acc),$s3);
1116 &jmp (&label("dec_out"));
1117
1118 &align (4);
1119 &set_label("dec_partial");
1120 &lea ($key,$ivec);
1121 &mov (&DWP(0,$key),$s0); # dump output to stack
1122 &mov (&DWP(4,$key),$s1);
1123 &mov (&DWP(8,$key),$s2);
1124 &mov (&DWP(12,$key),$s3);
1125 &lea ($s2 eq "ecx" ? $s2 : "",&DWP(16,$acc));
1126 &mov ($acc eq "esi" ? $acc : "",$key);
1127 &mov ($key eq "edi" ? $key : "",$_out); # load out
1128 &data_word(0xA4F3F689); # rep movsb # copy output
1129 &mov ($key,$_inp); # use inp as temp ivp
1130 &jmp (&label("dec_end"));
1131
1132 &align (4);
1133 &set_label("dec_in_place");
1134 &set_label("dec_in_place_loop");
1135 &lea ($key,$ivec);
1136 &mov ($s0,&DWP(0,$acc)); # read input
1137 &mov ($s1,&DWP(4,$acc));
1138 &mov ($s2,&DWP(8,$acc));
1139 &mov ($s3,&DWP(12,$acc));
1140
1141 &mov (&DWP(0,$key),$s0); # copy to temp
1142 &mov (&DWP(4,$key),$s1);
1143 &mov (&DWP(8,$key),$s2);
1144 &mov (&DWP(12,$key),$s3);
1145
1146 &mov ($key,$_key); # load key
1147 &call ("_x86_AES_decrypt");
1148
1149 &mov ($key,$_ivp); # load ivp
1150 &mov ($acc,$_out); # load out
1151 &xor ($s0,&DWP(0,$key)); # xor iv
1152 &xor ($s1,&DWP(4,$key));
1153 &xor ($s2,&DWP(8,$key));
1154 &xor ($s3,&DWP(12,$key));
1155
1156 &mov (&DWP(0,$acc),$s0); # write output
1157 &mov (&DWP(4,$acc),$s1);
1158 &mov (&DWP(8,$acc),$s2);
1159 &mov (&DWP(12,$acc),$s3);
1160
1161 &lea ($acc,&DWP(16,$acc));
1162 &mov ($_out,$acc); # save out
1163
1164 &lea ($acc,$ivec);
1165 &mov ($s0,&DWP(0,$acc)); # read temp
1166 &mov ($s1,&DWP(4,$acc));
1167 &mov ($s2,&DWP(8,$acc));
1168 &mov ($s3,&DWP(12,$acc));
1169
1170 &mov (&DWP(0,$key),$s0); # copy iv
1171 &mov (&DWP(4,$key),$s1);
1172 &mov (&DWP(8,$key),$s2);
1173 &mov (&DWP(12,$key),$s3);
1174
1175 &mov ($acc,$_inp); # load inp
1176
1177 &lea ($acc,&DWP(16,$acc));
1178 &mov ($_inp,$acc); # save inp
1179
1180 &mov ($s2,$_len); # load len
1181 &sub ($s2,16);
1182 &jc (&label("dec_in_place_partial"));
1183 &mov ($_len,$s2); # save len
1184 &jnz (&label("dec_in_place_loop"));
1185 &jmp (&label("dec_out"));
1186
1187 &align (4);
1188 &set_label("dec_in_place_partial");
1189 # one can argue if this is actually required...
1190 &mov ($key eq "edi" ? $key : "",$_out);
1191 &lea ($acc eq "esi" ? $acc : "",$ivec);
1192 &lea ($key,&DWP(0,$key,$s2));
1193 &lea ($acc,&DWP(16,$acc,$s2));
1194 &neg ($s2 eq "ecx" ? $s2 : "");
1195 &data_word(0xA4F3F689); # rep movsb # restore tail
1196
1197 &align (4);
1198 &set_label("dec_out");
1199 &cmp ($mark,0); # was the key schedule copied?
1200 &mov ("edi",$_key);
1201 &je (&label("skip_dzero"));
1202 # zero copy of key schedule
1203 &mov ("ecx",240/4);
1204 &xor ("eax","eax");
1205 &align (4);
1206 &data_word(0xABF3F689); # rep stosd
1207 &set_label("skip_dzero")
1208 &mov ("esp",$_esp);
1209 &popf ();
1210&function_end("AES_cbc_encrypt");
1211}
1212
1213#------------------------------------------------------------------#
1214
1215sub enckey()
1216{
1217 &movz ("esi",&LB("edx")); # rk[i]>>0
1218 &mov ("ebx",&DWP(2,"ebp","esi",8));
1219 &movz ("esi",&HB("edx")); # rk[i]>>8
1220 &and ("ebx",0xFF000000);
1221 &xor ("eax","ebx");
1222
1223 &mov ("ebx",&DWP(2,"ebp","esi",8));
1224 &shr ("edx",16);
1225 &and ("ebx",0x000000FF);
1226 &movz ("esi",&LB("edx")); # rk[i]>>16
1227 &xor ("eax","ebx");
1228
1229 &mov ("ebx",&DWP(0,"ebp","esi",8));
1230 &movz ("esi",&HB("edx")); # rk[i]>>24
1231 &and ("ebx",0x0000FF00);
1232 &xor ("eax","ebx");
1233
1234 &mov ("ebx",&DWP(0,"ebp","esi",8));
1235 &and ("ebx",0x00FF0000);
1236 &xor ("eax","ebx");
1237
1238 &xor ("eax",&DWP(2048,"ebp","ecx",4)); # rcon
1239}
1240
1241# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1242# AES_KEY *key)
1243&public_label("AES_Te");
1244&function_begin("AES_set_encrypt_key", "", "_x86_AES_set_encrypt_key");
1245 &mov ("esi",&wparam(0)); # user supplied key
1246 &mov ("edi",&wparam(2)); # private key schedule
1247
1248 &test ("esi",-1);
1249 &jz (&label("badpointer"));
1250 &test ("edi",-1);
1251 &jz (&label("badpointer"));
1252
1253 &call (&label("pic_point"));
1254 &set_label("pic_point");
1255 &blindpop("ebp");
1256 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
1257
1258 &mov ("ecx",&wparam(1)); # number of bits in key
1259 &cmp ("ecx",128);
1260 &je (&label("10rounds"));
1261 &cmp ("ecx",192);
1262 &je (&label("12rounds"));
1263 &cmp ("ecx",256);
1264 &je (&label("14rounds"));
1265 &mov ("eax",-2); # invalid number of bits
1266 &jmp (&label("exit"));
1267
1268 &set_label("10rounds");
1269 &mov ("eax",&DWP(0,"esi")); # copy first 4 dwords
1270 &mov ("ebx",&DWP(4,"esi"));
1271 &mov ("ecx",&DWP(8,"esi"));
1272 &mov ("edx",&DWP(12,"esi"));
1273 &mov (&DWP(0,"edi"),"eax");
1274 &mov (&DWP(4,"edi"),"ebx");
1275 &mov (&DWP(8,"edi"),"ecx");
1276 &mov (&DWP(12,"edi"),"edx");
1277
1278 &xor ("ecx","ecx");
1279 &jmp (&label("10shortcut"));
1280
1281 &align (4);
1282 &set_label("10loop");
1283 &mov ("eax",&DWP(0,"edi")); # rk[0]
1284 &mov ("edx",&DWP(12,"edi")); # rk[3]
1285 &set_label("10shortcut");
1286 &enckey ();
1287
1288 &mov (&DWP(16,"edi"),"eax"); # rk[4]
1289 &xor ("eax",&DWP(4,"edi"));
1290 &mov (&DWP(20,"edi"),"eax"); # rk[5]
1291 &xor ("eax",&DWP(8,"edi"));
1292 &mov (&DWP(24,"edi"),"eax"); # rk[6]
1293 &xor ("eax",&DWP(12,"edi"));
1294 &mov (&DWP(28,"edi"),"eax"); # rk[7]
1295 &inc ("ecx");
1296 &add ("edi",16);
1297 &cmp ("ecx",10);
1298 &jl (&label("10loop"));
1299
1300 &mov (&DWP(80,"edi"),10); # setup number of rounds
1301 &xor ("eax","eax");
1302 &jmp (&label("exit"));
1303
1304 &set_label("12rounds");
1305 &mov ("eax",&DWP(0,"esi")); # copy first 6 dwords
1306 &mov ("ebx",&DWP(4,"esi"));
1307 &mov ("ecx",&DWP(8,"esi"));
1308 &mov ("edx",&DWP(12,"esi"));
1309 &mov (&DWP(0,"edi"),"eax");
1310 &mov (&DWP(4,"edi"),"ebx");
1311 &mov (&DWP(8,"edi"),"ecx");
1312 &mov (&DWP(12,"edi"),"edx");
1313 &mov ("ecx",&DWP(16,"esi"));
1314 &mov ("edx",&DWP(20,"esi"));
1315 &mov (&DWP(16,"edi"),"ecx");
1316 &mov (&DWP(20,"edi"),"edx");
1317
1318 &xor ("ecx","ecx");
1319 &jmp (&label("12shortcut"));
1320
1321 &align (4);
1322 &set_label("12loop");
1323 &mov ("eax",&DWP(0,"edi")); # rk[0]
1324 &mov ("edx",&DWP(20,"edi")); # rk[5]
1325 &set_label("12shortcut");
1326 &enckey ();
1327
1328 &mov (&DWP(24,"edi"),"eax"); # rk[6]
1329 &xor ("eax",&DWP(4,"edi"));
1330 &mov (&DWP(28,"edi"),"eax"); # rk[7]
1331 &xor ("eax",&DWP(8,"edi"));
1332 &mov (&DWP(32,"edi"),"eax"); # rk[8]
1333 &xor ("eax",&DWP(12,"edi"));
1334 &mov (&DWP(36,"edi"),"eax"); # rk[9]
1335
1336 &cmp ("ecx",7);
1337 &je (&label("12break"));
1338 &inc ("ecx");
1339
1340 &xor ("eax",&DWP(16,"edi"));
1341 &mov (&DWP(40,"edi"),"eax"); # rk[10]
1342 &xor ("eax",&DWP(20,"edi"));
1343 &mov (&DWP(44,"edi"),"eax"); # rk[11]
1344
1345 &add ("edi",24);
1346 &jmp (&label("12loop"));
1347
1348 &set_label("12break");
1349 &mov (&DWP(72,"edi"),12); # setup number of rounds
1350 &xor ("eax","eax");
1351 &jmp (&label("exit"));
1352
1353 &set_label("14rounds");
1354 &mov ("eax",&DWP(0,"esi")); # copy first 8 dwords
1355 &mov ("ebx",&DWP(4,"esi"));
1356 &mov ("ecx",&DWP(8,"esi"));
1357 &mov ("edx",&DWP(12,"esi"));
1358 &mov (&DWP(0,"edi"),"eax");
1359 &mov (&DWP(4,"edi"),"ebx");
1360 &mov (&DWP(8,"edi"),"ecx");
1361 &mov (&DWP(12,"edi"),"edx");
1362 &mov ("eax",&DWP(16,"esi"));
1363 &mov ("ebx",&DWP(20,"esi"));
1364 &mov ("ecx",&DWP(24,"esi"));
1365 &mov ("edx",&DWP(28,"esi"));
1366 &mov (&DWP(16,"edi"),"eax");
1367 &mov (&DWP(20,"edi"),"ebx");
1368 &mov (&DWP(24,"edi"),"ecx");
1369 &mov (&DWP(28,"edi"),"edx");
1370
1371 &xor ("ecx","ecx");
1372 &jmp (&label("14shortcut"));
1373
1374 &align (4);
1375 &set_label("14loop");
1376 &mov ("edx",&DWP(28,"edi")); # rk[7]
1377 &set_label("14shortcut");
1378 &mov ("eax",&DWP(0,"edi")); # rk[0]
1379
1380 &enckey ();
1381
1382 &mov (&DWP(32,"edi"),"eax"); # rk[8]
1383 &xor ("eax",&DWP(4,"edi"));
1384 &mov (&DWP(36,"edi"),"eax"); # rk[9]
1385 &xor ("eax",&DWP(8,"edi"));
1386 &mov (&DWP(40,"edi"),"eax"); # rk[10]
1387 &xor ("eax",&DWP(12,"edi"));
1388 &mov (&DWP(44,"edi"),"eax"); # rk[11]
1389
1390 &cmp ("ecx",6);
1391 &je (&label("14break"));
1392 &inc ("ecx");
1393
1394 &mov ("edx","eax");
1395 &mov ("eax",&DWP(16,"edi")); # rk[4]
1396 &movz ("esi",&LB("edx")); # rk[11]>>0
1397 &mov ("ebx",&DWP(2,"ebp","esi",8));
1398 &movz ("esi",&HB("edx")); # rk[11]>>8
1399 &and ("ebx",0x000000FF);
1400 &xor ("eax","ebx");
1401
1402 &mov ("ebx",&DWP(0,"ebp","esi",8));
1403 &shr ("edx",16);
1404 &and ("ebx",0x0000FF00);
1405 &movz ("esi",&LB("edx")); # rk[11]>>16
1406 &xor ("eax","ebx");
1407
1408 &mov ("ebx",&DWP(0,"ebp","esi",8));
1409 &movz ("esi",&HB("edx")); # rk[11]>>24
1410 &and ("ebx",0x00FF0000);
1411 &xor ("eax","ebx");
1412
1413 &mov ("ebx",&DWP(2,"ebp","esi",8));
1414 &and ("ebx",0xFF000000);
1415 &xor ("eax","ebx");
1416
1417 &mov (&DWP(48,"edi"),"eax"); # rk[12]
1418 &xor ("eax",&DWP(20,"edi"));
1419 &mov (&DWP(52,"edi"),"eax"); # rk[13]
1420 &xor ("eax",&DWP(24,"edi"));
1421 &mov (&DWP(56,"edi"),"eax"); # rk[14]
1422 &xor ("eax",&DWP(28,"edi"));
1423 &mov (&DWP(60,"edi"),"eax"); # rk[15]
1424
1425 &add ("edi",32);
1426 &jmp (&label("14loop"));
1427
1428 &set_label("14break");
1429 &mov (&DWP(48,"edi"),14); # setup number of rounds
1430 &xor ("eax","eax");
1431 &jmp (&label("exit"));
1432
1433 &set_label("badpointer");
1434 &mov ("eax",-1);
1435 &set_label("exit");
1436&function_end("AES_set_encrypt_key");
1437
1438sub deckey()
1439{ my ($i,$ptr,$te,$td) = @_;
1440
1441 &mov ("eax",&DWP($i,$ptr));
1442 &mov ("edx","eax");
1443 &movz ("ebx",&HB("eax"));
1444 &shr ("edx",16);
1445 &and ("eax",0xFF);
1446 &movz ("eax",&BP(2,$te,"eax",8));
1447 &movz ("ebx",&BP(2,$te,"ebx",8));
1448 &mov ("eax",&DWP(0,$td,"eax",8));
1449 &xor ("eax",&DWP(3,$td,"ebx",8));
1450 &movz ("ebx",&HB("edx"));
1451 &and ("edx",0xFF);
1452 &movz ("edx",&BP(2,$te,"edx",8));
1453 &movz ("ebx",&BP(2,$te,"ebx",8));
1454 &xor ("eax",&DWP(2,$td,"edx",8));
1455 &xor ("eax",&DWP(1,$td,"ebx",8));
1456 &mov (&DWP($i,$ptr),"eax");
1457}
1458
1459# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
1460# AES_KEY *key)
1461&public_label("AES_Td");
1462&public_label("AES_Te");
1463&function_begin_B("AES_set_decrypt_key");
1464 &mov ("eax",&wparam(0));
1465 &mov ("ecx",&wparam(1));
1466 &mov ("edx",&wparam(2));
1467 &sub ("esp",12);
1468 &mov (&DWP(0,"esp"),"eax");
1469 &mov (&DWP(4,"esp"),"ecx");
1470 &mov (&DWP(8,"esp"),"edx");
1471 &call ("_x86_AES_set_encrypt_key");
1472 &add ("esp",12);
1473 &cmp ("eax",0);
1474 &je (&label("proceed"));
1475 &ret ();
1476
1477 &set_label("proceed");
1478 &push ("ebp");
1479 &push ("ebx");
1480 &push ("esi");
1481 &push ("edi");
1482
1483 &mov ("esi",&wparam(2));
1484 &mov ("ecx",&DWP(240,"esi")); # pull number of rounds
1485 &lea ("ecx",&DWP(0,"","ecx",4));
1486 &lea ("edi",&DWP(0,"esi","ecx",4)); # pointer to last chunk
1487
1488 &align (4);
1489 &set_label("invert"); # invert order of chunks
1490 &mov ("eax",&DWP(0,"esi"));
1491 &mov ("ebx",&DWP(4,"esi"));
1492 &mov ("ecx",&DWP(0,"edi"));
1493 &mov ("edx",&DWP(4,"edi"));
1494 &mov (&DWP(0,"edi"),"eax");
1495 &mov (&DWP(4,"edi"),"ebx");
1496 &mov (&DWP(0,"esi"),"ecx");
1497 &mov (&DWP(4,"esi"),"edx");
1498 &mov ("eax",&DWP(8,"esi"));
1499 &mov ("ebx",&DWP(12,"esi"));
1500 &mov ("ecx",&DWP(8,"edi"));
1501 &mov ("edx",&DWP(12,"edi"));
1502 &mov (&DWP(8,"edi"),"eax");
1503 &mov (&DWP(12,"edi"),"ebx");
1504 &mov (&DWP(8,"esi"),"ecx");
1505 &mov (&DWP(12,"esi"),"edx");
1506 &add ("esi",16);
1507 &sub ("edi",16);
1508 &cmp ("esi","edi");
1509 &jne (&label("invert"));
1510
1511 &call (&label("pic_point"));
1512 &set_label("pic_point");
1513 blindpop("ebp");
1514 &lea ("edi",&DWP(&label("AES_Td")."-".&label("pic_point"),"ebp"));
1515 &lea ("ebp",&DWP(&label("AES_Te")."-".&label("pic_point"),"ebp"));
1516
1517 &mov ("esi",&wparam(2));
1518 &mov ("ecx",&DWP(240,"esi")); # pull number of rounds
1519 &dec ("ecx");
1520 &align (4);
1521 &set_label("permute"); # permute the key schedule
1522 &add ("esi",16);
1523 &deckey (0,"esi","ebp","edi");
1524 &deckey (4,"esi","ebp","edi");
1525 &deckey (8,"esi","ebp","edi");
1526 &deckey (12,"esi","ebp","edi");
1527 &dec ("ecx");
1528 &jnz (&label("permute"));
1529
1530 &xor ("eax","eax"); # return success
1531&function_end("AES_set_decrypt_key");
1532
1533&asm_finish();
diff --git a/src/lib/libcrypto/aes/asm/aes-armv4.pl b/src/lib/libcrypto/aes/asm/aes-armv4.pl
deleted file mode 100644
index 15742c1ec5..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-armv4.pl
+++ /dev/null
@@ -1,1030 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for ARMv4
11
12# January 2007.
13#
14# Code uses single 1K S-box and is >2 times faster than code generated
15# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
16# allows to merge logical or arithmetic operation with shift or rotate
17# in one instruction and emit combined result every cycle. The module
18# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
19# key.
20
21# May 2007.
22#
23# AES_set_[en|de]crypt_key is added.
24
25$s0="r0";
26$s1="r1";
27$s2="r2";
28$s3="r3";
29$t1="r4";
30$t2="r5";
31$t3="r6";
32$i1="r7";
33$i2="r8";
34$i3="r9";
35
36$tbl="r10";
37$key="r11";
38$rounds="r12";
39
40$code=<<___;
41.text
42.code 32
43
44.type AES_Te,%object
45.align 5
46AES_Te:
47.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
48.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
49.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
50.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
51.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
52.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
53.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
54.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
55.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
56.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
57.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
58.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
59.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
60.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
61.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
62.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
63.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
64.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
65.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
66.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
67.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
68.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
69.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
70.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
71.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
72.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
73.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
74.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
75.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
76.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
77.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
78.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
79.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
80.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
81.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
82.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
83.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
84.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
85.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
86.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
87.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
88.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
89.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
90.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
91.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
92.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
93.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
94.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
95.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
96.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
97.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
98.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
99.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
100.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
101.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
102.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
103.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
104.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
105.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
106.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
107.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
108.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
109.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
110.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
111@ Te4[256]
112.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
113.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
114.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
115.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
116.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
117.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
118.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
119.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
120.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
121.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
122.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
123.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
124.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
125.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
126.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
127.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
128.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
129.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
130.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
131.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
132.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
133.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
134.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
135.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
136.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
137.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
138.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
139.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
140.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
141.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
142.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
143.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
144@ rcon[]
145.word 0x01000000, 0x02000000, 0x04000000, 0x08000000
146.word 0x10000000, 0x20000000, 0x40000000, 0x80000000
147.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
148.size AES_Te,.-AES_Te
149
150@ void AES_encrypt(const unsigned char *in, unsigned char *out,
151@ const AES_KEY *key) {
152.global AES_encrypt
153.type AES_encrypt,%function
154.align 5
155AES_encrypt:
156 sub r3,pc,#8 @ AES_encrypt
157 stmdb sp!,{r1,r4-r12,lr}
158 mov $rounds,r0 @ inp
159 mov $key,r2
160 sub $tbl,r3,#AES_encrypt-AES_Te @ Te
161
162 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
163 ldrb $t1,[$rounds,#2] @ manner...
164 ldrb $t2,[$rounds,#1]
165 ldrb $t3,[$rounds,#0]
166 orr $s0,$s0,$t1,lsl#8
167 orr $s0,$s0,$t2,lsl#16
168 orr $s0,$s0,$t3,lsl#24
169 ldrb $s1,[$rounds,#7]
170 ldrb $t1,[$rounds,#6]
171 ldrb $t2,[$rounds,#5]
172 ldrb $t3,[$rounds,#4]
173 orr $s1,$s1,$t1,lsl#8
174 orr $s1,$s1,$t2,lsl#16
175 orr $s1,$s1,$t3,lsl#24
176 ldrb $s2,[$rounds,#11]
177 ldrb $t1,[$rounds,#10]
178 ldrb $t2,[$rounds,#9]
179 ldrb $t3,[$rounds,#8]
180 orr $s2,$s2,$t1,lsl#8
181 orr $s2,$s2,$t2,lsl#16
182 orr $s2,$s2,$t3,lsl#24
183 ldrb $s3,[$rounds,#15]
184 ldrb $t1,[$rounds,#14]
185 ldrb $t2,[$rounds,#13]
186 ldrb $t3,[$rounds,#12]
187 orr $s3,$s3,$t1,lsl#8
188 orr $s3,$s3,$t2,lsl#16
189 orr $s3,$s3,$t3,lsl#24
190
191 bl _armv4_AES_encrypt
192
193 ldr $rounds,[sp],#4 @ pop out
194 mov $t1,$s0,lsr#24 @ write output in endian-neutral
195 mov $t2,$s0,lsr#16 @ manner...
196 mov $t3,$s0,lsr#8
197 strb $t1,[$rounds,#0]
198 strb $t2,[$rounds,#1]
199 strb $t3,[$rounds,#2]
200 strb $s0,[$rounds,#3]
201 mov $t1,$s1,lsr#24
202 mov $t2,$s1,lsr#16
203 mov $t3,$s1,lsr#8
204 strb $t1,[$rounds,#4]
205 strb $t2,[$rounds,#5]
206 strb $t3,[$rounds,#6]
207 strb $s1,[$rounds,#7]
208 mov $t1,$s2,lsr#24
209 mov $t2,$s2,lsr#16
210 mov $t3,$s2,lsr#8
211 strb $t1,[$rounds,#8]
212 strb $t2,[$rounds,#9]
213 strb $t3,[$rounds,#10]
214 strb $s2,[$rounds,#11]
215 mov $t1,$s3,lsr#24
216 mov $t2,$s3,lsr#16
217 mov $t3,$s3,lsr#8
218 strb $t1,[$rounds,#12]
219 strb $t2,[$rounds,#13]
220 strb $t3,[$rounds,#14]
221 strb $s3,[$rounds,#15]
222
223 ldmia sp!,{r4-r12,lr}
224 tst lr,#1
225 moveq pc,lr @ be binary compatible with V4, yet
226 bx lr @ interoperable with Thumb ISA:-)
227.size AES_encrypt,.-AES_encrypt
228
229.type _armv4_AES_encrypt,%function
230.align 2
231_armv4_AES_encrypt:
232 str lr,[sp,#-4]! @ push lr
233 ldr $t1,[$key],#16
234 ldr $t2,[$key,#-12]
235 ldr $t3,[$key,#-8]
236 ldr $i1,[$key,#-4]
237 ldr $rounds,[$key,#240-16]
238 eor $s0,$s0,$t1
239 eor $s1,$s1,$t2
240 eor $s2,$s2,$t3
241 eor $s3,$s3,$i1
242 sub $rounds,$rounds,#1
243 mov lr,#255
244
245.Lenc_loop:
246 and $i2,lr,$s0,lsr#8
247 and $i3,lr,$s0,lsr#16
248 and $i1,lr,$s0
249 mov $s0,$s0,lsr#24
250 ldr $t1,[$tbl,$i1,lsl#2] @ Te3[s0>>0]
251 ldr $s0,[$tbl,$s0,lsl#2] @ Te0[s0>>24]
252 ldr $t2,[$tbl,$i2,lsl#2] @ Te2[s0>>8]
253 ldr $t3,[$tbl,$i3,lsl#2] @ Te1[s0>>16]
254
255 and $i1,lr,$s1,lsr#16 @ i0
256 and $i2,lr,$s1
257 and $i3,lr,$s1,lsr#8
258 mov $s1,$s1,lsr#24
259 ldr $i1,[$tbl,$i1,lsl#2] @ Te1[s1>>16]
260 ldr $s1,[$tbl,$s1,lsl#2] @ Te0[s1>>24]
261 ldr $i2,[$tbl,$i2,lsl#2] @ Te3[s1>>0]
262 ldr $i3,[$tbl,$i3,lsl#2] @ Te2[s1>>8]
263 eor $s0,$s0,$i1,ror#8
264 eor $s1,$s1,$t1,ror#24
265 eor $t2,$t2,$i2,ror#8
266 eor $t3,$t3,$i3,ror#8
267
268 and $i1,lr,$s2,lsr#8 @ i0
269 and $i2,lr,$s2,lsr#16 @ i1
270 and $i3,lr,$s2
271 mov $s2,$s2,lsr#24
272 ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8]
273 ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16]
274 ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24]
275 ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0]
276 eor $s0,$s0,$i1,ror#16
277 eor $s1,$s1,$i2,ror#8
278 eor $s2,$s2,$t2,ror#16
279 eor $t3,$t3,$i3,ror#16
280
281 and $i1,lr,$s3 @ i0
282 and $i2,lr,$s3,lsr#8 @ i1
283 and $i3,lr,$s3,lsr#16 @ i2
284 mov $s3,$s3,lsr#24
285 ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0]
286 ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8]
287 ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16]
288 ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24]
289 eor $s0,$s0,$i1,ror#24
290 eor $s1,$s1,$i2,ror#16
291 eor $s2,$s2,$i3,ror#8
292 eor $s3,$s3,$t3,ror#8
293
294 ldr $t1,[$key],#16
295 ldr $t2,[$key,#-12]
296 ldr $t3,[$key,#-8]
297 ldr $i1,[$key,#-4]
298 eor $s0,$s0,$t1
299 eor $s1,$s1,$t2
300 eor $s2,$s2,$t3
301 eor $s3,$s3,$i1
302
303 subs $rounds,$rounds,#1
304 bne .Lenc_loop
305
306 add $tbl,$tbl,#2
307
308 and $i1,lr,$s0
309 and $i2,lr,$s0,lsr#8
310 and $i3,lr,$s0,lsr#16
311 mov $s0,$s0,lsr#24
312 ldrb $t1,[$tbl,$i1,lsl#2] @ Te4[s0>>0]
313 ldrb $s0,[$tbl,$s0,lsl#2] @ Te4[s0>>24]
314 ldrb $t2,[$tbl,$i2,lsl#2] @ Te4[s0>>8]
315 ldrb $t3,[$tbl,$i3,lsl#2] @ Te4[s0>>16]
316
317 and $i1,lr,$s1,lsr#16 @ i0
318 and $i2,lr,$s1
319 and $i3,lr,$s1,lsr#8
320 mov $s1,$s1,lsr#24
321 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s1>>16]
322 ldrb $s1,[$tbl,$s1,lsl#2] @ Te4[s1>>24]
323 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s1>>0]
324 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s1>>8]
325 eor $s0,$i1,$s0,lsl#8
326 eor $s1,$t1,$s1,lsl#24
327 eor $t2,$i2,$t2,lsl#8
328 eor $t3,$i3,$t3,lsl#8
329
330 and $i1,lr,$s2,lsr#8 @ i0
331 and $i2,lr,$s2,lsr#16 @ i1
332 and $i3,lr,$s2
333 mov $s2,$s2,lsr#24
334 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8]
335 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16]
336 ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24]
337 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0]
338 eor $s0,$i1,$s0,lsl#8
339 eor $s1,$s1,$i2,lsl#16
340 eor $s2,$t2,$s2,lsl#24
341 eor $t3,$i3,$t3,lsl#8
342
343 and $i1,lr,$s3 @ i0
344 and $i2,lr,$s3,lsr#8 @ i1
345 and $i3,lr,$s3,lsr#16 @ i2
346 mov $s3,$s3,lsr#24
347 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0]
348 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8]
349 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16]
350 ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24]
351 eor $s0,$i1,$s0,lsl#8
352 eor $s1,$s1,$i2,lsl#8
353 eor $s2,$s2,$i3,lsl#16
354 eor $s3,$t3,$s3,lsl#24
355
356 ldr lr,[sp],#4 @ pop lr
357 ldr $t1,[$key,#0]
358 ldr $t2,[$key,#4]
359 ldr $t3,[$key,#8]
360 ldr $i1,[$key,#12]
361 eor $s0,$s0,$t1
362 eor $s1,$s1,$t2
363 eor $s2,$s2,$t3
364 eor $s3,$s3,$i1
365
366 sub $tbl,$tbl,#2
367 mov pc,lr @ return
368.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
369
370.global AES_set_encrypt_key
371.type AES_set_encrypt_key,%function
372.align 5
373AES_set_encrypt_key:
374 sub r3,pc,#8 @ AES_set_encrypt_key
375 teq r0,#0
376 moveq r0,#-1
377 beq .Labrt
378 teq r2,#0
379 moveq r0,#-1
380 beq .Labrt
381
382 teq r1,#128
383 beq .Lok
384 teq r1,#192
385 beq .Lok
386 teq r1,#256
387 movne r0,#-1
388 bne .Labrt
389
390.Lok: stmdb sp!,{r4-r12,lr}
391 sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4
392
393 mov $rounds,r0 @ inp
394 mov lr,r1 @ bits
395 mov $key,r2 @ key
396
397 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
398 ldrb $t1,[$rounds,#2] @ manner...
399 ldrb $t2,[$rounds,#1]
400 ldrb $t3,[$rounds,#0]
401 orr $s0,$s0,$t1,lsl#8
402 orr $s0,$s0,$t2,lsl#16
403 orr $s0,$s0,$t3,lsl#24
404 ldrb $s1,[$rounds,#7]
405 ldrb $t1,[$rounds,#6]
406 ldrb $t2,[$rounds,#5]
407 ldrb $t3,[$rounds,#4]
408 orr $s1,$s1,$t1,lsl#8
409 orr $s1,$s1,$t2,lsl#16
410 orr $s1,$s1,$t3,lsl#24
411 ldrb $s2,[$rounds,#11]
412 ldrb $t1,[$rounds,#10]
413 ldrb $t2,[$rounds,#9]
414 ldrb $t3,[$rounds,#8]
415 orr $s2,$s2,$t1,lsl#8
416 orr $s2,$s2,$t2,lsl#16
417 orr $s2,$s2,$t3,lsl#24
418 ldrb $s3,[$rounds,#15]
419 ldrb $t1,[$rounds,#14]
420 ldrb $t2,[$rounds,#13]
421 ldrb $t3,[$rounds,#12]
422 orr $s3,$s3,$t1,lsl#8
423 orr $s3,$s3,$t2,lsl#16
424 orr $s3,$s3,$t3,lsl#24
425 str $s0,[$key],#16
426 str $s1,[$key,#-12]
427 str $s2,[$key,#-8]
428 str $s3,[$key,#-4]
429
430 teq lr,#128
431 bne .Lnot128
432 mov $rounds,#10
433 str $rounds,[$key,#240-16]
434 add $t3,$tbl,#256 @ rcon
435 mov lr,#255
436
437.L128_loop:
438 and $t2,lr,$s3,lsr#24
439 and $i1,lr,$s3,lsr#16
440 and $i2,lr,$s3,lsr#8
441 and $i3,lr,$s3
442 ldrb $t2,[$tbl,$t2]
443 ldrb $i1,[$tbl,$i1]
444 ldrb $i2,[$tbl,$i2]
445 ldrb $i3,[$tbl,$i3]
446 ldr $t1,[$t3],#4 @ rcon[i++]
447 orr $t2,$t2,$i1,lsl#24
448 orr $t2,$t2,$i2,lsl#16
449 orr $t2,$t2,$i3,lsl#8
450 eor $t2,$t2,$t1
451 eor $s0,$s0,$t2 @ rk[4]=rk[0]^...
452 eor $s1,$s1,$s0 @ rk[5]=rk[1]^rk[4]
453 eor $s2,$s2,$s1 @ rk[6]=rk[2]^rk[5]
454 eor $s3,$s3,$s2 @ rk[7]=rk[3]^rk[6]
455 str $s0,[$key],#16
456 str $s1,[$key,#-12]
457 str $s2,[$key,#-8]
458 str $s3,[$key,#-4]
459
460 subs $rounds,$rounds,#1
461 bne .L128_loop
462 sub r2,$key,#176
463 b .Ldone
464
465.Lnot128:
466 ldrb $i2,[$rounds,#19]
467 ldrb $t1,[$rounds,#18]
468 ldrb $t2,[$rounds,#17]
469 ldrb $t3,[$rounds,#16]
470 orr $i2,$i2,$t1,lsl#8
471 orr $i2,$i2,$t2,lsl#16
472 orr $i2,$i2,$t3,lsl#24
473 ldrb $i3,[$rounds,#23]
474 ldrb $t1,[$rounds,#22]
475 ldrb $t2,[$rounds,#21]
476 ldrb $t3,[$rounds,#20]
477 orr $i3,$i3,$t1,lsl#8
478 orr $i3,$i3,$t2,lsl#16
479 orr $i3,$i3,$t3,lsl#24
480 str $i2,[$key],#8
481 str $i3,[$key,#-4]
482
483 teq lr,#192
484 bne .Lnot192
485 mov $rounds,#12
486 str $rounds,[$key,#240-24]
487 add $t3,$tbl,#256 @ rcon
488 mov lr,#255
489 mov $rounds,#8
490
491.L192_loop:
492 and $t2,lr,$i3,lsr#24
493 and $i1,lr,$i3,lsr#16
494 and $i2,lr,$i3,lsr#8
495 and $i3,lr,$i3
496 ldrb $t2,[$tbl,$t2]
497 ldrb $i1,[$tbl,$i1]
498 ldrb $i2,[$tbl,$i2]
499 ldrb $i3,[$tbl,$i3]
500 ldr $t1,[$t3],#4 @ rcon[i++]
501 orr $t2,$t2,$i1,lsl#24
502 orr $t2,$t2,$i2,lsl#16
503 orr $t2,$t2,$i3,lsl#8
504 eor $i3,$t2,$t1
505 eor $s0,$s0,$i3 @ rk[6]=rk[0]^...
506 eor $s1,$s1,$s0 @ rk[7]=rk[1]^rk[6]
507 eor $s2,$s2,$s1 @ rk[8]=rk[2]^rk[7]
508 eor $s3,$s3,$s2 @ rk[9]=rk[3]^rk[8]
509 str $s0,[$key],#24
510 str $s1,[$key,#-20]
511 str $s2,[$key,#-16]
512 str $s3,[$key,#-12]
513
514 subs $rounds,$rounds,#1
515 subeq r2,$key,#216
516 beq .Ldone
517
518 ldr $i1,[$key,#-32]
519 ldr $i2,[$key,#-28]
520 eor $i1,$i1,$s3 @ rk[10]=rk[4]^rk[9]
521 eor $i3,$i2,$i1 @ rk[11]=rk[5]^rk[10]
522 str $i1,[$key,#-8]
523 str $i3,[$key,#-4]
524 b .L192_loop
525
526.Lnot192:
527 ldrb $i2,[$rounds,#27]
528 ldrb $t1,[$rounds,#26]
529 ldrb $t2,[$rounds,#25]
530 ldrb $t3,[$rounds,#24]
531 orr $i2,$i2,$t1,lsl#8
532 orr $i2,$i2,$t2,lsl#16
533 orr $i2,$i2,$t3,lsl#24
534 ldrb $i3,[$rounds,#31]
535 ldrb $t1,[$rounds,#30]
536 ldrb $t2,[$rounds,#29]
537 ldrb $t3,[$rounds,#28]
538 orr $i3,$i3,$t1,lsl#8
539 orr $i3,$i3,$t2,lsl#16
540 orr $i3,$i3,$t3,lsl#24
541 str $i2,[$key],#8
542 str $i3,[$key,#-4]
543
544 mov $rounds,#14
545 str $rounds,[$key,#240-32]
546 add $t3,$tbl,#256 @ rcon
547 mov lr,#255
548 mov $rounds,#7
549
550.L256_loop:
551 and $t2,lr,$i3,lsr#24
552 and $i1,lr,$i3,lsr#16
553 and $i2,lr,$i3,lsr#8
554 and $i3,lr,$i3
555 ldrb $t2,[$tbl,$t2]
556 ldrb $i1,[$tbl,$i1]
557 ldrb $i2,[$tbl,$i2]
558 ldrb $i3,[$tbl,$i3]
559 ldr $t1,[$t3],#4 @ rcon[i++]
560 orr $t2,$t2,$i1,lsl#24
561 orr $t2,$t2,$i2,lsl#16
562 orr $t2,$t2,$i3,lsl#8
563 eor $i3,$t2,$t1
564 eor $s0,$s0,$i3 @ rk[8]=rk[0]^...
565 eor $s1,$s1,$s0 @ rk[9]=rk[1]^rk[8]
566 eor $s2,$s2,$s1 @ rk[10]=rk[2]^rk[9]
567 eor $s3,$s3,$s2 @ rk[11]=rk[3]^rk[10]
568 str $s0,[$key],#32
569 str $s1,[$key,#-28]
570 str $s2,[$key,#-24]
571 str $s3,[$key,#-20]
572
573 subs $rounds,$rounds,#1
574 subeq r2,$key,#256
575 beq .Ldone
576
577 and $t2,lr,$s3
578 and $i1,lr,$s3,lsr#8
579 and $i2,lr,$s3,lsr#16
580 and $i3,lr,$s3,lsr#24
581 ldrb $t2,[$tbl,$t2]
582 ldrb $i1,[$tbl,$i1]
583 ldrb $i2,[$tbl,$i2]
584 ldrb $i3,[$tbl,$i3]
585 orr $t2,$t2,$i1,lsl#8
586 orr $t2,$t2,$i2,lsl#16
587 orr $t2,$t2,$i3,lsl#24
588
589 ldr $t1,[$key,#-48]
590 ldr $i1,[$key,#-44]
591 ldr $i2,[$key,#-40]
592 ldr $i3,[$key,#-36]
593 eor $t1,$t1,$t2 @ rk[12]=rk[4]^...
594 eor $i1,$i1,$t1 @ rk[13]=rk[5]^rk[12]
595 eor $i2,$i2,$i1 @ rk[14]=rk[6]^rk[13]
596 eor $i3,$i3,$i2 @ rk[15]=rk[7]^rk[14]
597 str $t1,[$key,#-16]
598 str $i1,[$key,#-12]
599 str $i2,[$key,#-8]
600 str $i3,[$key,#-4]
601 b .L256_loop
602
603.Ldone: mov r0,#0
604 ldmia sp!,{r4-r12,lr}
605.Labrt: tst lr,#1
606 moveq pc,lr @ be binary compatible with V4, yet
607 bx lr @ interoperable with Thumb ISA:-)
608.size AES_set_encrypt_key,.-AES_set_encrypt_key
609
610.global AES_set_decrypt_key
611.type AES_set_decrypt_key,%function
612.align 5
613AES_set_decrypt_key:
614 str lr,[sp,#-4]! @ push lr
615 bl AES_set_encrypt_key
616 teq r0,#0
617 ldrne lr,[sp],#4 @ pop lr
618 bne .Labrt
619
620 stmdb sp!,{r4-r12}
621
622 ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2,
623 mov $key,r2 @ which is AES_KEY *key
624 mov $i1,r2
625 add $i2,r2,$rounds,lsl#4
626
627.Linv: ldr $s0,[$i1]
628 ldr $s1,[$i1,#4]
629 ldr $s2,[$i1,#8]
630 ldr $s3,[$i1,#12]
631 ldr $t1,[$i2]
632 ldr $t2,[$i2,#4]
633 ldr $t3,[$i2,#8]
634 ldr $i3,[$i2,#12]
635 str $s0,[$i2],#-16
636 str $s1,[$i2,#16+4]
637 str $s2,[$i2,#16+8]
638 str $s3,[$i2,#16+12]
639 str $t1,[$i1],#16
640 str $t2,[$i1,#-12]
641 str $t3,[$i1,#-8]
642 str $i3,[$i1,#-4]
643 teq $i1,$i2
644 bne .Linv
645___
646$mask80=$i1;
647$mask1b=$i2;
648$mask7f=$i3;
649$code.=<<___;
650 ldr $s0,[$key,#16]! @ prefetch tp1
651 mov $mask80,#0x80
652 mov $mask1b,#0x1b
653 orr $mask80,$mask80,#0x8000
654 orr $mask1b,$mask1b,#0x1b00
655 orr $mask80,$mask80,$mask80,lsl#16
656 orr $mask1b,$mask1b,$mask1b,lsl#16
657 sub $rounds,$rounds,#1
658 mvn $mask7f,$mask80
659 mov $rounds,$rounds,lsl#2 @ (rounds-1)*4
660
661.Lmix: and $t1,$s0,$mask80
662 and $s1,$s0,$mask7f
663 sub $t1,$t1,$t1,lsr#7
664 and $t1,$t1,$mask1b
665 eor $s1,$t1,$s1,lsl#1 @ tp2
666
667 and $t1,$s1,$mask80
668 and $s2,$s1,$mask7f
669 sub $t1,$t1,$t1,lsr#7
670 and $t1,$t1,$mask1b
671 eor $s2,$t1,$s2,lsl#1 @ tp4
672
673 and $t1,$s2,$mask80
674 and $s3,$s2,$mask7f
675 sub $t1,$t1,$t1,lsr#7
676 and $t1,$t1,$mask1b
677 eor $s3,$t1,$s3,lsl#1 @ tp8
678
679 eor $t1,$s1,$s2
680 eor $t2,$s0,$s3 @ tp9
681 eor $t1,$t1,$s3 @ tpe
682 eor $t1,$t1,$s1,ror#24
683 eor $t1,$t1,$t2,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8)
684 eor $t1,$t1,$s2,ror#16
685 eor $t1,$t1,$t2,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16)
686 eor $t1,$t1,$t2,ror#8 @ ^= ROTATE(tp9,24)
687
688 ldr $s0,[$key,#4] @ prefetch tp1
689 str $t1,[$key],#4
690 subs $rounds,$rounds,#1
691 bne .Lmix
692
693 mov r0,#0
694 ldmia sp!,{r4-r12,lr}
695 tst lr,#1
696 moveq pc,lr @ be binary compatible with V4, yet
697 bx lr @ interoperable with Thumb ISA:-)
698.size AES_set_decrypt_key,.-AES_set_decrypt_key
699
700.type AES_Td,%object
701.align 5
702AES_Td:
703.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
704.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
705.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
706.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
707.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
708.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
709.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
710.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
711.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
712.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
713.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
714.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
715.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
716.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
717.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
718.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
719.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
720.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
721.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
722.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
723.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
724.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
725.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
726.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
727.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
728.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
729.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
730.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
731.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
732.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
733.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
734.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
735.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
736.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
737.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
738.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
739.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
740.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
741.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
742.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
743.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
744.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
745.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
746.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
747.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
748.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
749.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
750.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
751.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
752.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
753.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
754.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
755.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
756.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
757.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
758.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
759.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
760.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
761.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
762.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
763.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
764.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
765.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
766.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
767@ Td4[256]
768.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
769.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
770.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
771.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
772.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
773.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
774.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
775.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
776.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
777.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
778.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
779.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
780.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
781.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
782.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
783.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
784.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
785.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
786.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
787.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
788.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
789.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
790.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
791.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
792.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
793.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
794.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
795.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
796.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
797.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
798.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
799.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
800.size AES_Td,.-AES_Td
801
802@ void AES_decrypt(const unsigned char *in, unsigned char *out,
803@ const AES_KEY *key) {
804.global AES_decrypt
805.type AES_decrypt,%function
806.align 5
807AES_decrypt:
808 sub r3,pc,#8 @ AES_decrypt
809 stmdb sp!,{r1,r4-r12,lr}
810 mov $rounds,r0 @ inp
811 mov $key,r2
812 sub $tbl,r3,#AES_decrypt-AES_Td @ Td
813
814 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
815 ldrb $t1,[$rounds,#2] @ manner...
816 ldrb $t2,[$rounds,#1]
817 ldrb $t3,[$rounds,#0]
818 orr $s0,$s0,$t1,lsl#8
819 orr $s0,$s0,$t2,lsl#16
820 orr $s0,$s0,$t3,lsl#24
821 ldrb $s1,[$rounds,#7]
822 ldrb $t1,[$rounds,#6]
823 ldrb $t2,[$rounds,#5]
824 ldrb $t3,[$rounds,#4]
825 orr $s1,$s1,$t1,lsl#8
826 orr $s1,$s1,$t2,lsl#16
827 orr $s1,$s1,$t3,lsl#24
828 ldrb $s2,[$rounds,#11]
829 ldrb $t1,[$rounds,#10]
830 ldrb $t2,[$rounds,#9]
831 ldrb $t3,[$rounds,#8]
832 orr $s2,$s2,$t1,lsl#8
833 orr $s2,$s2,$t2,lsl#16
834 orr $s2,$s2,$t3,lsl#24
835 ldrb $s3,[$rounds,#15]
836 ldrb $t1,[$rounds,#14]
837 ldrb $t2,[$rounds,#13]
838 ldrb $t3,[$rounds,#12]
839 orr $s3,$s3,$t1,lsl#8
840 orr $s3,$s3,$t2,lsl#16
841 orr $s3,$s3,$t3,lsl#24
842
843 bl _armv4_AES_decrypt
844
845 ldr $rounds,[sp],#4 @ pop out
846 mov $t1,$s0,lsr#24 @ write output in endian-neutral
847 mov $t2,$s0,lsr#16 @ manner...
848 mov $t3,$s0,lsr#8
849 strb $t1,[$rounds,#0]
850 strb $t2,[$rounds,#1]
851 strb $t3,[$rounds,#2]
852 strb $s0,[$rounds,#3]
853 mov $t1,$s1,lsr#24
854 mov $t2,$s1,lsr#16
855 mov $t3,$s1,lsr#8
856 strb $t1,[$rounds,#4]
857 strb $t2,[$rounds,#5]
858 strb $t3,[$rounds,#6]
859 strb $s1,[$rounds,#7]
860 mov $t1,$s2,lsr#24
861 mov $t2,$s2,lsr#16
862 mov $t3,$s2,lsr#8
863 strb $t1,[$rounds,#8]
864 strb $t2,[$rounds,#9]
865 strb $t3,[$rounds,#10]
866 strb $s2,[$rounds,#11]
867 mov $t1,$s3,lsr#24
868 mov $t2,$s3,lsr#16
869 mov $t3,$s3,lsr#8
870 strb $t1,[$rounds,#12]
871 strb $t2,[$rounds,#13]
872 strb $t3,[$rounds,#14]
873 strb $s3,[$rounds,#15]
874
875 ldmia sp!,{r4-r12,lr}
876 tst lr,#1
877 moveq pc,lr @ be binary compatible with V4, yet
878 bx lr @ interoperable with Thumb ISA:-)
879.size AES_decrypt,.-AES_decrypt
880
881.type _armv4_AES_decrypt,%function
882.align 2
883_armv4_AES_decrypt:
884 str lr,[sp,#-4]! @ push lr
885 ldr $t1,[$key],#16
886 ldr $t2,[$key,#-12]
887 ldr $t3,[$key,#-8]
888 ldr $i1,[$key,#-4]
889 ldr $rounds,[$key,#240-16]
890 eor $s0,$s0,$t1
891 eor $s1,$s1,$t2
892 eor $s2,$s2,$t3
893 eor $s3,$s3,$i1
894 sub $rounds,$rounds,#1
895 mov lr,#255
896
897.Ldec_loop:
898 and $i1,lr,$s0,lsr#16
899 and $i2,lr,$s0,lsr#8
900 and $i3,lr,$s0
901 mov $s0,$s0,lsr#24
902 ldr $t1,[$tbl,$i1,lsl#2] @ Td1[s0>>16]
903 ldr $s0,[$tbl,$s0,lsl#2] @ Td0[s0>>24]
904 ldr $t2,[$tbl,$i2,lsl#2] @ Td2[s0>>8]
905 ldr $t3,[$tbl,$i3,lsl#2] @ Td3[s0>>0]
906
907 and $i1,lr,$s1 @ i0
908 and $i2,lr,$s1,lsr#16
909 and $i3,lr,$s1,lsr#8
910 mov $s1,$s1,lsr#24
911 ldr $i1,[$tbl,$i1,lsl#2] @ Td3[s1>>0]
912 ldr $s1,[$tbl,$s1,lsl#2] @ Td0[s1>>24]
913 ldr $i2,[$tbl,$i2,lsl#2] @ Td1[s1>>16]
914 ldr $i3,[$tbl,$i3,lsl#2] @ Td2[s1>>8]
915 eor $s0,$s0,$i1,ror#24
916 eor $s1,$s1,$t1,ror#8
917 eor $t2,$i2,$t2,ror#8
918 eor $t3,$i3,$t3,ror#8
919
920 and $i1,lr,$s2,lsr#8 @ i0
921 and $i2,lr,$s2 @ i1
922 and $i3,lr,$s2,lsr#16
923 mov $s2,$s2,lsr#24
924 ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8]
925 ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0]
926 ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24]
927 ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16]
928 eor $s0,$s0,$i1,ror#16
929 eor $s1,$s1,$i2,ror#24
930 eor $s2,$s2,$t2,ror#8
931 eor $t3,$i3,$t3,ror#8
932
933 and $i1,lr,$s3,lsr#16 @ i0
934 and $i2,lr,$s3,lsr#8 @ i1
935 and $i3,lr,$s3 @ i2
936 mov $s3,$s3,lsr#24
937 ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16]
938 ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8]
939 ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0]
940 ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24]
941 eor $s0,$s0,$i1,ror#8
942 eor $s1,$s1,$i2,ror#16
943 eor $s2,$s2,$i3,ror#24
944 eor $s3,$s3,$t3,ror#8
945
946 ldr $t1,[$key],#16
947 ldr $t2,[$key,#-12]
948 ldr $t3,[$key,#-8]
949 ldr $i1,[$key,#-4]
950 eor $s0,$s0,$t1
951 eor $s1,$s1,$t2
952 eor $s2,$s2,$t3
953 eor $s3,$s3,$i1
954
955 subs $rounds,$rounds,#1
956 bne .Ldec_loop
957
958 add $tbl,$tbl,#1024
959
960 ldr $t1,[$tbl,#0] @ prefetch Td4
961 ldr $t2,[$tbl,#32]
962 ldr $t3,[$tbl,#64]
963 ldr $i1,[$tbl,#96]
964 ldr $i2,[$tbl,#128]
965 ldr $i3,[$tbl,#160]
966 ldr $t1,[$tbl,#192]
967 ldr $t2,[$tbl,#224]
968
969 and $i1,lr,$s0,lsr#16
970 and $i2,lr,$s0,lsr#8
971 and $i3,lr,$s0
972 ldrb $s0,[$tbl,$s0,lsr#24] @ Td4[s0>>24]
973 ldrb $t1,[$tbl,$i1] @ Td4[s0>>16]
974 ldrb $t2,[$tbl,$i2] @ Td4[s0>>8]
975 ldrb $t3,[$tbl,$i3] @ Td4[s0>>0]
976
977 and $i1,lr,$s1 @ i0
978 and $i2,lr,$s1,lsr#16
979 and $i3,lr,$s1,lsr#8
980 ldrb $i1,[$tbl,$i1] @ Td4[s1>>0]
981 ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24]
982 ldrb $i2,[$tbl,$i2] @ Td4[s1>>16]
983 ldrb $i3,[$tbl,$i3] @ Td4[s1>>8]
984 eor $s0,$i1,$s0,lsl#24
985 eor $s1,$t1,$s1,lsl#8
986 eor $t2,$t2,$i2,lsl#8
987 eor $t3,$t3,$i3,lsl#8
988
989 and $i1,lr,$s2,lsr#8 @ i0
990 and $i2,lr,$s2 @ i1
991 and $i3,lr,$s2,lsr#16
992 ldrb $i1,[$tbl,$i1] @ Td4[s2>>8]
993 ldrb $i2,[$tbl,$i2] @ Td4[s2>>0]
994 ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24]
995 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16]
996 eor $s0,$s0,$i1,lsl#8
997 eor $s1,$i2,$s1,lsl#16
998 eor $s2,$t2,$s2,lsl#16
999 eor $t3,$t3,$i3,lsl#16
1000
1001 and $i1,lr,$s3,lsr#16 @ i0
1002 and $i2,lr,$s3,lsr#8 @ i1
1003 and $i3,lr,$s3 @ i2
1004 ldrb $i1,[$tbl,$i1] @ Td4[s3>>16]
1005 ldrb $i2,[$tbl,$i2] @ Td4[s3>>8]
1006 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0]
1007 ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24]
1008 eor $s0,$s0,$i1,lsl#16
1009 eor $s1,$s1,$i2,lsl#8
1010 eor $s2,$i3,$s2,lsl#8
1011 eor $s3,$t3,$s3,lsl#24
1012
1013 ldr lr,[sp],#4 @ pop lr
1014 ldr $t1,[$key,#0]
1015 ldr $t2,[$key,#4]
1016 ldr $t3,[$key,#8]
1017 ldr $i1,[$key,#12]
1018 eor $s0,$s0,$t1
1019 eor $s1,$s1,$t2
1020 eor $s2,$s2,$t3
1021 eor $s3,$s3,$i1
1022
1023 sub $tbl,$tbl,#1024
1024 mov pc,lr @ return
1025.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
1026.asciz "AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1027___
1028
1029$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
1030print $code;
diff --git a/src/lib/libcrypto/aes/asm/aes-ia64.S b/src/lib/libcrypto/aes/asm/aes-ia64.S
deleted file mode 100644
index 7f6c4c3662..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-ia64.S
+++ /dev/null
@@ -1,1123 +0,0 @@
1// ====================================================================
2// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
3// project. Rights for redistribution and usage in source and binary
4// forms are granted according to the OpenSSL license.
5// ====================================================================
6//
7// What's wrong with compiler generated code? Compiler never uses
8// variable 'shr' which is pairable with 'extr'/'dep' instructions.
9// Then it uses 'zxt' which is an I-type, but can be replaced with
10// 'and' which in turn can be assigned to M-port [there're double as
11// much M-ports as there're I-ports on Itanium 2]. By sacrificing few
12// registers for small constants (255, 24 and 16) to be used with
13// 'shr' and 'and' instructions I can achieve better ILP, Intruction
14// Level Parallelism, and performance. This code outperforms GCC 3.3
15// generated code by over factor of 2 (two), GCC 3.4 - by 70% and
16// HP C - by 40%. Measured best-case scenario, i.e. aligned
17// big-endian input, ECB timing on Itanium 2 is (18 + 13*rounds)
18// ticks per block, or 9.25 CPU cycles per byte for 128 bit key.
19
20// Version 1.2 mitigates the hazard of cache-timing attacks by
21// a) compressing S-boxes from 8KB to 2KB+256B, b) scheduling
22// references to S-boxes for L2 cache latency, c) prefetching T[ed]4
23// prior last round. As result performance dropped to (26 + 15*rounds)
24// ticks per block or 11 cycles per byte processed with 128-bit key.
25// This is ~16% deterioration. For reference Itanium 2 L1 cache has
26// 64 bytes line size and L2 - 128 bytes...
27
28.ident "aes-ia64.S, version 1.2"
29.ident "IA-64 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
30.explicit
31.text
32
33rk0=r8; rk1=r9;
34
35pfssave=r2;
36lcsave=r10;
37prsave=r3;
38maskff=r11;
39twenty4=r14;
40sixteen=r15;
41
42te00=r16; te11=r17; te22=r18; te33=r19;
43te01=r20; te12=r21; te23=r22; te30=r23;
44te02=r24; te13=r25; te20=r26; te31=r27;
45te03=r28; te10=r29; te21=r30; te32=r31;
46
47// these are rotating...
48t0=r32; s0=r33;
49t1=r34; s1=r35;
50t2=r36; s2=r37;
51t3=r38; s3=r39;
52
53te0=r40; te1=r41; te2=r42; te3=r43;
54
55#if defined(_HPUX_SOURCE) && !defined(_LP64)
56# define ADDP addp4
57#else
58# define ADDP add
59#endif
60
61// Offsets from Te0
62#define TE0 0
63#define TE2 2
64#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
65#define TE1 3
66#define TE3 1
67#else
68#define TE1 1
69#define TE3 3
70#endif
71
72// This implies that AES_KEY comprises 32-bit key schedule elements
73// even on LP64 platforms.
74#ifndef KSZ
75# define KSZ 4
76# define LDKEY ld4
77#endif
78
79.proc _ia64_AES_encrypt#
80// Input: rk0-rk1
81// te0
82// te3 as AES_KEY->rounds!!!
83// s0-s3
84// maskff,twenty4,sixteen
85// Output: r16,r20,r24,r28 as s0-s3
86// Clobber: r16-r31,rk0-rk1,r32-r43
87.align 32
88_ia64_AES_encrypt:
89 .prologue
90 .altrp b6
91 .body
92{ .mmi; alloc r16=ar.pfs,12,0,0,8
93 LDKEY t0=[rk0],2*KSZ
94 mov pr.rot=1<<16 }
95{ .mmi; LDKEY t1=[rk1],2*KSZ
96 add te1=TE1,te0
97 add te3=-3,te3 };;
98{ .mib; LDKEY t2=[rk0],2*KSZ
99 mov ar.ec=2 }
100{ .mib; LDKEY t3=[rk1],2*KSZ
101 add te2=TE2,te0
102 brp.loop.imp .Le_top,.Le_end-16 };;
103
104{ .mmi; xor s0=s0,t0
105 xor s1=s1,t1
106 mov ar.lc=te3 }
107{ .mmi; xor s2=s2,t2
108 xor s3=s3,t3
109 add te3=TE3,te0 };;
110
111.align 32
112.Le_top:
113{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
114 (p0) and te33=s3,maskff // 0/0:s3&0xff
115 (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
116{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
117 (p0) and te30=s0,maskff // 0/1:s0&0xff
118 (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24
119{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
120 (p0) shladd te33=te33,3,te3 // 1/0:te0+s0>>24
121 (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
122{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
123 (p0) shladd te30=te30,3,te3 // 1/1:te3+s0
124 (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24
125{ .mmi; (p0) ld4 te33=[te33] // 2/0:te3[s3&0xff]
126 (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff
127 (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
128{ .mmi; (p0) ld4 te30=[te30] // 2/1:te3[s0]
129 (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8
130 (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24
131{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8]
132 (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8
133 (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
134{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8]
135 (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24
136 (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24
137{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8]
138 (p0) shladd te21=te21,3,te2 // 4/3:te3+s2
139 (p0) extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff
140{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24]
141 (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24
142 (p0) shr.u te13=s3,sixteen };; // 4/2:s3>>16
143{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8]
144 (p0) shladd te11=te11,3,te1 // 5/0:te1+s1>>16
145 (p0) extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff
146{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24]
147 (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24
148 (p0) and te31=s1,maskff };; // 5/2:s1&0xff
149{ .mmi; (p0) ld4 te11=[te11] // 6/0:te1[s1>>16]
150 (p0) shladd te12=te12,3,te1 // 6/1:te1+s2>>16
151 (p0) extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff
152{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24]
153 (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16
154 (p0) and te32=s2,maskff };; // 6/3:s2&0xff
155
156{ .mmi; (p0) ld4 te12=[te12] // 7/1:te1[s2>>16]
157 (p0) shladd te31=te31,3,te3 // 7/2:te3+s1&0xff
158 (p0) and te13=te13,maskff} // 7/2:s3>>16&0xff
159{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24]
160 (p0) shladd te32=te32,3,te3 // 7/3:te3+s2
161 (p0) xor t0=t0,te33 };; // 7/0:
162{ .mmi; (p0) ld4 te31=[te31] // 8/2:te3[s1]
163 (p0) shladd te13=te13,3,te1 // 8/2:te1+s3>>16
164 (p0) xor t0=t0,te22 } // 8/0:
165{ .mmi; (p0) ld4 te32=[te32] // 8/3:te3[s2]
166 (p0) shladd te10=te10,3,te1 // 8/3:te1+s0>>16
167 (p0) xor t1=t1,te30 };; // 8/1:
168{ .mmi; (p0) ld4 te13=[te13] // 9/2:te1[s3>>16]
169 (p0) ld4 te10=[te10] // 9/3:te1[s0>>16]
170 (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling
171{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1:
172 (p0) xor t2=t2,te20 // 10[9]/2:
173 (p0) xor t3=t3,te21 };; // 10[9]/3:
174{ .mmi; (p0) xor t0=t0,te11 // 11[10]/0:done!
175 (p0) xor t1=t1,te01 // 11[10]/1:
176 (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling
177{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3:
178 (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17)
179{ .mmi; (p0) xor t1=t1,te12 // 13[11]/1:done!
180 (p0) xor t2=t2,te31 // 13[11]/2:
181 (p0) xor t3=t3,te32 } // 13[11]/3:
182{ .mmi; (p17) add te0=2048,te0 // 13[11]/
183 (p17) add te1=2048+64-TE1,te1};; // 13[11]/
184{ .mib; (p0) xor t2=t2,te13 // 14[12]/2:done!
185 (p17) add te2=2048+128-TE2,te2} // 14[12]/
186{ .mib; (p0) xor t3=t3,te10 // 14[12]/3:done!
187 (p17) add te3=2048+192-TE3,te3 // 14[12]/
188 br.ctop.sptk .Le_top };;
189.Le_end:
190
191
192{ .mmi; ld8 te12=[te0] // prefetch Te4
193 ld8 te31=[te1] }
194{ .mmi; ld8 te10=[te2]
195 ld8 te32=[te3] }
196
197{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
198 and te33=s3,maskff // 0/0:s3&0xff
199 extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
200{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
201 and te30=s0,maskff // 0/1:s0&0xff
202 shr.u te00=s0,twenty4 };; // 0/0:s0>>24
203{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
204 add te33=te33,te0 // 1/0:te0+s0>>24
205 extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
206{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
207 add te30=te30,te0 // 1/1:te0+s0
208 shr.u te01=s1,twenty4 };; // 1/1:s1>>24
209{ .mmi; ld1 te33=[te33] // 2/0:te0[s3&0xff]
210 add te22=te22,te0 // 2/0:te0+s2>>8&0xff
211 extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
212{ .mmi; ld1 te30=[te30] // 2/1:te0[s0]
213 add te23=te23,te0 // 2/1:te0+s3>>8
214 shr.u te02=s2,twenty4 };; // 2/2:s2>>24
215{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8]
216 add te20=te20,te0 // 3/2:te0+s0>>8
217 extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
218{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8]
219 add te00=te00,te0 // 3/0:te0+s0>>24
220 shr.u te03=s3,twenty4 };; // 3/3:s3>>24
221{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8]
222 add te21=te21,te0 // 4/3:te0+s2
223 extr.u te11=s1,16,8 } // 4/0:s1>>16&0xff
224{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24]
225 add te01=te01,te0 // 4/1:te0+s1>>24
226 shr.u te13=s3,sixteen };; // 4/2:s3>>16
227{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8]
228 add te11=te11,te0 // 5/0:te0+s1>>16
229 extr.u te12=s2,16,8 } // 5/1:s2>>16&0xff
230{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24]
231 add te02=te02,te0 // 5/2:te0+s2>>24
232 and te31=s1,maskff };; // 5/2:s1&0xff
233{ .mmi; ld1 te11=[te11] // 6/0:te0[s1>>16]
234 add te12=te12,te0 // 6/1:te0+s2>>16
235 extr.u te10=s0,16,8 } // 6/3:s0>>16&0xff
236{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24]
237 add te03=te03,te0 // 6/3:te0+s0>>16
238 and te32=s2,maskff };; // 6/3:s2&0xff
239
240{ .mmi; ld1 te12=[te12] // 7/1:te0[s2>>16]
241 add te31=te31,te0 // 7/2:te0+s1&0xff
242 dep te33=te22,te33,8,8} // 7/0:
243{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24]
244 add te32=te32,te0 // 7/3:te0+s2
245 and te13=te13,maskff};; // 7/2:s3>>16&0xff
246{ .mmi; ld1 te31=[te31] // 8/2:te0[s1]
247 add te13=te13,te0 // 8/2:te0+s3>>16
248 dep te30=te23,te30,8,8} // 8/1:
249{ .mmi; ld1 te32=[te32] // 8/3:te0[s2]
250 add te10=te10,te0 // 8/3:te0+s0>>16
251 shl te00=te00,twenty4};; // 8/0:
252{ .mii; ld1 te13=[te13] // 9/2:te0[s3>>16]
253 dep te33=te11,te33,16,8 // 9/0:
254 shl te01=te01,twenty4};; // 9/1:
255{ .mii; ld1 te10=[te10] // 10/3:te0[s0>>16]
256 dep te31=te20,te31,8,8 // 10/2:
257 shl te02=te02,twenty4};; // 10/2:
258{ .mii; xor t0=t0,te33 // 11/0:
259 dep te32=te21,te32,8,8 // 11/3:
260 shl te12=te12,sixteen};; // 11/1:
261{ .mii; xor r16=t0,te00 // 12/0:done!
262 dep te31=te13,te31,16,8 // 12/2:
263 shl te03=te03,twenty4};; // 12/3:
264{ .mmi; xor t1=t1,te01 // 13/1:
265 xor t2=t2,te02 // 13/2:
266 dep te32=te10,te32,16,8};; // 13/3:
267{ .mmi; xor t1=t1,te30 // 14/1:
268 xor r24=t2,te31 // 14/2:done!
269 xor t3=t3,te32 };; // 14/3:
270{ .mib; xor r20=t1,te12 // 15/1:done!
271 xor r28=t3,te03 // 15/3:done!
272 br.ret.sptk b6 };;
273.endp _ia64_AES_encrypt#
274
275// void AES_encrypt (const void *in,void *out,const AES_KEY *key);
276.global AES_encrypt#
277.proc AES_encrypt#
278.align 32
279AES_encrypt:
280 .prologue
281 .save ar.pfs,pfssave
282{ .mmi; alloc pfssave=ar.pfs,3,1,12,0
283 and out0=3,in0
284 mov r3=ip }
285{ .mmi; ADDP in0=0,in0
286 mov loc0=psr.um
287 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
288
289{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
290 add out8=(AES_Te#-AES_encrypt#),r3 // Te0
291 .save pr,prsave
292 mov prsave=pr }
293{ .mmi; rum 1<<3 // clear um.ac
294 .save ar.lc,lcsave
295 mov lcsave=ar.lc };;
296
297 .body
298#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
299{ .mib; cmp.ne p6,p0=out0,r0
300 add out0=4,in0
301(p6) br.dpnt.many .Le_i_unaligned };;
302
303{ .mmi; ld4 out1=[in0],8 // s0
304 and out9=3,in1
305 mov twenty4=24 }
306{ .mmi; ld4 out3=[out0],8 // s1
307 ADDP rk0=0,in2
308 mov sixteen=16 };;
309{ .mmi; ld4 out5=[in0] // s2
310 cmp.ne p6,p0=out9,r0
311 mov maskff=0xff }
312{ .mmb; ld4 out7=[out0] // s3
313 ADDP rk1=KSZ,in2
314 br.call.sptk.many b6=_ia64_AES_encrypt };;
315
316{ .mib; ADDP in0=4,in1
317 ADDP in1=0,in1
318(p6) br.spnt .Le_o_unaligned };;
319
320{ .mii; mov psr.um=loc0
321 mov ar.pfs=pfssave
322 mov ar.lc=lcsave };;
323{ .mmi; st4 [in1]=r16,8 // s0
324 st4 [in0]=r20,8 // s1
325 mov pr=prsave,0x1ffff };;
326{ .mmb; st4 [in1]=r24 // s2
327 st4 [in0]=r28 // s3
328 br.ret.sptk.many b0 };;
329#endif
330
331.align 32
332.Le_i_unaligned:
333{ .mmi; add out0=1,in0
334 add out2=2,in0
335 add out4=3,in0 };;
336{ .mmi; ld1 r16=[in0],4
337 ld1 r17=[out0],4 }//;;
338{ .mmi; ld1 r18=[out2],4
339 ld1 out1=[out4],4 };; // s0
340{ .mmi; ld1 r20=[in0],4
341 ld1 r21=[out0],4 }//;;
342{ .mmi; ld1 r22=[out2],4
343 ld1 out3=[out4],4 };; // s1
344{ .mmi; ld1 r24=[in0],4
345 ld1 r25=[out0],4 }//;;
346{ .mmi; ld1 r26=[out2],4
347 ld1 out5=[out4],4 };; // s2
348{ .mmi; ld1 r28=[in0]
349 ld1 r29=[out0] }//;;
350{ .mmi; ld1 r30=[out2]
351 ld1 out7=[out4] };; // s3
352
353{ .mii;
354 dep out1=r16,out1,24,8 //;;
355 dep out3=r20,out3,24,8 }//;;
356{ .mii; ADDP rk0=0,in2
357 dep out5=r24,out5,24,8 //;;
358 dep out7=r28,out7,24,8 };;
359{ .mii; ADDP rk1=KSZ,in2
360 dep out1=r17,out1,16,8 //;;
361 dep out3=r21,out3,16,8 }//;;
362{ .mii; mov twenty4=24
363 dep out5=r25,out5,16,8 //;;
364 dep out7=r29,out7,16,8 };;
365{ .mii; mov sixteen=16
366 dep out1=r18,out1,8,8 //;;
367 dep out3=r22,out3,8,8 }//;;
368{ .mii; mov maskff=0xff
369 dep out5=r26,out5,8,8 //;;
370 dep out7=r30,out7,8,8 };;
371
372{ .mib; br.call.sptk.many b6=_ia64_AES_encrypt };;
373
374.Le_o_unaligned:
375{ .mii; ADDP out0=0,in1
376 extr.u r17=r16,8,8 // s0
377 shr.u r19=r16,twenty4 }//;;
378{ .mii; ADDP out1=1,in1
379 extr.u r18=r16,16,8
380 shr.u r23=r20,twenty4 }//;; // s1
381{ .mii; ADDP out2=2,in1
382 extr.u r21=r20,8,8
383 shr.u r22=r20,sixteen }//;;
384{ .mii; ADDP out3=3,in1
385 extr.u r25=r24,8,8 // s2
386 shr.u r27=r24,twenty4 };;
387{ .mii; st1 [out3]=r16,4
388 extr.u r26=r24,16,8
389 shr.u r31=r28,twenty4 }//;; // s3
390{ .mii; st1 [out2]=r17,4
391 extr.u r29=r28,8,8
392 shr.u r30=r28,sixteen }//;;
393
394{ .mmi; st1 [out1]=r18,4
395 st1 [out0]=r19,4 };;
396{ .mmi; st1 [out3]=r20,4
397 st1 [out2]=r21,4 }//;;
398{ .mmi; st1 [out1]=r22,4
399 st1 [out0]=r23,4 };;
400{ .mmi; st1 [out3]=r24,4
401 st1 [out2]=r25,4
402 mov pr=prsave,0x1ffff }//;;
403{ .mmi; st1 [out1]=r26,4
404 st1 [out0]=r27,4
405 mov ar.pfs=pfssave };;
406{ .mmi; st1 [out3]=r28
407 st1 [out2]=r29
408 mov ar.lc=lcsave }//;;
409{ .mmi; st1 [out1]=r30
410 st1 [out0]=r31 }
411{ .mfb; mov psr.um=loc0 // restore user mask
412 br.ret.sptk.many b0 };;
413.endp AES_encrypt#
414
415// *AES_decrypt are autogenerated by the following script:
416#if 0
417#!/usr/bin/env perl
418print "// *AES_decrypt are autogenerated by the following script:\n#if 0\n";
419open(PROG,'<'.$0); while(<PROG>) { print; } close(PROG);
420print "#endif\n";
421while(<>) {
422 $process=1 if (/\.proc\s+_ia64_AES_encrypt/);
423 next if (!$process);
424
425 #s/te00=s0/td00=s0/; s/te00/td00/g;
426 s/te11=s1/td13=s3/; s/te11/td13/g;
427 #s/te22=s2/td22=s2/; s/te22/td22/g;
428 s/te33=s3/td31=s1/; s/te33/td31/g;
429
430 #s/te01=s1/td01=s1/; s/te01/td01/g;
431 s/te12=s2/td10=s0/; s/te12/td10/g;
432 #s/te23=s3/td23=s3/; s/te23/td23/g;
433 s/te30=s0/td32=s2/; s/te30/td32/g;
434
435 #s/te02=s2/td02=s2/; s/te02/td02/g;
436 s/te13=s3/td11=s1/; s/te13/td11/g;
437 #s/te20=s0/td20=s0/; s/te20/td20/g;
438 s/te31=s1/td33=s3/; s/te31/td33/g;
439
440 #s/te03=s3/td03=s3/; s/te03/td03/g;
441 s/te10=s0/td12=s2/; s/te10/td12/g;
442 #s/te21=s1/td21=s1/; s/te21/td21/g;
443 s/te32=s2/td30=s0/; s/te32/td30/g;
444
445 s/td/te/g;
446
447 s/AES_encrypt/AES_decrypt/g;
448 s/\.Le_/.Ld_/g;
449 s/AES_Te#/AES_Td#/g;
450
451 print;
452
453 exit if (/\.endp\s+AES_decrypt/);
454}
455#endif
456.proc _ia64_AES_decrypt#
457// Input: rk0-rk1
458// te0
459// te3 as AES_KEY->rounds!!!
460// s0-s3
461// maskff,twenty4,sixteen
462// Output: r16,r20,r24,r28 as s0-s3
463// Clobber: r16-r31,rk0-rk1,r32-r43
464.align 32
465_ia64_AES_decrypt:
466 .prologue
467 .altrp b6
468 .body
469{ .mmi; alloc r16=ar.pfs,12,0,0,8
470 LDKEY t0=[rk0],2*KSZ
471 mov pr.rot=1<<16 }
472{ .mmi; LDKEY t1=[rk1],2*KSZ
473 add te1=TE1,te0
474 add te3=-3,te3 };;
475{ .mib; LDKEY t2=[rk0],2*KSZ
476 mov ar.ec=2 }
477{ .mib; LDKEY t3=[rk1],2*KSZ
478 add te2=TE2,te0
479 brp.loop.imp .Ld_top,.Ld_end-16 };;
480
481{ .mmi; xor s0=s0,t0
482 xor s1=s1,t1
483 mov ar.lc=te3 }
484{ .mmi; xor s2=s2,t2
485 xor s3=s3,t3
486 add te3=TE3,te0 };;
487
488.align 32
489.Ld_top:
490{ .mmi; (p0) LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
491 (p0) and te31=s1,maskff // 0/0:s3&0xff
492 (p0) extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
493{ .mmi; (p0) LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
494 (p0) and te32=s2,maskff // 0/1:s0&0xff
495 (p0) shr.u te00=s0,twenty4 };; // 0/0:s0>>24
496{ .mmi; (p0) LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
497 (p0) shladd te31=te31,3,te3 // 1/0:te0+s0>>24
498 (p0) extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
499{ .mmi; (p0) LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
500 (p0) shladd te32=te32,3,te3 // 1/1:te3+s0
501 (p0) shr.u te01=s1,twenty4 };; // 1/1:s1>>24
502{ .mmi; (p0) ld4 te31=[te31] // 2/0:te3[s3&0xff]
503 (p0) shladd te22=te22,3,te2 // 2/0:te2+s2>>8&0xff
504 (p0) extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
505{ .mmi; (p0) ld4 te32=[te32] // 2/1:te3[s0]
506 (p0) shladd te23=te23,3,te2 // 2/1:te2+s3>>8
507 (p0) shr.u te02=s2,twenty4 };; // 2/2:s2>>24
508{ .mmi; (p0) ld4 te22=[te22] // 3/0:te2[s2>>8]
509 (p0) shladd te20=te20,3,te2 // 3/2:te2+s0>>8
510 (p0) extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
511{ .mmi; (p0) ld4 te23=[te23] // 3/1:te2[s3>>8]
512 (p0) shladd te00=te00,3,te0 // 3/0:te0+s0>>24
513 (p0) shr.u te03=s3,twenty4 };; // 3/3:s3>>24
514{ .mmi; (p0) ld4 te20=[te20] // 4/2:te2[s0>>8]
515 (p0) shladd te21=te21,3,te2 // 4/3:te3+s2
516 (p0) extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff
517{ .mmi; (p0) ld4 te00=[te00] // 4/0:te0[s0>>24]
518 (p0) shladd te01=te01,3,te0 // 4/1:te0+s1>>24
519 (p0) shr.u te11=s1,sixteen };; // 4/2:s3>>16
520{ .mmi; (p0) ld4 te21=[te21] // 5/3:te2[s1>>8]
521 (p0) shladd te13=te13,3,te1 // 5/0:te1+s1>>16
522 (p0) extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff
523{ .mmi; (p0) ld4 te01=[te01] // 5/1:te0[s1>>24]
524 (p0) shladd te02=te02,3,te0 // 5/2:te0+s2>>24
525 (p0) and te33=s3,maskff };; // 5/2:s1&0xff
526{ .mmi; (p0) ld4 te13=[te13] // 6/0:te1[s1>>16]
527 (p0) shladd te10=te10,3,te1 // 6/1:te1+s2>>16
528 (p0) extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff
529{ .mmi; (p0) ld4 te02=[te02] // 6/2:te0[s2>>24]
530 (p0) shladd te03=te03,3,te0 // 6/3:te1+s0>>16
531 (p0) and te30=s0,maskff };; // 6/3:s2&0xff
532
533{ .mmi; (p0) ld4 te10=[te10] // 7/1:te1[s2>>16]
534 (p0) shladd te33=te33,3,te3 // 7/2:te3+s1&0xff
535 (p0) and te11=te11,maskff} // 7/2:s3>>16&0xff
536{ .mmi; (p0) ld4 te03=[te03] // 7/3:te0[s3>>24]
537 (p0) shladd te30=te30,3,te3 // 7/3:te3+s2
538 (p0) xor t0=t0,te31 };; // 7/0:
539{ .mmi; (p0) ld4 te33=[te33] // 8/2:te3[s1]
540 (p0) shladd te11=te11,3,te1 // 8/2:te1+s3>>16
541 (p0) xor t0=t0,te22 } // 8/0:
542{ .mmi; (p0) ld4 te30=[te30] // 8/3:te3[s2]
543 (p0) shladd te12=te12,3,te1 // 8/3:te1+s0>>16
544 (p0) xor t1=t1,te32 };; // 8/1:
545{ .mmi; (p0) ld4 te11=[te11] // 9/2:te1[s3>>16]
546 (p0) ld4 te12=[te12] // 9/3:te1[s0>>16]
547 (p0) xor t0=t0,te00 };; // 9/0: !L2 scheduling
548{ .mmi; (p0) xor t1=t1,te23 // 10[9]/1:
549 (p0) xor t2=t2,te20 // 10[9]/2:
550 (p0) xor t3=t3,te21 };; // 10[9]/3:
551{ .mmi; (p0) xor t0=t0,te13 // 11[10]/0:done!
552 (p0) xor t1=t1,te01 // 11[10]/1:
553 (p0) xor t2=t2,te02 };; // 11[10]/2: !L2 scheduling
554{ .mmi; (p0) xor t3=t3,te03 // 12[10]/3:
555 (p16) cmp.eq p0,p17=r0,r0 };; // 12[10]/clear (p17)
556{ .mmi; (p0) xor t1=t1,te10 // 13[11]/1:done!
557 (p0) xor t2=t2,te33 // 13[11]/2:
558 (p0) xor t3=t3,te30 } // 13[11]/3:
559{ .mmi; (p17) add te0=2048,te0 // 13[11]/
560 (p17) add te1=2048+64-TE1,te1};; // 13[11]/
561{ .mib; (p0) xor t2=t2,te11 // 14[12]/2:done!
562 (p17) add te2=2048+128-TE2,te2} // 14[12]/
563{ .mib; (p0) xor t3=t3,te12 // 14[12]/3:done!
564 (p17) add te3=2048+192-TE3,te3 // 14[12]/
565 br.ctop.sptk .Ld_top };;
566.Ld_end:
567
568
569{ .mmi; ld8 te10=[te0] // prefetch Td4
570 ld8 te33=[te1] }
571{ .mmi; ld8 te12=[te2]
572 ld8 te30=[te3] }
573
574{ .mmi; LDKEY t0=[rk0],2*KSZ // 0/0:rk[0]
575 and te31=s1,maskff // 0/0:s3&0xff
576 extr.u te22=s2,8,8 } // 0/0:s2>>8&0xff
577{ .mmi; LDKEY t1=[rk1],2*KSZ // 0/1:rk[1]
578 and te32=s2,maskff // 0/1:s0&0xff
579 shr.u te00=s0,twenty4 };; // 0/0:s0>>24
580{ .mmi; LDKEY t2=[rk0],2*KSZ // 1/2:rk[2]
581 add te31=te31,te0 // 1/0:te0+s0>>24
582 extr.u te23=s3,8,8 } // 1/1:s3>>8&0xff
583{ .mmi; LDKEY t3=[rk1],2*KSZ // 1/3:rk[3]
584 add te32=te32,te0 // 1/1:te0+s0
585 shr.u te01=s1,twenty4 };; // 1/1:s1>>24
586{ .mmi; ld1 te31=[te31] // 2/0:te0[s3&0xff]
587 add te22=te22,te0 // 2/0:te0+s2>>8&0xff
588 extr.u te20=s0,8,8 } // 2/2:s0>>8&0xff
589{ .mmi; ld1 te32=[te32] // 2/1:te0[s0]
590 add te23=te23,te0 // 2/1:te0+s3>>8
591 shr.u te02=s2,twenty4 };; // 2/2:s2>>24
592{ .mmi; ld1 te22=[te22] // 3/0:te0[s2>>8]
593 add te20=te20,te0 // 3/2:te0+s0>>8
594 extr.u te21=s1,8,8 } // 3/3:s1>>8&0xff
595{ .mmi; ld1 te23=[te23] // 3/1:te0[s3>>8]
596 add te00=te00,te0 // 3/0:te0+s0>>24
597 shr.u te03=s3,twenty4 };; // 3/3:s3>>24
598{ .mmi; ld1 te20=[te20] // 4/2:te0[s0>>8]
599 add te21=te21,te0 // 4/3:te0+s2
600 extr.u te13=s3,16,8 } // 4/0:s1>>16&0xff
601{ .mmi; ld1 te00=[te00] // 4/0:te0[s0>>24]
602 add te01=te01,te0 // 4/1:te0+s1>>24
603 shr.u te11=s1,sixteen };; // 4/2:s3>>16
604{ .mmi; ld1 te21=[te21] // 5/3:te0[s1>>8]
605 add te13=te13,te0 // 5/0:te0+s1>>16
606 extr.u te10=s0,16,8 } // 5/1:s2>>16&0xff
607{ .mmi; ld1 te01=[te01] // 5/1:te0[s1>>24]
608 add te02=te02,te0 // 5/2:te0+s2>>24
609 and te33=s3,maskff };; // 5/2:s1&0xff
610{ .mmi; ld1 te13=[te13] // 6/0:te0[s1>>16]
611 add te10=te10,te0 // 6/1:te0+s2>>16
612 extr.u te12=s2,16,8 } // 6/3:s0>>16&0xff
613{ .mmi; ld1 te02=[te02] // 6/2:te0[s2>>24]
614 add te03=te03,te0 // 6/3:te0+s0>>16
615 and te30=s0,maskff };; // 6/3:s2&0xff
616
617{ .mmi; ld1 te10=[te10] // 7/1:te0[s2>>16]
618 add te33=te33,te0 // 7/2:te0+s1&0xff
619 dep te31=te22,te31,8,8} // 7/0:
620{ .mmi; ld1 te03=[te03] // 7/3:te0[s3>>24]
621 add te30=te30,te0 // 7/3:te0+s2
622 and te11=te11,maskff};; // 7/2:s3>>16&0xff
623{ .mmi; ld1 te33=[te33] // 8/2:te0[s1]
624 add te11=te11,te0 // 8/2:te0+s3>>16
625 dep te32=te23,te32,8,8} // 8/1:
626{ .mmi; ld1 te30=[te30] // 8/3:te0[s2]
627 add te12=te12,te0 // 8/3:te0+s0>>16
628 shl te00=te00,twenty4};; // 8/0:
629{ .mii; ld1 te11=[te11] // 9/2:te0[s3>>16]
630 dep te31=te13,te31,16,8 // 9/0:
631 shl te01=te01,twenty4};; // 9/1:
632{ .mii; ld1 te12=[te12] // 10/3:te0[s0>>16]
633 dep te33=te20,te33,8,8 // 10/2:
634 shl te02=te02,twenty4};; // 10/2:
635{ .mii; xor t0=t0,te31 // 11/0:
636 dep te30=te21,te30,8,8 // 11/3:
637 shl te10=te10,sixteen};; // 11/1:
638{ .mii; xor r16=t0,te00 // 12/0:done!
639 dep te33=te11,te33,16,8 // 12/2:
640 shl te03=te03,twenty4};; // 12/3:
641{ .mmi; xor t1=t1,te01 // 13/1:
642 xor t2=t2,te02 // 13/2:
643 dep te30=te12,te30,16,8};; // 13/3:
644{ .mmi; xor t1=t1,te32 // 14/1:
645 xor r24=t2,te33 // 14/2:done!
646 xor t3=t3,te30 };; // 14/3:
647{ .mib; xor r20=t1,te10 // 15/1:done!
648 xor r28=t3,te03 // 15/3:done!
649 br.ret.sptk b6 };;
650.endp _ia64_AES_decrypt#
651
652// void AES_decrypt (const void *in,void *out,const AES_KEY *key);
653.global AES_decrypt#
654.proc AES_decrypt#
655.align 32
656AES_decrypt:
657 .prologue
658 .save ar.pfs,pfssave
659{ .mmi; alloc pfssave=ar.pfs,3,1,12,0
660 and out0=3,in0
661 mov r3=ip }
662{ .mmi; ADDP in0=0,in0
663 mov loc0=psr.um
664 ADDP out11=KSZ*60,in2 };; // &AES_KEY->rounds
665
666{ .mmi; ld4 out11=[out11] // AES_KEY->rounds
667 add out8=(AES_Td#-AES_decrypt#),r3 // Te0
668 .save pr,prsave
669 mov prsave=pr }
670{ .mmi; rum 1<<3 // clear um.ac
671 .save ar.lc,lcsave
672 mov lcsave=ar.lc };;
673
674 .body
675#if defined(_HPUX_SOURCE) // HPUX is big-endian, cut 15+15 cycles...
676{ .mib; cmp.ne p6,p0=out0,r0
677 add out0=4,in0
678(p6) br.dpnt.many .Ld_i_unaligned };;
679
680{ .mmi; ld4 out1=[in0],8 // s0
681 and out9=3,in1
682 mov twenty4=24 }
683{ .mmi; ld4 out3=[out0],8 // s1
684 ADDP rk0=0,in2
685 mov sixteen=16 };;
686{ .mmi; ld4 out5=[in0] // s2
687 cmp.ne p6,p0=out9,r0
688 mov maskff=0xff }
689{ .mmb; ld4 out7=[out0] // s3
690 ADDP rk1=KSZ,in2
691 br.call.sptk.many b6=_ia64_AES_decrypt };;
692
693{ .mib; ADDP in0=4,in1
694 ADDP in1=0,in1
695(p6) br.spnt .Ld_o_unaligned };;
696
697{ .mii; mov psr.um=loc0
698 mov ar.pfs=pfssave
699 mov ar.lc=lcsave };;
700{ .mmi; st4 [in1]=r16,8 // s0
701 st4 [in0]=r20,8 // s1
702 mov pr=prsave,0x1ffff };;
703{ .mmb; st4 [in1]=r24 // s2
704 st4 [in0]=r28 // s3
705 br.ret.sptk.many b0 };;
706#endif
707
708.align 32
709.Ld_i_unaligned:
710{ .mmi; add out0=1,in0
711 add out2=2,in0
712 add out4=3,in0 };;
713{ .mmi; ld1 r16=[in0],4
714 ld1 r17=[out0],4 }//;;
715{ .mmi; ld1 r18=[out2],4
716 ld1 out1=[out4],4 };; // s0
717{ .mmi; ld1 r20=[in0],4
718 ld1 r21=[out0],4 }//;;
719{ .mmi; ld1 r22=[out2],4
720 ld1 out3=[out4],4 };; // s1
721{ .mmi; ld1 r24=[in0],4
722 ld1 r25=[out0],4 }//;;
723{ .mmi; ld1 r26=[out2],4
724 ld1 out5=[out4],4 };; // s2
725{ .mmi; ld1 r28=[in0]
726 ld1 r29=[out0] }//;;
727{ .mmi; ld1 r30=[out2]
728 ld1 out7=[out4] };; // s3
729
730{ .mii;
731 dep out1=r16,out1,24,8 //;;
732 dep out3=r20,out3,24,8 }//;;
733{ .mii; ADDP rk0=0,in2
734 dep out5=r24,out5,24,8 //;;
735 dep out7=r28,out7,24,8 };;
736{ .mii; ADDP rk1=KSZ,in2
737 dep out1=r17,out1,16,8 //;;
738 dep out3=r21,out3,16,8 }//;;
739{ .mii; mov twenty4=24
740 dep out5=r25,out5,16,8 //;;
741 dep out7=r29,out7,16,8 };;
742{ .mii; mov sixteen=16
743 dep out1=r18,out1,8,8 //;;
744 dep out3=r22,out3,8,8 }//;;
745{ .mii; mov maskff=0xff
746 dep out5=r26,out5,8,8 //;;
747 dep out7=r30,out7,8,8 };;
748
749{ .mib; br.call.sptk.many b6=_ia64_AES_decrypt };;
750
751.Ld_o_unaligned:
752{ .mii; ADDP out0=0,in1
753 extr.u r17=r16,8,8 // s0
754 shr.u r19=r16,twenty4 }//;;
755{ .mii; ADDP out1=1,in1
756 extr.u r18=r16,16,8
757 shr.u r23=r20,twenty4 }//;; // s1
758{ .mii; ADDP out2=2,in1
759 extr.u r21=r20,8,8
760 shr.u r22=r20,sixteen }//;;
761{ .mii; ADDP out3=3,in1
762 extr.u r25=r24,8,8 // s2
763 shr.u r27=r24,twenty4 };;
764{ .mii; st1 [out3]=r16,4
765 extr.u r26=r24,16,8
766 shr.u r31=r28,twenty4 }//;; // s3
767{ .mii; st1 [out2]=r17,4
768 extr.u r29=r28,8,8
769 shr.u r30=r28,sixteen }//;;
770
771{ .mmi; st1 [out1]=r18,4
772 st1 [out0]=r19,4 };;
773{ .mmi; st1 [out3]=r20,4
774 st1 [out2]=r21,4 }//;;
775{ .mmi; st1 [out1]=r22,4
776 st1 [out0]=r23,4 };;
777{ .mmi; st1 [out3]=r24,4
778 st1 [out2]=r25,4
779 mov pr=prsave,0x1ffff }//;;
780{ .mmi; st1 [out1]=r26,4
781 st1 [out0]=r27,4
782 mov ar.pfs=pfssave };;
783{ .mmi; st1 [out3]=r28
784 st1 [out2]=r29
785 mov ar.lc=lcsave }//;;
786{ .mmi; st1 [out1]=r30
787 st1 [out0]=r31 }
788{ .mfb; mov psr.um=loc0 // restore user mask
789 br.ret.sptk.many b0 };;
790.endp AES_decrypt#
791
792// leave it in .text segment...
793.align 64
794.global AES_Te#
795.type AES_Te#,@object
796AES_Te: data4 0xc66363a5,0xc66363a5, 0xf87c7c84,0xf87c7c84
797 data4 0xee777799,0xee777799, 0xf67b7b8d,0xf67b7b8d
798 data4 0xfff2f20d,0xfff2f20d, 0xd66b6bbd,0xd66b6bbd
799 data4 0xde6f6fb1,0xde6f6fb1, 0x91c5c554,0x91c5c554
800 data4 0x60303050,0x60303050, 0x02010103,0x02010103
801 data4 0xce6767a9,0xce6767a9, 0x562b2b7d,0x562b2b7d
802 data4 0xe7fefe19,0xe7fefe19, 0xb5d7d762,0xb5d7d762
803 data4 0x4dababe6,0x4dababe6, 0xec76769a,0xec76769a
804 data4 0x8fcaca45,0x8fcaca45, 0x1f82829d,0x1f82829d
805 data4 0x89c9c940,0x89c9c940, 0xfa7d7d87,0xfa7d7d87
806 data4 0xeffafa15,0xeffafa15, 0xb25959eb,0xb25959eb
807 data4 0x8e4747c9,0x8e4747c9, 0xfbf0f00b,0xfbf0f00b
808 data4 0x41adadec,0x41adadec, 0xb3d4d467,0xb3d4d467
809 data4 0x5fa2a2fd,0x5fa2a2fd, 0x45afafea,0x45afafea
810 data4 0x239c9cbf,0x239c9cbf, 0x53a4a4f7,0x53a4a4f7
811 data4 0xe4727296,0xe4727296, 0x9bc0c05b,0x9bc0c05b
812 data4 0x75b7b7c2,0x75b7b7c2, 0xe1fdfd1c,0xe1fdfd1c
813 data4 0x3d9393ae,0x3d9393ae, 0x4c26266a,0x4c26266a
814 data4 0x6c36365a,0x6c36365a, 0x7e3f3f41,0x7e3f3f41
815 data4 0xf5f7f702,0xf5f7f702, 0x83cccc4f,0x83cccc4f
816 data4 0x6834345c,0x6834345c, 0x51a5a5f4,0x51a5a5f4
817 data4 0xd1e5e534,0xd1e5e534, 0xf9f1f108,0xf9f1f108
818 data4 0xe2717193,0xe2717193, 0xabd8d873,0xabd8d873
819 data4 0x62313153,0x62313153, 0x2a15153f,0x2a15153f
820 data4 0x0804040c,0x0804040c, 0x95c7c752,0x95c7c752
821 data4 0x46232365,0x46232365, 0x9dc3c35e,0x9dc3c35e
822 data4 0x30181828,0x30181828, 0x379696a1,0x379696a1
823 data4 0x0a05050f,0x0a05050f, 0x2f9a9ab5,0x2f9a9ab5
824 data4 0x0e070709,0x0e070709, 0x24121236,0x24121236
825 data4 0x1b80809b,0x1b80809b, 0xdfe2e23d,0xdfe2e23d
826 data4 0xcdebeb26,0xcdebeb26, 0x4e272769,0x4e272769
827 data4 0x7fb2b2cd,0x7fb2b2cd, 0xea75759f,0xea75759f
828 data4 0x1209091b,0x1209091b, 0x1d83839e,0x1d83839e
829 data4 0x582c2c74,0x582c2c74, 0x341a1a2e,0x341a1a2e
830 data4 0x361b1b2d,0x361b1b2d, 0xdc6e6eb2,0xdc6e6eb2
831 data4 0xb45a5aee,0xb45a5aee, 0x5ba0a0fb,0x5ba0a0fb
832 data4 0xa45252f6,0xa45252f6, 0x763b3b4d,0x763b3b4d
833 data4 0xb7d6d661,0xb7d6d661, 0x7db3b3ce,0x7db3b3ce
834 data4 0x5229297b,0x5229297b, 0xdde3e33e,0xdde3e33e
835 data4 0x5e2f2f71,0x5e2f2f71, 0x13848497,0x13848497
836 data4 0xa65353f5,0xa65353f5, 0xb9d1d168,0xb9d1d168
837 data4 0x00000000,0x00000000, 0xc1eded2c,0xc1eded2c
838 data4 0x40202060,0x40202060, 0xe3fcfc1f,0xe3fcfc1f
839 data4 0x79b1b1c8,0x79b1b1c8, 0xb65b5bed,0xb65b5bed
840 data4 0xd46a6abe,0xd46a6abe, 0x8dcbcb46,0x8dcbcb46
841 data4 0x67bebed9,0x67bebed9, 0x7239394b,0x7239394b
842 data4 0x944a4ade,0x944a4ade, 0x984c4cd4,0x984c4cd4
843 data4 0xb05858e8,0xb05858e8, 0x85cfcf4a,0x85cfcf4a
844 data4 0xbbd0d06b,0xbbd0d06b, 0xc5efef2a,0xc5efef2a
845 data4 0x4faaaae5,0x4faaaae5, 0xedfbfb16,0xedfbfb16
846 data4 0x864343c5,0x864343c5, 0x9a4d4dd7,0x9a4d4dd7
847 data4 0x66333355,0x66333355, 0x11858594,0x11858594
848 data4 0x8a4545cf,0x8a4545cf, 0xe9f9f910,0xe9f9f910
849 data4 0x04020206,0x04020206, 0xfe7f7f81,0xfe7f7f81
850 data4 0xa05050f0,0xa05050f0, 0x783c3c44,0x783c3c44
851 data4 0x259f9fba,0x259f9fba, 0x4ba8a8e3,0x4ba8a8e3
852 data4 0xa25151f3,0xa25151f3, 0x5da3a3fe,0x5da3a3fe
853 data4 0x804040c0,0x804040c0, 0x058f8f8a,0x058f8f8a
854 data4 0x3f9292ad,0x3f9292ad, 0x219d9dbc,0x219d9dbc
855 data4 0x70383848,0x70383848, 0xf1f5f504,0xf1f5f504
856 data4 0x63bcbcdf,0x63bcbcdf, 0x77b6b6c1,0x77b6b6c1
857 data4 0xafdada75,0xafdada75, 0x42212163,0x42212163
858 data4 0x20101030,0x20101030, 0xe5ffff1a,0xe5ffff1a
859 data4 0xfdf3f30e,0xfdf3f30e, 0xbfd2d26d,0xbfd2d26d
860 data4 0x81cdcd4c,0x81cdcd4c, 0x180c0c14,0x180c0c14
861 data4 0x26131335,0x26131335, 0xc3ecec2f,0xc3ecec2f
862 data4 0xbe5f5fe1,0xbe5f5fe1, 0x359797a2,0x359797a2
863 data4 0x884444cc,0x884444cc, 0x2e171739,0x2e171739
864 data4 0x93c4c457,0x93c4c457, 0x55a7a7f2,0x55a7a7f2
865 data4 0xfc7e7e82,0xfc7e7e82, 0x7a3d3d47,0x7a3d3d47
866 data4 0xc86464ac,0xc86464ac, 0xba5d5de7,0xba5d5de7
867 data4 0x3219192b,0x3219192b, 0xe6737395,0xe6737395
868 data4 0xc06060a0,0xc06060a0, 0x19818198,0x19818198
869 data4 0x9e4f4fd1,0x9e4f4fd1, 0xa3dcdc7f,0xa3dcdc7f
870 data4 0x44222266,0x44222266, 0x542a2a7e,0x542a2a7e
871 data4 0x3b9090ab,0x3b9090ab, 0x0b888883,0x0b888883
872 data4 0x8c4646ca,0x8c4646ca, 0xc7eeee29,0xc7eeee29
873 data4 0x6bb8b8d3,0x6bb8b8d3, 0x2814143c,0x2814143c
874 data4 0xa7dede79,0xa7dede79, 0xbc5e5ee2,0xbc5e5ee2
875 data4 0x160b0b1d,0x160b0b1d, 0xaddbdb76,0xaddbdb76
876 data4 0xdbe0e03b,0xdbe0e03b, 0x64323256,0x64323256
877 data4 0x743a3a4e,0x743a3a4e, 0x140a0a1e,0x140a0a1e
878 data4 0x924949db,0x924949db, 0x0c06060a,0x0c06060a
879 data4 0x4824246c,0x4824246c, 0xb85c5ce4,0xb85c5ce4
880 data4 0x9fc2c25d,0x9fc2c25d, 0xbdd3d36e,0xbdd3d36e
881 data4 0x43acacef,0x43acacef, 0xc46262a6,0xc46262a6
882 data4 0x399191a8,0x399191a8, 0x319595a4,0x319595a4
883 data4 0xd3e4e437,0xd3e4e437, 0xf279798b,0xf279798b
884 data4 0xd5e7e732,0xd5e7e732, 0x8bc8c843,0x8bc8c843
885 data4 0x6e373759,0x6e373759, 0xda6d6db7,0xda6d6db7
886 data4 0x018d8d8c,0x018d8d8c, 0xb1d5d564,0xb1d5d564
887 data4 0x9c4e4ed2,0x9c4e4ed2, 0x49a9a9e0,0x49a9a9e0
888 data4 0xd86c6cb4,0xd86c6cb4, 0xac5656fa,0xac5656fa
889 data4 0xf3f4f407,0xf3f4f407, 0xcfeaea25,0xcfeaea25
890 data4 0xca6565af,0xca6565af, 0xf47a7a8e,0xf47a7a8e
891 data4 0x47aeaee9,0x47aeaee9, 0x10080818,0x10080818
892 data4 0x6fbabad5,0x6fbabad5, 0xf0787888,0xf0787888
893 data4 0x4a25256f,0x4a25256f, 0x5c2e2e72,0x5c2e2e72
894 data4 0x381c1c24,0x381c1c24, 0x57a6a6f1,0x57a6a6f1
895 data4 0x73b4b4c7,0x73b4b4c7, 0x97c6c651,0x97c6c651
896 data4 0xcbe8e823,0xcbe8e823, 0xa1dddd7c,0xa1dddd7c
897 data4 0xe874749c,0xe874749c, 0x3e1f1f21,0x3e1f1f21
898 data4 0x964b4bdd,0x964b4bdd, 0x61bdbddc,0x61bdbddc
899 data4 0x0d8b8b86,0x0d8b8b86, 0x0f8a8a85,0x0f8a8a85
900 data4 0xe0707090,0xe0707090, 0x7c3e3e42,0x7c3e3e42
901 data4 0x71b5b5c4,0x71b5b5c4, 0xcc6666aa,0xcc6666aa
902 data4 0x904848d8,0x904848d8, 0x06030305,0x06030305
903 data4 0xf7f6f601,0xf7f6f601, 0x1c0e0e12,0x1c0e0e12
904 data4 0xc26161a3,0xc26161a3, 0x6a35355f,0x6a35355f
905 data4 0xae5757f9,0xae5757f9, 0x69b9b9d0,0x69b9b9d0
906 data4 0x17868691,0x17868691, 0x99c1c158,0x99c1c158
907 data4 0x3a1d1d27,0x3a1d1d27, 0x279e9eb9,0x279e9eb9
908 data4 0xd9e1e138,0xd9e1e138, 0xebf8f813,0xebf8f813
909 data4 0x2b9898b3,0x2b9898b3, 0x22111133,0x22111133
910 data4 0xd26969bb,0xd26969bb, 0xa9d9d970,0xa9d9d970
911 data4 0x078e8e89,0x078e8e89, 0x339494a7,0x339494a7
912 data4 0x2d9b9bb6,0x2d9b9bb6, 0x3c1e1e22,0x3c1e1e22
913 data4 0x15878792,0x15878792, 0xc9e9e920,0xc9e9e920
914 data4 0x87cece49,0x87cece49, 0xaa5555ff,0xaa5555ff
915 data4 0x50282878,0x50282878, 0xa5dfdf7a,0xa5dfdf7a
916 data4 0x038c8c8f,0x038c8c8f, 0x59a1a1f8,0x59a1a1f8
917 data4 0x09898980,0x09898980, 0x1a0d0d17,0x1a0d0d17
918 data4 0x65bfbfda,0x65bfbfda, 0xd7e6e631,0xd7e6e631
919 data4 0x844242c6,0x844242c6, 0xd06868b8,0xd06868b8
920 data4 0x824141c3,0x824141c3, 0x299999b0,0x299999b0
921 data4 0x5a2d2d77,0x5a2d2d77, 0x1e0f0f11,0x1e0f0f11
922 data4 0x7bb0b0cb,0x7bb0b0cb, 0xa85454fc,0xa85454fc
923 data4 0x6dbbbbd6,0x6dbbbbd6, 0x2c16163a,0x2c16163a
924// Te4:
925 data1 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
926 data1 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
927 data1 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
928 data1 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
929 data1 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
930 data1 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
931 data1 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
932 data1 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
933 data1 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
934 data1 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
935 data1 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
936 data1 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
937 data1 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
938 data1 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
939 data1 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
940 data1 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
941 data1 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
942 data1 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
943 data1 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
944 data1 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
945 data1 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
946 data1 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
947 data1 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
948 data1 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
949 data1 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
950 data1 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
951 data1 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
952 data1 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
953 data1 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
954 data1 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
955 data1 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
956 data1 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
957.size AES_Te#,2048+256 // HP-UX assembler fails to ".-AES_Te#"
958
959.align 64
960.global AES_Td#
961.type AES_Td#,@object
962AES_Td: data4 0x51f4a750,0x51f4a750, 0x7e416553,0x7e416553
963 data4 0x1a17a4c3,0x1a17a4c3, 0x3a275e96,0x3a275e96
964 data4 0x3bab6bcb,0x3bab6bcb, 0x1f9d45f1,0x1f9d45f1
965 data4 0xacfa58ab,0xacfa58ab, 0x4be30393,0x4be30393
966 data4 0x2030fa55,0x2030fa55, 0xad766df6,0xad766df6
967 data4 0x88cc7691,0x88cc7691, 0xf5024c25,0xf5024c25
968 data4 0x4fe5d7fc,0x4fe5d7fc, 0xc52acbd7,0xc52acbd7
969 data4 0x26354480,0x26354480, 0xb562a38f,0xb562a38f
970 data4 0xdeb15a49,0xdeb15a49, 0x25ba1b67,0x25ba1b67
971 data4 0x45ea0e98,0x45ea0e98, 0x5dfec0e1,0x5dfec0e1
972 data4 0xc32f7502,0xc32f7502, 0x814cf012,0x814cf012
973 data4 0x8d4697a3,0x8d4697a3, 0x6bd3f9c6,0x6bd3f9c6
974 data4 0x038f5fe7,0x038f5fe7, 0x15929c95,0x15929c95
975 data4 0xbf6d7aeb,0xbf6d7aeb, 0x955259da,0x955259da
976 data4 0xd4be832d,0xd4be832d, 0x587421d3,0x587421d3
977 data4 0x49e06929,0x49e06929, 0x8ec9c844,0x8ec9c844
978 data4 0x75c2896a,0x75c2896a, 0xf48e7978,0xf48e7978
979 data4 0x99583e6b,0x99583e6b, 0x27b971dd,0x27b971dd
980 data4 0xbee14fb6,0xbee14fb6, 0xf088ad17,0xf088ad17
981 data4 0xc920ac66,0xc920ac66, 0x7dce3ab4,0x7dce3ab4
982 data4 0x63df4a18,0x63df4a18, 0xe51a3182,0xe51a3182
983 data4 0x97513360,0x97513360, 0x62537f45,0x62537f45
984 data4 0xb16477e0,0xb16477e0, 0xbb6bae84,0xbb6bae84
985 data4 0xfe81a01c,0xfe81a01c, 0xf9082b94,0xf9082b94
986 data4 0x70486858,0x70486858, 0x8f45fd19,0x8f45fd19
987 data4 0x94de6c87,0x94de6c87, 0x527bf8b7,0x527bf8b7
988 data4 0xab73d323,0xab73d323, 0x724b02e2,0x724b02e2
989 data4 0xe31f8f57,0xe31f8f57, 0x6655ab2a,0x6655ab2a
990 data4 0xb2eb2807,0xb2eb2807, 0x2fb5c203,0x2fb5c203
991 data4 0x86c57b9a,0x86c57b9a, 0xd33708a5,0xd33708a5
992 data4 0x302887f2,0x302887f2, 0x23bfa5b2,0x23bfa5b2
993 data4 0x02036aba,0x02036aba, 0xed16825c,0xed16825c
994 data4 0x8acf1c2b,0x8acf1c2b, 0xa779b492,0xa779b492
995 data4 0xf307f2f0,0xf307f2f0, 0x4e69e2a1,0x4e69e2a1
996 data4 0x65daf4cd,0x65daf4cd, 0x0605bed5,0x0605bed5
997 data4 0xd134621f,0xd134621f, 0xc4a6fe8a,0xc4a6fe8a
998 data4 0x342e539d,0x342e539d, 0xa2f355a0,0xa2f355a0
999 data4 0x058ae132,0x058ae132, 0xa4f6eb75,0xa4f6eb75
1000 data4 0x0b83ec39,0x0b83ec39, 0x4060efaa,0x4060efaa
1001 data4 0x5e719f06,0x5e719f06, 0xbd6e1051,0xbd6e1051
1002 data4 0x3e218af9,0x3e218af9, 0x96dd063d,0x96dd063d
1003 data4 0xdd3e05ae,0xdd3e05ae, 0x4de6bd46,0x4de6bd46
1004 data4 0x91548db5,0x91548db5, 0x71c45d05,0x71c45d05
1005 data4 0x0406d46f,0x0406d46f, 0x605015ff,0x605015ff
1006 data4 0x1998fb24,0x1998fb24, 0xd6bde997,0xd6bde997
1007 data4 0x894043cc,0x894043cc, 0x67d99e77,0x67d99e77
1008 data4 0xb0e842bd,0xb0e842bd, 0x07898b88,0x07898b88
1009 data4 0xe7195b38,0xe7195b38, 0x79c8eedb,0x79c8eedb
1010 data4 0xa17c0a47,0xa17c0a47, 0x7c420fe9,0x7c420fe9
1011 data4 0xf8841ec9,0xf8841ec9, 0x00000000,0x00000000
1012 data4 0x09808683,0x09808683, 0x322bed48,0x322bed48
1013 data4 0x1e1170ac,0x1e1170ac, 0x6c5a724e,0x6c5a724e
1014 data4 0xfd0efffb,0xfd0efffb, 0x0f853856,0x0f853856
1015 data4 0x3daed51e,0x3daed51e, 0x362d3927,0x362d3927
1016 data4 0x0a0fd964,0x0a0fd964, 0x685ca621,0x685ca621
1017 data4 0x9b5b54d1,0x9b5b54d1, 0x24362e3a,0x24362e3a
1018 data4 0x0c0a67b1,0x0c0a67b1, 0x9357e70f,0x9357e70f
1019 data4 0xb4ee96d2,0xb4ee96d2, 0x1b9b919e,0x1b9b919e
1020 data4 0x80c0c54f,0x80c0c54f, 0x61dc20a2,0x61dc20a2
1021 data4 0x5a774b69,0x5a774b69, 0x1c121a16,0x1c121a16
1022 data4 0xe293ba0a,0xe293ba0a, 0xc0a02ae5,0xc0a02ae5
1023 data4 0x3c22e043,0x3c22e043, 0x121b171d,0x121b171d
1024 data4 0x0e090d0b,0x0e090d0b, 0xf28bc7ad,0xf28bc7ad
1025 data4 0x2db6a8b9,0x2db6a8b9, 0x141ea9c8,0x141ea9c8
1026 data4 0x57f11985,0x57f11985, 0xaf75074c,0xaf75074c
1027 data4 0xee99ddbb,0xee99ddbb, 0xa37f60fd,0xa37f60fd
1028 data4 0xf701269f,0xf701269f, 0x5c72f5bc,0x5c72f5bc
1029 data4 0x44663bc5,0x44663bc5, 0x5bfb7e34,0x5bfb7e34
1030 data4 0x8b432976,0x8b432976, 0xcb23c6dc,0xcb23c6dc
1031 data4 0xb6edfc68,0xb6edfc68, 0xb8e4f163,0xb8e4f163
1032 data4 0xd731dcca,0xd731dcca, 0x42638510,0x42638510
1033 data4 0x13972240,0x13972240, 0x84c61120,0x84c61120
1034 data4 0x854a247d,0x854a247d, 0xd2bb3df8,0xd2bb3df8
1035 data4 0xaef93211,0xaef93211, 0xc729a16d,0xc729a16d
1036 data4 0x1d9e2f4b,0x1d9e2f4b, 0xdcb230f3,0xdcb230f3
1037 data4 0x0d8652ec,0x0d8652ec, 0x77c1e3d0,0x77c1e3d0
1038 data4 0x2bb3166c,0x2bb3166c, 0xa970b999,0xa970b999
1039 data4 0x119448fa,0x119448fa, 0x47e96422,0x47e96422
1040 data4 0xa8fc8cc4,0xa8fc8cc4, 0xa0f03f1a,0xa0f03f1a
1041 data4 0x567d2cd8,0x567d2cd8, 0x223390ef,0x223390ef
1042 data4 0x87494ec7,0x87494ec7, 0xd938d1c1,0xd938d1c1
1043 data4 0x8ccaa2fe,0x8ccaa2fe, 0x98d40b36,0x98d40b36
1044 data4 0xa6f581cf,0xa6f581cf, 0xa57ade28,0xa57ade28
1045 data4 0xdab78e26,0xdab78e26, 0x3fadbfa4,0x3fadbfa4
1046 data4 0x2c3a9de4,0x2c3a9de4, 0x5078920d,0x5078920d
1047 data4 0x6a5fcc9b,0x6a5fcc9b, 0x547e4662,0x547e4662
1048 data4 0xf68d13c2,0xf68d13c2, 0x90d8b8e8,0x90d8b8e8
1049 data4 0x2e39f75e,0x2e39f75e, 0x82c3aff5,0x82c3aff5
1050 data4 0x9f5d80be,0x9f5d80be, 0x69d0937c,0x69d0937c
1051 data4 0x6fd52da9,0x6fd52da9, 0xcf2512b3,0xcf2512b3
1052 data4 0xc8ac993b,0xc8ac993b, 0x10187da7,0x10187da7
1053 data4 0xe89c636e,0xe89c636e, 0xdb3bbb7b,0xdb3bbb7b
1054 data4 0xcd267809,0xcd267809, 0x6e5918f4,0x6e5918f4
1055 data4 0xec9ab701,0xec9ab701, 0x834f9aa8,0x834f9aa8
1056 data4 0xe6956e65,0xe6956e65, 0xaaffe67e,0xaaffe67e
1057 data4 0x21bccf08,0x21bccf08, 0xef15e8e6,0xef15e8e6
1058 data4 0xbae79bd9,0xbae79bd9, 0x4a6f36ce,0x4a6f36ce
1059 data4 0xea9f09d4,0xea9f09d4, 0x29b07cd6,0x29b07cd6
1060 data4 0x31a4b2af,0x31a4b2af, 0x2a3f2331,0x2a3f2331
1061 data4 0xc6a59430,0xc6a59430, 0x35a266c0,0x35a266c0
1062 data4 0x744ebc37,0x744ebc37, 0xfc82caa6,0xfc82caa6
1063 data4 0xe090d0b0,0xe090d0b0, 0x33a7d815,0x33a7d815
1064 data4 0xf104984a,0xf104984a, 0x41ecdaf7,0x41ecdaf7
1065 data4 0x7fcd500e,0x7fcd500e, 0x1791f62f,0x1791f62f
1066 data4 0x764dd68d,0x764dd68d, 0x43efb04d,0x43efb04d
1067 data4 0xccaa4d54,0xccaa4d54, 0xe49604df,0xe49604df
1068 data4 0x9ed1b5e3,0x9ed1b5e3, 0x4c6a881b,0x4c6a881b
1069 data4 0xc12c1fb8,0xc12c1fb8, 0x4665517f,0x4665517f
1070 data4 0x9d5eea04,0x9d5eea04, 0x018c355d,0x018c355d
1071 data4 0xfa877473,0xfa877473, 0xfb0b412e,0xfb0b412e
1072 data4 0xb3671d5a,0xb3671d5a, 0x92dbd252,0x92dbd252
1073 data4 0xe9105633,0xe9105633, 0x6dd64713,0x6dd64713
1074 data4 0x9ad7618c,0x9ad7618c, 0x37a10c7a,0x37a10c7a
1075 data4 0x59f8148e,0x59f8148e, 0xeb133c89,0xeb133c89
1076 data4 0xcea927ee,0xcea927ee, 0xb761c935,0xb761c935
1077 data4 0xe11ce5ed,0xe11ce5ed, 0x7a47b13c,0x7a47b13c
1078 data4 0x9cd2df59,0x9cd2df59, 0x55f2733f,0x55f2733f
1079 data4 0x1814ce79,0x1814ce79, 0x73c737bf,0x73c737bf
1080 data4 0x53f7cdea,0x53f7cdea, 0x5ffdaa5b,0x5ffdaa5b
1081 data4 0xdf3d6f14,0xdf3d6f14, 0x7844db86,0x7844db86
1082 data4 0xcaaff381,0xcaaff381, 0xb968c43e,0xb968c43e
1083 data4 0x3824342c,0x3824342c, 0xc2a3405f,0xc2a3405f
1084 data4 0x161dc372,0x161dc372, 0xbce2250c,0xbce2250c
1085 data4 0x283c498b,0x283c498b, 0xff0d9541,0xff0d9541
1086 data4 0x39a80171,0x39a80171, 0x080cb3de,0x080cb3de
1087 data4 0xd8b4e49c,0xd8b4e49c, 0x6456c190,0x6456c190
1088 data4 0x7bcb8461,0x7bcb8461, 0xd532b670,0xd532b670
1089 data4 0x486c5c74,0x486c5c74, 0xd0b85742,0xd0b85742
1090// Td4:
1091 data1 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
1092 data1 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
1093 data1 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
1094 data1 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
1095 data1 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
1096 data1 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
1097 data1 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
1098 data1 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
1099 data1 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
1100 data1 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
1101 data1 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
1102 data1 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
1103 data1 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
1104 data1 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
1105 data1 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
1106 data1 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
1107 data1 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
1108 data1 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
1109 data1 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
1110 data1 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
1111 data1 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
1112 data1 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
1113 data1 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
1114 data1 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
1115 data1 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
1116 data1 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
1117 data1 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
1118 data1 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
1119 data1 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
1120 data1 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
1121 data1 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
1122 data1 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1123.size AES_Td#,2048+256 // HP-UX assembler fails to ".-AES_Td#"
diff --git a/src/lib/libcrypto/aes/asm/aes-ppc.pl b/src/lib/libcrypto/aes/asm/aes-ppc.pl
deleted file mode 100644
index ce427655ef..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-ppc.pl
+++ /dev/null
@@ -1,1176 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# Needs more work: key setup, page boundaries, CBC routine...
11#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14# 4.0. But these are not the ones currently used! Their "compact"
15# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt.
18
19$flavour = shift;
20
21if ($flavour =~ /64/) {
22 $SIZE_T =8;
23 $STU ="stdu";
24 $POP ="ld";
25 $PUSH ="std";
26} elsif ($flavour =~ /32/) {
27 $SIZE_T =4;
28 $STU ="stwu";
29 $POP ="lwz";
30 $PUSH ="stw";
31} else { die "nonsense $flavour"; }
32
33$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
35( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
36die "can't locate ppc-xlate.pl";
37
38open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
39
40$FRAME=32*$SIZE_T;
41
42sub _data_word()
43{ my $i;
44 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
45}
46
47$sp="r1";
48$toc="r2";
49$inp="r3";
50$out="r4";
51$key="r5";
52
53$Tbl0="r3";
54$Tbl1="r6";
55$Tbl2="r7";
56$Tbl3="r2";
57
58$s0="r8";
59$s1="r9";
60$s2="r10";
61$s3="r11";
62
63$t0="r12";
64$t1="r13";
65$t2="r14";
66$t3="r15";
67
68$acc00="r16";
69$acc01="r17";
70$acc02="r18";
71$acc03="r19";
72
73$acc04="r20";
74$acc05="r21";
75$acc06="r22";
76$acc07="r23";
77
78$acc08="r24";
79$acc09="r25";
80$acc10="r26";
81$acc11="r27";
82
83$acc12="r28";
84$acc13="r29";
85$acc14="r30";
86$acc15="r31";
87
88# stay away from TLS pointer
89if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
90else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
91$mask80=$Tbl2;
92$mask1b=$Tbl3;
93
94$code.=<<___;
95.machine "any"
96.text
97
98.align 7
99LAES_Te:
100 mflr r0
101 bcl 20,31,\$+4
102 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
103 addi $Tbl0,$Tbl0,`128-8`
104 mtlr r0
105 blr
106 .space `32-24`
107LAES_Td:
108 mflr r0
109 bcl 20,31,\$+4
110 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
111 addi $Tbl0,$Tbl0,`128-8-32+2048+256`
112 mtlr r0
113 blr
114 .space `128-32-24`
115___
116&_data_word(
117 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
118 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
119 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
120 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
121 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
122 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
123 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
124 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
125 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
126 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
127 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
128 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
129 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
130 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
131 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
132 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
133 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
134 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
135 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
136 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
137 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
138 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
139 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
140 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
141 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
142 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
143 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
144 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
145 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
146 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
147 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
148 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
149 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
150 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
151 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
152 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
153 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
154 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
155 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
156 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
157 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
158 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
159 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
160 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
161 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
162 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
163 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
164 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
165 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
166 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
167 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
168 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
169 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
170 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
171 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
172 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
173 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
174 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
175 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
176 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
177 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
178 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
179 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
180 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
181$code.=<<___;
182.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
183.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
184.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
185.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
186.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
187.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
188.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
189.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
190.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
191.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
192.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
193.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
194.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
195.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
196.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
197.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
198.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
199.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
200.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
201.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
202.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
203.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
204.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
205.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
206.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
207.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
208.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
209.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
210.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
211.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
212.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
213.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
214___
215&_data_word(
216 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
217 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
218 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
219 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
220 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
221 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
222 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
223 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
224 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
225 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
226 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
227 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
228 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
229 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
230 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
231 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
232 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
233 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
234 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
235 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
236 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
237 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
238 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
239 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
240 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
241 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
242 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
243 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
244 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
245 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
246 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
247 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
248 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
249 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
250 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
251 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
252 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
253 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
254 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
255 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
256 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
257 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
258 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
259 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
260 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
261 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
262 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
263 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
264 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
265 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
266 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
267 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
268 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
269 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
270 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
271 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
272 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
273 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
274 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
275 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
276 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
277 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
278 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
279 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
280$code.=<<___;
281.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
282.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
283.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
284.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
285.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
286.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
287.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
288.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
289.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
290.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
291.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
292.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
293.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
294.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
295.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
296.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
297.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
298.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
299.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
300.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
301.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
302.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
303.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
304.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
305.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
306.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
307.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
308.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
309.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
310.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
311.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
312.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
313
314
315.globl .AES_encrypt
316.align 7
317.AES_encrypt:
318 mflr r0
319 $STU $sp,-$FRAME($sp)
320
321 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
322 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
323 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
324 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
325 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
326 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
327 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
328 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
329 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
330 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
331 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
332 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
333 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
334 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
335 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
336 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
337 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
338 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
339 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
340 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
341 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
342
343 lwz $s0,0($inp)
344 lwz $s1,4($inp)
345 lwz $s2,8($inp)
346 lwz $s3,12($inp)
347 bl LAES_Te
348 bl Lppc_AES_encrypt_compact
349 stw $s0,0($out)
350 stw $s1,4($out)
351 stw $s2,8($out)
352 stw $s3,12($out)
353
354 $POP r0,`$FRAME-$SIZE_T*21`($sp)
355 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
356 $POP r13,`$FRAME-$SIZE_T*19`($sp)
357 $POP r14,`$FRAME-$SIZE_T*18`($sp)
358 $POP r15,`$FRAME-$SIZE_T*17`($sp)
359 $POP r16,`$FRAME-$SIZE_T*16`($sp)
360 $POP r17,`$FRAME-$SIZE_T*15`($sp)
361 $POP r18,`$FRAME-$SIZE_T*14`($sp)
362 $POP r19,`$FRAME-$SIZE_T*13`($sp)
363 $POP r20,`$FRAME-$SIZE_T*12`($sp)
364 $POP r21,`$FRAME-$SIZE_T*11`($sp)
365 $POP r22,`$FRAME-$SIZE_T*10`($sp)
366 $POP r23,`$FRAME-$SIZE_T*9`($sp)
367 $POP r24,`$FRAME-$SIZE_T*8`($sp)
368 $POP r25,`$FRAME-$SIZE_T*7`($sp)
369 $POP r26,`$FRAME-$SIZE_T*6`($sp)
370 $POP r27,`$FRAME-$SIZE_T*5`($sp)
371 $POP r28,`$FRAME-$SIZE_T*4`($sp)
372 $POP r29,`$FRAME-$SIZE_T*3`($sp)
373 $POP r30,`$FRAME-$SIZE_T*2`($sp)
374 $POP r31,`$FRAME-$SIZE_T*1`($sp)
375 mtlr r0
376 addi $sp,$sp,$FRAME
377 blr
378
379.align 4
380Lppc_AES_encrypt:
381 lwz $acc00,240($key)
382 lwz $t0,0($key)
383 lwz $t1,4($key)
384 lwz $t2,8($key)
385 lwz $t3,12($key)
386 addi $Tbl1,$Tbl0,3
387 addi $Tbl2,$Tbl0,2
388 addi $Tbl3,$Tbl0,1
389 addi $acc00,$acc00,-1
390 addi $key,$key,16
391 xor $s0,$s0,$t0
392 xor $s1,$s1,$t1
393 xor $s2,$s2,$t2
394 xor $s3,$s3,$t3
395 mtctr $acc00
396.align 4
397Lenc_loop:
398 rlwinm $acc00,$s0,`32-24+3`,21,28
399 rlwinm $acc01,$s1,`32-24+3`,21,28
400 lwz $t0,0($key)
401 lwz $t1,4($key)
402 rlwinm $acc02,$s2,`32-24+3`,21,28
403 rlwinm $acc03,$s3,`32-24+3`,21,28
404 lwz $t2,8($key)
405 lwz $t3,12($key)
406 rlwinm $acc04,$s1,`32-16+3`,21,28
407 rlwinm $acc05,$s2,`32-16+3`,21,28
408 lwzx $acc00,$Tbl0,$acc00
409 lwzx $acc01,$Tbl0,$acc01
410 rlwinm $acc06,$s3,`32-16+3`,21,28
411 rlwinm $acc07,$s0,`32-16+3`,21,28
412 lwzx $acc02,$Tbl0,$acc02
413 lwzx $acc03,$Tbl0,$acc03
414 rlwinm $acc08,$s2,`32-8+3`,21,28
415 rlwinm $acc09,$s3,`32-8+3`,21,28
416 lwzx $acc04,$Tbl1,$acc04
417 lwzx $acc05,$Tbl1,$acc05
418 rlwinm $acc10,$s0,`32-8+3`,21,28
419 rlwinm $acc11,$s1,`32-8+3`,21,28
420 lwzx $acc06,$Tbl1,$acc06
421 lwzx $acc07,$Tbl1,$acc07
422 rlwinm $acc12,$s3,`0+3`,21,28
423 rlwinm $acc13,$s0,`0+3`,21,28
424 lwzx $acc08,$Tbl2,$acc08
425 lwzx $acc09,$Tbl2,$acc09
426 rlwinm $acc14,$s1,`0+3`,21,28
427 rlwinm $acc15,$s2,`0+3`,21,28
428 lwzx $acc10,$Tbl2,$acc10
429 lwzx $acc11,$Tbl2,$acc11
430 xor $t0,$t0,$acc00
431 xor $t1,$t1,$acc01
432 lwzx $acc12,$Tbl3,$acc12
433 lwzx $acc13,$Tbl3,$acc13
434 xor $t2,$t2,$acc02
435 xor $t3,$t3,$acc03
436 lwzx $acc14,$Tbl3,$acc14
437 lwzx $acc15,$Tbl3,$acc15
438 xor $t0,$t0,$acc04
439 xor $t1,$t1,$acc05
440 xor $t2,$t2,$acc06
441 xor $t3,$t3,$acc07
442 xor $t0,$t0,$acc08
443 xor $t1,$t1,$acc09
444 xor $t2,$t2,$acc10
445 xor $t3,$t3,$acc11
446 xor $s0,$t0,$acc12
447 xor $s1,$t1,$acc13
448 xor $s2,$t2,$acc14
449 xor $s3,$t3,$acc15
450 addi $key,$key,16
451 bdnz- Lenc_loop
452
453 addi $Tbl2,$Tbl0,2048
454 nop
455 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
456 lwz $acc09,`2048+32`($Tbl0)
457 lwz $acc10,`2048+64`($Tbl0)
458 lwz $acc11,`2048+96`($Tbl0)
459 lwz $acc08,`2048+128`($Tbl0)
460 lwz $acc09,`2048+160`($Tbl0)
461 lwz $acc10,`2048+192`($Tbl0)
462 lwz $acc11,`2048+224`($Tbl0)
463 rlwinm $acc00,$s0,`32-24`,24,31
464 rlwinm $acc01,$s1,`32-24`,24,31
465 lwz $t0,0($key)
466 lwz $t1,4($key)
467 rlwinm $acc02,$s2,`32-24`,24,31
468 rlwinm $acc03,$s3,`32-24`,24,31
469 lwz $t2,8($key)
470 lwz $t3,12($key)
471 rlwinm $acc04,$s1,`32-16`,24,31
472 rlwinm $acc05,$s2,`32-16`,24,31
473 lbzx $acc00,$Tbl2,$acc00
474 lbzx $acc01,$Tbl2,$acc01
475 rlwinm $acc06,$s3,`32-16`,24,31
476 rlwinm $acc07,$s0,`32-16`,24,31
477 lbzx $acc02,$Tbl2,$acc02
478 lbzx $acc03,$Tbl2,$acc03
479 rlwinm $acc08,$s2,`32-8`,24,31
480 rlwinm $acc09,$s3,`32-8`,24,31
481 lbzx $acc04,$Tbl2,$acc04
482 lbzx $acc05,$Tbl2,$acc05
483 rlwinm $acc10,$s0,`32-8`,24,31
484 rlwinm $acc11,$s1,`32-8`,24,31
485 lbzx $acc06,$Tbl2,$acc06
486 lbzx $acc07,$Tbl2,$acc07
487 rlwinm $acc12,$s3,`0`,24,31
488 rlwinm $acc13,$s0,`0`,24,31
489 lbzx $acc08,$Tbl2,$acc08
490 lbzx $acc09,$Tbl2,$acc09
491 rlwinm $acc14,$s1,`0`,24,31
492 rlwinm $acc15,$s2,`0`,24,31
493 lbzx $acc10,$Tbl2,$acc10
494 lbzx $acc11,$Tbl2,$acc11
495 rlwinm $s0,$acc00,24,0,7
496 rlwinm $s1,$acc01,24,0,7
497 lbzx $acc12,$Tbl2,$acc12
498 lbzx $acc13,$Tbl2,$acc13
499 rlwinm $s2,$acc02,24,0,7
500 rlwinm $s3,$acc03,24,0,7
501 lbzx $acc14,$Tbl2,$acc14
502 lbzx $acc15,$Tbl2,$acc15
503 rlwimi $s0,$acc04,16,8,15
504 rlwimi $s1,$acc05,16,8,15
505 rlwimi $s2,$acc06,16,8,15
506 rlwimi $s3,$acc07,16,8,15
507 rlwimi $s0,$acc08,8,16,23
508 rlwimi $s1,$acc09,8,16,23
509 rlwimi $s2,$acc10,8,16,23
510 rlwimi $s3,$acc11,8,16,23
511 or $s0,$s0,$acc12
512 or $s1,$s1,$acc13
513 or $s2,$s2,$acc14
514 or $s3,$s3,$acc15
515 xor $s0,$s0,$t0
516 xor $s1,$s1,$t1
517 xor $s2,$s2,$t2
518 xor $s3,$s3,$t3
519 blr
520
521.align 4
522Lppc_AES_encrypt_compact:
523 lwz $acc00,240($key)
524 lwz $t0,0($key)
525 lwz $t1,4($key)
526 lwz $t2,8($key)
527 lwz $t3,12($key)
528 addi $Tbl1,$Tbl0,2048
529 lis $mask80,0x8080
530 lis $mask1b,0x1b1b
531 addi $key,$key,16
532 ori $mask80,$mask80,0x8080
533 ori $mask1b,$mask1b,0x1b1b
534 mtctr $acc00
535.align 4
536Lenc_compact_loop:
537 xor $s0,$s0,$t0
538 xor $s1,$s1,$t1
539 xor $s2,$s2,$t2
540 xor $s3,$s3,$t3
541 rlwinm $acc00,$s0,`32-24`,24,31
542 rlwinm $acc01,$s1,`32-24`,24,31
543 rlwinm $acc02,$s2,`32-24`,24,31
544 rlwinm $acc03,$s3,`32-24`,24,31
545 lbzx $acc00,$Tbl1,$acc00
546 lbzx $acc01,$Tbl1,$acc01
547 rlwinm $acc04,$s1,`32-16`,24,31
548 rlwinm $acc05,$s2,`32-16`,24,31
549 lbzx $acc02,$Tbl1,$acc02
550 lbzx $acc03,$Tbl1,$acc03
551 rlwinm $acc06,$s3,`32-16`,24,31
552 rlwinm $acc07,$s0,`32-16`,24,31
553 lbzx $acc04,$Tbl1,$acc04
554 lbzx $acc05,$Tbl1,$acc05
555 rlwinm $acc08,$s2,`32-8`,24,31
556 rlwinm $acc09,$s3,`32-8`,24,31
557 lbzx $acc06,$Tbl1,$acc06
558 lbzx $acc07,$Tbl1,$acc07
559 rlwinm $acc10,$s0,`32-8`,24,31
560 rlwinm $acc11,$s1,`32-8`,24,31
561 lbzx $acc08,$Tbl1,$acc08
562 lbzx $acc09,$Tbl1,$acc09
563 rlwinm $acc12,$s3,`0`,24,31
564 rlwinm $acc13,$s0,`0`,24,31
565 lbzx $acc10,$Tbl1,$acc10
566 lbzx $acc11,$Tbl1,$acc11
567 rlwinm $acc14,$s1,`0`,24,31
568 rlwinm $acc15,$s2,`0`,24,31
569 lbzx $acc12,$Tbl1,$acc12
570 lbzx $acc13,$Tbl1,$acc13
571 rlwinm $s0,$acc00,24,0,7
572 rlwinm $s1,$acc01,24,0,7
573 lbzx $acc14,$Tbl1,$acc14
574 lbzx $acc15,$Tbl1,$acc15
575 rlwinm $s2,$acc02,24,0,7
576 rlwinm $s3,$acc03,24,0,7
577 rlwimi $s0,$acc04,16,8,15
578 rlwimi $s1,$acc05,16,8,15
579 rlwimi $s2,$acc06,16,8,15
580 rlwimi $s3,$acc07,16,8,15
581 rlwimi $s0,$acc08,8,16,23
582 rlwimi $s1,$acc09,8,16,23
583 rlwimi $s2,$acc10,8,16,23
584 rlwimi $s3,$acc11,8,16,23
585 lwz $t0,0($key)
586 lwz $t1,4($key)
587 or $s0,$s0,$acc12
588 or $s1,$s1,$acc13
589 lwz $t2,8($key)
590 lwz $t3,12($key)
591 or $s2,$s2,$acc14
592 or $s3,$s3,$acc15
593
594 addi $key,$key,16
595 bdz Lenc_compact_done
596
597 and $acc00,$s0,$mask80 # r1=r0&0x80808080
598 and $acc01,$s1,$mask80
599 and $acc02,$s2,$mask80
600 and $acc03,$s3,$mask80
601 srwi $acc04,$acc00,7 # r1>>7
602 srwi $acc05,$acc01,7
603 srwi $acc06,$acc02,7
604 srwi $acc07,$acc03,7
605 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
606 andc $acc09,$s1,$mask80
607 andc $acc10,$s2,$mask80
608 andc $acc11,$s3,$mask80
609 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
610 sub $acc01,$acc01,$acc05
611 sub $acc02,$acc02,$acc06
612 sub $acc03,$acc03,$acc07
613 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
614 add $acc09,$acc09,$acc09
615 add $acc10,$acc10,$acc10
616 add $acc11,$acc11,$acc11
617 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
618 and $acc01,$acc01,$mask1b
619 and $acc02,$acc02,$mask1b
620 and $acc03,$acc03,$mask1b
621 xor $acc00,$acc00,$acc08 # r2
622 xor $acc01,$acc01,$acc09
623 xor $acc02,$acc02,$acc10
624 xor $acc03,$acc03,$acc11
625
626 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
627 rotlwi $acc13,$s1,16
628 rotlwi $acc14,$s2,16
629 rotlwi $acc15,$s3,16
630 xor $s0,$s0,$acc00 # r0^r2
631 xor $s1,$s1,$acc01
632 xor $s2,$s2,$acc02
633 xor $s3,$s3,$acc03
634 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
635 rotrwi $s1,$s1,24
636 rotrwi $s2,$s2,24
637 rotrwi $s3,$s3,24
638 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
639 xor $s1,$s1,$acc01
640 xor $s2,$s2,$acc02
641 xor $s3,$s3,$acc03
642 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
643 rotlwi $acc09,$acc13,8
644 rotlwi $acc10,$acc14,8
645 rotlwi $acc11,$acc15,8
646 xor $s0,$s0,$acc12 #
647 xor $s1,$s1,$acc13
648 xor $s2,$s2,$acc14
649 xor $s3,$s3,$acc15
650 xor $s0,$s0,$acc08 #
651 xor $s1,$s1,$acc09
652 xor $s2,$s2,$acc10
653 xor $s3,$s3,$acc11
654
655 b Lenc_compact_loop
656.align 4
657Lenc_compact_done:
658 xor $s0,$s0,$t0
659 xor $s1,$s1,$t1
660 xor $s2,$s2,$t2
661 xor $s3,$s3,$t3
662 blr
663
664.globl .AES_decrypt
665.align 7
666.AES_decrypt:
667 mflr r0
668 $STU $sp,-$FRAME($sp)
669
670 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
671 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
672 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
673 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
674 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
675 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
676 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
677 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
678 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
679 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
680 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
681 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
682 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
683 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
684 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
685 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
686 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
687 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
688 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
689 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
690 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
691
692 lwz $s0,0($inp)
693 lwz $s1,4($inp)
694 lwz $s2,8($inp)
695 lwz $s3,12($inp)
696 bl LAES_Td
697 bl Lppc_AES_decrypt_compact
698 stw $s0,0($out)
699 stw $s1,4($out)
700 stw $s2,8($out)
701 stw $s3,12($out)
702
703 $POP r0,`$FRAME-$SIZE_T*21`($sp)
704 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
705 $POP r13,`$FRAME-$SIZE_T*19`($sp)
706 $POP r14,`$FRAME-$SIZE_T*18`($sp)
707 $POP r15,`$FRAME-$SIZE_T*17`($sp)
708 $POP r16,`$FRAME-$SIZE_T*16`($sp)
709 $POP r17,`$FRAME-$SIZE_T*15`($sp)
710 $POP r18,`$FRAME-$SIZE_T*14`($sp)
711 $POP r19,`$FRAME-$SIZE_T*13`($sp)
712 $POP r20,`$FRAME-$SIZE_T*12`($sp)
713 $POP r21,`$FRAME-$SIZE_T*11`($sp)
714 $POP r22,`$FRAME-$SIZE_T*10`($sp)
715 $POP r23,`$FRAME-$SIZE_T*9`($sp)
716 $POP r24,`$FRAME-$SIZE_T*8`($sp)
717 $POP r25,`$FRAME-$SIZE_T*7`($sp)
718 $POP r26,`$FRAME-$SIZE_T*6`($sp)
719 $POP r27,`$FRAME-$SIZE_T*5`($sp)
720 $POP r28,`$FRAME-$SIZE_T*4`($sp)
721 $POP r29,`$FRAME-$SIZE_T*3`($sp)
722 $POP r30,`$FRAME-$SIZE_T*2`($sp)
723 $POP r31,`$FRAME-$SIZE_T*1`($sp)
724 mtlr r0
725 addi $sp,$sp,$FRAME
726 blr
727
728.align 4
729Lppc_AES_decrypt:
730 lwz $acc00,240($key)
731 lwz $t0,0($key)
732 lwz $t1,4($key)
733 lwz $t2,8($key)
734 lwz $t3,12($key)
735 addi $Tbl1,$Tbl0,3
736 addi $Tbl2,$Tbl0,2
737 addi $Tbl3,$Tbl0,1
738 addi $acc00,$acc00,-1
739 addi $key,$key,16
740 xor $s0,$s0,$t0
741 xor $s1,$s1,$t1
742 xor $s2,$s2,$t2
743 xor $s3,$s3,$t3
744 mtctr $acc00
745.align 4
746Ldec_loop:
747 rlwinm $acc00,$s0,`32-24+3`,21,28
748 rlwinm $acc01,$s1,`32-24+3`,21,28
749 lwz $t0,0($key)
750 lwz $t1,4($key)
751 rlwinm $acc02,$s2,`32-24+3`,21,28
752 rlwinm $acc03,$s3,`32-24+3`,21,28
753 lwz $t2,8($key)
754 lwz $t3,12($key)
755 rlwinm $acc04,$s3,`32-16+3`,21,28
756 rlwinm $acc05,$s0,`32-16+3`,21,28
757 lwzx $acc00,$Tbl0,$acc00
758 lwzx $acc01,$Tbl0,$acc01
759 rlwinm $acc06,$s1,`32-16+3`,21,28
760 rlwinm $acc07,$s2,`32-16+3`,21,28
761 lwzx $acc02,$Tbl0,$acc02
762 lwzx $acc03,$Tbl0,$acc03
763 rlwinm $acc08,$s2,`32-8+3`,21,28
764 rlwinm $acc09,$s3,`32-8+3`,21,28
765 lwzx $acc04,$Tbl1,$acc04
766 lwzx $acc05,$Tbl1,$acc05
767 rlwinm $acc10,$s0,`32-8+3`,21,28
768 rlwinm $acc11,$s1,`32-8+3`,21,28
769 lwzx $acc06,$Tbl1,$acc06
770 lwzx $acc07,$Tbl1,$acc07
771 rlwinm $acc12,$s1,`0+3`,21,28
772 rlwinm $acc13,$s2,`0+3`,21,28
773 lwzx $acc08,$Tbl2,$acc08
774 lwzx $acc09,$Tbl2,$acc09
775 rlwinm $acc14,$s3,`0+3`,21,28
776 rlwinm $acc15,$s0,`0+3`,21,28
777 lwzx $acc10,$Tbl2,$acc10
778 lwzx $acc11,$Tbl2,$acc11
779 xor $t0,$t0,$acc00
780 xor $t1,$t1,$acc01
781 lwzx $acc12,$Tbl3,$acc12
782 lwzx $acc13,$Tbl3,$acc13
783 xor $t2,$t2,$acc02
784 xor $t3,$t3,$acc03
785 lwzx $acc14,$Tbl3,$acc14
786 lwzx $acc15,$Tbl3,$acc15
787 xor $t0,$t0,$acc04
788 xor $t1,$t1,$acc05
789 xor $t2,$t2,$acc06
790 xor $t3,$t3,$acc07
791 xor $t0,$t0,$acc08
792 xor $t1,$t1,$acc09
793 xor $t2,$t2,$acc10
794 xor $t3,$t3,$acc11
795 xor $s0,$t0,$acc12
796 xor $s1,$t1,$acc13
797 xor $s2,$t2,$acc14
798 xor $s3,$t3,$acc15
799 addi $key,$key,16
800 bdnz- Ldec_loop
801
802 addi $Tbl2,$Tbl0,2048
803 nop
804 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
805 lwz $acc09,`2048+32`($Tbl0)
806 lwz $acc10,`2048+64`($Tbl0)
807 lwz $acc11,`2048+96`($Tbl0)
808 lwz $acc08,`2048+128`($Tbl0)
809 lwz $acc09,`2048+160`($Tbl0)
810 lwz $acc10,`2048+192`($Tbl0)
811 lwz $acc11,`2048+224`($Tbl0)
812 rlwinm $acc00,$s0,`32-24`,24,31
813 rlwinm $acc01,$s1,`32-24`,24,31
814 lwz $t0,0($key)
815 lwz $t1,4($key)
816 rlwinm $acc02,$s2,`32-24`,24,31
817 rlwinm $acc03,$s3,`32-24`,24,31
818 lwz $t2,8($key)
819 lwz $t3,12($key)
820 rlwinm $acc04,$s3,`32-16`,24,31
821 rlwinm $acc05,$s0,`32-16`,24,31
822 lbzx $acc00,$Tbl2,$acc00
823 lbzx $acc01,$Tbl2,$acc01
824 rlwinm $acc06,$s1,`32-16`,24,31
825 rlwinm $acc07,$s2,`32-16`,24,31
826 lbzx $acc02,$Tbl2,$acc02
827 lbzx $acc03,$Tbl2,$acc03
828 rlwinm $acc08,$s2,`32-8`,24,31
829 rlwinm $acc09,$s3,`32-8`,24,31
830 lbzx $acc04,$Tbl2,$acc04
831 lbzx $acc05,$Tbl2,$acc05
832 rlwinm $acc10,$s0,`32-8`,24,31
833 rlwinm $acc11,$s1,`32-8`,24,31
834 lbzx $acc06,$Tbl2,$acc06
835 lbzx $acc07,$Tbl2,$acc07
836 rlwinm $acc12,$s1,`0`,24,31
837 rlwinm $acc13,$s2,`0`,24,31
838 lbzx $acc08,$Tbl2,$acc08
839 lbzx $acc09,$Tbl2,$acc09
840 rlwinm $acc14,$s3,`0`,24,31
841 rlwinm $acc15,$s0,`0`,24,31
842 lbzx $acc10,$Tbl2,$acc10
843 lbzx $acc11,$Tbl2,$acc11
844 rlwinm $s0,$acc00,24,0,7
845 rlwinm $s1,$acc01,24,0,7
846 lbzx $acc12,$Tbl2,$acc12
847 lbzx $acc13,$Tbl2,$acc13
848 rlwinm $s2,$acc02,24,0,7
849 rlwinm $s3,$acc03,24,0,7
850 lbzx $acc14,$Tbl2,$acc14
851 lbzx $acc15,$Tbl2,$acc15
852 rlwimi $s0,$acc04,16,8,15
853 rlwimi $s1,$acc05,16,8,15
854 rlwimi $s2,$acc06,16,8,15
855 rlwimi $s3,$acc07,16,8,15
856 rlwimi $s0,$acc08,8,16,23
857 rlwimi $s1,$acc09,8,16,23
858 rlwimi $s2,$acc10,8,16,23
859 rlwimi $s3,$acc11,8,16,23
860 or $s0,$s0,$acc12
861 or $s1,$s1,$acc13
862 or $s2,$s2,$acc14
863 or $s3,$s3,$acc15
864 xor $s0,$s0,$t0
865 xor $s1,$s1,$t1
866 xor $s2,$s2,$t2
867 xor $s3,$s3,$t3
868 blr
869
870.align 4
871Lppc_AES_decrypt_compact:
872 lwz $acc00,240($key)
873 lwz $t0,0($key)
874 lwz $t1,4($key)
875 lwz $t2,8($key)
876 lwz $t3,12($key)
877 addi $Tbl1,$Tbl0,2048
878 lis $mask80,0x8080
879 lis $mask1b,0x1b1b
880 addi $key,$key,16
881 ori $mask80,$mask80,0x8080
882 ori $mask1b,$mask1b,0x1b1b
883___
884$code.=<<___ if ($SIZE_T==8);
885 insrdi $mask80,$mask80,32,0
886 insrdi $mask1b,$mask1b,32,0
887___
888$code.=<<___;
889 mtctr $acc00
890.align 4
891Ldec_compact_loop:
892 xor $s0,$s0,$t0
893 xor $s1,$s1,$t1
894 xor $s2,$s2,$t2
895 xor $s3,$s3,$t3
896 rlwinm $acc00,$s0,`32-24`,24,31
897 rlwinm $acc01,$s1,`32-24`,24,31
898 rlwinm $acc02,$s2,`32-24`,24,31
899 rlwinm $acc03,$s3,`32-24`,24,31
900 lbzx $acc00,$Tbl1,$acc00
901 lbzx $acc01,$Tbl1,$acc01
902 rlwinm $acc04,$s3,`32-16`,24,31
903 rlwinm $acc05,$s0,`32-16`,24,31
904 lbzx $acc02,$Tbl1,$acc02
905 lbzx $acc03,$Tbl1,$acc03
906 rlwinm $acc06,$s1,`32-16`,24,31
907 rlwinm $acc07,$s2,`32-16`,24,31
908 lbzx $acc04,$Tbl1,$acc04
909 lbzx $acc05,$Tbl1,$acc05
910 rlwinm $acc08,$s2,`32-8`,24,31
911 rlwinm $acc09,$s3,`32-8`,24,31
912 lbzx $acc06,$Tbl1,$acc06
913 lbzx $acc07,$Tbl1,$acc07
914 rlwinm $acc10,$s0,`32-8`,24,31
915 rlwinm $acc11,$s1,`32-8`,24,31
916 lbzx $acc08,$Tbl1,$acc08
917 lbzx $acc09,$Tbl1,$acc09
918 rlwinm $acc12,$s1,`0`,24,31
919 rlwinm $acc13,$s2,`0`,24,31
920 lbzx $acc10,$Tbl1,$acc10
921 lbzx $acc11,$Tbl1,$acc11
922 rlwinm $acc14,$s3,`0`,24,31
923 rlwinm $acc15,$s0,`0`,24,31
924 lbzx $acc12,$Tbl1,$acc12
925 lbzx $acc13,$Tbl1,$acc13
926 rlwinm $s0,$acc00,24,0,7
927 rlwinm $s1,$acc01,24,0,7
928 lbzx $acc14,$Tbl1,$acc14
929 lbzx $acc15,$Tbl1,$acc15
930 rlwinm $s2,$acc02,24,0,7
931 rlwinm $s3,$acc03,24,0,7
932 rlwimi $s0,$acc04,16,8,15
933 rlwimi $s1,$acc05,16,8,15
934 rlwimi $s2,$acc06,16,8,15
935 rlwimi $s3,$acc07,16,8,15
936 rlwimi $s0,$acc08,8,16,23
937 rlwimi $s1,$acc09,8,16,23
938 rlwimi $s2,$acc10,8,16,23
939 rlwimi $s3,$acc11,8,16,23
940 lwz $t0,0($key)
941 lwz $t1,4($key)
942 or $s0,$s0,$acc12
943 or $s1,$s1,$acc13
944 lwz $t2,8($key)
945 lwz $t3,12($key)
946 or $s2,$s2,$acc14
947 or $s3,$s3,$acc15
948
949 addi $key,$key,16
950 bdz Ldec_compact_done
951___
952$code.=<<___ if ($SIZE_T==8);
953 # vectorized permutation improves decrypt performance by 10%
954 insrdi $s0,$s1,32,0
955 insrdi $s2,$s3,32,0
956
957 and $acc00,$s0,$mask80 # r1=r0&0x80808080
958 and $acc02,$s2,$mask80
959 srdi $acc04,$acc00,7 # r1>>7
960 srdi $acc06,$acc02,7
961 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
962 andc $acc10,$s2,$mask80
963 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
964 sub $acc02,$acc02,$acc06
965 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
966 add $acc10,$acc10,$acc10
967 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
968 and $acc02,$acc02,$mask1b
969 xor $acc00,$acc00,$acc08 # r2
970 xor $acc02,$acc02,$acc10
971
972 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
973 and $acc06,$acc02,$mask80
974 srdi $acc08,$acc04,7 # r1>>7
975 srdi $acc10,$acc06,7
976 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
977 andc $acc14,$acc02,$mask80
978 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
979 sub $acc06,$acc06,$acc10
980 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
981 add $acc14,$acc14,$acc14
982 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
983 and $acc06,$acc06,$mask1b
984 xor $acc04,$acc04,$acc12 # r4
985 xor $acc06,$acc06,$acc14
986
987 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
988 and $acc10,$acc06,$mask80
989 srdi $acc12,$acc08,7 # r1>>7
990 srdi $acc14,$acc10,7
991 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
992 sub $acc10,$acc10,$acc14
993 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
994 andc $acc14,$acc06,$mask80
995 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
996 add $acc14,$acc14,$acc14
997 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
998 and $acc10,$acc10,$mask1b
999 xor $acc08,$acc08,$acc12 # r8
1000 xor $acc10,$acc10,$acc14
1001
1002 xor $acc00,$acc00,$s0 # r2^r0
1003 xor $acc02,$acc02,$s2
1004 xor $acc04,$acc04,$s0 # r4^r0
1005 xor $acc06,$acc06,$s2
1006
1007 extrdi $acc01,$acc00,32,0
1008 extrdi $acc03,$acc02,32,0
1009 extrdi $acc05,$acc04,32,0
1010 extrdi $acc07,$acc06,32,0
1011 extrdi $acc09,$acc08,32,0
1012 extrdi $acc11,$acc10,32,0
1013___
1014$code.=<<___ if ($SIZE_T==4);
1015 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1016 and $acc01,$s1,$mask80
1017 and $acc02,$s2,$mask80
1018 and $acc03,$s3,$mask80
1019 srwi $acc04,$acc00,7 # r1>>7
1020 srwi $acc05,$acc01,7
1021 srwi $acc06,$acc02,7
1022 srwi $acc07,$acc03,7
1023 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1024 andc $acc09,$s1,$mask80
1025 andc $acc10,$s2,$mask80
1026 andc $acc11,$s3,$mask80
1027 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1028 sub $acc01,$acc01,$acc05
1029 sub $acc02,$acc02,$acc06
1030 sub $acc03,$acc03,$acc07
1031 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1032 add $acc09,$acc09,$acc09
1033 add $acc10,$acc10,$acc10
1034 add $acc11,$acc11,$acc11
1035 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1036 and $acc01,$acc01,$mask1b
1037 and $acc02,$acc02,$mask1b
1038 and $acc03,$acc03,$mask1b
1039 xor $acc00,$acc00,$acc08 # r2
1040 xor $acc01,$acc01,$acc09
1041 xor $acc02,$acc02,$acc10
1042 xor $acc03,$acc03,$acc11
1043
1044 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1045 and $acc05,$acc01,$mask80
1046 and $acc06,$acc02,$mask80
1047 and $acc07,$acc03,$mask80
1048 srwi $acc08,$acc04,7 # r1>>7
1049 srwi $acc09,$acc05,7
1050 srwi $acc10,$acc06,7
1051 srwi $acc11,$acc07,7
1052 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1053 andc $acc13,$acc01,$mask80
1054 andc $acc14,$acc02,$mask80
1055 andc $acc15,$acc03,$mask80
1056 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1057 sub $acc05,$acc05,$acc09
1058 sub $acc06,$acc06,$acc10
1059 sub $acc07,$acc07,$acc11
1060 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1061 add $acc13,$acc13,$acc13
1062 add $acc14,$acc14,$acc14
1063 add $acc15,$acc15,$acc15
1064 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1065 and $acc05,$acc05,$mask1b
1066 and $acc06,$acc06,$mask1b
1067 and $acc07,$acc07,$mask1b
1068 xor $acc04,$acc04,$acc12 # r4
1069 xor $acc05,$acc05,$acc13
1070 xor $acc06,$acc06,$acc14
1071 xor $acc07,$acc07,$acc15
1072
1073 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1074 and $acc09,$acc05,$mask80
1075 and $acc10,$acc06,$mask80
1076 and $acc11,$acc07,$mask80
1077 srwi $acc12,$acc08,7 # r1>>7
1078 srwi $acc13,$acc09,7
1079 srwi $acc14,$acc10,7
1080 srwi $acc15,$acc11,7
1081 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1082 sub $acc09,$acc09,$acc13
1083 sub $acc10,$acc10,$acc14
1084 sub $acc11,$acc11,$acc15
1085 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1086 andc $acc13,$acc05,$mask80
1087 andc $acc14,$acc06,$mask80
1088 andc $acc15,$acc07,$mask80
1089 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1090 add $acc13,$acc13,$acc13
1091 add $acc14,$acc14,$acc14
1092 add $acc15,$acc15,$acc15
1093 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1094 and $acc09,$acc09,$mask1b
1095 and $acc10,$acc10,$mask1b
1096 and $acc11,$acc11,$mask1b
1097 xor $acc08,$acc08,$acc12 # r8
1098 xor $acc09,$acc09,$acc13
1099 xor $acc10,$acc10,$acc14
1100 xor $acc11,$acc11,$acc15
1101
1102 xor $acc00,$acc00,$s0 # r2^r0
1103 xor $acc01,$acc01,$s1
1104 xor $acc02,$acc02,$s2
1105 xor $acc03,$acc03,$s3
1106 xor $acc04,$acc04,$s0 # r4^r0
1107 xor $acc05,$acc05,$s1
1108 xor $acc06,$acc06,$s2
1109 xor $acc07,$acc07,$s3
1110___
1111$code.=<<___;
1112 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1113 rotrwi $s1,$s1,8
1114 rotrwi $s2,$s2,8
1115 rotrwi $s3,$s3,8
1116 xor $s0,$s0,$acc00 # ^= r2^r0
1117 xor $s1,$s1,$acc01
1118 xor $s2,$s2,$acc02
1119 xor $s3,$s3,$acc03
1120 xor $acc00,$acc00,$acc08
1121 xor $acc01,$acc01,$acc09
1122 xor $acc02,$acc02,$acc10
1123 xor $acc03,$acc03,$acc11
1124 xor $s0,$s0,$acc04 # ^= r4^r0
1125 xor $s1,$s1,$acc05
1126 xor $s2,$s2,$acc06
1127 xor $s3,$s3,$acc07
1128 rotrwi $acc00,$acc00,24
1129 rotrwi $acc01,$acc01,24
1130 rotrwi $acc02,$acc02,24
1131 rotrwi $acc03,$acc03,24
1132 xor $acc04,$acc04,$acc08
1133 xor $acc05,$acc05,$acc09
1134 xor $acc06,$acc06,$acc10
1135 xor $acc07,$acc07,$acc11
1136 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1137 xor $s1,$s1,$acc09
1138 xor $s2,$s2,$acc10
1139 xor $s3,$s3,$acc11
1140 rotrwi $acc04,$acc04,16
1141 rotrwi $acc05,$acc05,16
1142 rotrwi $acc06,$acc06,16
1143 rotrwi $acc07,$acc07,16
1144 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1145 xor $s1,$s1,$acc01
1146 xor $s2,$s2,$acc02
1147 xor $s3,$s3,$acc03
1148 rotrwi $acc08,$acc08,8
1149 rotrwi $acc09,$acc09,8
1150 rotrwi $acc10,$acc10,8
1151 rotrwi $acc11,$acc11,8
1152 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1153 xor $s1,$s1,$acc05
1154 xor $s2,$s2,$acc06
1155 xor $s3,$s3,$acc07
1156 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1157 xor $s1,$s1,$acc09
1158 xor $s2,$s2,$acc10
1159 xor $s3,$s3,$acc11
1160
1161 b Ldec_compact_loop
1162.align 4
1163Ldec_compact_done:
1164 xor $s0,$s0,$t0
1165 xor $s1,$s1,$t1
1166 xor $s2,$s2,$t2
1167 xor $s3,$s3,$t3
1168 blr
1169.long 0
1170.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1171.align 7
1172___
1173
1174$code =~ s/\`([^\`]*)\`/eval $1/gem;
1175print $code;
1176close STDOUT;
diff --git a/src/lib/libcrypto/aes/asm/aes-s390x.pl b/src/lib/libcrypto/aes/asm/aes-s390x.pl
deleted file mode 100644
index 4b27afd92f..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-s390x.pl
+++ /dev/null
@@ -1,1333 +0,0 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for s390x.
11
12# April 2007.
13#
14# Software performance improvement over gcc-generated code is ~70% and
15# in absolute terms is ~73 cycles per byte processed with 128-bit key.
16# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17# *strictly* in-order execution and issued instruction [in this case
18# load value from memory is critical] has to complete before execution
19# flow proceeds. S-boxes are compressed to 2KB[+256B].
20#
21# As for hardware acceleration support. It's basically a "teaser," as
22# it can and should be improved in several ways. Most notably support
23# for CBC is not utilized, nor multiple blocks are ever processed.
24# Then software key schedule can be postponed till hardware support
25# detection... Performance improvement over assembler is reportedly
26# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27# support is implemented.
28
29# May 2007.
30#
31# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32# for 128-bit keys, if hardware support is detected.
33
34# Januray 2009.
35#
36# Add support for hardware AES192/256 and reschedule instructions to
37# minimize/avoid Address Generation Interlock hazard and to favour
38# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
39# almost 50% on z9. The gain is smaller on z10, because being dual-
40# issue z10 makes it improssible to eliminate the interlock condition:
41# critial path is not long enough. Yet it spends ~24 cycles per byte
42# processed with 128-bit key.
43#
44# Unlike previous version hardware support detection takes place only
45# at the moment of key schedule setup, which is denoted in key->rounds.
46# This is done, because deferred key setup can't be made MT-safe, not
47# for key lengthes longer than 128 bits.
48#
49# Add AES_cbc_encrypt, which gives incredible performance improvement,
50# it was measured to be ~6.6x. It's less than previously mentioned 8x,
51# because software implementation was optimized.
52
53$softonly=0; # allow hardware support
54
55$t0="%r0"; $mask="%r0";
56$t1="%r1";
57$t2="%r2"; $inp="%r2";
58$t3="%r3"; $out="%r3"; $bits="%r3";
59$key="%r4";
60$i1="%r5";
61$i2="%r6";
62$i3="%r7";
63$s0="%r8";
64$s1="%r9";
65$s2="%r10";
66$s3="%r11";
67$tbl="%r12";
68$rounds="%r13";
69$ra="%r14";
70$sp="%r15";
71
72sub _data_word()
73{ my $i;
74 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
75}
76
77$code=<<___;
78.text
79
80.type AES_Te,\@object
81.align 256
82AES_Te:
83___
84&_data_word(
85 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
86 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
87 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
88 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
89 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
90 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
91 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
92 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
93 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
94 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
95 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
96 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
97 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
98 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
99 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
100 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
101 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
102 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
103 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
104 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
105 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
106 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
107 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
108 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
109 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
110 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
111 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
112 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
113 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
114 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
115 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
116 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
117 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
118 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
119 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
120 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
121 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
122 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
123 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
124 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
125 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
126 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
127 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
128 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
129 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
130 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
131 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
132 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
133 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
134 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
135 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
136 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
137 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
138 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
139 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
140 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
141 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
142 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
143 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
144 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
145 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
146 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
147 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
148 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
149$code.=<<___;
150# Te4[256]
151.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
152.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
153.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
154.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
155.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
156.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
157.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
158.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
159.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
160.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
161.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
162.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
163.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
164.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
165.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
166.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
167.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
168.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
169.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
170.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
171.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
172.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
173.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
174.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
175.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
176.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
177.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
178.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
179.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
180.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
181.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
182.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
183# rcon[]
184.long 0x01000000, 0x02000000, 0x04000000, 0x08000000
185.long 0x10000000, 0x20000000, 0x40000000, 0x80000000
186.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
187.align 256
188.size AES_Te,.-AES_Te
189
190# void AES_encrypt(const unsigned char *inp, unsigned char *out,
191# const AES_KEY *key) {
192.globl AES_encrypt
193.type AES_encrypt,\@function
194AES_encrypt:
195___
196$code.=<<___ if (!$softonly);
197 l %r0,240($key)
198 lhi %r1,16
199 clr %r0,%r1
200 jl .Lesoft
201
202 la %r1,0($key)
203 #la %r2,0($inp)
204 la %r4,0($out)
205 lghi %r3,16 # single block length
206 .long 0xb92e0042 # km %r4,%r2
207 brc 1,.-4 # can this happen?
208 br %r14
209.align 64
210.Lesoft:
211___
212$code.=<<___;
213 stmg %r3,$ra,24($sp)
214
215 llgf $s0,0($inp)
216 llgf $s1,4($inp)
217 llgf $s2,8($inp)
218 llgf $s3,12($inp)
219
220 larl $tbl,AES_Te
221 bras $ra,_s390x_AES_encrypt
222
223 lg $out,24($sp)
224 st $s0,0($out)
225 st $s1,4($out)
226 st $s2,8($out)
227 st $s3,12($out)
228
229 lmg %r6,$ra,48($sp)
230 br $ra
231.size AES_encrypt,.-AES_encrypt
232
233.type _s390x_AES_encrypt,\@function
234.align 16
235_s390x_AES_encrypt:
236 stg $ra,152($sp)
237 x $s0,0($key)
238 x $s1,4($key)
239 x $s2,8($key)
240 x $s3,12($key)
241 l $rounds,240($key)
242 llill $mask,`0xff<<3`
243 aghi $rounds,-1
244 j .Lenc_loop
245.align 16
246.Lenc_loop:
247 sllg $t1,$s0,`0+3`
248 srlg $t2,$s0,`8-3`
249 srlg $t3,$s0,`16-3`
250 srl $s0,`24-3`
251 nr $s0,$mask
252 ngr $t1,$mask
253 nr $t2,$mask
254 nr $t3,$mask
255
256 srlg $i1,$s1,`16-3` # i0
257 sllg $i2,$s1,`0+3`
258 srlg $i3,$s1,`8-3`
259 srl $s1,`24-3`
260 nr $i1,$mask
261 nr $s1,$mask
262 ngr $i2,$mask
263 nr $i3,$mask
264
265 l $s0,0($s0,$tbl) # Te0[s0>>24]
266 l $t1,1($t1,$tbl) # Te3[s0>>0]
267 l $t2,2($t2,$tbl) # Te2[s0>>8]
268 l $t3,3($t3,$tbl) # Te1[s0>>16]
269
270 x $s0,3($i1,$tbl) # Te1[s1>>16]
271 l $s1,0($s1,$tbl) # Te0[s1>>24]
272 x $t2,1($i2,$tbl) # Te3[s1>>0]
273 x $t3,2($i3,$tbl) # Te2[s1>>8]
274
275 srlg $i1,$s2,`8-3` # i0
276 srlg $i2,$s2,`16-3` # i1
277 nr $i1,$mask
278 nr $i2,$mask
279 sllg $i3,$s2,`0+3`
280 srl $s2,`24-3`
281 nr $s2,$mask
282 ngr $i3,$mask
283
284 xr $s1,$t1
285 srlg $ra,$s3,`8-3` # i1
286 sllg $t1,$s3,`0+3` # i0
287 nr $ra,$mask
288 la $key,16($key)
289 ngr $t1,$mask
290
291 x $s0,2($i1,$tbl) # Te2[s2>>8]
292 x $s1,3($i2,$tbl) # Te1[s2>>16]
293 l $s2,0($s2,$tbl) # Te0[s2>>24]
294 x $t3,1($i3,$tbl) # Te3[s2>>0]
295
296 srlg $i3,$s3,`16-3` # i2
297 xr $s2,$t2
298 srl $s3,`24-3`
299 nr $i3,$mask
300 nr $s3,$mask
301
302 x $s0,0($key)
303 x $s1,4($key)
304 x $s2,8($key)
305 x $t3,12($key)
306
307 x $s0,1($t1,$tbl) # Te3[s3>>0]
308 x $s1,2($ra,$tbl) # Te2[s3>>8]
309 x $s2,3($i3,$tbl) # Te1[s3>>16]
310 l $s3,0($s3,$tbl) # Te0[s3>>24]
311 xr $s3,$t3
312
313 brct $rounds,.Lenc_loop
314 .align 16
315
316 sllg $t1,$s0,`0+3`
317 srlg $t2,$s0,`8-3`
318 ngr $t1,$mask
319 srlg $t3,$s0,`16-3`
320 srl $s0,`24-3`
321 nr $s0,$mask
322 nr $t2,$mask
323 nr $t3,$mask
324
325 srlg $i1,$s1,`16-3` # i0
326 sllg $i2,$s1,`0+3`
327 ngr $i2,$mask
328 srlg $i3,$s1,`8-3`
329 srl $s1,`24-3`
330 nr $i1,$mask
331 nr $s1,$mask
332 nr $i3,$mask
333
334 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
335 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
336 sll $s0,24
337 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
338 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
339 sll $t2,8
340 sll $t3,16
341
342 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
343 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
344 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
345 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
346 sll $i1,16
347 sll $s1,24
348 sll $i3,8
349 or $s0,$i1
350 or $s1,$t1
351 or $t2,$i2
352 or $t3,$i3
353
354 srlg $i1,$s2,`8-3` # i0
355 srlg $i2,$s2,`16-3` # i1
356 nr $i1,$mask
357 nr $i2,$mask
358 sllg $i3,$s2,`0+3`
359 srl $s2,`24-3`
360 ngr $i3,$mask
361 nr $s2,$mask
362
363 sllg $t1,$s3,`0+3` # i0
364 srlg $ra,$s3,`8-3` # i1
365 ngr $t1,$mask
366
367 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
368 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
369 sll $i1,8
370 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
371 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
372 sll $i2,16
373 nr $ra,$mask
374 sll $s2,24
375 or $s0,$i1
376 or $s1,$i2
377 or $s2,$t2
378 or $t3,$i3
379
380 srlg $i3,$s3,`16-3` # i2
381 srl $s3,`24-3`
382 nr $i3,$mask
383 nr $s3,$mask
384
385 l $t0,16($key)
386 l $t2,20($key)
387
388 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
389 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
390 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
391 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
392 sll $i2,8
393 sll $i3,16
394 sll $s3,24
395 or $s0,$i1
396 or $s1,$i2
397 or $s2,$i3
398 or $s3,$t3
399
400 lg $ra,152($sp)
401 xr $s0,$t0
402 xr $s1,$t2
403 x $s2,24($key)
404 x $s3,28($key)
405
406 br $ra
407.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
408___
409
410$code.=<<___;
411.type AES_Td,\@object
412.align 256
413AES_Td:
414___
415&_data_word(
416 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
417 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
418 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
419 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
420 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
421 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
422 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
423 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
424 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
425 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
426 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
427 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
428 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
429 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
430 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
431 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
432 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
433 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
434 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
435 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
436 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
437 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
438 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
439 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
440 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
441 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
442 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
443 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
444 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
445 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
446 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
447 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
448 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
449 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
450 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
451 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
452 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
453 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
454 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
455 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
456 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
457 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
458 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
459 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
460 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
461 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
462 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
463 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
464 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
465 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
466 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
467 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
468 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
469 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
470 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
471 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
472 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
473 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
474 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
475 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
476 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
477 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
478 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
479 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
480$code.=<<___;
481# Td4[256]
482.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
483.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
484.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
485.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
486.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
487.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
488.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
489.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
490.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
491.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
492.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
493.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
494.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
495.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
496.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
497.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
498.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
499.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
500.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
501.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
502.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
503.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
504.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
505.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
506.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
507.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
508.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
509.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
510.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
511.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
512.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
513.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
514.size AES_Td,.-AES_Td
515
516# void AES_decrypt(const unsigned char *inp, unsigned char *out,
517# const AES_KEY *key) {
518.globl AES_decrypt
519.type AES_decrypt,\@function
520AES_decrypt:
521___
522$code.=<<___ if (!$softonly);
523 l %r0,240($key)
524 lhi %r1,16
525 clr %r0,%r1
526 jl .Ldsoft
527
528 la %r1,0($key)
529 #la %r2,0($inp)
530 la %r4,0($out)
531 lghi %r3,16 # single block length
532 .long 0xb92e0042 # km %r4,%r2
533 brc 1,.-4 # can this happen?
534 br %r14
535.align 64
536.Ldsoft:
537___
538$code.=<<___;
539 stmg %r3,$ra,24($sp)
540
541 llgf $s0,0($inp)
542 llgf $s1,4($inp)
543 llgf $s2,8($inp)
544 llgf $s3,12($inp)
545
546 larl $tbl,AES_Td
547 bras $ra,_s390x_AES_decrypt
548
549 lg $out,24($sp)
550 st $s0,0($out)
551 st $s1,4($out)
552 st $s2,8($out)
553 st $s3,12($out)
554
555 lmg %r6,$ra,48($sp)
556 br $ra
557.size AES_decrypt,.-AES_decrypt
558
559.type _s390x_AES_decrypt,\@function
560.align 16
561_s390x_AES_decrypt:
562 stg $ra,152($sp)
563 x $s0,0($key)
564 x $s1,4($key)
565 x $s2,8($key)
566 x $s3,12($key)
567 l $rounds,240($key)
568 llill $mask,`0xff<<3`
569 aghi $rounds,-1
570 j .Ldec_loop
571.align 16
572.Ldec_loop:
573 srlg $t1,$s0,`16-3`
574 srlg $t2,$s0,`8-3`
575 sllg $t3,$s0,`0+3`
576 srl $s0,`24-3`
577 nr $s0,$mask
578 nr $t1,$mask
579 nr $t2,$mask
580 ngr $t3,$mask
581
582 sllg $i1,$s1,`0+3` # i0
583 srlg $i2,$s1,`16-3`
584 srlg $i3,$s1,`8-3`
585 srl $s1,`24-3`
586 ngr $i1,$mask
587 nr $s1,$mask
588 nr $i2,$mask
589 nr $i3,$mask
590
591 l $s0,0($s0,$tbl) # Td0[s0>>24]
592 l $t1,3($t1,$tbl) # Td1[s0>>16]
593 l $t2,2($t2,$tbl) # Td2[s0>>8]
594 l $t3,1($t3,$tbl) # Td3[s0>>0]
595
596 x $s0,1($i1,$tbl) # Td3[s1>>0]
597 l $s1,0($s1,$tbl) # Td0[s1>>24]
598 x $t2,3($i2,$tbl) # Td1[s1>>16]
599 x $t3,2($i3,$tbl) # Td2[s1>>8]
600
601 srlg $i1,$s2,`8-3` # i0
602 sllg $i2,$s2,`0+3` # i1
603 srlg $i3,$s2,`16-3`
604 srl $s2,`24-3`
605 nr $i1,$mask
606 ngr $i2,$mask
607 nr $s2,$mask
608 nr $i3,$mask
609
610 xr $s1,$t1
611 srlg $ra,$s3,`8-3` # i1
612 srlg $t1,$s3,`16-3` # i0
613 nr $ra,$mask
614 la $key,16($key)
615 nr $t1,$mask
616
617 x $s0,2($i1,$tbl) # Td2[s2>>8]
618 x $s1,1($i2,$tbl) # Td3[s2>>0]
619 l $s2,0($s2,$tbl) # Td0[s2>>24]
620 x $t3,3($i3,$tbl) # Td1[s2>>16]
621
622 sllg $i3,$s3,`0+3` # i2
623 srl $s3,`24-3`
624 ngr $i3,$mask
625 nr $s3,$mask
626
627 xr $s2,$t2
628 x $s0,0($key)
629 x $s1,4($key)
630 x $s2,8($key)
631 x $t3,12($key)
632
633 x $s0,3($t1,$tbl) # Td1[s3>>16]
634 x $s1,2($ra,$tbl) # Td2[s3>>8]
635 x $s2,1($i3,$tbl) # Td3[s3>>0]
636 l $s3,0($s3,$tbl) # Td0[s3>>24]
637 xr $s3,$t3
638
639 brct $rounds,.Ldec_loop
640 .align 16
641
642 l $t1,`2048+0`($tbl) # prefetch Td4
643 l $t2,`2048+64`($tbl)
644 l $t3,`2048+128`($tbl)
645 l $i1,`2048+192`($tbl)
646 llill $mask,0xff
647
648 srlg $i3,$s0,24 # i0
649 srlg $t1,$s0,16
650 srlg $t2,$s0,8
651 nr $s0,$mask # i3
652 nr $t1,$mask
653
654 srlg $i1,$s1,24
655 nr $t2,$mask
656 srlg $i2,$s1,16
657 srlg $ra,$s1,8
658 nr $s1,$mask # i0
659 nr $i2,$mask
660 nr $ra,$mask
661
662 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
663 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
664 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
665 sll $t1,16
666 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
667 sllg $s0,$i3,24
668 sll $t2,8
669
670 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
671 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
672 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
673 sll $i1,24
674 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
675 sll $i2,16
676 sll $i3,8
677 or $s0,$s1
678 or $t1,$i1
679 or $t2,$i2
680 or $t3,$i3
681
682 srlg $i1,$s2,8 # i0
683 srlg $i2,$s2,24
684 srlg $i3,$s2,16
685 nr $s2,$mask # i1
686 nr $i1,$mask
687 nr $i3,$mask
688 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
689 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
690 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
691 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
692 sll $i1,8
693 sll $i2,24
694 or $s0,$i1
695 sll $i3,16
696 or $t2,$i2
697 or $t3,$i3
698
699 srlg $i1,$s3,16 # i0
700 srlg $i2,$s3,8 # i1
701 srlg $i3,$s3,24
702 nr $s3,$mask # i2
703 nr $i1,$mask
704 nr $i2,$mask
705
706 lg $ra,152($sp)
707 or $s1,$t1
708 l $t0,16($key)
709 l $t1,20($key)
710
711 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
712 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
713 sll $i1,16
714 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
715 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
716 sll $i2,8
717 sll $s3,24
718 or $s0,$i1
719 or $s1,$i2
720 or $s2,$t2
721 or $s3,$t3
722
723 xr $s0,$t0
724 xr $s1,$t1
725 x $s2,24($key)
726 x $s3,28($key)
727
728 br $ra
729.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
730___
731
732$code.=<<___;
733# void AES_set_encrypt_key(const unsigned char *in, int bits,
734# AES_KEY *key) {
735.globl AES_set_encrypt_key
736.type AES_set_encrypt_key,\@function
737.align 16
738AES_set_encrypt_key:
739 lghi $t0,0
740 clgr $inp,$t0
741 je .Lminus1
742 clgr $key,$t0
743 je .Lminus1
744
745 lghi $t0,128
746 clr $bits,$t0
747 je .Lproceed
748 lghi $t0,192
749 clr $bits,$t0
750 je .Lproceed
751 lghi $t0,256
752 clr $bits,$t0
753 je .Lproceed
754 lghi %r2,-2
755 br %r14
756
757.align 16
758.Lproceed:
759___
760$code.=<<___ if (!$softonly);
761 # convert bits to km code, [128,192,256]->[18,19,20]
762 lhi %r5,-128
763 lhi %r0,18
764 ar %r5,$bits
765 srl %r5,6
766 ar %r5,%r0
767
768 lghi %r0,0 # query capability vector
769 la %r1,16($sp)
770 .long 0xb92f0042 # kmc %r4,%r2
771
772 llihh %r1,0x8000
773 srlg %r1,%r1,0(%r5)
774 ng %r1,16($sp)
775 jz .Lekey_internal
776
777 lmg %r0,%r1,0($inp) # just copy 128 bits...
778 stmg %r0,%r1,0($key)
779 lhi %r0,192
780 cr $bits,%r0
781 jl 1f
782 lg %r1,16($inp)
783 stg %r1,16($key)
784 je 1f
785 lg %r1,24($inp)
786 stg %r1,24($key)
7871: st $bits,236($key) # save bits
788 st %r5,240($key) # save km code
789 lghi %r2,0
790 br %r14
791___
792$code.=<<___;
793.align 16
794.Lekey_internal:
795 stmg %r6,%r13,48($sp) # all non-volatile regs
796
797 larl $tbl,AES_Te+2048
798
799 llgf $s0,0($inp)
800 llgf $s1,4($inp)
801 llgf $s2,8($inp)
802 llgf $s3,12($inp)
803 st $s0,0($key)
804 st $s1,4($key)
805 st $s2,8($key)
806 st $s3,12($key)
807 lghi $t0,128
808 cr $bits,$t0
809 jne .Lnot128
810
811 llill $mask,0xff
812 lghi $t3,0 # i=0
813 lghi $rounds,10
814 st $rounds,240($key)
815
816 llgfr $t2,$s3 # temp=rk[3]
817 srlg $i1,$s3,8
818 srlg $i2,$s3,16
819 srlg $i3,$s3,24
820 nr $t2,$mask
821 nr $i1,$mask
822 nr $i2,$mask
823
824.align 16
825.L128_loop:
826 la $t2,0($t2,$tbl)
827 la $i1,0($i1,$tbl)
828 la $i2,0($i2,$tbl)
829 la $i3,0($i3,$tbl)
830 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
831 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
832 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
833 icm $t2,1,0($i3) # Te4[rk[3]>>24]
834 x $t2,256($t3,$tbl) # rcon[i]
835 xr $s0,$t2 # rk[4]=rk[0]^...
836 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
837 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
838 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
839
840 llgfr $t2,$s3 # temp=rk[3]
841 srlg $i1,$s3,8
842 srlg $i2,$s3,16
843 nr $t2,$mask
844 nr $i1,$mask
845 srlg $i3,$s3,24
846 nr $i2,$mask
847
848 st $s0,16($key)
849 st $s1,20($key)
850 st $s2,24($key)
851 st $s3,28($key)
852 la $key,16($key) # key+=4
853 la $t3,4($t3) # i++
854 brct $rounds,.L128_loop
855 lghi %r2,0
856 lmg %r6,%r13,48($sp)
857 br $ra
858
859.align 16
860.Lnot128:
861 llgf $t0,16($inp)
862 llgf $t1,20($inp)
863 st $t0,16($key)
864 st $t1,20($key)
865 lghi $t0,192
866 cr $bits,$t0
867 jne .Lnot192
868
869 llill $mask,0xff
870 lghi $t3,0 # i=0
871 lghi $rounds,12
872 st $rounds,240($key)
873 lghi $rounds,8
874
875 srlg $i1,$t1,8
876 srlg $i2,$t1,16
877 srlg $i3,$t1,24
878 nr $t1,$mask
879 nr $i1,$mask
880 nr $i2,$mask
881
882.align 16
883.L192_loop:
884 la $t1,0($t1,$tbl)
885 la $i1,0($i1,$tbl)
886 la $i2,0($i2,$tbl)
887 la $i3,0($i3,$tbl)
888 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
889 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
890 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
891 icm $t1,1,0($i3) # Te4[rk[5]>>24]
892 x $t1,256($t3,$tbl) # rcon[i]
893 xr $s0,$t1 # rk[6]=rk[0]^...
894 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
895 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
896 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
897
898 st $s0,24($key)
899 st $s1,28($key)
900 st $s2,32($key)
901 st $s3,36($key)
902 brct $rounds,.L192_continue
903 lghi %r2,0
904 lmg %r6,%r13,48($sp)
905 br $ra
906
907.align 16
908.L192_continue:
909 lgr $t1,$s3
910 x $t1,16($key) # rk[10]=rk[4]^rk[9]
911 st $t1,40($key)
912 x $t1,20($key) # rk[11]=rk[5]^rk[10]
913 st $t1,44($key)
914
915 srlg $i1,$t1,8
916 srlg $i2,$t1,16
917 srlg $i3,$t1,24
918 nr $t1,$mask
919 nr $i1,$mask
920 nr $i2,$mask
921
922 la $key,24($key) # key+=6
923 la $t3,4($t3) # i++
924 j .L192_loop
925
926.align 16
927.Lnot192:
928 llgf $t0,24($inp)
929 llgf $t1,28($inp)
930 st $t0,24($key)
931 st $t1,28($key)
932 llill $mask,0xff
933 lghi $t3,0 # i=0
934 lghi $rounds,14
935 st $rounds,240($key)
936 lghi $rounds,7
937
938 srlg $i1,$t1,8
939 srlg $i2,$t1,16
940 srlg $i3,$t1,24
941 nr $t1,$mask
942 nr $i1,$mask
943 nr $i2,$mask
944
945.align 16
946.L256_loop:
947 la $t1,0($t1,$tbl)
948 la $i1,0($i1,$tbl)
949 la $i2,0($i2,$tbl)
950 la $i3,0($i3,$tbl)
951 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
952 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
953 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
954 icm $t1,1,0($i3) # Te4[rk[7]>>24]
955 x $t1,256($t3,$tbl) # rcon[i]
956 xr $s0,$t1 # rk[8]=rk[0]^...
957 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
958 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
959 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
960 st $s0,32($key)
961 st $s1,36($key)
962 st $s2,40($key)
963 st $s3,44($key)
964 brct $rounds,.L256_continue
965 lghi %r2,0
966 lmg %r6,%r13,48($sp)
967 br $ra
968
969.align 16
970.L256_continue:
971 lgr $t1,$s3 # temp=rk[11]
972 srlg $i1,$s3,8
973 srlg $i2,$s3,16
974 srlg $i3,$s3,24
975 nr $t1,$mask
976 nr $i1,$mask
977 nr $i2,$mask
978 la $t1,0($t1,$tbl)
979 la $i1,0($i1,$tbl)
980 la $i2,0($i2,$tbl)
981 la $i3,0($i3,$tbl)
982 llgc $t1,0($t1) # Te4[rk[11]>>0]
983 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
984 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
985 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
986 x $t1,16($key) # rk[12]=rk[4]^...
987 st $t1,48($key)
988 x $t1,20($key) # rk[13]=rk[5]^rk[12]
989 st $t1,52($key)
990 x $t1,24($key) # rk[14]=rk[6]^rk[13]
991 st $t1,56($key)
992 x $t1,28($key) # rk[15]=rk[7]^rk[14]
993 st $t1,60($key)
994
995 srlg $i1,$t1,8
996 srlg $i2,$t1,16
997 srlg $i3,$t1,24
998 nr $t1,$mask
999 nr $i1,$mask
1000 nr $i2,$mask
1001
1002 la $key,32($key) # key+=8
1003 la $t3,4($t3) # i++
1004 j .L256_loop
1005
1006.Lminus1:
1007 lghi %r2,-1
1008 br $ra
1009.size AES_set_encrypt_key,.-AES_set_encrypt_key
1010
1011# void AES_set_decrypt_key(const unsigned char *in, int bits,
1012# AES_KEY *key) {
1013.globl AES_set_decrypt_key
1014.type AES_set_decrypt_key,\@function
1015.align 16
1016AES_set_decrypt_key:
1017 stg $key,32($sp) # I rely on AES_set_encrypt_key to
1018 stg $ra,112($sp) # save non-volatile registers!
1019 bras $ra,AES_set_encrypt_key
1020 lg $key,32($sp)
1021 lg $ra,112($sp)
1022 ltgr %r2,%r2
1023 bnzr $ra
1024___
1025$code.=<<___ if (!$softonly);
1026 l $t0,240($key)
1027 lhi $t1,16
1028 cr $t0,$t1
1029 jl .Lgo
1030 oill $t0,0x80 # set "decrypt" bit
1031 st $t0,240($key)
1032 br $ra
1033
1034.align 16
1035.Ldkey_internal:
1036 stg $key,32($sp)
1037 stg $ra,40($sp)
1038 bras $ra,.Lekey_internal
1039 lg $key,32($sp)
1040 lg $ra,40($sp)
1041___
1042$code.=<<___;
1043
1044.Lgo: llgf $rounds,240($key)
1045 la $i1,0($key)
1046 sllg $i2,$rounds,4
1047 la $i2,0($i2,$key)
1048 srl $rounds,1
1049 lghi $t1,-16
1050
1051.align 16
1052.Linv: lmg $s0,$s1,0($i1)
1053 lmg $s2,$s3,0($i2)
1054 stmg $s0,$s1,0($i2)
1055 stmg $s2,$s3,0($i1)
1056 la $i1,16($i1)
1057 la $i2,0($t1,$i2)
1058 brct $rounds,.Linv
1059___
1060$mask80=$i1;
1061$mask1b=$i2;
1062$maskfe=$i3;
1063$code.=<<___;
1064 llgf $rounds,240($key)
1065 aghi $rounds,-1
1066 sll $rounds,2 # (rounds-1)*4
1067 llilh $mask80,0x8080
1068 llilh $mask1b,0x1b1b
1069 llilh $maskfe,0xfefe
1070 oill $mask80,0x8080
1071 oill $mask1b,0x1b1b
1072 oill $maskfe,0xfefe
1073
1074.align 16
1075.Lmix: l $s0,16($key) # tp1
1076 lr $s1,$s0
1077 ngr $s1,$mask80
1078 srlg $t1,$s1,7
1079 slr $s1,$t1
1080 nr $s1,$mask1b
1081 sllg $t1,$s0,1
1082 nr $t1,$maskfe
1083 xr $s1,$t1 # tp2
1084
1085 lr $s2,$s1
1086 ngr $s2,$mask80
1087 srlg $t1,$s2,7
1088 slr $s2,$t1
1089 nr $s2,$mask1b
1090 sllg $t1,$s1,1
1091 nr $t1,$maskfe
1092 xr $s2,$t1 # tp4
1093
1094 lr $s3,$s2
1095 ngr $s3,$mask80
1096 srlg $t1,$s3,7
1097 slr $s3,$t1
1098 nr $s3,$mask1b
1099 sllg $t1,$s2,1
1100 nr $t1,$maskfe
1101 xr $s3,$t1 # tp8
1102
1103 xr $s1,$s0 # tp2^tp1
1104 xr $s2,$s0 # tp4^tp1
1105 rll $s0,$s0,24 # = ROTATE(tp1,8)
1106 xr $s2,$s3 # ^=tp8
1107 xr $s0,$s1 # ^=tp2^tp1
1108 xr $s1,$s3 # tp2^tp1^tp8
1109 xr $s0,$s2 # ^=tp4^tp1^tp8
1110 rll $s1,$s1,8
1111 rll $s2,$s2,16
1112 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1113 rll $s3,$s3,24
1114 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1115 xr $s0,$s3 # ^= ROTATE(tp8,8)
1116
1117 st $s0,16($key)
1118 la $key,4($key)
1119 brct $rounds,.Lmix
1120
1121 lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
1122 lghi %r2,0
1123 br $ra
1124.size AES_set_decrypt_key,.-AES_set_decrypt_key
1125___
1126
1127#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1128# size_t length, const AES_KEY *key,
1129# unsigned char *ivec, const int enc)
1130{
1131my $inp="%r2";
1132my $out="%r4"; # length and out are swapped
1133my $len="%r3";
1134my $key="%r5";
1135my $ivp="%r6";
1136
1137$code.=<<___;
1138.globl AES_cbc_encrypt
1139.type AES_cbc_encrypt,\@function
1140.align 16
1141AES_cbc_encrypt:
1142 xgr %r3,%r4 # flip %r3 and %r4, out and len
1143 xgr %r4,%r3
1144 xgr %r3,%r4
1145___
1146$code.=<<___ if (!$softonly);
1147 lhi %r0,16
1148 cl %r0,240($key)
1149 jh .Lcbc_software
1150
1151 lg %r0,0($ivp) # copy ivec
1152 lg %r1,8($ivp)
1153 stmg %r0,%r1,16($sp)
1154 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1155 stmg %r0,%r1,32($sp)
1156 lmg %r0,%r1,16($key)
1157 stmg %r0,%r1,48($sp)
1158 l %r0,240($key) # load kmc code
1159 lghi $key,15 # res=len%16, len-=res;
1160 ngr $key,$len
1161 slgr $len,$key
1162 la %r1,16($sp) # parameter block - ivec || key
1163 jz .Lkmc_truncated
1164 .long 0xb92f0042 # kmc %r4,%r2
1165 brc 1,.-4 # pay attention to "partial completion"
1166 ltr $key,$key
1167 jnz .Lkmc_truncated
1168.Lkmc_done:
1169 lmg %r0,%r1,16($sp) # copy ivec to caller
1170 stg %r0,0($ivp)
1171 stg %r1,8($ivp)
1172 br $ra
1173.align 16
1174.Lkmc_truncated:
1175 ahi $key,-1 # it's the way it's encoded in mvc
1176 tmll %r0,0x80
1177 jnz .Lkmc_truncated_dec
1178 lghi %r1,0
1179 stg %r1,128($sp)
1180 stg %r1,136($sp)
1181 bras %r1,1f
1182 mvc 128(1,$sp),0($inp)
11831: ex $key,0(%r1)
1184 la %r1,16($sp) # restore parameter block
1185 la $inp,128($sp)
1186 lghi $len,16
1187 .long 0xb92f0042 # kmc %r4,%r2
1188 j .Lkmc_done
1189.align 16
1190.Lkmc_truncated_dec:
1191 stg $out,64($sp)
1192 la $out,128($sp)
1193 lghi $len,16
1194 .long 0xb92f0042 # kmc %r4,%r2
1195 lg $out,64($sp)
1196 bras %r1,2f
1197 mvc 0(1,$out),128($sp)
11982: ex $key,0(%r1)
1199 j .Lkmc_done
1200.align 16
1201.Lcbc_software:
1202___
1203$code.=<<___;
1204 stmg $key,$ra,40($sp)
1205 lhi %r0,0
1206 cl %r0,164($sp)
1207 je .Lcbc_decrypt
1208
1209 larl $tbl,AES_Te
1210
1211 llgf $s0,0($ivp)
1212 llgf $s1,4($ivp)
1213 llgf $s2,8($ivp)
1214 llgf $s3,12($ivp)
1215
1216 lghi $t0,16
1217 slgr $len,$t0
1218 brc 4,.Lcbc_enc_tail # if borrow
1219.Lcbc_enc_loop:
1220 stmg $inp,$out,16($sp)
1221 x $s0,0($inp)
1222 x $s1,4($inp)
1223 x $s2,8($inp)
1224 x $s3,12($inp)
1225 lgr %r4,$key
1226
1227 bras $ra,_s390x_AES_encrypt
1228
1229 lmg $inp,$key,16($sp)
1230 st $s0,0($out)
1231 st $s1,4($out)
1232 st $s2,8($out)
1233 st $s3,12($out)
1234
1235 la $inp,16($inp)
1236 la $out,16($out)
1237 lghi $t0,16
1238 ltgr $len,$len
1239 jz .Lcbc_enc_done
1240 slgr $len,$t0
1241 brc 4,.Lcbc_enc_tail # if borrow
1242 j .Lcbc_enc_loop
1243.align 16
1244.Lcbc_enc_done:
1245 lg $ivp,48($sp)
1246 st $s0,0($ivp)
1247 st $s1,4($ivp)
1248 st $s2,8($ivp)
1249 st $s3,12($ivp)
1250
1251 lmg %r7,$ra,56($sp)
1252 br $ra
1253
1254.align 16
1255.Lcbc_enc_tail:
1256 aghi $len,15
1257 lghi $t0,0
1258 stg $t0,128($sp)
1259 stg $t0,136($sp)
1260 bras $t1,3f
1261 mvc 128(1,$sp),0($inp)
12623: ex $len,0($t1)
1263 lghi $len,0
1264 la $inp,128($sp)
1265 j .Lcbc_enc_loop
1266
1267.align 16
1268.Lcbc_decrypt:
1269 larl $tbl,AES_Td
1270
1271 lg $t0,0($ivp)
1272 lg $t1,8($ivp)
1273 stmg $t0,$t1,128($sp)
1274
1275.Lcbc_dec_loop:
1276 stmg $inp,$out,16($sp)
1277 llgf $s0,0($inp)
1278 llgf $s1,4($inp)
1279 llgf $s2,8($inp)
1280 llgf $s3,12($inp)
1281 lgr %r4,$key
1282
1283 bras $ra,_s390x_AES_decrypt
1284
1285 lmg $inp,$key,16($sp)
1286 sllg $s0,$s0,32
1287 sllg $s2,$s2,32
1288 lr $s0,$s1
1289 lr $s2,$s3
1290
1291 lg $t0,0($inp)
1292 lg $t1,8($inp)
1293 xg $s0,128($sp)
1294 xg $s2,136($sp)
1295 lghi $s1,16
1296 slgr $len,$s1
1297 brc 4,.Lcbc_dec_tail # if borrow
1298 brc 2,.Lcbc_dec_done # if zero
1299 stg $s0,0($out)
1300 stg $s2,8($out)
1301 stmg $t0,$t1,128($sp)
1302
1303 la $inp,16($inp)
1304 la $out,16($out)
1305 j .Lcbc_dec_loop
1306
1307.Lcbc_dec_done:
1308 stg $s0,0($out)
1309 stg $s2,8($out)
1310.Lcbc_dec_exit:
1311 lmg $ivp,$ra,48($sp)
1312 stmg $t0,$t1,0($ivp)
1313
1314 br $ra
1315
1316.align 16
1317.Lcbc_dec_tail:
1318 aghi $len,15
1319 stg $s0,128($sp)
1320 stg $s2,136($sp)
1321 bras $s1,4f
1322 mvc 0(1,$out),128($sp)
13234: ex $len,0($s1)
1324 j .Lcbc_dec_exit
1325.size AES_cbc_encrypt,.-AES_cbc_encrypt
1326___
1327}
1328$code.=<<___;
1329.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
1330___
1331
1332$code =~ s/\`([^\`]*)\`/eval $1/gem;
1333print $code;
diff --git a/src/lib/libcrypto/aes/asm/aes-sparcv9.pl b/src/lib/libcrypto/aes/asm/aes-sparcv9.pl
deleted file mode 100755
index c57b3a2d6d..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-sparcv9.pl
+++ /dev/null
@@ -1,1181 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 1.1
10#
11# The major reason for undertaken effort was to mitigate the hazard of
12# cache-timing attack. This is [currently and initially!] addressed in
13# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
14# 2. References to them are scheduled for L2 cache latency, meaning
15# that the tables don't have to reside in L1 cache. Once again, this
16# is an initial draft and one should expect more countermeasures to
17# be implemented...
18#
19# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
20# round.
21#
22# Even though performance was not the primary goal [on the contrary,
23# extra shifts "induced" by compressed S-box and longer loop epilogue
24# "induced" by scheduling for L2 have negative effect on performance],
25# the code turned out to run in ~23 cycles per processed byte en-/
26# decrypted with 128-bit key. This is pretty good result for code
27# with mentioned qualities and UltraSPARC core. Compared to Sun C
28# generated code my encrypt procedure runs just few percents faster,
29# while decrypt one - whole 50% faster [yes, Sun C failed to generate
30# optimal decrypt procedure]. Compared to GNU C generated code both
31# procedures are more than 60% faster:-)
32
33$bits=32;
34for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
35if ($bits==64) { $bias=2047; $frame=192; }
36else { $bias=0; $frame=112; }
37$locals=16;
38
39$acc0="%l0";
40$acc1="%o0";
41$acc2="%o1";
42$acc3="%o2";
43
44$acc4="%l1";
45$acc5="%o3";
46$acc6="%o4";
47$acc7="%o5";
48
49$acc8="%l2";
50$acc9="%o7";
51$acc10="%g1";
52$acc11="%g2";
53
54$acc12="%l3";
55$acc13="%g3";
56$acc14="%g4";
57$acc15="%g5";
58
59$t0="%l4";
60$t1="%l5";
61$t2="%l6";
62$t3="%l7";
63
64$s0="%i0";
65$s1="%i1";
66$s2="%i2";
67$s3="%i3";
68$tbl="%i4";
69$key="%i5";
70$rounds="%i7"; # aliases with return address, which is off-loaded to stack
71
72sub _data_word()
73{ my $i;
74 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
75}
76
77$code.=<<___ if ($bits==64);
78.register %g2,#scratch
79.register %g3,#scratch
80___
81$code.=<<___;
82.section ".text",#alloc,#execinstr
83
84.align 256
85AES_Te:
86___
87&_data_word(
88 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
89 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
90 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
91 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
92 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
93 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
94 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
95 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
96 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
97 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
98 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
99 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
100 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
101 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
102 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
103 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
104 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
105 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
106 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
107 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
108 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
109 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
110 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
111 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
112 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
113 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
114 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
115 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
116 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
117 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
118 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
119 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
120 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
121 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
122 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
123 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
124 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
125 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
126 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
127 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
128 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
129 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
130 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
131 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
132 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
133 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
134 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
135 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
136 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
137 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
138 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
139 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
140 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
141 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
142 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
143 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
144 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
145 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
146 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
147 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
148 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
149 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
150 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
151 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
152$code.=<<___;
153 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
154 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
155 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
156 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
157 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
158 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
159 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
160 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
161 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
162 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
163 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
164 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
165 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
166 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
167 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
168 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
169 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
170 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
171 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
172 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
173 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
174 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
175 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
176 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
177 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
178 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
179 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
180 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
181 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
182 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
183 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
184 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
185.type AES_Te,#object
186.size AES_Te,(.-AES_Te)
187
188.align 64
189.skip 16
190_sparcv9_AES_encrypt:
191 save %sp,-$frame-$locals,%sp
192 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
193 ld [$key+240],$rounds
194 ld [$key+0],$t0
195 ld [$key+4],$t1 !
196 ld [$key+8],$t2
197 srl $rounds,1,$rounds
198 xor $t0,$s0,$s0
199 ld [$key+12],$t3
200 srl $s0,21,$acc0
201 xor $t1,$s1,$s1
202 ld [$key+16],$t0
203 srl $s1,13,$acc1 !
204 xor $t2,$s2,$s2
205 ld [$key+20],$t1
206 xor $t3,$s3,$s3
207 ld [$key+24],$t2
208 and $acc0,2040,$acc0
209 ld [$key+28],$t3
210 nop
211.Lenc_loop:
212 srl $s2,5,$acc2 !
213 and $acc1,2040,$acc1
214 ldx [$tbl+$acc0],$acc0
215 sll $s3,3,$acc3
216 and $acc2,2040,$acc2
217 ldx [$tbl+$acc1],$acc1
218 srl $s1,21,$acc4
219 and $acc3,2040,$acc3
220 ldx [$tbl+$acc2],$acc2 !
221 srl $s2,13,$acc5
222 and $acc4,2040,$acc4
223 ldx [$tbl+$acc3],$acc3
224 srl $s3,5,$acc6
225 and $acc5,2040,$acc5
226 ldx [$tbl+$acc4],$acc4
227 fmovs %f0,%f0
228 sll $s0,3,$acc7 !
229 and $acc6,2040,$acc6
230 ldx [$tbl+$acc5],$acc5
231 srl $s2,21,$acc8
232 and $acc7,2040,$acc7
233 ldx [$tbl+$acc6],$acc6
234 srl $s3,13,$acc9
235 and $acc8,2040,$acc8
236 ldx [$tbl+$acc7],$acc7 !
237 srl $s0,5,$acc10
238 and $acc9,2040,$acc9
239 ldx [$tbl+$acc8],$acc8
240 sll $s1,3,$acc11
241 and $acc10,2040,$acc10
242 ldx [$tbl+$acc9],$acc9
243 fmovs %f0,%f0
244 srl $s3,21,$acc12 !
245 and $acc11,2040,$acc11
246 ldx [$tbl+$acc10],$acc10
247 srl $s0,13,$acc13
248 and $acc12,2040,$acc12
249 ldx [$tbl+$acc11],$acc11
250 srl $s1,5,$acc14
251 and $acc13,2040,$acc13
252 ldx [$tbl+$acc12],$acc12 !
253 sll $s2,3,$acc15
254 and $acc14,2040,$acc14
255 ldx [$tbl+$acc13],$acc13
256 and $acc15,2040,$acc15
257 add $key,32,$key
258 ldx [$tbl+$acc14],$acc14
259 fmovs %f0,%f0
260 subcc $rounds,1,$rounds !
261 ldx [$tbl+$acc15],$acc15
262 bz,a,pn %icc,.Lenc_last
263 add $tbl,2048,$rounds
264
265 srlx $acc1,8,$acc1
266 xor $acc0,$t0,$t0
267 ld [$key+0],$s0
268 fmovs %f0,%f0
269 srlx $acc2,16,$acc2 !
270 xor $acc1,$t0,$t0
271 ld [$key+4],$s1
272 srlx $acc3,24,$acc3
273 xor $acc2,$t0,$t0
274 ld [$key+8],$s2
275 srlx $acc5,8,$acc5
276 xor $acc3,$t0,$t0
277 ld [$key+12],$s3 !
278 srlx $acc6,16,$acc6
279 xor $acc4,$t1,$t1
280 fmovs %f0,%f0
281 srlx $acc7,24,$acc7
282 xor $acc5,$t1,$t1
283 srlx $acc9,8,$acc9
284 xor $acc6,$t1,$t1
285 srlx $acc10,16,$acc10 !
286 xor $acc7,$t1,$t1
287 srlx $acc11,24,$acc11
288 xor $acc8,$t2,$t2
289 srlx $acc13,8,$acc13
290 xor $acc9,$t2,$t2
291 srlx $acc14,16,$acc14
292 xor $acc10,$t2,$t2
293 srlx $acc15,24,$acc15 !
294 xor $acc11,$t2,$t2
295 xor $acc12,$acc14,$acc14
296 xor $acc13,$t3,$t3
297 srl $t0,21,$acc0
298 xor $acc14,$t3,$t3
299 srl $t1,13,$acc1
300 xor $acc15,$t3,$t3
301
302 and $acc0,2040,$acc0 !
303 srl $t2,5,$acc2
304 and $acc1,2040,$acc1
305 ldx [$tbl+$acc0],$acc0
306 sll $t3,3,$acc3
307 and $acc2,2040,$acc2
308 ldx [$tbl+$acc1],$acc1
309 fmovs %f0,%f0
310 srl $t1,21,$acc4 !
311 and $acc3,2040,$acc3
312 ldx [$tbl+$acc2],$acc2
313 srl $t2,13,$acc5
314 and $acc4,2040,$acc4
315 ldx [$tbl+$acc3],$acc3
316 srl $t3,5,$acc6
317 and $acc5,2040,$acc5
318 ldx [$tbl+$acc4],$acc4 !
319 sll $t0,3,$acc7
320 and $acc6,2040,$acc6
321 ldx [$tbl+$acc5],$acc5
322 srl $t2,21,$acc8
323 and $acc7,2040,$acc7
324 ldx [$tbl+$acc6],$acc6
325 fmovs %f0,%f0
326 srl $t3,13,$acc9 !
327 and $acc8,2040,$acc8
328 ldx [$tbl+$acc7],$acc7
329 srl $t0,5,$acc10
330 and $acc9,2040,$acc9
331 ldx [$tbl+$acc8],$acc8
332 sll $t1,3,$acc11
333 and $acc10,2040,$acc10
334 ldx [$tbl+$acc9],$acc9 !
335 srl $t3,21,$acc12
336 and $acc11,2040,$acc11
337 ldx [$tbl+$acc10],$acc10
338 srl $t0,13,$acc13
339 and $acc12,2040,$acc12
340 ldx [$tbl+$acc11],$acc11
341 fmovs %f0,%f0
342 srl $t1,5,$acc14 !
343 and $acc13,2040,$acc13
344 ldx [$tbl+$acc12],$acc12
345 sll $t2,3,$acc15
346 and $acc14,2040,$acc14
347 ldx [$tbl+$acc13],$acc13
348 srlx $acc1,8,$acc1
349 and $acc15,2040,$acc15
350 ldx [$tbl+$acc14],$acc14 !
351
352 srlx $acc2,16,$acc2
353 xor $acc0,$s0,$s0
354 ldx [$tbl+$acc15],$acc15
355 srlx $acc3,24,$acc3
356 xor $acc1,$s0,$s0
357 ld [$key+16],$t0
358 fmovs %f0,%f0
359 srlx $acc5,8,$acc5 !
360 xor $acc2,$s0,$s0
361 ld [$key+20],$t1
362 srlx $acc6,16,$acc6
363 xor $acc3,$s0,$s0
364 ld [$key+24],$t2
365 srlx $acc7,24,$acc7
366 xor $acc4,$s1,$s1
367 ld [$key+28],$t3 !
368 srlx $acc9,8,$acc9
369 xor $acc5,$s1,$s1
370 ldx [$tbl+2048+0],%g0 ! prefetch te4
371 srlx $acc10,16,$acc10
372 xor $acc6,$s1,$s1
373 ldx [$tbl+2048+32],%g0 ! prefetch te4
374 srlx $acc11,24,$acc11
375 xor $acc7,$s1,$s1
376 ldx [$tbl+2048+64],%g0 ! prefetch te4
377 srlx $acc13,8,$acc13
378 xor $acc8,$s2,$s2
379 ldx [$tbl+2048+96],%g0 ! prefetch te4
380 srlx $acc14,16,$acc14 !
381 xor $acc9,$s2,$s2
382 ldx [$tbl+2048+128],%g0 ! prefetch te4
383 srlx $acc15,24,$acc15
384 xor $acc10,$s2,$s2
385 ldx [$tbl+2048+160],%g0 ! prefetch te4
386 srl $s0,21,$acc0
387 xor $acc11,$s2,$s2
388 ldx [$tbl+2048+192],%g0 ! prefetch te4
389 xor $acc12,$acc14,$acc14
390 xor $acc13,$s3,$s3
391 ldx [$tbl+2048+224],%g0 ! prefetch te4
392 srl $s1,13,$acc1 !
393 xor $acc14,$s3,$s3
394 xor $acc15,$s3,$s3
395 ba .Lenc_loop
396 and $acc0,2040,$acc0
397
398.align 32
399.Lenc_last:
400 srlx $acc1,8,$acc1 !
401 xor $acc0,$t0,$t0
402 ld [$key+0],$s0
403 srlx $acc2,16,$acc2
404 xor $acc1,$t0,$t0
405 ld [$key+4],$s1
406 srlx $acc3,24,$acc3
407 xor $acc2,$t0,$t0
408 ld [$key+8],$s2 !
409 srlx $acc5,8,$acc5
410 xor $acc3,$t0,$t0
411 ld [$key+12],$s3
412 srlx $acc6,16,$acc6
413 xor $acc4,$t1,$t1
414 srlx $acc7,24,$acc7
415 xor $acc5,$t1,$t1
416 srlx $acc9,8,$acc9 !
417 xor $acc6,$t1,$t1
418 srlx $acc10,16,$acc10
419 xor $acc7,$t1,$t1
420 srlx $acc11,24,$acc11
421 xor $acc8,$t2,$t2
422 srlx $acc13,8,$acc13
423 xor $acc9,$t2,$t2
424 srlx $acc14,16,$acc14 !
425 xor $acc10,$t2,$t2
426 srlx $acc15,24,$acc15
427 xor $acc11,$t2,$t2
428 xor $acc12,$acc14,$acc14
429 xor $acc13,$t3,$t3
430 srl $t0,24,$acc0
431 xor $acc14,$t3,$t3
432 srl $t1,16,$acc1 !
433 xor $acc15,$t3,$t3
434
435 srl $t2,8,$acc2
436 and $acc1,255,$acc1
437 ldub [$rounds+$acc0],$acc0
438 srl $t1,24,$acc4
439 and $acc2,255,$acc2
440 ldub [$rounds+$acc1],$acc1
441 srl $t2,16,$acc5 !
442 and $t3,255,$acc3
443 ldub [$rounds+$acc2],$acc2
444 ldub [$rounds+$acc3],$acc3
445 srl $t3,8,$acc6
446 and $acc5,255,$acc5
447 ldub [$rounds+$acc4],$acc4
448 fmovs %f0,%f0
449 srl $t2,24,$acc8 !
450 and $acc6,255,$acc6
451 ldub [$rounds+$acc5],$acc5
452 srl $t3,16,$acc9
453 and $t0,255,$acc7
454 ldub [$rounds+$acc6],$acc6
455 ldub [$rounds+$acc7],$acc7
456 fmovs %f0,%f0
457 srl $t0,8,$acc10 !
458 and $acc9,255,$acc9
459 ldub [$rounds+$acc8],$acc8
460 srl $t3,24,$acc12
461 and $acc10,255,$acc10
462 ldub [$rounds+$acc9],$acc9
463 srl $t0,16,$acc13
464 and $t1,255,$acc11
465 ldub [$rounds+$acc10],$acc10 !
466 srl $t1,8,$acc14
467 and $acc13,255,$acc13
468 ldub [$rounds+$acc11],$acc11
469 ldub [$rounds+$acc12],$acc12
470 and $acc14,255,$acc14
471 ldub [$rounds+$acc13],$acc13
472 and $t2,255,$acc15
473 ldub [$rounds+$acc14],$acc14 !
474
475 sll $acc0,24,$acc0
476 xor $acc3,$s0,$s0
477 ldub [$rounds+$acc15],$acc15
478 sll $acc1,16,$acc1
479 xor $acc0,$s0,$s0
480 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
481 fmovs %f0,%f0
482 sll $acc2,8,$acc2 !
483 xor $acc1,$s0,$s0
484 sll $acc4,24,$acc4
485 xor $acc2,$s0,$s0
486 sll $acc5,16,$acc5
487 xor $acc7,$s1,$s1
488 sll $acc6,8,$acc6
489 xor $acc4,$s1,$s1
490 sll $acc8,24,$acc8 !
491 xor $acc5,$s1,$s1
492 sll $acc9,16,$acc9
493 xor $acc11,$s2,$s2
494 sll $acc10,8,$acc10
495 xor $acc6,$s1,$s1
496 sll $acc12,24,$acc12
497 xor $acc8,$s2,$s2
498 sll $acc13,16,$acc13 !
499 xor $acc9,$s2,$s2
500 sll $acc14,8,$acc14
501 xor $acc10,$s2,$s2
502 xor $acc12,$acc14,$acc14
503 xor $acc13,$s3,$s3
504 xor $acc14,$s3,$s3
505 xor $acc15,$s3,$s3
506
507 ret
508 restore
509.type _sparcv9_AES_encrypt,#function
510.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
511
512.align 32
513.globl AES_encrypt
514AES_encrypt:
515 or %o0,%o1,%g1
516 andcc %g1,3,%g0
517 bnz,pn %xcc,.Lunaligned_enc
518 save %sp,-$frame,%sp
519
520 ld [%i0+0],%o0
521 ld [%i0+4],%o1
522 ld [%i0+8],%o2
523 ld [%i0+12],%o3
524
5251: call .+8
526 add %o7,AES_Te-1b,%o4
527 call _sparcv9_AES_encrypt
528 mov %i2,%o5
529
530 st %o0,[%i1+0]
531 st %o1,[%i1+4]
532 st %o2,[%i1+8]
533 st %o3,[%i1+12]
534
535 ret
536 restore
537
538.align 32
539.Lunaligned_enc:
540 ldub [%i0+0],%l0
541 ldub [%i0+1],%l1
542 ldub [%i0+2],%l2
543
544 sll %l0,24,%l0
545 ldub [%i0+3],%l3
546 sll %l1,16,%l1
547 ldub [%i0+4],%l4
548 sll %l2,8,%l2
549 or %l1,%l0,%l0
550 ldub [%i0+5],%l5
551 sll %l4,24,%l4
552 or %l3,%l2,%l2
553 ldub [%i0+6],%l6
554 sll %l5,16,%l5
555 or %l0,%l2,%o0
556 ldub [%i0+7],%l7
557
558 sll %l6,8,%l6
559 or %l5,%l4,%l4
560 ldub [%i0+8],%l0
561 or %l7,%l6,%l6
562 ldub [%i0+9],%l1
563 or %l4,%l6,%o1
564 ldub [%i0+10],%l2
565
566 sll %l0,24,%l0
567 ldub [%i0+11],%l3
568 sll %l1,16,%l1
569 ldub [%i0+12],%l4
570 sll %l2,8,%l2
571 or %l1,%l0,%l0
572 ldub [%i0+13],%l5
573 sll %l4,24,%l4
574 or %l3,%l2,%l2
575 ldub [%i0+14],%l6
576 sll %l5,16,%l5
577 or %l0,%l2,%o2
578 ldub [%i0+15],%l7
579
580 sll %l6,8,%l6
581 or %l5,%l4,%l4
582 or %l7,%l6,%l6
583 or %l4,%l6,%o3
584
5851: call .+8
586 add %o7,AES_Te-1b,%o4
587 call _sparcv9_AES_encrypt
588 mov %i2,%o5
589
590 srl %o0,24,%l0
591 srl %o0,16,%l1
592 stb %l0,[%i1+0]
593 srl %o0,8,%l2
594 stb %l1,[%i1+1]
595 stb %l2,[%i1+2]
596 srl %o1,24,%l4
597 stb %o0,[%i1+3]
598
599 srl %o1,16,%l5
600 stb %l4,[%i1+4]
601 srl %o1,8,%l6
602 stb %l5,[%i1+5]
603 stb %l6,[%i1+6]
604 srl %o2,24,%l0
605 stb %o1,[%i1+7]
606
607 srl %o2,16,%l1
608 stb %l0,[%i1+8]
609 srl %o2,8,%l2
610 stb %l1,[%i1+9]
611 stb %l2,[%i1+10]
612 srl %o3,24,%l4
613 stb %o2,[%i1+11]
614
615 srl %o3,16,%l5
616 stb %l4,[%i1+12]
617 srl %o3,8,%l6
618 stb %l5,[%i1+13]
619 stb %l6,[%i1+14]
620 stb %o3,[%i1+15]
621
622 ret
623 restore
624.type AES_encrypt,#function
625.size AES_encrypt,(.-AES_encrypt)
626
627___
628
629$code.=<<___;
630.align 256
631AES_Td:
632___
633&_data_word(
634 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
635 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
636 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
637 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
638 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
639 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
640 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
641 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
642 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
643 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
644 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
645 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
646 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
647 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
648 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
649 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
650 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
651 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
652 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
653 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
654 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
655 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
656 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
657 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
658 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
659 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
660 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
661 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
662 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
663 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
664 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
665 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
666 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
667 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
668 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
669 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
670 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
671 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
672 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
673 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
674 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
675 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
676 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
677 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
678 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
679 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
680 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
681 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
682 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
683 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
684 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
685 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
686 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
687 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
688 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
689 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
690 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
691 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
692 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
693 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
694 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
695 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
696 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
697 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
698$code.=<<___;
699 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
700 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
701 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
702 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
703 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
704 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
705 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
706 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
707 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
708 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
709 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
710 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
711 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
712 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
713 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
714 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
715 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
716 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
717 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
718 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
719 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
720 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
721 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
722 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
723 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
724 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
725 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
726 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
727 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
728 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
729 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
730 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
731.type AES_Td,#object
732.size AES_Td,(.-AES_Td)
733
734.align 64
735.skip 16
736_sparcv9_AES_decrypt:
737 save %sp,-$frame-$locals,%sp
738 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
739 ld [$key+240],$rounds
740 ld [$key+0],$t0
741 ld [$key+4],$t1 !
742 ld [$key+8],$t2
743 ld [$key+12],$t3
744 srl $rounds,1,$rounds
745 xor $t0,$s0,$s0
746 ld [$key+16],$t0
747 xor $t1,$s1,$s1
748 ld [$key+20],$t1
749 srl $s0,21,$acc0 !
750 xor $t2,$s2,$s2
751 ld [$key+24],$t2
752 xor $t3,$s3,$s3
753 and $acc0,2040,$acc0
754 ld [$key+28],$t3
755 srl $s3,13,$acc1
756 nop
757.Ldec_loop:
758 srl $s2,5,$acc2 !
759 and $acc1,2040,$acc1
760 ldx [$tbl+$acc0],$acc0
761 sll $s1,3,$acc3
762 and $acc2,2040,$acc2
763 ldx [$tbl+$acc1],$acc1
764 srl $s1,21,$acc4
765 and $acc3,2040,$acc3
766 ldx [$tbl+$acc2],$acc2 !
767 srl $s0,13,$acc5
768 and $acc4,2040,$acc4
769 ldx [$tbl+$acc3],$acc3
770 srl $s3,5,$acc6
771 and $acc5,2040,$acc5
772 ldx [$tbl+$acc4],$acc4
773 fmovs %f0,%f0
774 sll $s2,3,$acc7 !
775 and $acc6,2040,$acc6
776 ldx [$tbl+$acc5],$acc5
777 srl $s2,21,$acc8
778 and $acc7,2040,$acc7
779 ldx [$tbl+$acc6],$acc6
780 srl $s1,13,$acc9
781 and $acc8,2040,$acc8
782 ldx [$tbl+$acc7],$acc7 !
783 srl $s0,5,$acc10
784 and $acc9,2040,$acc9
785 ldx [$tbl+$acc8],$acc8
786 sll $s3,3,$acc11
787 and $acc10,2040,$acc10
788 ldx [$tbl+$acc9],$acc9
789 fmovs %f0,%f0
790 srl $s3,21,$acc12 !
791 and $acc11,2040,$acc11
792 ldx [$tbl+$acc10],$acc10
793 srl $s2,13,$acc13
794 and $acc12,2040,$acc12
795 ldx [$tbl+$acc11],$acc11
796 srl $s1,5,$acc14
797 and $acc13,2040,$acc13
798 ldx [$tbl+$acc12],$acc12 !
799 sll $s0,3,$acc15
800 and $acc14,2040,$acc14
801 ldx [$tbl+$acc13],$acc13
802 and $acc15,2040,$acc15
803 add $key,32,$key
804 ldx [$tbl+$acc14],$acc14
805 fmovs %f0,%f0
806 subcc $rounds,1,$rounds !
807 ldx [$tbl+$acc15],$acc15
808 bz,a,pn %icc,.Ldec_last
809 add $tbl,2048,$rounds
810
811 srlx $acc1,8,$acc1
812 xor $acc0,$t0,$t0
813 ld [$key+0],$s0
814 fmovs %f0,%f0
815 srlx $acc2,16,$acc2 !
816 xor $acc1,$t0,$t0
817 ld [$key+4],$s1
818 srlx $acc3,24,$acc3
819 xor $acc2,$t0,$t0
820 ld [$key+8],$s2
821 srlx $acc5,8,$acc5
822 xor $acc3,$t0,$t0
823 ld [$key+12],$s3 !
824 srlx $acc6,16,$acc6
825 xor $acc4,$t1,$t1
826 fmovs %f0,%f0
827 srlx $acc7,24,$acc7
828 xor $acc5,$t1,$t1
829 srlx $acc9,8,$acc9
830 xor $acc6,$t1,$t1
831 srlx $acc10,16,$acc10 !
832 xor $acc7,$t1,$t1
833 srlx $acc11,24,$acc11
834 xor $acc8,$t2,$t2
835 srlx $acc13,8,$acc13
836 xor $acc9,$t2,$t2
837 srlx $acc14,16,$acc14
838 xor $acc10,$t2,$t2
839 srlx $acc15,24,$acc15 !
840 xor $acc11,$t2,$t2
841 xor $acc12,$acc14,$acc14
842 xor $acc13,$t3,$t3
843 srl $t0,21,$acc0
844 xor $acc14,$t3,$t3
845 xor $acc15,$t3,$t3
846 srl $t3,13,$acc1
847
848 and $acc0,2040,$acc0 !
849 srl $t2,5,$acc2
850 and $acc1,2040,$acc1
851 ldx [$tbl+$acc0],$acc0
852 sll $t1,3,$acc3
853 and $acc2,2040,$acc2
854 ldx [$tbl+$acc1],$acc1
855 fmovs %f0,%f0
856 srl $t1,21,$acc4 !
857 and $acc3,2040,$acc3
858 ldx [$tbl+$acc2],$acc2
859 srl $t0,13,$acc5
860 and $acc4,2040,$acc4
861 ldx [$tbl+$acc3],$acc3
862 srl $t3,5,$acc6
863 and $acc5,2040,$acc5
864 ldx [$tbl+$acc4],$acc4 !
865 sll $t2,3,$acc7
866 and $acc6,2040,$acc6
867 ldx [$tbl+$acc5],$acc5
868 srl $t2,21,$acc8
869 and $acc7,2040,$acc7
870 ldx [$tbl+$acc6],$acc6
871 fmovs %f0,%f0
872 srl $t1,13,$acc9 !
873 and $acc8,2040,$acc8
874 ldx [$tbl+$acc7],$acc7
875 srl $t0,5,$acc10
876 and $acc9,2040,$acc9
877 ldx [$tbl+$acc8],$acc8
878 sll $t3,3,$acc11
879 and $acc10,2040,$acc10
880 ldx [$tbl+$acc9],$acc9 !
881 srl $t3,21,$acc12
882 and $acc11,2040,$acc11
883 ldx [$tbl+$acc10],$acc10
884 srl $t2,13,$acc13
885 and $acc12,2040,$acc12
886 ldx [$tbl+$acc11],$acc11
887 fmovs %f0,%f0
888 srl $t1,5,$acc14 !
889 and $acc13,2040,$acc13
890 ldx [$tbl+$acc12],$acc12
891 sll $t0,3,$acc15
892 and $acc14,2040,$acc14
893 ldx [$tbl+$acc13],$acc13
894 srlx $acc1,8,$acc1
895 and $acc15,2040,$acc15
896 ldx [$tbl+$acc14],$acc14 !
897
898 srlx $acc2,16,$acc2
899 xor $acc0,$s0,$s0
900 ldx [$tbl+$acc15],$acc15
901 srlx $acc3,24,$acc3
902 xor $acc1,$s0,$s0
903 ld [$key+16],$t0
904 fmovs %f0,%f0
905 srlx $acc5,8,$acc5 !
906 xor $acc2,$s0,$s0
907 ld [$key+20],$t1
908 srlx $acc6,16,$acc6
909 xor $acc3,$s0,$s0
910 ld [$key+24],$t2
911 srlx $acc7,24,$acc7
912 xor $acc4,$s1,$s1
913 ld [$key+28],$t3 !
914 srlx $acc9,8,$acc9
915 xor $acc5,$s1,$s1
916 ldx [$tbl+2048+0],%g0 ! prefetch td4
917 srlx $acc10,16,$acc10
918 xor $acc6,$s1,$s1
919 ldx [$tbl+2048+32],%g0 ! prefetch td4
920 srlx $acc11,24,$acc11
921 xor $acc7,$s1,$s1
922 ldx [$tbl+2048+64],%g0 ! prefetch td4
923 srlx $acc13,8,$acc13
924 xor $acc8,$s2,$s2
925 ldx [$tbl+2048+96],%g0 ! prefetch td4
926 srlx $acc14,16,$acc14 !
927 xor $acc9,$s2,$s2
928 ldx [$tbl+2048+128],%g0 ! prefetch td4
929 srlx $acc15,24,$acc15
930 xor $acc10,$s2,$s2
931 ldx [$tbl+2048+160],%g0 ! prefetch td4
932 srl $s0,21,$acc0
933 xor $acc11,$s2,$s2
934 ldx [$tbl+2048+192],%g0 ! prefetch td4
935 xor $acc12,$acc14,$acc14
936 xor $acc13,$s3,$s3
937 ldx [$tbl+2048+224],%g0 ! prefetch td4
938 and $acc0,2040,$acc0 !
939 xor $acc14,$s3,$s3
940 xor $acc15,$s3,$s3
941 ba .Ldec_loop
942 srl $s3,13,$acc1
943
944.align 32
945.Ldec_last:
946 srlx $acc1,8,$acc1 !
947 xor $acc0,$t0,$t0
948 ld [$key+0],$s0
949 srlx $acc2,16,$acc2
950 xor $acc1,$t0,$t0
951 ld [$key+4],$s1
952 srlx $acc3,24,$acc3
953 xor $acc2,$t0,$t0
954 ld [$key+8],$s2 !
955 srlx $acc5,8,$acc5
956 xor $acc3,$t0,$t0
957 ld [$key+12],$s3
958 srlx $acc6,16,$acc6
959 xor $acc4,$t1,$t1
960 srlx $acc7,24,$acc7
961 xor $acc5,$t1,$t1
962 srlx $acc9,8,$acc9 !
963 xor $acc6,$t1,$t1
964 srlx $acc10,16,$acc10
965 xor $acc7,$t1,$t1
966 srlx $acc11,24,$acc11
967 xor $acc8,$t2,$t2
968 srlx $acc13,8,$acc13
969 xor $acc9,$t2,$t2
970 srlx $acc14,16,$acc14 !
971 xor $acc10,$t2,$t2
972 srlx $acc15,24,$acc15
973 xor $acc11,$t2,$t2
974 xor $acc12,$acc14,$acc14
975 xor $acc13,$t3,$t3
976 srl $t0,24,$acc0
977 xor $acc14,$t3,$t3
978 xor $acc15,$t3,$t3 !
979 srl $t3,16,$acc1
980
981 srl $t2,8,$acc2
982 and $acc1,255,$acc1
983 ldub [$rounds+$acc0],$acc0
984 srl $t1,24,$acc4
985 and $acc2,255,$acc2
986 ldub [$rounds+$acc1],$acc1
987 srl $t0,16,$acc5 !
988 and $t1,255,$acc3
989 ldub [$rounds+$acc2],$acc2
990 ldub [$rounds+$acc3],$acc3
991 srl $t3,8,$acc6
992 and $acc5,255,$acc5
993 ldub [$rounds+$acc4],$acc4
994 fmovs %f0,%f0
995 srl $t2,24,$acc8 !
996 and $acc6,255,$acc6
997 ldub [$rounds+$acc5],$acc5
998 srl $t1,16,$acc9
999 and $t2,255,$acc7
1000 ldub [$rounds+$acc6],$acc6
1001 ldub [$rounds+$acc7],$acc7
1002 fmovs %f0,%f0
1003 srl $t0,8,$acc10 !
1004 and $acc9,255,$acc9
1005 ldub [$rounds+$acc8],$acc8
1006 srl $t3,24,$acc12
1007 and $acc10,255,$acc10
1008 ldub [$rounds+$acc9],$acc9
1009 srl $t2,16,$acc13
1010 and $t3,255,$acc11
1011 ldub [$rounds+$acc10],$acc10 !
1012 srl $t1,8,$acc14
1013 and $acc13,255,$acc13
1014 ldub [$rounds+$acc11],$acc11
1015 ldub [$rounds+$acc12],$acc12
1016 and $acc14,255,$acc14
1017 ldub [$rounds+$acc13],$acc13
1018 and $t0,255,$acc15
1019 ldub [$rounds+$acc14],$acc14 !
1020
1021 sll $acc0,24,$acc0
1022 xor $acc3,$s0,$s0
1023 ldub [$rounds+$acc15],$acc15
1024 sll $acc1,16,$acc1
1025 xor $acc0,$s0,$s0
1026 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1027 fmovs %f0,%f0
1028 sll $acc2,8,$acc2 !
1029 xor $acc1,$s0,$s0
1030 sll $acc4,24,$acc4
1031 xor $acc2,$s0,$s0
1032 sll $acc5,16,$acc5
1033 xor $acc7,$s1,$s1
1034 sll $acc6,8,$acc6
1035 xor $acc4,$s1,$s1
1036 sll $acc8,24,$acc8 !
1037 xor $acc5,$s1,$s1
1038 sll $acc9,16,$acc9
1039 xor $acc11,$s2,$s2
1040 sll $acc10,8,$acc10
1041 xor $acc6,$s1,$s1
1042 sll $acc12,24,$acc12
1043 xor $acc8,$s2,$s2
1044 sll $acc13,16,$acc13 !
1045 xor $acc9,$s2,$s2
1046 sll $acc14,8,$acc14
1047 xor $acc10,$s2,$s2
1048 xor $acc12,$acc14,$acc14
1049 xor $acc13,$s3,$s3
1050 xor $acc14,$s3,$s3
1051 xor $acc15,$s3,$s3
1052
1053 ret
1054 restore
1055.type _sparcv9_AES_decrypt,#function
1056.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1057
1058.align 32
1059.globl AES_decrypt
1060AES_decrypt:
1061 or %o0,%o1,%g1
1062 andcc %g1,3,%g0
1063 bnz,pn %xcc,.Lunaligned_dec
1064 save %sp,-$frame,%sp
1065
1066 ld [%i0+0],%o0
1067 ld [%i0+4],%o1
1068 ld [%i0+8],%o2
1069 ld [%i0+12],%o3
1070
10711: call .+8
1072 add %o7,AES_Td-1b,%o4
1073 call _sparcv9_AES_decrypt
1074 mov %i2,%o5
1075
1076 st %o0,[%i1+0]
1077 st %o1,[%i1+4]
1078 st %o2,[%i1+8]
1079 st %o3,[%i1+12]
1080
1081 ret
1082 restore
1083
1084.align 32
1085.Lunaligned_dec:
1086 ldub [%i0+0],%l0
1087 ldub [%i0+1],%l1
1088 ldub [%i0+2],%l2
1089
1090 sll %l0,24,%l0
1091 ldub [%i0+3],%l3
1092 sll %l1,16,%l1
1093 ldub [%i0+4],%l4
1094 sll %l2,8,%l2
1095 or %l1,%l0,%l0
1096 ldub [%i0+5],%l5
1097 sll %l4,24,%l4
1098 or %l3,%l2,%l2
1099 ldub [%i0+6],%l6
1100 sll %l5,16,%l5
1101 or %l0,%l2,%o0
1102 ldub [%i0+7],%l7
1103
1104 sll %l6,8,%l6
1105 or %l5,%l4,%l4
1106 ldub [%i0+8],%l0
1107 or %l7,%l6,%l6
1108 ldub [%i0+9],%l1
1109 or %l4,%l6,%o1
1110 ldub [%i0+10],%l2
1111
1112 sll %l0,24,%l0
1113 ldub [%i0+11],%l3
1114 sll %l1,16,%l1
1115 ldub [%i0+12],%l4
1116 sll %l2,8,%l2
1117 or %l1,%l0,%l0
1118 ldub [%i0+13],%l5
1119 sll %l4,24,%l4
1120 or %l3,%l2,%l2
1121 ldub [%i0+14],%l6
1122 sll %l5,16,%l5
1123 or %l0,%l2,%o2
1124 ldub [%i0+15],%l7
1125
1126 sll %l6,8,%l6
1127 or %l5,%l4,%l4
1128 or %l7,%l6,%l6
1129 or %l4,%l6,%o3
1130
11311: call .+8
1132 add %o7,AES_Td-1b,%o4
1133 call _sparcv9_AES_decrypt
1134 mov %i2,%o5
1135
1136 srl %o0,24,%l0
1137 srl %o0,16,%l1
1138 stb %l0,[%i1+0]
1139 srl %o0,8,%l2
1140 stb %l1,[%i1+1]
1141 stb %l2,[%i1+2]
1142 srl %o1,24,%l4
1143 stb %o0,[%i1+3]
1144
1145 srl %o1,16,%l5
1146 stb %l4,[%i1+4]
1147 srl %o1,8,%l6
1148 stb %l5,[%i1+5]
1149 stb %l6,[%i1+6]
1150 srl %o2,24,%l0
1151 stb %o1,[%i1+7]
1152
1153 srl %o2,16,%l1
1154 stb %l0,[%i1+8]
1155 srl %o2,8,%l2
1156 stb %l1,[%i1+9]
1157 stb %l2,[%i1+10]
1158 srl %o3,24,%l4
1159 stb %o2,[%i1+11]
1160
1161 srl %o3,16,%l5
1162 stb %l4,[%i1+12]
1163 srl %o3,8,%l6
1164 stb %l5,[%i1+13]
1165 stb %l6,[%i1+14]
1166 stb %o3,[%i1+15]
1167
1168 ret
1169 restore
1170.type AES_decrypt,#function
1171.size AES_decrypt,(.-AES_decrypt)
1172___
1173
1174# fmovs instructions substituting for FP nops were originally added
1175# to meet specific instruction alignment requirements to maximize ILP.
1176# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1177# undesired effect, so just omit them and sacrifice some portion of
1178# percent in performance...
1179$code =~ s/fmovs.*$//gem;
1180
1181print $code;
diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl
deleted file mode 100755
index f616f1751f..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-x86_64.pl
+++ /dev/null
@@ -1,1579 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 1.2.
10#
11# aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on
12# Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version
13# [you'll notice a lot of resemblance], such as compressed S-boxes
14# in little-endian byte order, prefetch of these tables in CBC mode,
15# as well as avoiding L1 cache aliasing between stack frame and key
16# schedule and already mentioned tables, compressed Td4...
17#
18# Performance in number of cycles per processed byte for 128-bit key:
19#
20# ECB CBC encrypt
21# AMD64 13.7 13.0(*)
22# EM64T 20.2 18.6(*)
23#
24# (*) CBC benchmarks are better than ECB thanks to custom ABI used
25# by the private block encryption function.
26
27$verticalspin=1; # unlike 32-bit version $verticalspin performs
28 # ~15% better on both AMD and Intel cores
29$output=shift;
30open STDOUT,"| $^X ../perlasm/x86_64-xlate.pl $output";
31
32$code=".text\n";
33
34$s0="%eax";
35$s1="%ebx";
36$s2="%ecx";
37$s3="%edx";
38$acc0="%esi";
39$acc1="%edi";
40$acc2="%ebp";
41$inp="%r8";
42$out="%r9";
43$t0="%r10d";
44$t1="%r11d";
45$t2="%r12d";
46$rnds="%r13d";
47$sbox="%r14";
48$key="%r15";
49
50sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; }
51sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/;
52 $r =~ s/%[er]([sd]i)/%\1l/;
53 $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; }
54sub _data_word()
55{ my $i;
56 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
57}
58sub data_word()
59{ my $i;
60 my $last=pop(@_);
61 $code.=".long\t";
62 while(defined($i=shift)) { $code.=sprintf"0x%08x,",$i; }
63 $code.=sprintf"0x%08x\n",$last;
64}
65
66sub data_byte()
67{ my $i;
68 my $last=pop(@_);
69 $code.=".byte\t";
70 while(defined($i=shift)) { $code.=sprintf"0x%02x,",$i&0xff; }
71 $code.=sprintf"0x%02x\n",$last&0xff;
72}
73
74sub encvert()
75{ my $t3="%r8d"; # zaps $inp!
76
77$code.=<<___;
78 # favor 3-way issue Opteron pipeline...
79 movzb `&lo("$s0")`,$acc0
80 movzb `&lo("$s1")`,$acc1
81 movzb `&lo("$s2")`,$acc2
82 mov 0($sbox,$acc0,8),$t0
83 mov 0($sbox,$acc1,8),$t1
84 mov 0($sbox,$acc2,8),$t2
85
86 movzb `&hi("$s1")`,$acc0
87 movzb `&hi("$s2")`,$acc1
88 movzb `&lo("$s3")`,$acc2
89 xor 3($sbox,$acc0,8),$t0
90 xor 3($sbox,$acc1,8),$t1
91 mov 0($sbox,$acc2,8),$t3
92
93 movzb `&hi("$s3")`,$acc0
94 shr \$16,$s2
95 movzb `&hi("$s0")`,$acc2
96 xor 3($sbox,$acc0,8),$t2
97 shr \$16,$s3
98 xor 3($sbox,$acc2,8),$t3
99
100 shr \$16,$s1
101 lea 16($key),$key
102 shr \$16,$s0
103
104 movzb `&lo("$s2")`,$acc0
105 movzb `&lo("$s3")`,$acc1
106 movzb `&lo("$s0")`,$acc2
107 xor 2($sbox,$acc0,8),$t0
108 xor 2($sbox,$acc1,8),$t1
109 xor 2($sbox,$acc2,8),$t2
110
111 movzb `&hi("$s3")`,$acc0
112 movzb `&hi("$s0")`,$acc1
113 movzb `&lo("$s1")`,$acc2
114 xor 1($sbox,$acc0,8),$t0
115 xor 1($sbox,$acc1,8),$t1
116 xor 2($sbox,$acc2,8),$t3
117
118 mov 12($key),$s3
119 movzb `&hi("$s1")`,$acc1
120 movzb `&hi("$s2")`,$acc2
121 mov 0($key),$s0
122 xor 1($sbox,$acc1,8),$t2
123 xor 1($sbox,$acc2,8),$t3
124
125 mov 4($key),$s1
126 mov 8($key),$s2
127 xor $t0,$s0
128 xor $t1,$s1
129 xor $t2,$s2
130 xor $t3,$s3
131___
132}
133
134sub enclastvert()
135{ my $t3="%r8d"; # zaps $inp!
136
137$code.=<<___;
138 movzb `&lo("$s0")`,$acc0
139 movzb `&lo("$s1")`,$acc1
140 movzb `&lo("$s2")`,$acc2
141 mov 2($sbox,$acc0,8),$t0
142 mov 2($sbox,$acc1,8),$t1
143 mov 2($sbox,$acc2,8),$t2
144
145 and \$0x000000ff,$t0
146 and \$0x000000ff,$t1
147 and \$0x000000ff,$t2
148
149 movzb `&lo("$s3")`,$acc0
150 movzb `&hi("$s1")`,$acc1
151 movzb `&hi("$s2")`,$acc2
152 mov 2($sbox,$acc0,8),$t3
153 mov 0($sbox,$acc1,8),$acc1 #$t0
154 mov 0($sbox,$acc2,8),$acc2 #$t1
155
156 and \$0x000000ff,$t3
157 and \$0x0000ff00,$acc1
158 and \$0x0000ff00,$acc2
159
160 xor $acc1,$t0
161 xor $acc2,$t1
162 shr \$16,$s2
163
164 movzb `&hi("$s3")`,$acc0
165 movzb `&hi("$s0")`,$acc1
166 shr \$16,$s3
167 mov 0($sbox,$acc0,8),$acc0 #$t2
168 mov 0($sbox,$acc1,8),$acc1 #$t3
169
170 and \$0x0000ff00,$acc0
171 and \$0x0000ff00,$acc1
172 shr \$16,$s1
173 xor $acc0,$t2
174 xor $acc1,$t3
175 shr \$16,$s0
176
177 movzb `&lo("$s2")`,$acc0
178 movzb `&lo("$s3")`,$acc1
179 movzb `&lo("$s0")`,$acc2
180 mov 0($sbox,$acc0,8),$acc0 #$t0
181 mov 0($sbox,$acc1,8),$acc1 #$t1
182 mov 0($sbox,$acc2,8),$acc2 #$t2
183
184 and \$0x00ff0000,$acc0
185 and \$0x00ff0000,$acc1
186 and \$0x00ff0000,$acc2
187
188 xor $acc0,$t0
189 xor $acc1,$t1
190 xor $acc2,$t2
191
192 movzb `&lo("$s1")`,$acc0
193 movzb `&hi("$s3")`,$acc1
194 movzb `&hi("$s0")`,$acc2
195 mov 0($sbox,$acc0,8),$acc0 #$t3
196 mov 2($sbox,$acc1,8),$acc1 #$t0
197 mov 2($sbox,$acc2,8),$acc2 #$t1
198
199 and \$0x00ff0000,$acc0
200 and \$0xff000000,$acc1
201 and \$0xff000000,$acc2
202
203 xor $acc0,$t3
204 xor $acc1,$t0
205 xor $acc2,$t1
206
207 movzb `&hi("$s1")`,$acc0
208 movzb `&hi("$s2")`,$acc1
209 mov 16+12($key),$s3
210 mov 2($sbox,$acc0,8),$acc0 #$t2
211 mov 2($sbox,$acc1,8),$acc1 #$t3
212 mov 16+0($key),$s0
213
214 and \$0xff000000,$acc0
215 and \$0xff000000,$acc1
216
217 xor $acc0,$t2
218 xor $acc1,$t3
219
220 mov 16+4($key),$s1
221 mov 16+8($key),$s2
222 xor $t0,$s0
223 xor $t1,$s1
224 xor $t2,$s2
225 xor $t3,$s3
226___
227}
228
229sub encstep()
230{ my ($i,@s) = @_;
231 my $tmp0=$acc0;
232 my $tmp1=$acc1;
233 my $tmp2=$acc2;
234 my $out=($t0,$t1,$t2,$s[0])[$i];
235
236 if ($i==3) {
237 $tmp0=$s[1];
238 $tmp1=$s[2];
239 $tmp2=$s[3];
240 }
241 $code.=" movzb ".&lo($s[0]).",$out\n";
242 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
243 $code.=" lea 16($key),$key\n" if ($i==0);
244
245 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
246 $code.=" mov 0($sbox,$out,8),$out\n";
247
248 $code.=" shr \$16,$tmp1\n";
249 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
250 $code.=" xor 3($sbox,$tmp0,8),$out\n";
251
252 $code.=" movzb ".&lo($tmp1).",$tmp1\n";
253 $code.=" shr \$24,$tmp2\n";
254 $code.=" xor 4*$i($key),$out\n";
255
256 $code.=" xor 2($sbox,$tmp1,8),$out\n";
257 $code.=" xor 1($sbox,$tmp2,8),$out\n";
258
259 $code.=" mov $t0,$s[1]\n" if ($i==3);
260 $code.=" mov $t1,$s[2]\n" if ($i==3);
261 $code.=" mov $t2,$s[3]\n" if ($i==3);
262 $code.="\n";
263}
264
265sub enclast()
266{ my ($i,@s)=@_;
267 my $tmp0=$acc0;
268 my $tmp1=$acc1;
269 my $tmp2=$acc2;
270 my $out=($t0,$t1,$t2,$s[0])[$i];
271
272 if ($i==3) {
273 $tmp0=$s[1];
274 $tmp1=$s[2];
275 $tmp2=$s[3];
276 }
277 $code.=" movzb ".&lo($s[0]).",$out\n";
278 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
279
280 $code.=" mov 2($sbox,$out,8),$out\n";
281 $code.=" shr \$16,$tmp1\n";
282 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
283
284 $code.=" and \$0x000000ff,$out\n";
285 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
286 $code.=" movzb ".&lo($tmp1).",$tmp1\n";
287 $code.=" shr \$24,$tmp2\n";
288
289 $code.=" mov 0($sbox,$tmp0,8),$tmp0\n";
290 $code.=" mov 0($sbox,$tmp1,8),$tmp1\n";
291 $code.=" mov 2($sbox,$tmp2,8),$tmp2\n";
292
293 $code.=" and \$0x0000ff00,$tmp0\n";
294 $code.=" and \$0x00ff0000,$tmp1\n";
295 $code.=" and \$0xff000000,$tmp2\n";
296
297 $code.=" xor $tmp0,$out\n";
298 $code.=" mov $t0,$s[1]\n" if ($i==3);
299 $code.=" xor $tmp1,$out\n";
300 $code.=" mov $t1,$s[2]\n" if ($i==3);
301 $code.=" xor $tmp2,$out\n";
302 $code.=" mov $t2,$s[3]\n" if ($i==3);
303 $code.="\n";
304}
305
306$code.=<<___;
307.type _x86_64_AES_encrypt,\@abi-omnipotent
308.align 16
309_x86_64_AES_encrypt:
310 xor 0($key),$s0 # xor with key
311 xor 4($key),$s1
312 xor 8($key),$s2
313 xor 12($key),$s3
314
315 mov 240($key),$rnds # load key->rounds
316 sub \$1,$rnds
317 jmp .Lenc_loop
318.align 16
319.Lenc_loop:
320___
321 if ($verticalspin) { &encvert(); }
322 else { &encstep(0,$s0,$s1,$s2,$s3);
323 &encstep(1,$s1,$s2,$s3,$s0);
324 &encstep(2,$s2,$s3,$s0,$s1);
325 &encstep(3,$s3,$s0,$s1,$s2);
326 }
327$code.=<<___;
328 sub \$1,$rnds
329 jnz .Lenc_loop
330___
331 if ($verticalspin) { &enclastvert(); }
332 else { &enclast(0,$s0,$s1,$s2,$s3);
333 &enclast(1,$s1,$s2,$s3,$s0);
334 &enclast(2,$s2,$s3,$s0,$s1);
335 &enclast(3,$s3,$s0,$s1,$s2);
336 $code.=<<___;
337 xor 16+0($key),$s0 # xor with key
338 xor 16+4($key),$s1
339 xor 16+8($key),$s2
340 xor 16+12($key),$s3
341___
342 }
343$code.=<<___;
344 .byte 0xf3,0xc3 # rep ret
345.size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
346___
347
348# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
349$code.=<<___;
350.globl AES_encrypt
351.type AES_encrypt,\@function,3
352.align 16
353AES_encrypt:
354 push %rbx
355 push %rbp
356 push %r12
357 push %r13
358 push %r14
359 push %r15
360
361 mov %rdx,$key
362 mov %rdi,$inp
363 mov %rsi,$out
364
365 .picmeup $sbox
366 lea AES_Te-.($sbox),$sbox
367
368 mov 0($inp),$s0
369 mov 4($inp),$s1
370 mov 8($inp),$s2
371 mov 12($inp),$s3
372
373 call _x86_64_AES_encrypt
374
375 mov $s0,0($out)
376 mov $s1,4($out)
377 mov $s2,8($out)
378 mov $s3,12($out)
379
380 pop %r15
381 pop %r14
382 pop %r13
383 pop %r12
384 pop %rbp
385 pop %rbx
386 ret
387.size AES_encrypt,.-AES_encrypt
388___
389
390#------------------------------------------------------------------#
391
392sub decvert()
393{ my $t3="%r8d"; # zaps $inp!
394
395$code.=<<___;
396 # favor 3-way issue Opteron pipeline...
397 movzb `&lo("$s0")`,$acc0
398 movzb `&lo("$s1")`,$acc1
399 movzb `&lo("$s2")`,$acc2
400 mov 0($sbox,$acc0,8),$t0
401 mov 0($sbox,$acc1,8),$t1
402 mov 0($sbox,$acc2,8),$t2
403
404 movzb `&hi("$s3")`,$acc0
405 movzb `&hi("$s0")`,$acc1
406 movzb `&lo("$s3")`,$acc2
407 xor 3($sbox,$acc0,8),$t0
408 xor 3($sbox,$acc1,8),$t1
409 mov 0($sbox,$acc2,8),$t3
410
411 movzb `&hi("$s1")`,$acc0
412 shr \$16,$s0
413 movzb `&hi("$s2")`,$acc2
414 xor 3($sbox,$acc0,8),$t2
415 shr \$16,$s3
416 xor 3($sbox,$acc2,8),$t3
417
418 shr \$16,$s1
419 lea 16($key),$key
420 shr \$16,$s2
421
422 movzb `&lo("$s2")`,$acc0
423 movzb `&lo("$s3")`,$acc1
424 movzb `&lo("$s0")`,$acc2
425 xor 2($sbox,$acc0,8),$t0
426 xor 2($sbox,$acc1,8),$t1
427 xor 2($sbox,$acc2,8),$t2
428
429 movzb `&hi("$s1")`,$acc0
430 movzb `&hi("$s2")`,$acc1
431 movzb `&lo("$s1")`,$acc2
432 xor 1($sbox,$acc0,8),$t0
433 xor 1($sbox,$acc1,8),$t1
434 xor 2($sbox,$acc2,8),$t3
435
436 movzb `&hi("$s3")`,$acc0
437 mov 12($key),$s3
438 movzb `&hi("$s0")`,$acc2
439 xor 1($sbox,$acc0,8),$t2
440 mov 0($key),$s0
441 xor 1($sbox,$acc2,8),$t3
442
443 xor $t0,$s0
444 mov 4($key),$s1
445 mov 8($key),$s2
446 xor $t2,$s2
447 xor $t1,$s1
448 xor $t3,$s3
449___
450}
451
452sub declastvert()
453{ my $t3="%r8d"; # zaps $inp!
454
455$code.=<<___;
456 movzb `&lo("$s0")`,$acc0
457 movzb `&lo("$s1")`,$acc1
458 movzb `&lo("$s2")`,$acc2
459 movzb 2048($sbox,$acc0,1),$t0
460 movzb 2048($sbox,$acc1,1),$t1
461 movzb 2048($sbox,$acc2,1),$t2
462
463 movzb `&lo("$s3")`,$acc0
464 movzb `&hi("$s3")`,$acc1
465 movzb `&hi("$s0")`,$acc2
466 movzb 2048($sbox,$acc0,1),$t3
467 movzb 2048($sbox,$acc1,1),$acc1 #$t0
468 movzb 2048($sbox,$acc2,1),$acc2 #$t1
469
470 shl \$8,$acc1
471 shl \$8,$acc2
472
473 xor $acc1,$t0
474 xor $acc2,$t1
475 shr \$16,$s3
476
477 movzb `&hi("$s1")`,$acc0
478 movzb `&hi("$s2")`,$acc1
479 shr \$16,$s0
480 movzb 2048($sbox,$acc0,1),$acc0 #$t2
481 movzb 2048($sbox,$acc1,1),$acc1 #$t3
482
483 shl \$8,$acc0
484 shl \$8,$acc1
485 shr \$16,$s1
486 xor $acc0,$t2
487 xor $acc1,$t3
488 shr \$16,$s2
489
490 movzb `&lo("$s2")`,$acc0
491 movzb `&lo("$s3")`,$acc1
492 movzb `&lo("$s0")`,$acc2
493 movzb 2048($sbox,$acc0,1),$acc0 #$t0
494 movzb 2048($sbox,$acc1,1),$acc1 #$t1
495 movzb 2048($sbox,$acc2,1),$acc2 #$t2
496
497 shl \$16,$acc0
498 shl \$16,$acc1
499 shl \$16,$acc2
500
501 xor $acc0,$t0
502 xor $acc1,$t1
503 xor $acc2,$t2
504
505 movzb `&lo("$s1")`,$acc0
506 movzb `&hi("$s1")`,$acc1
507 movzb `&hi("$s2")`,$acc2
508 movzb 2048($sbox,$acc0,1),$acc0 #$t3
509 movzb 2048($sbox,$acc1,1),$acc1 #$t0
510 movzb 2048($sbox,$acc2,1),$acc2 #$t1
511
512 shl \$16,$acc0
513 shl \$24,$acc1
514 shl \$24,$acc2
515
516 xor $acc0,$t3
517 xor $acc1,$t0
518 xor $acc2,$t1
519
520 movzb `&hi("$s3")`,$acc0
521 movzb `&hi("$s0")`,$acc1
522 mov 16+12($key),$s3
523 movzb 2048($sbox,$acc0,1),$acc0 #$t2
524 movzb 2048($sbox,$acc1,1),$acc1 #$t3
525 mov 16+0($key),$s0
526
527 shl \$24,$acc0
528 shl \$24,$acc1
529
530 xor $acc0,$t2
531 xor $acc1,$t3
532
533 mov 16+4($key),$s1
534 mov 16+8($key),$s2
535 xor $t0,$s0
536 xor $t1,$s1
537 xor $t2,$s2
538 xor $t3,$s3
539___
540}
541
542sub decstep()
543{ my ($i,@s) = @_;
544 my $tmp0=$acc0;
545 my $tmp1=$acc1;
546 my $tmp2=$acc2;
547 my $out=($t0,$t1,$t2,$s[0])[$i];
548
549 $code.=" mov $s[0],$out\n" if ($i!=3);
550 $tmp1=$s[2] if ($i==3);
551 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
552 $code.=" and \$0xFF,$out\n";
553
554 $code.=" mov 0($sbox,$out,8),$out\n";
555 $code.=" shr \$16,$tmp1\n";
556 $tmp2=$s[3] if ($i==3);
557 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
558
559 $tmp0=$s[1] if ($i==3);
560 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
561 $code.=" and \$0xFF,$tmp1\n";
562 $code.=" shr \$24,$tmp2\n";
563
564 $code.=" xor 3($sbox,$tmp0,8),$out\n";
565 $code.=" xor 2($sbox,$tmp1,8),$out\n";
566 $code.=" xor 1($sbox,$tmp2,8),$out\n";
567
568 $code.=" mov $t2,$s[1]\n" if ($i==3);
569 $code.=" mov $t1,$s[2]\n" if ($i==3);
570 $code.=" mov $t0,$s[3]\n" if ($i==3);
571 $code.="\n";
572}
573
574sub declast()
575{ my ($i,@s)=@_;
576 my $tmp0=$acc0;
577 my $tmp1=$acc1;
578 my $tmp2=$acc2;
579 my $out=($t0,$t1,$t2,$s[0])[$i];
580
581 $code.=" mov $s[0],$out\n" if ($i!=3);
582 $tmp1=$s[2] if ($i==3);
583 $code.=" mov $s[2],$tmp1\n" if ($i!=3);
584 $code.=" and \$0xFF,$out\n";
585
586 $code.=" movzb 2048($sbox,$out,1),$out\n";
587 $code.=" shr \$16,$tmp1\n";
588 $tmp2=$s[3] if ($i==3);
589 $code.=" mov $s[3],$tmp2\n" if ($i!=3);
590
591 $tmp0=$s[1] if ($i==3);
592 $code.=" movzb ".&hi($s[1]).",$tmp0\n";
593 $code.=" and \$0xFF,$tmp1\n";
594 $code.=" shr \$24,$tmp2\n";
595
596 $code.=" movzb 2048($sbox,$tmp0,1),$tmp0\n";
597 $code.=" movzb 2048($sbox,$tmp1,1),$tmp1\n";
598 $code.=" movzb 2048($sbox,$tmp2,1),$tmp2\n";
599
600 $code.=" shl \$8,$tmp0\n";
601 $code.=" shl \$16,$tmp1\n";
602 $code.=" shl \$24,$tmp2\n";
603
604 $code.=" xor $tmp0,$out\n";
605 $code.=" mov $t2,$s[1]\n" if ($i==3);
606 $code.=" xor $tmp1,$out\n";
607 $code.=" mov $t1,$s[2]\n" if ($i==3);
608 $code.=" xor $tmp2,$out\n";
609 $code.=" mov $t0,$s[3]\n" if ($i==3);
610 $code.="\n";
611}
612
613$code.=<<___;
614.type _x86_64_AES_decrypt,\@abi-omnipotent
615.align 16
616_x86_64_AES_decrypt:
617 xor 0($key),$s0 # xor with key
618 xor 4($key),$s1
619 xor 8($key),$s2
620 xor 12($key),$s3
621
622 mov 240($key),$rnds # load key->rounds
623 sub \$1,$rnds
624 jmp .Ldec_loop
625.align 16
626.Ldec_loop:
627___
628 if ($verticalspin) { &decvert(); }
629 else { &decstep(0,$s0,$s3,$s2,$s1);
630 &decstep(1,$s1,$s0,$s3,$s2);
631 &decstep(2,$s2,$s1,$s0,$s3);
632 &decstep(3,$s3,$s2,$s1,$s0);
633 $code.=<<___;
634 lea 16($key),$key
635 xor 0($key),$s0 # xor with key
636 xor 4($key),$s1
637 xor 8($key),$s2
638 xor 12($key),$s3
639___
640 }
641$code.=<<___;
642 sub \$1,$rnds
643 jnz .Ldec_loop
644___
645 if ($verticalspin) { &declastvert(); }
646 else { &declast(0,$s0,$s3,$s2,$s1);
647 &declast(1,$s1,$s0,$s3,$s2);
648 &declast(2,$s2,$s1,$s0,$s3);
649 &declast(3,$s3,$s2,$s1,$s0);
650 $code.=<<___;
651 xor 16+0($key),$s0 # xor with key
652 xor 16+4($key),$s1
653 xor 16+8($key),$s2
654 xor 16+12($key),$s3
655___
656 }
657$code.=<<___;
658 .byte 0xf3,0xc3 # rep ret
659.size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
660___
661
662# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
663$code.=<<___;
664.globl AES_decrypt
665.type AES_decrypt,\@function,3
666.align 16
667AES_decrypt:
668 push %rbx
669 push %rbp
670 push %r12
671 push %r13
672 push %r14
673 push %r15
674
675 mov %rdx,$key
676 mov %rdi,$inp
677 mov %rsi,$out
678
679 .picmeup $sbox
680 lea AES_Td-.($sbox),$sbox
681
682 # prefetch Td4
683 lea 2048+128($sbox),$sbox;
684 mov 0-128($sbox),$s0
685 mov 32-128($sbox),$s1
686 mov 64-128($sbox),$s2
687 mov 96-128($sbox),$s3
688 mov 128-128($sbox),$s0
689 mov 160-128($sbox),$s1
690 mov 192-128($sbox),$s2
691 mov 224-128($sbox),$s3
692 lea -2048-128($sbox),$sbox;
693
694 mov 0($inp),$s0
695 mov 4($inp),$s1
696 mov 8($inp),$s2
697 mov 12($inp),$s3
698
699 call _x86_64_AES_decrypt
700
701 mov $s0,0($out)
702 mov $s1,4($out)
703 mov $s2,8($out)
704 mov $s3,12($out)
705
706 pop %r15
707 pop %r14
708 pop %r13
709 pop %r12
710 pop %rbp
711 pop %rbx
712 ret
713.size AES_decrypt,.-AES_decrypt
714___
715#------------------------------------------------------------------#
716
717sub enckey()
718{
719$code.=<<___;
720 movz %dl,%esi # rk[i]>>0
721 mov 2(%rbp,%rsi,8),%ebx
722 movz %dh,%esi # rk[i]>>8
723 and \$0xFF000000,%ebx
724 xor %ebx,%eax
725
726 mov 2(%rbp,%rsi,8),%ebx
727 shr \$16,%edx
728 and \$0x000000FF,%ebx
729 movz %dl,%esi # rk[i]>>16
730 xor %ebx,%eax
731
732 mov 0(%rbp,%rsi,8),%ebx
733 movz %dh,%esi # rk[i]>>24
734 and \$0x0000FF00,%ebx
735 xor %ebx,%eax
736
737 mov 0(%rbp,%rsi,8),%ebx
738 and \$0x00FF0000,%ebx
739 xor %ebx,%eax
740
741 xor 2048(%rbp,%rcx,4),%eax # rcon
742___
743}
744
745# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
746# AES_KEY *key)
747$code.=<<___;
748.globl AES_set_encrypt_key
749.type AES_set_encrypt_key,\@function,3
750.align 16
751AES_set_encrypt_key:
752 push %rbx
753 push %rbp
754
755 mov %esi,%ecx # %ecx=bits
756 mov %rdi,%rsi # %rsi=userKey
757 mov %rdx,%rdi # %rdi=key
758
759 test \$-1,%rsi
760 jz .Lbadpointer
761 test \$-1,%rdi
762 jz .Lbadpointer
763
764 .picmeup %rbp
765 lea AES_Te-.(%rbp),%rbp
766
767 cmp \$128,%ecx
768 je .L10rounds
769 cmp \$192,%ecx
770 je .L12rounds
771 cmp \$256,%ecx
772 je .L14rounds
773 mov \$-2,%rax # invalid number of bits
774 jmp .Lexit
775
776.L10rounds:
777 mov 0(%rsi),%eax # copy first 4 dwords
778 mov 4(%rsi),%ebx
779 mov 8(%rsi),%ecx
780 mov 12(%rsi),%edx
781 mov %eax,0(%rdi)
782 mov %ebx,4(%rdi)
783 mov %ecx,8(%rdi)
784 mov %edx,12(%rdi)
785
786 xor %ecx,%ecx
787 jmp .L10shortcut
788.align 4
789.L10loop:
790 mov 0(%rdi),%eax # rk[0]
791 mov 12(%rdi),%edx # rk[3]
792.L10shortcut:
793___
794 &enckey ();
795$code.=<<___;
796 mov %eax,16(%rdi) # rk[4]
797 xor 4(%rdi),%eax
798 mov %eax,20(%rdi) # rk[5]
799 xor 8(%rdi),%eax
800 mov %eax,24(%rdi) # rk[6]
801 xor 12(%rdi),%eax
802 mov %eax,28(%rdi) # rk[7]
803 add \$1,%ecx
804 lea 16(%rdi),%rdi
805 cmp \$10,%ecx
806 jl .L10loop
807
808 movl \$10,80(%rdi) # setup number of rounds
809 xor %rax,%rax
810 jmp .Lexit
811
812.L12rounds:
813 mov 0(%rsi),%eax # copy first 6 dwords
814 mov 4(%rsi),%ebx
815 mov 8(%rsi),%ecx
816 mov 12(%rsi),%edx
817 mov %eax,0(%rdi)
818 mov %ebx,4(%rdi)
819 mov %ecx,8(%rdi)
820 mov %edx,12(%rdi)
821 mov 16(%rsi),%ecx
822 mov 20(%rsi),%edx
823 mov %ecx,16(%rdi)
824 mov %edx,20(%rdi)
825
826 xor %ecx,%ecx
827 jmp .L12shortcut
828.align 4
829.L12loop:
830 mov 0(%rdi),%eax # rk[0]
831 mov 20(%rdi),%edx # rk[5]
832.L12shortcut:
833___
834 &enckey ();
835$code.=<<___;
836 mov %eax,24(%rdi) # rk[6]
837 xor 4(%rdi),%eax
838 mov %eax,28(%rdi) # rk[7]
839 xor 8(%rdi),%eax
840 mov %eax,32(%rdi) # rk[8]
841 xor 12(%rdi),%eax
842 mov %eax,36(%rdi) # rk[9]
843
844 cmp \$7,%ecx
845 je .L12break
846 add \$1,%ecx
847
848 xor 16(%rdi),%eax
849 mov %eax,40(%rdi) # rk[10]
850 xor 20(%rdi),%eax
851 mov %eax,44(%rdi) # rk[11]
852
853 lea 24(%rdi),%rdi
854 jmp .L12loop
855.L12break:
856 movl \$12,72(%rdi) # setup number of rounds
857 xor %rax,%rax
858 jmp .Lexit
859
860.L14rounds:
861 mov 0(%rsi),%eax # copy first 8 dwords
862 mov 4(%rsi),%ebx
863 mov 8(%rsi),%ecx
864 mov 12(%rsi),%edx
865 mov %eax,0(%rdi)
866 mov %ebx,4(%rdi)
867 mov %ecx,8(%rdi)
868 mov %edx,12(%rdi)
869 mov 16(%rsi),%eax
870 mov 20(%rsi),%ebx
871 mov 24(%rsi),%ecx
872 mov 28(%rsi),%edx
873 mov %eax,16(%rdi)
874 mov %ebx,20(%rdi)
875 mov %ecx,24(%rdi)
876 mov %edx,28(%rdi)
877
878 xor %ecx,%ecx
879 jmp .L14shortcut
880.align 4
881.L14loop:
882 mov 28(%rdi),%edx # rk[4]
883.L14shortcut:
884 mov 0(%rdi),%eax # rk[0]
885___
886 &enckey ();
887$code.=<<___;
888 mov %eax,32(%rdi) # rk[8]
889 xor 4(%rdi),%eax
890 mov %eax,36(%rdi) # rk[9]
891 xor 8(%rdi),%eax
892 mov %eax,40(%rdi) # rk[10]
893 xor 12(%rdi),%eax
894 mov %eax,44(%rdi) # rk[11]
895
896 cmp \$6,%ecx
897 je .L14break
898 add \$1,%ecx
899
900 mov %eax,%edx
901 mov 16(%rdi),%eax # rk[4]
902 movz %dl,%esi # rk[11]>>0
903 mov 2(%rbp,%rsi,8),%ebx
904 movz %dh,%esi # rk[11]>>8
905 and \$0x000000FF,%ebx
906 xor %ebx,%eax
907
908 mov 0(%rbp,%rsi,8),%ebx
909 shr \$16,%edx
910 and \$0x0000FF00,%ebx
911 movz %dl,%esi # rk[11]>>16
912 xor %ebx,%eax
913
914 mov 0(%rbp,%rsi,8),%ebx
915 movz %dh,%esi # rk[11]>>24
916 and \$0x00FF0000,%ebx
917 xor %ebx,%eax
918
919 mov 2(%rbp,%rsi,8),%ebx
920 and \$0xFF000000,%ebx
921 xor %ebx,%eax
922
923 mov %eax,48(%rdi) # rk[12]
924 xor 20(%rdi),%eax
925 mov %eax,52(%rdi) # rk[13]
926 xor 24(%rdi),%eax
927 mov %eax,56(%rdi) # rk[14]
928 xor 28(%rdi),%eax
929 mov %eax,60(%rdi) # rk[15]
930
931 lea 32(%rdi),%rdi
932 jmp .L14loop
933.L14break:
934 movl \$14,48(%rdi) # setup number of rounds
935 xor %rax,%rax
936 jmp .Lexit
937
938.Lbadpointer:
939 mov \$-1,%rax
940.Lexit:
941 pop %rbp
942 pop %rbx
943 ret
944.size AES_set_encrypt_key,.-AES_set_encrypt_key
945___
946
947sub deckey()
948{ my ($i,$ptr,$te,$td) = @_;
949$code.=<<___;
950 mov $i($ptr),%eax
951 mov %eax,%edx
952 movz %ah,%ebx
953 shr \$16,%edx
954 and \$0xFF,%eax
955 movzb 2($te,%rax,8),%rax
956 movzb 2($te,%rbx,8),%rbx
957 mov 0($td,%rax,8),%eax
958 xor 3($td,%rbx,8),%eax
959 movzb %dh,%ebx
960 and \$0xFF,%edx
961 movzb 2($te,%rdx,8),%rdx
962 movzb 2($te,%rbx,8),%rbx
963 xor 2($td,%rdx,8),%eax
964 xor 1($td,%rbx,8),%eax
965 mov %eax,$i($ptr)
966___
967}
968
969# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
970# AES_KEY *key)
971$code.=<<___;
972.globl AES_set_decrypt_key
973.type AES_set_decrypt_key,\@function,3
974.align 16
975AES_set_decrypt_key:
976 push %rdx
977 call AES_set_encrypt_key
978 cmp \$0,%eax
979 je .Lproceed
980 lea 24(%rsp),%rsp
981 ret
982.Lproceed:
983 mov (%rsp),%r8 # restore key schedule
984 mov %rbx,(%rsp)
985
986 mov 240(%r8),%ecx # pull number of rounds
987 xor %rdi,%rdi
988 lea (%rdi,%rcx,4),%rcx
989 mov %r8,%rsi
990 lea (%r8,%rcx,4),%rdi # pointer to last chunk
991.align 4
992.Linvert:
993 mov 0(%rsi),%rax
994 mov 8(%rsi),%rbx
995 mov 0(%rdi),%rcx
996 mov 8(%rdi),%rdx
997 mov %rax,0(%rdi)
998 mov %rbx,8(%rdi)
999 mov %rcx,0(%rsi)
1000 mov %rdx,8(%rsi)
1001 lea 16(%rsi),%rsi
1002 lea -16(%rdi),%rdi
1003 cmp %rsi,%rdi
1004 jne .Linvert
1005
1006 .picmeup %r9
1007 lea AES_Td-.(%r9),%rdi
1008 lea AES_Te-AES_Td(%rdi),%r9
1009
1010 mov %r8,%rsi
1011 mov 240(%r8),%ecx # pull number of rounds
1012 sub \$1,%ecx
1013.align 4
1014.Lpermute:
1015 lea 16(%rsi),%rsi
1016___
1017 &deckey (0,"%rsi","%r9","%rdi");
1018 &deckey (4,"%rsi","%r9","%rdi");
1019 &deckey (8,"%rsi","%r9","%rdi");
1020 &deckey (12,"%rsi","%r9","%rdi");
1021$code.=<<___;
1022 sub \$1,%ecx
1023 jnz .Lpermute
1024
1025 xor %rax,%rax
1026 pop %rbx
1027 ret
1028.size AES_set_decrypt_key,.-AES_set_decrypt_key
1029___
1030
1031# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
1032# size_t length, const AES_KEY *key,
1033# unsigned char *ivp,const int enc);
1034{
1035# stack frame layout
1036# -8(%rsp) return address
1037my $_rsp="0(%rsp)"; # saved %rsp
1038my $_len="8(%rsp)"; # copy of 3rd parameter, length
1039my $_key="16(%rsp)"; # copy of 4th parameter, key
1040my $_ivp="24(%rsp)"; # copy of 5th parameter, ivp
1041my $keyp="32(%rsp)"; # one to pass as $key
1042my $ivec="40(%rsp)"; # ivec[16]
1043my $aes_key="56(%rsp)"; # copy of aes_key
1044my $mark="56+240(%rsp)"; # copy of aes_key->rounds
1045
1046$code.=<<___;
1047.globl AES_cbc_encrypt
1048.type AES_cbc_encrypt,\@function,6
1049.align 16
1050AES_cbc_encrypt:
1051 cmp \$0,%rdx # check length
1052 je .Lcbc_just_ret
1053 push %rbx
1054 push %rbp
1055 push %r12
1056 push %r13
1057 push %r14
1058 push %r15
1059 pushfq
1060 cld
1061 mov %r9d,%r9d # clear upper half of enc
1062
1063 .picmeup $sbox
1064.Lcbc_pic_point:
1065
1066 cmp \$0,%r9
1067 je .LDECRYPT
1068
1069 lea AES_Te-.Lcbc_pic_point($sbox),$sbox
1070
1071 # allocate aligned stack frame...
1072 lea -64-248(%rsp),$key
1073 and \$-64,$key
1074
1075 # ... and make it doesn't alias with AES_Te modulo 4096
1076 mov $sbox,%r10
1077 lea 2048($sbox),%r11
1078 mov $key,%r12
1079 and \$0xFFF,%r10 # s = $sbox&0xfff
1080 and \$0xFFF,%r11 # e = ($sbox+2048)&0xfff
1081 and \$0xFFF,%r12 # p = %rsp&0xfff
1082
1083 cmp %r11,%r12 # if (p=>e) %rsp =- (p-e);
1084 jb .Lcbc_te_break_out
1085 sub %r11,%r12
1086 sub %r12,$key
1087 jmp .Lcbc_te_ok
1088.Lcbc_te_break_out: # else %rsp -= (p-s)&0xfff + framesz
1089 sub %r10,%r12
1090 and \$0xFFF,%r12
1091 add \$320,%r12
1092 sub %r12,$key
1093.align 4
1094.Lcbc_te_ok:
1095
1096 xchg %rsp,$key
1097 add \$8,%rsp # reserve for return address!
1098 mov $key,$_rsp # save %rsp
1099 mov %rdx,$_len # save copy of len
1100 mov %rcx,$_key # save copy of key
1101 mov %r8,$_ivp # save copy of ivp
1102 movl \$0,$mark # copy of aes_key->rounds = 0;
1103 mov %r8,%rbp # rearrange input arguments
1104 mov %rsi,$out
1105 mov %rdi,$inp
1106 mov %rcx,$key
1107
1108 # do we copy key schedule to stack?
1109 mov $key,%r10
1110 sub $sbox,%r10
1111 and \$0xfff,%r10
1112 cmp \$2048,%r10
1113 jb .Lcbc_do_ecopy
1114 cmp \$4096-248,%r10
1115 jb .Lcbc_skip_ecopy
1116.align 4
1117.Lcbc_do_ecopy:
1118 mov $key,%rsi
1119 lea $aes_key,%rdi
1120 lea $aes_key,$key
1121 mov \$240/8,%ecx
1122 .long 0x90A548F3 # rep movsq
1123 mov (%rsi),%eax # copy aes_key->rounds
1124 mov %eax,(%rdi)
1125.Lcbc_skip_ecopy:
1126 mov $key,$keyp # save key pointer
1127
1128 mov \$16,%ecx
1129.align 4
1130.Lcbc_prefetch_te:
1131 mov 0($sbox),%r10
1132 mov 32($sbox),%r11
1133 mov 64($sbox),%r12
1134 mov 96($sbox),%r13
1135 lea 128($sbox),$sbox
1136 sub \$1,%ecx
1137 jnz .Lcbc_prefetch_te
1138 sub \$2048,$sbox
1139
1140 test \$-16,%rdx # check upon length
1141 mov %rdx,%r10
1142 mov 0(%rbp),$s0 # load iv
1143 mov 4(%rbp),$s1
1144 mov 8(%rbp),$s2
1145 mov 12(%rbp),$s3
1146 jz .Lcbc_enc_tail # short input...
1147
1148.align 4
1149.Lcbc_enc_loop:
1150 xor 0($inp),$s0
1151 xor 4($inp),$s1
1152 xor 8($inp),$s2
1153 xor 12($inp),$s3
1154 mov $inp,$ivec # if ($verticalspin) save inp
1155
1156 mov $keyp,$key # restore key
1157 call _x86_64_AES_encrypt
1158
1159 mov $ivec,$inp # if ($verticalspin) restore inp
1160 mov $s0,0($out)
1161 mov $s1,4($out)
1162 mov $s2,8($out)
1163 mov $s3,12($out)
1164
1165 mov $_len,%r10
1166 lea 16($inp),$inp
1167 lea 16($out),$out
1168 sub \$16,%r10
1169 test \$-16,%r10
1170 mov %r10,$_len
1171 jnz .Lcbc_enc_loop
1172 test \$15,%r10
1173 jnz .Lcbc_enc_tail
1174 mov $_ivp,%rbp # restore ivp
1175 mov $s0,0(%rbp) # save ivec
1176 mov $s1,4(%rbp)
1177 mov $s2,8(%rbp)
1178 mov $s3,12(%rbp)
1179
1180.align 4
1181.Lcbc_cleanup:
1182 cmpl \$0,$mark # was the key schedule copied?
1183 lea $aes_key,%rdi
1184 mov $_rsp,%rsp
1185 je .Lcbc_exit
1186 mov \$240/8,%ecx
1187 xor %rax,%rax
1188 .long 0x90AB48F3 # rep stosq
1189.Lcbc_exit:
1190 popfq
1191 pop %r15
1192 pop %r14
1193 pop %r13
1194 pop %r12
1195 pop %rbp
1196 pop %rbx
1197.Lcbc_just_ret:
1198 ret
1199.align 4
1200.Lcbc_enc_tail:
1201 mov %rax,%r11
1202 mov %rcx,%r12
1203 mov %r10,%rcx
1204 mov $inp,%rsi
1205 mov $out,%rdi
1206 .long 0xF689A4F3 # rep movsb
1207 mov \$16,%rcx # zero tail
1208 sub %r10,%rcx
1209 xor %rax,%rax
1210 .long 0xF689AAF3 # rep stosb
1211 mov $out,$inp # this is not a mistake!
1212 movq \$16,$_len # len=16
1213 mov %r11,%rax
1214 mov %r12,%rcx
1215 jmp .Lcbc_enc_loop # one more spin...
1216#----------------------------- DECRYPT -----------------------------#
1217.align 16
1218.LDECRYPT:
1219 lea AES_Td-.Lcbc_pic_point($sbox),$sbox
1220
1221 # allocate aligned stack frame...
1222 lea -64-248(%rsp),$key
1223 and \$-64,$key
1224
1225 # ... and make it doesn't alias with AES_Td modulo 4096
1226 mov $sbox,%r10
1227 lea 2304($sbox),%r11
1228 mov $key,%r12
1229 and \$0xFFF,%r10 # s = $sbox&0xfff
1230 and \$0xFFF,%r11 # e = ($sbox+2048+256)&0xfff
1231 and \$0xFFF,%r12 # p = %rsp&0xfff
1232
1233 cmp %r11,%r12 # if (p=>e) %rsp =- (p-e);
1234 jb .Lcbc_td_break_out
1235 sub %r11,%r12
1236 sub %r12,$key
1237 jmp .Lcbc_td_ok
1238.Lcbc_td_break_out: # else %rsp -= (p-s)&0xfff + framesz
1239 sub %r10,%r12
1240 and \$0xFFF,%r12
1241 add \$320,%r12
1242 sub %r12,$key
1243.align 4
1244.Lcbc_td_ok:
1245
1246 xchg %rsp,$key
1247 add \$8,%rsp # reserve for return address!
1248 mov $key,$_rsp # save %rsp
1249 mov %rdx,$_len # save copy of len
1250 mov %rcx,$_key # save copy of key
1251 mov %r8,$_ivp # save copy of ivp
1252 movl \$0,$mark # copy of aes_key->rounds = 0;
1253 mov %r8,%rbp # rearrange input arguments
1254 mov %rsi,$out
1255 mov %rdi,$inp
1256 mov %rcx,$key
1257
1258 # do we copy key schedule to stack?
1259 mov $key,%r10
1260 sub $sbox,%r10
1261 and \$0xfff,%r10
1262 cmp \$2304,%r10
1263 jb .Lcbc_do_dcopy
1264 cmp \$4096-248,%r10
1265 jb .Lcbc_skip_dcopy
1266.align 4
1267.Lcbc_do_dcopy:
1268 mov $key,%rsi
1269 lea $aes_key,%rdi
1270 lea $aes_key,$key
1271 mov \$240/8,%ecx
1272 .long 0x90A548F3 # rep movsq
1273 mov (%rsi),%eax # copy aes_key->rounds
1274 mov %eax,(%rdi)
1275.Lcbc_skip_dcopy:
1276 mov $key,$keyp # save key pointer
1277
1278 mov \$18,%ecx
1279.align 4
1280.Lcbc_prefetch_td:
1281 mov 0($sbox),%r10
1282 mov 32($sbox),%r11
1283 mov 64($sbox),%r12
1284 mov 96($sbox),%r13
1285 lea 128($sbox),$sbox
1286 sub \$1,%ecx
1287 jnz .Lcbc_prefetch_td
1288 sub \$2304,$sbox
1289
1290 cmp $inp,$out
1291 je .Lcbc_dec_in_place
1292
1293 mov %rbp,$ivec
1294.align 4
1295.Lcbc_dec_loop:
1296 mov 0($inp),$s0 # read input
1297 mov 4($inp),$s1
1298 mov 8($inp),$s2
1299 mov 12($inp),$s3
1300 mov $inp,8+$ivec # if ($verticalspin) save inp
1301
1302 mov $keyp,$key # restore key
1303 call _x86_64_AES_decrypt
1304
1305 mov $ivec,%rbp # load ivp
1306 mov 8+$ivec,$inp # if ($verticalspin) restore inp
1307 xor 0(%rbp),$s0 # xor iv
1308 xor 4(%rbp),$s1
1309 xor 8(%rbp),$s2
1310 xor 12(%rbp),$s3
1311 mov $inp,%rbp # current input, next iv
1312
1313 mov $_len,%r10 # load len
1314 sub \$16,%r10
1315 jc .Lcbc_dec_partial
1316 mov %r10,$_len # update len
1317 mov %rbp,$ivec # update ivp
1318
1319 mov $s0,0($out) # write output
1320 mov $s1,4($out)
1321 mov $s2,8($out)
1322 mov $s3,12($out)
1323
1324 lea 16($inp),$inp
1325 lea 16($out),$out
1326 jnz .Lcbc_dec_loop
1327.Lcbc_dec_end:
1328 mov $_ivp,%r12 # load user ivp
1329 mov 0(%rbp),%r10 # load iv
1330 mov 8(%rbp),%r11
1331 mov %r10,0(%r12) # copy back to user
1332 mov %r11,8(%r12)
1333 jmp .Lcbc_cleanup
1334
1335.align 4
1336.Lcbc_dec_partial:
1337 mov $s0,0+$ivec # dump output to stack
1338 mov $s1,4+$ivec
1339 mov $s2,8+$ivec
1340 mov $s3,12+$ivec
1341 mov $out,%rdi
1342 lea $ivec,%rsi
1343 mov \$16,%rcx
1344 add %r10,%rcx # number of bytes to copy
1345 .long 0xF689A4F3 # rep movsb
1346 jmp .Lcbc_dec_end
1347
1348.align 16
1349.Lcbc_dec_in_place:
1350 mov 0($inp),$s0 # load input
1351 mov 4($inp),$s1
1352 mov 8($inp),$s2
1353 mov 12($inp),$s3
1354
1355 mov $inp,$ivec # if ($verticalspin) save inp
1356 mov $keyp,$key
1357 call _x86_64_AES_decrypt
1358
1359 mov $ivec,$inp # if ($verticalspin) restore inp
1360 mov $_ivp,%rbp
1361 xor 0(%rbp),$s0
1362 xor 4(%rbp),$s1
1363 xor 8(%rbp),$s2
1364 xor 12(%rbp),$s3
1365
1366 mov 0($inp),%r10 # copy input to iv
1367 mov 8($inp),%r11
1368 mov %r10,0(%rbp)
1369 mov %r11,8(%rbp)
1370
1371 mov $s0,0($out) # save output [zaps input]
1372 mov $s1,4($out)
1373 mov $s2,8($out)
1374 mov $s3,12($out)
1375
1376 mov $_len,%rcx
1377 lea 16($inp),$inp
1378 lea 16($out),$out
1379 sub \$16,%rcx
1380 jc .Lcbc_dec_in_place_partial
1381 mov %rcx,$_len
1382 jnz .Lcbc_dec_in_place
1383 jmp .Lcbc_cleanup
1384
1385.align 4
1386.Lcbc_dec_in_place_partial:
1387 # one can argue if this is actually required
1388 lea ($out,%rcx),%rdi
1389 lea (%rbp,%rcx),%rsi
1390 neg %rcx
1391 .long 0xF689A4F3 # rep movsb # restore tail
1392 jmp .Lcbc_cleanup
1393.size AES_cbc_encrypt,.-AES_cbc_encrypt
1394___
1395}
1396
1397$code.=<<___;
1398.globl AES_Te
1399.align 64
1400AES_Te:
1401___
1402 &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
1403 &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
1404 &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
1405 &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
1406 &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
1407 &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
1408 &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
1409 &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
1410 &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
1411 &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
1412 &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
1413 &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
1414 &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
1415 &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
1416 &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
1417 &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
1418 &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
1419 &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
1420 &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
1421 &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
1422 &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
1423 &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
1424 &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
1425 &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
1426 &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
1427 &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
1428 &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
1429 &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
1430 &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
1431 &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
1432 &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
1433 &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
1434 &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
1435 &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
1436 &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
1437 &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
1438 &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
1439 &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
1440 &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
1441 &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
1442 &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
1443 &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
1444 &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
1445 &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
1446 &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
1447 &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
1448 &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
1449 &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
1450 &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
1451 &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
1452 &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
1453 &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
1454 &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
1455 &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
1456 &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
1457 &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
1458 &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
1459 &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
1460 &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
1461 &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
1462 &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
1463 &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
1464 &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
1465 &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
1466#rcon:
1467$code.=<<___;
1468 .long 0x00000001, 0x00000002, 0x00000004, 0x00000008
1469 .long 0x00000010, 0x00000020, 0x00000040, 0x00000080
1470 .long 0x0000001b, 0x00000036, 0, 0, 0, 0, 0, 0
1471___
1472$code.=<<___;
1473.globl AES_Td
1474.align 64
1475AES_Td:
1476___
1477 &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
1478 &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
1479 &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
1480 &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
1481 &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
1482 &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
1483 &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
1484 &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
1485 &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
1486 &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
1487 &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
1488 &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
1489 &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
1490 &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
1491 &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
1492 &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
1493 &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
1494 &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
1495 &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
1496 &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
1497 &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
1498 &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
1499 &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
1500 &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
1501 &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
1502 &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
1503 &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
1504 &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
1505 &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
1506 &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
1507 &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
1508 &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
1509 &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
1510 &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
1511 &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
1512 &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
1513 &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
1514 &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
1515 &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
1516 &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
1517 &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
1518 &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
1519 &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
1520 &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
1521 &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
1522 &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
1523 &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
1524 &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
1525 &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
1526 &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
1527 &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
1528 &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
1529 &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
1530 &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
1531 &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
1532 &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
1533 &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
1534 &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
1535 &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
1536 &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
1537 &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
1538 &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
1539 &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
1540 &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
1541#Td4:
1542 &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
1543 &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
1544 &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
1545 &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
1546 &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
1547 &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
1548 &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
1549 &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
1550 &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
1551 &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
1552 &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
1553 &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
1554 &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
1555 &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
1556 &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
1557 &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
1558 &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
1559 &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
1560 &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
1561 &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
1562 &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
1563 &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
1564 &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
1565 &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
1566 &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
1567 &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
1568 &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
1569 &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
1570 &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
1571 &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
1572 &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
1573 &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
1574
1575$code =~ s/\`([^\`]*)\`/eval($1)/gem;
1576
1577print $code;
1578
1579close STDOUT;