diff options
Diffstat (limited to 'src/lib/libcrypto/aes/asm/aes-x86_64.pl')
-rwxr-xr-x | src/lib/libcrypto/aes/asm/aes-x86_64.pl | 1578 |
1 files changed, 1578 insertions, 0 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl new file mode 100755 index 0000000000..44e0bf8cae --- /dev/null +++ b/src/lib/libcrypto/aes/asm/aes-x86_64.pl | |||
@@ -0,0 +1,1578 @@ | |||
1 | #!/usr/bin/env perl | ||
2 | # | ||
3 | # ==================================================================== | ||
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
5 | # project. Rights for redistribution and usage in source and binary | ||
6 | # forms are granted according to the OpenSSL license. | ||
7 | # ==================================================================== | ||
8 | # | ||
9 | # Version 1.2. | ||
10 | # | ||
11 | # aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on | ||
12 | # Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version | ||
13 | # [you'll notice a lot of resemblance], such as compressed S-boxes | ||
14 | # in little-endian byte order, prefetch of these tables in CBC mode, | ||
15 | # as well as avoiding L1 cache aliasing between stack frame and key | ||
16 | # schedule and already mentioned tables, compressed Td4... | ||
17 | # | ||
18 | # Performance in number of cycles per processed byte for 128-bit key: | ||
19 | # | ||
20 | # ECB CBC encrypt | ||
21 | # AMD64 13.7 13.0(*) | ||
22 | # EM64T 20.2 18.6(*) | ||
23 | # | ||
24 | # (*) CBC benchmarks are better than ECB thanks to custom ABI used | ||
25 | # by the private block encryption function. | ||
26 | |||
27 | $verticalspin=1; # unlike 32-bit version $verticalspin performs | ||
28 | # ~15% better on both AMD and Intel cores | ||
29 | $output=shift; | ||
30 | open STDOUT,"| $^X ../perlasm/x86_64-xlate.pl $output"; | ||
31 | |||
32 | $code=".text\n"; | ||
33 | |||
34 | $s0="%eax"; | ||
35 | $s1="%ebx"; | ||
36 | $s2="%ecx"; | ||
37 | $s3="%edx"; | ||
38 | $acc0="%esi"; | ||
39 | $acc1="%edi"; | ||
40 | $acc2="%ebp"; | ||
41 | $inp="%r8"; | ||
42 | $out="%r9"; | ||
43 | $t0="%r10d"; | ||
44 | $t1="%r11d"; | ||
45 | $t2="%r12d"; | ||
46 | $rnds="%r13d"; | ||
47 | $sbox="%r14"; | ||
48 | $key="%r15"; | ||
49 | |||
50 | sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; } | ||
51 | sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/; | ||
52 | $r =~ s/%[er]([sd]i)/%\1l/; | ||
53 | $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; } | ||
54 | sub _data_word() | ||
55 | { my $i; | ||
56 | while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } | ||
57 | } | ||
58 | sub data_word() | ||
59 | { my $i; | ||
60 | my $last=pop(@_); | ||
61 | $code.=".long\t"; | ||
62 | while(defined($i=shift)) { $code.=sprintf"0x%08x,",$i; } | ||
63 | $code.=sprintf"0x%08x\n",$last; | ||
64 | } | ||
65 | |||
66 | sub data_byte() | ||
67 | { my $i; | ||
68 | my $last=pop(@_); | ||
69 | $code.=".byte\t"; | ||
70 | while(defined($i=shift)) { $code.=sprintf"0x%02x,",$i&0xff; } | ||
71 | $code.=sprintf"0x%02x\n",$last&0xff; | ||
72 | } | ||
73 | |||
74 | sub encvert() | ||
75 | { my $t3="%r8d"; # zaps $inp! | ||
76 | |||
77 | $code.=<<___; | ||
78 | # favor 3-way issue Opteron pipeline... | ||
79 | movzb `&lo("$s0")`,$acc0 | ||
80 | movzb `&lo("$s1")`,$acc1 | ||
81 | movzb `&lo("$s2")`,$acc2 | ||
82 | mov 0($sbox,$acc0,8),$t0 | ||
83 | mov 0($sbox,$acc1,8),$t1 | ||
84 | mov 0($sbox,$acc2,8),$t2 | ||
85 | |||
86 | movzb `&hi("$s1")`,$acc0 | ||
87 | movzb `&hi("$s2")`,$acc1 | ||
88 | movzb `&lo("$s3")`,$acc2 | ||
89 | xor 3($sbox,$acc0,8),$t0 | ||
90 | xor 3($sbox,$acc1,8),$t1 | ||
91 | mov 0($sbox,$acc2,8),$t3 | ||
92 | |||
93 | movzb `&hi("$s3")`,$acc0 | ||
94 | shr \$16,$s2 | ||
95 | movzb `&hi("$s0")`,$acc2 | ||
96 | xor 3($sbox,$acc0,8),$t2 | ||
97 | shr \$16,$s3 | ||
98 | xor 3($sbox,$acc2,8),$t3 | ||
99 | |||
100 | shr \$16,$s1 | ||
101 | lea 16($key),$key | ||
102 | shr \$16,$s0 | ||
103 | |||
104 | movzb `&lo("$s2")`,$acc0 | ||
105 | movzb `&lo("$s3")`,$acc1 | ||
106 | movzb `&lo("$s0")`,$acc2 | ||
107 | xor 2($sbox,$acc0,8),$t0 | ||
108 | xor 2($sbox,$acc1,8),$t1 | ||
109 | xor 2($sbox,$acc2,8),$t2 | ||
110 | |||
111 | movzb `&hi("$s3")`,$acc0 | ||
112 | movzb `&hi("$s0")`,$acc1 | ||
113 | movzb `&lo("$s1")`,$acc2 | ||
114 | xor 1($sbox,$acc0,8),$t0 | ||
115 | xor 1($sbox,$acc1,8),$t1 | ||
116 | xor 2($sbox,$acc2,8),$t3 | ||
117 | |||
118 | mov 12($key),$s3 | ||
119 | movzb `&hi("$s1")`,$acc1 | ||
120 | movzb `&hi("$s2")`,$acc2 | ||
121 | mov 0($key),$s0 | ||
122 | xor 1($sbox,$acc1,8),$t2 | ||
123 | xor 1($sbox,$acc2,8),$t3 | ||
124 | |||
125 | mov 4($key),$s1 | ||
126 | mov 8($key),$s2 | ||
127 | xor $t0,$s0 | ||
128 | xor $t1,$s1 | ||
129 | xor $t2,$s2 | ||
130 | xor $t3,$s3 | ||
131 | ___ | ||
132 | } | ||
133 | |||
134 | sub enclastvert() | ||
135 | { my $t3="%r8d"; # zaps $inp! | ||
136 | |||
137 | $code.=<<___; | ||
138 | movzb `&lo("$s0")`,$acc0 | ||
139 | movzb `&lo("$s1")`,$acc1 | ||
140 | movzb `&lo("$s2")`,$acc2 | ||
141 | mov 2($sbox,$acc0,8),$t0 | ||
142 | mov 2($sbox,$acc1,8),$t1 | ||
143 | mov 2($sbox,$acc2,8),$t2 | ||
144 | |||
145 | and \$0x000000ff,$t0 | ||
146 | and \$0x000000ff,$t1 | ||
147 | and \$0x000000ff,$t2 | ||
148 | |||
149 | movzb `&lo("$s3")`,$acc0 | ||
150 | movzb `&hi("$s1")`,$acc1 | ||
151 | movzb `&hi("$s2")`,$acc2 | ||
152 | mov 2($sbox,$acc0,8),$t3 | ||
153 | mov 0($sbox,$acc1,8),$acc1 #$t0 | ||
154 | mov 0($sbox,$acc2,8),$acc2 #$t1 | ||
155 | |||
156 | and \$0x000000ff,$t3 | ||
157 | and \$0x0000ff00,$acc1 | ||
158 | and \$0x0000ff00,$acc2 | ||
159 | |||
160 | xor $acc1,$t0 | ||
161 | xor $acc2,$t1 | ||
162 | shr \$16,$s2 | ||
163 | |||
164 | movzb `&hi("$s3")`,$acc0 | ||
165 | movzb `&hi("$s0")`,$acc1 | ||
166 | shr \$16,$s3 | ||
167 | mov 0($sbox,$acc0,8),$acc0 #$t2 | ||
168 | mov 0($sbox,$acc1,8),$acc1 #$t3 | ||
169 | |||
170 | and \$0x0000ff00,$acc0 | ||
171 | and \$0x0000ff00,$acc1 | ||
172 | shr \$16,$s1 | ||
173 | xor $acc0,$t2 | ||
174 | xor $acc1,$t3 | ||
175 | shr \$16,$s0 | ||
176 | |||
177 | movzb `&lo("$s2")`,$acc0 | ||
178 | movzb `&lo("$s3")`,$acc1 | ||
179 | movzb `&lo("$s0")`,$acc2 | ||
180 | mov 0($sbox,$acc0,8),$acc0 #$t0 | ||
181 | mov 0($sbox,$acc1,8),$acc1 #$t1 | ||
182 | mov 0($sbox,$acc2,8),$acc2 #$t2 | ||
183 | |||
184 | and \$0x00ff0000,$acc0 | ||
185 | and \$0x00ff0000,$acc1 | ||
186 | and \$0x00ff0000,$acc2 | ||
187 | |||
188 | xor $acc0,$t0 | ||
189 | xor $acc1,$t1 | ||
190 | xor $acc2,$t2 | ||
191 | |||
192 | movzb `&lo("$s1")`,$acc0 | ||
193 | movzb `&hi("$s3")`,$acc1 | ||
194 | movzb `&hi("$s0")`,$acc2 | ||
195 | mov 0($sbox,$acc0,8),$acc0 #$t3 | ||
196 | mov 2($sbox,$acc1,8),$acc1 #$t0 | ||
197 | mov 2($sbox,$acc2,8),$acc2 #$t1 | ||
198 | |||
199 | and \$0x00ff0000,$acc0 | ||
200 | and \$0xff000000,$acc1 | ||
201 | and \$0xff000000,$acc2 | ||
202 | |||
203 | xor $acc0,$t3 | ||
204 | xor $acc1,$t0 | ||
205 | xor $acc2,$t1 | ||
206 | |||
207 | movzb `&hi("$s1")`,$acc0 | ||
208 | movzb `&hi("$s2")`,$acc1 | ||
209 | mov 16+12($key),$s3 | ||
210 | mov 2($sbox,$acc0,8),$acc0 #$t2 | ||
211 | mov 2($sbox,$acc1,8),$acc1 #$t3 | ||
212 | mov 16+0($key),$s0 | ||
213 | |||
214 | and \$0xff000000,$acc0 | ||
215 | and \$0xff000000,$acc1 | ||
216 | |||
217 | xor $acc0,$t2 | ||
218 | xor $acc1,$t3 | ||
219 | |||
220 | mov 16+4($key),$s1 | ||
221 | mov 16+8($key),$s2 | ||
222 | xor $t0,$s0 | ||
223 | xor $t1,$s1 | ||
224 | xor $t2,$s2 | ||
225 | xor $t3,$s3 | ||
226 | ___ | ||
227 | } | ||
228 | |||
229 | sub encstep() | ||
230 | { my ($i,@s) = @_; | ||
231 | my $tmp0=$acc0; | ||
232 | my $tmp1=$acc1; | ||
233 | my $tmp2=$acc2; | ||
234 | my $out=($t0,$t1,$t2,$s[0])[$i]; | ||
235 | |||
236 | if ($i==3) { | ||
237 | $tmp0=$s[1]; | ||
238 | $tmp1=$s[2]; | ||
239 | $tmp2=$s[3]; | ||
240 | } | ||
241 | $code.=" movzb ".&lo($s[0]).",$out\n"; | ||
242 | $code.=" mov $s[2],$tmp1\n" if ($i!=3); | ||
243 | $code.=" lea 16($key),$key\n" if ($i==0); | ||
244 | |||
245 | $code.=" movzb ".&hi($s[1]).",$tmp0\n"; | ||
246 | $code.=" mov 0($sbox,$out,8),$out\n"; | ||
247 | |||
248 | $code.=" shr \$16,$tmp1\n"; | ||
249 | $code.=" mov $s[3],$tmp2\n" if ($i!=3); | ||
250 | $code.=" xor 3($sbox,$tmp0,8),$out\n"; | ||
251 | |||
252 | $code.=" movzb ".&lo($tmp1).",$tmp1\n"; | ||
253 | $code.=" shr \$24,$tmp2\n"; | ||
254 | $code.=" xor 4*$i($key),$out\n"; | ||
255 | |||
256 | $code.=" xor 2($sbox,$tmp1,8),$out\n"; | ||
257 | $code.=" xor 1($sbox,$tmp2,8),$out\n"; | ||
258 | |||
259 | $code.=" mov $t0,$s[1]\n" if ($i==3); | ||
260 | $code.=" mov $t1,$s[2]\n" if ($i==3); | ||
261 | $code.=" mov $t2,$s[3]\n" if ($i==3); | ||
262 | $code.="\n"; | ||
263 | } | ||
264 | |||
265 | sub enclast() | ||
266 | { my ($i,@s)=@_; | ||
267 | my $tmp0=$acc0; | ||
268 | my $tmp1=$acc1; | ||
269 | my $tmp2=$acc2; | ||
270 | my $out=($t0,$t1,$t2,$s[0])[$i]; | ||
271 | |||
272 | if ($i==3) { | ||
273 | $tmp0=$s[1]; | ||
274 | $tmp1=$s[2]; | ||
275 | $tmp2=$s[3]; | ||
276 | } | ||
277 | $code.=" movzb ".&lo($s[0]).",$out\n"; | ||
278 | $code.=" mov $s[2],$tmp1\n" if ($i!=3); | ||
279 | |||
280 | $code.=" mov 2($sbox,$out,8),$out\n"; | ||
281 | $code.=" shr \$16,$tmp1\n"; | ||
282 | $code.=" mov $s[3],$tmp2\n" if ($i!=3); | ||
283 | |||
284 | $code.=" and \$0x000000ff,$out\n"; | ||
285 | $code.=" movzb ".&hi($s[1]).",$tmp0\n"; | ||
286 | $code.=" movzb ".&lo($tmp1).",$tmp1\n"; | ||
287 | $code.=" shr \$24,$tmp2\n"; | ||
288 | |||
289 | $code.=" mov 0($sbox,$tmp0,8),$tmp0\n"; | ||
290 | $code.=" mov 0($sbox,$tmp1,8),$tmp1\n"; | ||
291 | $code.=" mov 2($sbox,$tmp2,8),$tmp2\n"; | ||
292 | |||
293 | $code.=" and \$0x0000ff00,$tmp0\n"; | ||
294 | $code.=" and \$0x00ff0000,$tmp1\n"; | ||
295 | $code.=" and \$0xff000000,$tmp2\n"; | ||
296 | |||
297 | $code.=" xor $tmp0,$out\n"; | ||
298 | $code.=" mov $t0,$s[1]\n" if ($i==3); | ||
299 | $code.=" xor $tmp1,$out\n"; | ||
300 | $code.=" mov $t1,$s[2]\n" if ($i==3); | ||
301 | $code.=" xor $tmp2,$out\n"; | ||
302 | $code.=" mov $t2,$s[3]\n" if ($i==3); | ||
303 | $code.="\n"; | ||
304 | } | ||
305 | |||
306 | $code.=<<___; | ||
307 | .type _x86_64_AES_encrypt,\@abi-omnipotent | ||
308 | .align 16 | ||
309 | _x86_64_AES_encrypt: | ||
310 | xor 0($key),$s0 # xor with key | ||
311 | xor 4($key),$s1 | ||
312 | xor 8($key),$s2 | ||
313 | xor 12($key),$s3 | ||
314 | |||
315 | mov 240($key),$rnds # load key->rounds | ||
316 | sub \$1,$rnds | ||
317 | jmp .Lenc_loop | ||
318 | .align 16 | ||
319 | .Lenc_loop: | ||
320 | ___ | ||
321 | if ($verticalspin) { &encvert(); } | ||
322 | else { &encstep(0,$s0,$s1,$s2,$s3); | ||
323 | &encstep(1,$s1,$s2,$s3,$s0); | ||
324 | &encstep(2,$s2,$s3,$s0,$s1); | ||
325 | &encstep(3,$s3,$s0,$s1,$s2); | ||
326 | } | ||
327 | $code.=<<___; | ||
328 | sub \$1,$rnds | ||
329 | jnz .Lenc_loop | ||
330 | ___ | ||
331 | if ($verticalspin) { &enclastvert(); } | ||
332 | else { &enclast(0,$s0,$s1,$s2,$s3); | ||
333 | &enclast(1,$s1,$s2,$s3,$s0); | ||
334 | &enclast(2,$s2,$s3,$s0,$s1); | ||
335 | &enclast(3,$s3,$s0,$s1,$s2); | ||
336 | $code.=<<___; | ||
337 | xor 16+0($key),$s0 # xor with key | ||
338 | xor 16+4($key),$s1 | ||
339 | xor 16+8($key),$s2 | ||
340 | xor 16+12($key),$s3 | ||
341 | ___ | ||
342 | } | ||
343 | $code.=<<___; | ||
344 | .byte 0xf3,0xc3 # rep ret | ||
345 | .size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt | ||
346 | ___ | ||
347 | |||
348 | # void AES_encrypt (const void *inp,void *out,const AES_KEY *key); | ||
349 | $code.=<<___; | ||
350 | .globl AES_encrypt | ||
351 | .type AES_encrypt,\@function,3 | ||
352 | .align 16 | ||
353 | AES_encrypt: | ||
354 | push %rbx | ||
355 | push %rbp | ||
356 | push %r12 | ||
357 | push %r13 | ||
358 | push %r14 | ||
359 | push %r15 | ||
360 | |||
361 | mov %rdx,$key | ||
362 | mov %rdi,$inp | ||
363 | mov %rsi,$out | ||
364 | |||
365 | .picmeup $sbox | ||
366 | lea AES_Te-.($sbox),$sbox | ||
367 | |||
368 | mov 0($inp),$s0 | ||
369 | mov 4($inp),$s1 | ||
370 | mov 8($inp),$s2 | ||
371 | mov 12($inp),$s3 | ||
372 | |||
373 | call _x86_64_AES_encrypt | ||
374 | |||
375 | mov $s0,0($out) | ||
376 | mov $s1,4($out) | ||
377 | mov $s2,8($out) | ||
378 | mov $s3,12($out) | ||
379 | |||
380 | pop %r15 | ||
381 | pop %r14 | ||
382 | pop %r13 | ||
383 | pop %r12 | ||
384 | pop %rbp | ||
385 | pop %rbx | ||
386 | ret | ||
387 | .size AES_encrypt,.-AES_encrypt | ||
388 | ___ | ||
389 | |||
390 | #------------------------------------------------------------------# | ||
391 | |||
392 | sub decvert() | ||
393 | { my $t3="%r8d"; # zaps $inp! | ||
394 | |||
395 | $code.=<<___; | ||
396 | # favor 3-way issue Opteron pipeline... | ||
397 | movzb `&lo("$s0")`,$acc0 | ||
398 | movzb `&lo("$s1")`,$acc1 | ||
399 | movzb `&lo("$s2")`,$acc2 | ||
400 | mov 0($sbox,$acc0,8),$t0 | ||
401 | mov 0($sbox,$acc1,8),$t1 | ||
402 | mov 0($sbox,$acc2,8),$t2 | ||
403 | |||
404 | movzb `&hi("$s3")`,$acc0 | ||
405 | movzb `&hi("$s0")`,$acc1 | ||
406 | movzb `&lo("$s3")`,$acc2 | ||
407 | xor 3($sbox,$acc0,8),$t0 | ||
408 | xor 3($sbox,$acc1,8),$t1 | ||
409 | mov 0($sbox,$acc2,8),$t3 | ||
410 | |||
411 | movzb `&hi("$s1")`,$acc0 | ||
412 | shr \$16,$s0 | ||
413 | movzb `&hi("$s2")`,$acc2 | ||
414 | xor 3($sbox,$acc0,8),$t2 | ||
415 | shr \$16,$s3 | ||
416 | xor 3($sbox,$acc2,8),$t3 | ||
417 | |||
418 | shr \$16,$s1 | ||
419 | lea 16($key),$key | ||
420 | shr \$16,$s2 | ||
421 | |||
422 | movzb `&lo("$s2")`,$acc0 | ||
423 | movzb `&lo("$s3")`,$acc1 | ||
424 | movzb `&lo("$s0")`,$acc2 | ||
425 | xor 2($sbox,$acc0,8),$t0 | ||
426 | xor 2($sbox,$acc1,8),$t1 | ||
427 | xor 2($sbox,$acc2,8),$t2 | ||
428 | |||
429 | movzb `&hi("$s1")`,$acc0 | ||
430 | movzb `&hi("$s2")`,$acc1 | ||
431 | movzb `&lo("$s1")`,$acc2 | ||
432 | xor 1($sbox,$acc0,8),$t0 | ||
433 | xor 1($sbox,$acc1,8),$t1 | ||
434 | xor 2($sbox,$acc2,8),$t3 | ||
435 | |||
436 | movzb `&hi("$s3")`,$acc0 | ||
437 | mov 12($key),$s3 | ||
438 | movzb `&hi("$s0")`,$acc2 | ||
439 | xor 1($sbox,$acc0,8),$t2 | ||
440 | mov 0($key),$s0 | ||
441 | xor 1($sbox,$acc2,8),$t3 | ||
442 | |||
443 | xor $t0,$s0 | ||
444 | mov 4($key),$s1 | ||
445 | mov 8($key),$s2 | ||
446 | xor $t2,$s2 | ||
447 | xor $t1,$s1 | ||
448 | xor $t3,$s3 | ||
449 | ___ | ||
450 | } | ||
451 | |||
452 | sub declastvert() | ||
453 | { my $t3="%r8d"; # zaps $inp! | ||
454 | |||
455 | $code.=<<___; | ||
456 | movzb `&lo("$s0")`,$acc0 | ||
457 | movzb `&lo("$s1")`,$acc1 | ||
458 | movzb `&lo("$s2")`,$acc2 | ||
459 | movzb 2048($sbox,$acc0,1),$t0 | ||
460 | movzb 2048($sbox,$acc1,1),$t1 | ||
461 | movzb 2048($sbox,$acc2,1),$t2 | ||
462 | |||
463 | movzb `&lo("$s3")`,$acc0 | ||
464 | movzb `&hi("$s3")`,$acc1 | ||
465 | movzb `&hi("$s0")`,$acc2 | ||
466 | movzb 2048($sbox,$acc0,1),$t3 | ||
467 | movzb 2048($sbox,$acc1,1),$acc1 #$t0 | ||
468 | movzb 2048($sbox,$acc2,1),$acc2 #$t1 | ||
469 | |||
470 | shl \$8,$acc1 | ||
471 | shl \$8,$acc2 | ||
472 | |||
473 | xor $acc1,$t0 | ||
474 | xor $acc2,$t1 | ||
475 | shr \$16,$s3 | ||
476 | |||
477 | movzb `&hi("$s1")`,$acc0 | ||
478 | movzb `&hi("$s2")`,$acc1 | ||
479 | shr \$16,$s0 | ||
480 | movzb 2048($sbox,$acc0,1),$acc0 #$t2 | ||
481 | movzb 2048($sbox,$acc1,1),$acc1 #$t3 | ||
482 | |||
483 | shl \$8,$acc0 | ||
484 | shl \$8,$acc1 | ||
485 | shr \$16,$s1 | ||
486 | xor $acc0,$t2 | ||
487 | xor $acc1,$t3 | ||
488 | shr \$16,$s2 | ||
489 | |||
490 | movzb `&lo("$s2")`,$acc0 | ||
491 | movzb `&lo("$s3")`,$acc1 | ||
492 | movzb `&lo("$s0")`,$acc2 | ||
493 | movzb 2048($sbox,$acc0,1),$acc0 #$t0 | ||
494 | movzb 2048($sbox,$acc1,1),$acc1 #$t1 | ||
495 | movzb 2048($sbox,$acc2,1),$acc2 #$t2 | ||
496 | |||
497 | shl \$16,$acc0 | ||
498 | shl \$16,$acc1 | ||
499 | shl \$16,$acc2 | ||
500 | |||
501 | xor $acc0,$t0 | ||
502 | xor $acc1,$t1 | ||
503 | xor $acc2,$t2 | ||
504 | |||
505 | movzb `&lo("$s1")`,$acc0 | ||
506 | movzb `&hi("$s1")`,$acc1 | ||
507 | movzb `&hi("$s2")`,$acc2 | ||
508 | movzb 2048($sbox,$acc0,1),$acc0 #$t3 | ||
509 | movzb 2048($sbox,$acc1,1),$acc1 #$t0 | ||
510 | movzb 2048($sbox,$acc2,1),$acc2 #$t1 | ||
511 | |||
512 | shl \$16,$acc0 | ||
513 | shl \$24,$acc1 | ||
514 | shl \$24,$acc2 | ||
515 | |||
516 | xor $acc0,$t3 | ||
517 | xor $acc1,$t0 | ||
518 | xor $acc2,$t1 | ||
519 | |||
520 | movzb `&hi("$s3")`,$acc0 | ||
521 | movzb `&hi("$s0")`,$acc1 | ||
522 | mov 16+12($key),$s3 | ||
523 | movzb 2048($sbox,$acc0,1),$acc0 #$t2 | ||
524 | movzb 2048($sbox,$acc1,1),$acc1 #$t3 | ||
525 | mov 16+0($key),$s0 | ||
526 | |||
527 | shl \$24,$acc0 | ||
528 | shl \$24,$acc1 | ||
529 | |||
530 | xor $acc0,$t2 | ||
531 | xor $acc1,$t3 | ||
532 | |||
533 | mov 16+4($key),$s1 | ||
534 | mov 16+8($key),$s2 | ||
535 | xor $t0,$s0 | ||
536 | xor $t1,$s1 | ||
537 | xor $t2,$s2 | ||
538 | xor $t3,$s3 | ||
539 | ___ | ||
540 | } | ||
541 | |||
542 | sub decstep() | ||
543 | { my ($i,@s) = @_; | ||
544 | my $tmp0=$acc0; | ||
545 | my $tmp1=$acc1; | ||
546 | my $tmp2=$acc2; | ||
547 | my $out=($t0,$t1,$t2,$s[0])[$i]; | ||
548 | |||
549 | $code.=" mov $s[0],$out\n" if ($i!=3); | ||
550 | $tmp1=$s[2] if ($i==3); | ||
551 | $code.=" mov $s[2],$tmp1\n" if ($i!=3); | ||
552 | $code.=" and \$0xFF,$out\n"; | ||
553 | |||
554 | $code.=" mov 0($sbox,$out,8),$out\n"; | ||
555 | $code.=" shr \$16,$tmp1\n"; | ||
556 | $tmp2=$s[3] if ($i==3); | ||
557 | $code.=" mov $s[3],$tmp2\n" if ($i!=3); | ||
558 | |||
559 | $tmp0=$s[1] if ($i==3); | ||
560 | $code.=" movzb ".&hi($s[1]).",$tmp0\n"; | ||
561 | $code.=" and \$0xFF,$tmp1\n"; | ||
562 | $code.=" shr \$24,$tmp2\n"; | ||
563 | |||
564 | $code.=" xor 3($sbox,$tmp0,8),$out\n"; | ||
565 | $code.=" xor 2($sbox,$tmp1,8),$out\n"; | ||
566 | $code.=" xor 1($sbox,$tmp2,8),$out\n"; | ||
567 | |||
568 | $code.=" mov $t2,$s[1]\n" if ($i==3); | ||
569 | $code.=" mov $t1,$s[2]\n" if ($i==3); | ||
570 | $code.=" mov $t0,$s[3]\n" if ($i==3); | ||
571 | $code.="\n"; | ||
572 | } | ||
573 | |||
574 | sub declast() | ||
575 | { my ($i,@s)=@_; | ||
576 | my $tmp0=$acc0; | ||
577 | my $tmp1=$acc1; | ||
578 | my $tmp2=$acc2; | ||
579 | my $out=($t0,$t1,$t2,$s[0])[$i]; | ||
580 | |||
581 | $code.=" mov $s[0],$out\n" if ($i!=3); | ||
582 | $tmp1=$s[2] if ($i==3); | ||
583 | $code.=" mov $s[2],$tmp1\n" if ($i!=3); | ||
584 | $code.=" and \$0xFF,$out\n"; | ||
585 | |||
586 | $code.=" movzb 2048($sbox,$out,1),$out\n"; | ||
587 | $code.=" shr \$16,$tmp1\n"; | ||
588 | $tmp2=$s[3] if ($i==3); | ||
589 | $code.=" mov $s[3],$tmp2\n" if ($i!=3); | ||
590 | |||
591 | $tmp0=$s[1] if ($i==3); | ||
592 | $code.=" movzb ".&hi($s[1]).",$tmp0\n"; | ||
593 | $code.=" and \$0xFF,$tmp1\n"; | ||
594 | $code.=" shr \$24,$tmp2\n"; | ||
595 | |||
596 | $code.=" movzb 2048($sbox,$tmp0,1),$tmp0\n"; | ||
597 | $code.=" movzb 2048($sbox,$tmp1,1),$tmp1\n"; | ||
598 | $code.=" movzb 2048($sbox,$tmp2,1),$tmp2\n"; | ||
599 | |||
600 | $code.=" shl \$8,$tmp0\n"; | ||
601 | $code.=" shl \$16,$tmp1\n"; | ||
602 | $code.=" shl \$24,$tmp2\n"; | ||
603 | |||
604 | $code.=" xor $tmp0,$out\n"; | ||
605 | $code.=" mov $t2,$s[1]\n" if ($i==3); | ||
606 | $code.=" xor $tmp1,$out\n"; | ||
607 | $code.=" mov $t1,$s[2]\n" if ($i==3); | ||
608 | $code.=" xor $tmp2,$out\n"; | ||
609 | $code.=" mov $t0,$s[3]\n" if ($i==3); | ||
610 | $code.="\n"; | ||
611 | } | ||
612 | |||
613 | $code.=<<___; | ||
614 | .type _x86_64_AES_decrypt,\@abi-omnipotent | ||
615 | .align 16 | ||
616 | _x86_64_AES_decrypt: | ||
617 | xor 0($key),$s0 # xor with key | ||
618 | xor 4($key),$s1 | ||
619 | xor 8($key),$s2 | ||
620 | xor 12($key),$s3 | ||
621 | |||
622 | mov 240($key),$rnds # load key->rounds | ||
623 | sub \$1,$rnds | ||
624 | jmp .Ldec_loop | ||
625 | .align 16 | ||
626 | .Ldec_loop: | ||
627 | ___ | ||
628 | if ($verticalspin) { &decvert(); } | ||
629 | else { &decstep(0,$s0,$s3,$s2,$s1); | ||
630 | &decstep(1,$s1,$s0,$s3,$s2); | ||
631 | &decstep(2,$s2,$s1,$s0,$s3); | ||
632 | &decstep(3,$s3,$s2,$s1,$s0); | ||
633 | $code.=<<___; | ||
634 | lea 16($key),$key | ||
635 | xor 0($key),$s0 # xor with key | ||
636 | xor 4($key),$s1 | ||
637 | xor 8($key),$s2 | ||
638 | xor 12($key),$s3 | ||
639 | ___ | ||
640 | } | ||
641 | $code.=<<___; | ||
642 | sub \$1,$rnds | ||
643 | jnz .Ldec_loop | ||
644 | ___ | ||
645 | if ($verticalspin) { &declastvert(); } | ||
646 | else { &declast(0,$s0,$s3,$s2,$s1); | ||
647 | &declast(1,$s1,$s0,$s3,$s2); | ||
648 | &declast(2,$s2,$s1,$s0,$s3); | ||
649 | &declast(3,$s3,$s2,$s1,$s0); | ||
650 | $code.=<<___; | ||
651 | xor 16+0($key),$s0 # xor with key | ||
652 | xor 16+4($key),$s1 | ||
653 | xor 16+8($key),$s2 | ||
654 | xor 16+12($key),$s3 | ||
655 | ___ | ||
656 | } | ||
657 | $code.=<<___; | ||
658 | .byte 0xf3,0xc3 # rep ret | ||
659 | .size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt | ||
660 | ___ | ||
661 | |||
662 | # void AES_decrypt (const void *inp,void *out,const AES_KEY *key); | ||
663 | $code.=<<___; | ||
664 | .globl AES_decrypt | ||
665 | .type AES_decrypt,\@function,3 | ||
666 | .align 16 | ||
667 | AES_decrypt: | ||
668 | push %rbx | ||
669 | push %rbp | ||
670 | push %r12 | ||
671 | push %r13 | ||
672 | push %r14 | ||
673 | push %r15 | ||
674 | |||
675 | mov %rdx,$key | ||
676 | mov %rdi,$inp | ||
677 | mov %rsi,$out | ||
678 | |||
679 | .picmeup $sbox | ||
680 | lea AES_Td-.($sbox),$sbox | ||
681 | |||
682 | # prefetch Td4 | ||
683 | lea 2048+128($sbox),$sbox; | ||
684 | mov 0-128($sbox),$s0 | ||
685 | mov 32-128($sbox),$s1 | ||
686 | mov 64-128($sbox),$s2 | ||
687 | mov 96-128($sbox),$s3 | ||
688 | mov 128-128($sbox),$s0 | ||
689 | mov 160-128($sbox),$s1 | ||
690 | mov 192-128($sbox),$s2 | ||
691 | mov 224-128($sbox),$s3 | ||
692 | lea -2048-128($sbox),$sbox; | ||
693 | |||
694 | mov 0($inp),$s0 | ||
695 | mov 4($inp),$s1 | ||
696 | mov 8($inp),$s2 | ||
697 | mov 12($inp),$s3 | ||
698 | |||
699 | call _x86_64_AES_decrypt | ||
700 | |||
701 | mov $s0,0($out) | ||
702 | mov $s1,4($out) | ||
703 | mov $s2,8($out) | ||
704 | mov $s3,12($out) | ||
705 | |||
706 | pop %r15 | ||
707 | pop %r14 | ||
708 | pop %r13 | ||
709 | pop %r12 | ||
710 | pop %rbp | ||
711 | pop %rbx | ||
712 | ret | ||
713 | .size AES_decrypt,.-AES_decrypt | ||
714 | ___ | ||
715 | #------------------------------------------------------------------# | ||
716 | |||
717 | sub enckey() | ||
718 | { | ||
719 | $code.=<<___; | ||
720 | movz %dl,%esi # rk[i]>>0 | ||
721 | mov 2(%rbp,%rsi,8),%ebx | ||
722 | movz %dh,%esi # rk[i]>>8 | ||
723 | and \$0xFF000000,%ebx | ||
724 | xor %ebx,%eax | ||
725 | |||
726 | mov 2(%rbp,%rsi,8),%ebx | ||
727 | shr \$16,%edx | ||
728 | and \$0x000000FF,%ebx | ||
729 | movz %dl,%esi # rk[i]>>16 | ||
730 | xor %ebx,%eax | ||
731 | |||
732 | mov 0(%rbp,%rsi,8),%ebx | ||
733 | movz %dh,%esi # rk[i]>>24 | ||
734 | and \$0x0000FF00,%ebx | ||
735 | xor %ebx,%eax | ||
736 | |||
737 | mov 0(%rbp,%rsi,8),%ebx | ||
738 | and \$0x00FF0000,%ebx | ||
739 | xor %ebx,%eax | ||
740 | |||
741 | xor 2048(%rbp,%rcx,4),%eax # rcon | ||
742 | ___ | ||
743 | } | ||
744 | |||
745 | # int AES_set_encrypt_key(const unsigned char *userKey, const int bits, | ||
746 | # AES_KEY *key) | ||
747 | $code.=<<___; | ||
748 | .globl AES_set_encrypt_key | ||
749 | .type AES_set_encrypt_key,\@function,3 | ||
750 | .align 16 | ||
751 | AES_set_encrypt_key: | ||
752 | push %rbx | ||
753 | push %rbp | ||
754 | |||
755 | mov %esi,%ecx # %ecx=bits | ||
756 | mov %rdi,%rsi # %rsi=userKey | ||
757 | mov %rdx,%rdi # %rdi=key | ||
758 | |||
759 | test \$-1,%rsi | ||
760 | jz .Lbadpointer | ||
761 | test \$-1,%rdi | ||
762 | jz .Lbadpointer | ||
763 | |||
764 | .picmeup %rbp | ||
765 | lea AES_Te-.(%rbp),%rbp | ||
766 | |||
767 | cmp \$128,%ecx | ||
768 | je .L10rounds | ||
769 | cmp \$192,%ecx | ||
770 | je .L12rounds | ||
771 | cmp \$256,%ecx | ||
772 | je .L14rounds | ||
773 | mov \$-2,%rax # invalid number of bits | ||
774 | jmp .Lexit | ||
775 | |||
776 | .L10rounds: | ||
777 | mov 0(%rsi),%eax # copy first 4 dwords | ||
778 | mov 4(%rsi),%ebx | ||
779 | mov 8(%rsi),%ecx | ||
780 | mov 12(%rsi),%edx | ||
781 | mov %eax,0(%rdi) | ||
782 | mov %ebx,4(%rdi) | ||
783 | mov %ecx,8(%rdi) | ||
784 | mov %edx,12(%rdi) | ||
785 | |||
786 | xor %ecx,%ecx | ||
787 | jmp .L10shortcut | ||
788 | .align 4 | ||
789 | .L10loop: | ||
790 | mov 0(%rdi),%eax # rk[0] | ||
791 | mov 12(%rdi),%edx # rk[3] | ||
792 | .L10shortcut: | ||
793 | ___ | ||
794 | &enckey (); | ||
795 | $code.=<<___; | ||
796 | mov %eax,16(%rdi) # rk[4] | ||
797 | xor 4(%rdi),%eax | ||
798 | mov %eax,20(%rdi) # rk[5] | ||
799 | xor 8(%rdi),%eax | ||
800 | mov %eax,24(%rdi) # rk[6] | ||
801 | xor 12(%rdi),%eax | ||
802 | mov %eax,28(%rdi) # rk[7] | ||
803 | add \$1,%ecx | ||
804 | lea 16(%rdi),%rdi | ||
805 | cmp \$10,%ecx | ||
806 | jl .L10loop | ||
807 | |||
808 | movl \$10,80(%rdi) # setup number of rounds | ||
809 | xor %rax,%rax | ||
810 | jmp .Lexit | ||
811 | |||
812 | .L12rounds: | ||
813 | mov 0(%rsi),%eax # copy first 6 dwords | ||
814 | mov 4(%rsi),%ebx | ||
815 | mov 8(%rsi),%ecx | ||
816 | mov 12(%rsi),%edx | ||
817 | mov %eax,0(%rdi) | ||
818 | mov %ebx,4(%rdi) | ||
819 | mov %ecx,8(%rdi) | ||
820 | mov %edx,12(%rdi) | ||
821 | mov 16(%rsi),%ecx | ||
822 | mov 20(%rsi),%edx | ||
823 | mov %ecx,16(%rdi) | ||
824 | mov %edx,20(%rdi) | ||
825 | |||
826 | xor %ecx,%ecx | ||
827 | jmp .L12shortcut | ||
828 | .align 4 | ||
829 | .L12loop: | ||
830 | mov 0(%rdi),%eax # rk[0] | ||
831 | mov 20(%rdi),%edx # rk[5] | ||
832 | .L12shortcut: | ||
833 | ___ | ||
834 | &enckey (); | ||
835 | $code.=<<___; | ||
836 | mov %eax,24(%rdi) # rk[6] | ||
837 | xor 4(%rdi),%eax | ||
838 | mov %eax,28(%rdi) # rk[7] | ||
839 | xor 8(%rdi),%eax | ||
840 | mov %eax,32(%rdi) # rk[8] | ||
841 | xor 12(%rdi),%eax | ||
842 | mov %eax,36(%rdi) # rk[9] | ||
843 | |||
844 | cmp \$7,%ecx | ||
845 | je .L12break | ||
846 | add \$1,%ecx | ||
847 | |||
848 | xor 16(%rdi),%eax | ||
849 | mov %eax,40(%rdi) # rk[10] | ||
850 | xor 20(%rdi),%eax | ||
851 | mov %eax,44(%rdi) # rk[11] | ||
852 | |||
853 | lea 24(%rdi),%rdi | ||
854 | jmp .L12loop | ||
855 | .L12break: | ||
856 | movl \$12,72(%rdi) # setup number of rounds | ||
857 | xor %rax,%rax | ||
858 | jmp .Lexit | ||
859 | |||
860 | .L14rounds: | ||
861 | mov 0(%rsi),%eax # copy first 8 dwords | ||
862 | mov 4(%rsi),%ebx | ||
863 | mov 8(%rsi),%ecx | ||
864 | mov 12(%rsi),%edx | ||
865 | mov %eax,0(%rdi) | ||
866 | mov %ebx,4(%rdi) | ||
867 | mov %ecx,8(%rdi) | ||
868 | mov %edx,12(%rdi) | ||
869 | mov 16(%rsi),%eax | ||
870 | mov 20(%rsi),%ebx | ||
871 | mov 24(%rsi),%ecx | ||
872 | mov 28(%rsi),%edx | ||
873 | mov %eax,16(%rdi) | ||
874 | mov %ebx,20(%rdi) | ||
875 | mov %ecx,24(%rdi) | ||
876 | mov %edx,28(%rdi) | ||
877 | |||
878 | xor %ecx,%ecx | ||
879 | jmp .L14shortcut | ||
880 | .align 4 | ||
881 | .L14loop: | ||
882 | mov 28(%rdi),%edx # rk[4] | ||
883 | .L14shortcut: | ||
884 | mov 0(%rdi),%eax # rk[0] | ||
885 | ___ | ||
886 | &enckey (); | ||
887 | $code.=<<___; | ||
888 | mov %eax,32(%rdi) # rk[8] | ||
889 | xor 4(%rdi),%eax | ||
890 | mov %eax,36(%rdi) # rk[9] | ||
891 | xor 8(%rdi),%eax | ||
892 | mov %eax,40(%rdi) # rk[10] | ||
893 | xor 12(%rdi),%eax | ||
894 | mov %eax,44(%rdi) # rk[11] | ||
895 | |||
896 | cmp \$6,%ecx | ||
897 | je .L14break | ||
898 | add \$1,%ecx | ||
899 | |||
900 | mov %eax,%edx | ||
901 | mov 16(%rdi),%eax # rk[4] | ||
902 | movz %dl,%esi # rk[11]>>0 | ||
903 | mov 2(%rbp,%rsi,8),%ebx | ||
904 | movz %dh,%esi # rk[11]>>8 | ||
905 | and \$0x000000FF,%ebx | ||
906 | xor %ebx,%eax | ||
907 | |||
908 | mov 0(%rbp,%rsi,8),%ebx | ||
909 | shr \$16,%edx | ||
910 | and \$0x0000FF00,%ebx | ||
911 | movz %dl,%esi # rk[11]>>16 | ||
912 | xor %ebx,%eax | ||
913 | |||
914 | mov 0(%rbp,%rsi,8),%ebx | ||
915 | movz %dh,%esi # rk[11]>>24 | ||
916 | and \$0x00FF0000,%ebx | ||
917 | xor %ebx,%eax | ||
918 | |||
919 | mov 2(%rbp,%rsi,8),%ebx | ||
920 | and \$0xFF000000,%ebx | ||
921 | xor %ebx,%eax | ||
922 | |||
923 | mov %eax,48(%rdi) # rk[12] | ||
924 | xor 20(%rdi),%eax | ||
925 | mov %eax,52(%rdi) # rk[13] | ||
926 | xor 24(%rdi),%eax | ||
927 | mov %eax,56(%rdi) # rk[14] | ||
928 | xor 28(%rdi),%eax | ||
929 | mov %eax,60(%rdi) # rk[15] | ||
930 | |||
931 | lea 32(%rdi),%rdi | ||
932 | jmp .L14loop | ||
933 | .L14break: | ||
934 | movl \$14,48(%rdi) # setup number of rounds | ||
935 | xor %rax,%rax | ||
936 | jmp .Lexit | ||
937 | |||
938 | .Lbadpointer: | ||
939 | mov \$-1,%rax | ||
940 | .Lexit: | ||
941 | pop %rbp | ||
942 | pop %rbx | ||
943 | ret | ||
944 | .size AES_set_encrypt_key,.-AES_set_encrypt_key | ||
945 | ___ | ||
946 | |||
947 | sub deckey() | ||
948 | { my ($i,$ptr,$te,$td) = @_; | ||
949 | $code.=<<___; | ||
950 | mov $i($ptr),%eax | ||
951 | mov %eax,%edx | ||
952 | movz %ah,%ebx | ||
953 | shr \$16,%edx | ||
954 | and \$0xFF,%eax | ||
955 | movzb 2($te,%rax,8),%rax | ||
956 | movzb 2($te,%rbx,8),%rbx | ||
957 | mov 0($td,%rax,8),%eax | ||
958 | xor 3($td,%rbx,8),%eax | ||
959 | movzb %dh,%ebx | ||
960 | and \$0xFF,%edx | ||
961 | movzb 2($te,%rdx,8),%rdx | ||
962 | movzb 2($te,%rbx,8),%rbx | ||
963 | xor 2($td,%rdx,8),%eax | ||
964 | xor 1($td,%rbx,8),%eax | ||
965 | mov %eax,$i($ptr) | ||
966 | ___ | ||
967 | } | ||
968 | |||
969 | # int AES_set_decrypt_key(const unsigned char *userKey, const int bits, | ||
970 | # AES_KEY *key) | ||
971 | $code.=<<___; | ||
972 | .globl AES_set_decrypt_key | ||
973 | .type AES_set_decrypt_key,\@function,3 | ||
974 | .align 16 | ||
975 | AES_set_decrypt_key: | ||
976 | push %rdx | ||
977 | call AES_set_encrypt_key | ||
978 | cmp \$0,%eax | ||
979 | je .Lproceed | ||
980 | lea 24(%rsp),%rsp | ||
981 | ret | ||
982 | .Lproceed: | ||
983 | mov (%rsp),%r8 # restore key schedule | ||
984 | mov %rbx,(%rsp) | ||
985 | |||
986 | mov 240(%r8),%ecx # pull number of rounds | ||
987 | xor %rdi,%rdi | ||
988 | lea (%rdi,%rcx,4),%rcx | ||
989 | mov %r8,%rsi | ||
990 | lea (%r8,%rcx,4),%rdi # pointer to last chunk | ||
991 | .align 4 | ||
992 | .Linvert: | ||
993 | mov 0(%rsi),%rax | ||
994 | mov 8(%rsi),%rbx | ||
995 | mov 0(%rdi),%rcx | ||
996 | mov 8(%rdi),%rdx | ||
997 | mov %rax,0(%rdi) | ||
998 | mov %rbx,8(%rdi) | ||
999 | mov %rcx,0(%rsi) | ||
1000 | mov %rdx,8(%rsi) | ||
1001 | lea 16(%rsi),%rsi | ||
1002 | lea -16(%rdi),%rdi | ||
1003 | cmp %rsi,%rdi | ||
1004 | jne .Linvert | ||
1005 | |||
1006 | .picmeup %r9 | ||
1007 | lea AES_Td-.(%r9),%rdi | ||
1008 | lea AES_Te-AES_Td(%rdi),%r9 | ||
1009 | |||
1010 | mov %r8,%rsi | ||
1011 | mov 240(%r8),%ecx # pull number of rounds | ||
1012 | sub \$1,%ecx | ||
1013 | .align 4 | ||
1014 | .Lpermute: | ||
1015 | lea 16(%rsi),%rsi | ||
1016 | ___ | ||
1017 | &deckey (0,"%rsi","%r9","%rdi"); | ||
1018 | &deckey (4,"%rsi","%r9","%rdi"); | ||
1019 | &deckey (8,"%rsi","%r9","%rdi"); | ||
1020 | &deckey (12,"%rsi","%r9","%rdi"); | ||
1021 | $code.=<<___; | ||
1022 | sub \$1,%ecx | ||
1023 | jnz .Lpermute | ||
1024 | |||
1025 | xor %rax,%rax | ||
1026 | pop %rbx | ||
1027 | ret | ||
1028 | .size AES_set_decrypt_key,.-AES_set_decrypt_key | ||
1029 | ___ | ||
1030 | |||
1031 | # void AES_cbc_encrypt (const void char *inp, unsigned char *out, | ||
1032 | # size_t length, const AES_KEY *key, | ||
1033 | # unsigned char *ivp,const int enc); | ||
1034 | { | ||
1035 | # stack frame layout | ||
1036 | # -8(%rsp) return address | ||
1037 | my $_rsp="0(%rsp)"; # saved %rsp | ||
1038 | my $_len="8(%rsp)"; # copy of 3rd parameter, length | ||
1039 | my $_key="16(%rsp)"; # copy of 4th parameter, key | ||
1040 | my $_ivp="24(%rsp)"; # copy of 5th parameter, ivp | ||
1041 | my $keyp="32(%rsp)"; # one to pass as $key | ||
1042 | my $ivec="40(%rsp)"; # ivec[16] | ||
1043 | my $aes_key="56(%rsp)"; # copy of aes_key | ||
1044 | my $mark="56+240(%rsp)"; # copy of aes_key->rounds | ||
1045 | |||
1046 | $code.=<<___; | ||
1047 | .globl AES_cbc_encrypt | ||
1048 | .type AES_cbc_encrypt,\@function,6 | ||
1049 | .align 16 | ||
1050 | AES_cbc_encrypt: | ||
1051 | cmp \$0,%rdx # check length | ||
1052 | je .Lcbc_just_ret | ||
1053 | push %rbx | ||
1054 | push %rbp | ||
1055 | push %r12 | ||
1056 | push %r13 | ||
1057 | push %r14 | ||
1058 | push %r15 | ||
1059 | pushfq | ||
1060 | cld | ||
1061 | mov %r9d,%r9d # clear upper half of enc | ||
1062 | |||
1063 | .picmeup $sbox | ||
1064 | .Lcbc_pic_point: | ||
1065 | |||
1066 | cmp \$0,%r9 | ||
1067 | je .LDECRYPT | ||
1068 | |||
1069 | lea AES_Te-.Lcbc_pic_point($sbox),$sbox | ||
1070 | |||
1071 | # allocate aligned stack frame... | ||
1072 | lea -64-248(%rsp),$key | ||
1073 | and \$-64,$key | ||
1074 | |||
1075 | # ... and make it doesn't alias with AES_Te modulo 4096 | ||
1076 | mov $sbox,%r10 | ||
1077 | lea 2048($sbox),%r11 | ||
1078 | mov $key,%r12 | ||
1079 | and \$0xFFF,%r10 # s = $sbox&0xfff | ||
1080 | and \$0xFFF,%r11 # e = ($sbox+2048)&0xfff | ||
1081 | and \$0xFFF,%r12 # p = %rsp&0xfff | ||
1082 | |||
1083 | cmp %r11,%r12 # if (p=>e) %rsp =- (p-e); | ||
1084 | jb .Lcbc_te_break_out | ||
1085 | sub %r11,%r12 | ||
1086 | sub %r12,$key | ||
1087 | jmp .Lcbc_te_ok | ||
1088 | .Lcbc_te_break_out: # else %rsp -= (p-s)&0xfff + framesz | ||
1089 | sub %r10,%r12 | ||
1090 | and \$0xFFF,%r12 | ||
1091 | add \$320,%r12 | ||
1092 | sub %r12,$key | ||
1093 | .align 4 | ||
1094 | .Lcbc_te_ok: | ||
1095 | |||
1096 | xchg %rsp,$key | ||
1097 | add \$8,%rsp # reserve for return address! | ||
1098 | mov $key,$_rsp # save %rsp | ||
1099 | mov %rdx,$_len # save copy of len | ||
1100 | mov %rcx,$_key # save copy of key | ||
1101 | mov %r8,$_ivp # save copy of ivp | ||
1102 | movl \$0,$mark # copy of aes_key->rounds = 0; | ||
1103 | mov %r8,%rbp # rearrange input arguments | ||
1104 | mov %rsi,$out | ||
1105 | mov %rdi,$inp | ||
1106 | mov %rcx,$key | ||
1107 | |||
1108 | # do we copy key schedule to stack? | ||
1109 | mov $key,%r10 | ||
1110 | sub $sbox,%r10 | ||
1111 | and \$0xfff,%r10 | ||
1112 | cmp \$2048,%r10 | ||
1113 | jb .Lcbc_do_ecopy | ||
1114 | cmp \$4096-248,%r10 | ||
1115 | jb .Lcbc_skip_ecopy | ||
1116 | .align 4 | ||
1117 | .Lcbc_do_ecopy: | ||
1118 | mov $key,%rsi | ||
1119 | lea $aes_key,%rdi | ||
1120 | lea $aes_key,$key | ||
1121 | mov \$240/8,%ecx | ||
1122 | .long 0x90A548F3 # rep movsq | ||
1123 | mov (%rsi),%eax # copy aes_key->rounds | ||
1124 | mov %eax,(%rdi) | ||
1125 | .Lcbc_skip_ecopy: | ||
1126 | mov $key,$keyp # save key pointer | ||
1127 | |||
1128 | mov \$16,%ecx | ||
1129 | .align 4 | ||
1130 | .Lcbc_prefetch_te: | ||
1131 | mov 0($sbox),%r10 | ||
1132 | mov 32($sbox),%r11 | ||
1133 | mov 64($sbox),%r12 | ||
1134 | mov 96($sbox),%r13 | ||
1135 | lea 128($sbox),$sbox | ||
1136 | sub \$1,%ecx | ||
1137 | jnz .Lcbc_prefetch_te | ||
1138 | sub \$2048,$sbox | ||
1139 | |||
1140 | test \$-16,%rdx # check upon length | ||
1141 | mov %rdx,%r10 | ||
1142 | mov 0(%rbp),$s0 # load iv | ||
1143 | mov 4(%rbp),$s1 | ||
1144 | mov 8(%rbp),$s2 | ||
1145 | mov 12(%rbp),$s3 | ||
1146 | jz .Lcbc_enc_tail # short input... | ||
1147 | |||
1148 | .align 4 | ||
1149 | .Lcbc_enc_loop: | ||
1150 | xor 0($inp),$s0 | ||
1151 | xor 4($inp),$s1 | ||
1152 | xor 8($inp),$s2 | ||
1153 | xor 12($inp),$s3 | ||
1154 | mov $inp,$ivec # if ($verticalspin) save inp | ||
1155 | |||
1156 | mov $keyp,$key # restore key | ||
1157 | call _x86_64_AES_encrypt | ||
1158 | |||
1159 | mov $ivec,$inp # if ($verticalspin) restore inp | ||
1160 | mov $s0,0($out) | ||
1161 | mov $s1,4($out) | ||
1162 | mov $s2,8($out) | ||
1163 | mov $s3,12($out) | ||
1164 | |||
1165 | mov $_len,%r10 | ||
1166 | lea 16($inp),$inp | ||
1167 | lea 16($out),$out | ||
1168 | sub \$16,%r10 | ||
1169 | test \$-16,%r10 | ||
1170 | mov %r10,$_len | ||
1171 | jnz .Lcbc_enc_loop | ||
1172 | test \$15,%r10 | ||
1173 | jnz .Lcbc_enc_tail | ||
1174 | mov $_ivp,%rbp # restore ivp | ||
1175 | mov $s0,0(%rbp) # save ivec | ||
1176 | mov $s1,4(%rbp) | ||
1177 | mov $s2,8(%rbp) | ||
1178 | mov $s3,12(%rbp) | ||
1179 | |||
1180 | .align 4 | ||
1181 | .Lcbc_cleanup: | ||
1182 | cmpl \$0,$mark # was the key schedule copied? | ||
1183 | lea $aes_key,%rdi | ||
1184 | mov $_rsp,%rsp | ||
1185 | je .Lcbc_exit | ||
1186 | mov \$240/8,%ecx | ||
1187 | xor %rax,%rax | ||
1188 | .long 0x90AB48F3 # rep stosq | ||
1189 | .Lcbc_exit: | ||
1190 | popfq | ||
1191 | pop %r15 | ||
1192 | pop %r14 | ||
1193 | pop %r13 | ||
1194 | pop %r12 | ||
1195 | pop %rbp | ||
1196 | pop %rbx | ||
1197 | .Lcbc_just_ret: | ||
1198 | ret | ||
1199 | .align 4 | ||
1200 | .Lcbc_enc_tail: | ||
1201 | cmp $inp,$out | ||
1202 | je .Lcbc_enc_in_place | ||
1203 | mov %r10,%rcx | ||
1204 | mov $inp,%rsi | ||
1205 | mov $out,%rdi | ||
1206 | .long 0xF689A4F3 # rep movsb | ||
1207 | .Lcbc_enc_in_place: | ||
1208 | mov \$16,%rcx # zero tail | ||
1209 | sub %r10,%rcx | ||
1210 | xor %rax,%rax | ||
1211 | .long 0xF689AAF3 # rep stosb | ||
1212 | mov $out,$inp # this is not a mistake! | ||
1213 | movq \$16,$_len # len=16 | ||
1214 | jmp .Lcbc_enc_loop # one more spin... | ||
1215 | #----------------------------- DECRYPT -----------------------------# | ||
1216 | .align 16 | ||
1217 | .LDECRYPT: | ||
1218 | lea AES_Td-.Lcbc_pic_point($sbox),$sbox | ||
1219 | |||
1220 | # allocate aligned stack frame... | ||
1221 | lea -64-248(%rsp),$key | ||
1222 | and \$-64,$key | ||
1223 | |||
1224 | # ... and make it doesn't alias with AES_Td modulo 4096 | ||
1225 | mov $sbox,%r10 | ||
1226 | lea 2304($sbox),%r11 | ||
1227 | mov $key,%r12 | ||
1228 | and \$0xFFF,%r10 # s = $sbox&0xfff | ||
1229 | and \$0xFFF,%r11 # e = ($sbox+2048+256)&0xfff | ||
1230 | and \$0xFFF,%r12 # p = %rsp&0xfff | ||
1231 | |||
1232 | cmp %r11,%r12 # if (p=>e) %rsp =- (p-e); | ||
1233 | jb .Lcbc_td_break_out | ||
1234 | sub %r11,%r12 | ||
1235 | sub %r12,$key | ||
1236 | jmp .Lcbc_td_ok | ||
1237 | .Lcbc_td_break_out: # else %rsp -= (p-s)&0xfff + framesz | ||
1238 | sub %r10,%r12 | ||
1239 | and \$0xFFF,%r12 | ||
1240 | add \$320,%r12 | ||
1241 | sub %r12,$key | ||
1242 | .align 4 | ||
1243 | .Lcbc_td_ok: | ||
1244 | |||
1245 | xchg %rsp,$key | ||
1246 | add \$8,%rsp # reserve for return address! | ||
1247 | mov $key,$_rsp # save %rsp | ||
1248 | mov %rdx,$_len # save copy of len | ||
1249 | mov %rcx,$_key # save copy of key | ||
1250 | mov %r8,$_ivp # save copy of ivp | ||
1251 | movl \$0,$mark # copy of aes_key->rounds = 0; | ||
1252 | mov %r8,%rbp # rearrange input arguments | ||
1253 | mov %rsi,$out | ||
1254 | mov %rdi,$inp | ||
1255 | mov %rcx,$key | ||
1256 | |||
1257 | # do we copy key schedule to stack? | ||
1258 | mov $key,%r10 | ||
1259 | sub $sbox,%r10 | ||
1260 | and \$0xfff,%r10 | ||
1261 | cmp \$2304,%r10 | ||
1262 | jb .Lcbc_do_dcopy | ||
1263 | cmp \$4096-248,%r10 | ||
1264 | jb .Lcbc_skip_dcopy | ||
1265 | .align 4 | ||
1266 | .Lcbc_do_dcopy: | ||
1267 | mov $key,%rsi | ||
1268 | lea $aes_key,%rdi | ||
1269 | lea $aes_key,$key | ||
1270 | mov \$240/8,%ecx | ||
1271 | .long 0x90A548F3 # rep movsq | ||
1272 | mov (%rsi),%eax # copy aes_key->rounds | ||
1273 | mov %eax,(%rdi) | ||
1274 | .Lcbc_skip_dcopy: | ||
1275 | mov $key,$keyp # save key pointer | ||
1276 | |||
1277 | mov \$18,%ecx | ||
1278 | .align 4 | ||
1279 | .Lcbc_prefetch_td: | ||
1280 | mov 0($sbox),%r10 | ||
1281 | mov 32($sbox),%r11 | ||
1282 | mov 64($sbox),%r12 | ||
1283 | mov 96($sbox),%r13 | ||
1284 | lea 128($sbox),$sbox | ||
1285 | sub \$1,%ecx | ||
1286 | jnz .Lcbc_prefetch_td | ||
1287 | sub \$2304,$sbox | ||
1288 | |||
1289 | cmp $inp,$out | ||
1290 | je .Lcbc_dec_in_place | ||
1291 | |||
1292 | mov %rbp,$ivec | ||
1293 | .align 4 | ||
1294 | .Lcbc_dec_loop: | ||
1295 | mov 0($inp),$s0 # read input | ||
1296 | mov 4($inp),$s1 | ||
1297 | mov 8($inp),$s2 | ||
1298 | mov 12($inp),$s3 | ||
1299 | mov $inp,8+$ivec # if ($verticalspin) save inp | ||
1300 | |||
1301 | mov $keyp,$key # restore key | ||
1302 | call _x86_64_AES_decrypt | ||
1303 | |||
1304 | mov $ivec,%rbp # load ivp | ||
1305 | mov 8+$ivec,$inp # if ($verticalspin) restore inp | ||
1306 | xor 0(%rbp),$s0 # xor iv | ||
1307 | xor 4(%rbp),$s1 | ||
1308 | xor 8(%rbp),$s2 | ||
1309 | xor 12(%rbp),$s3 | ||
1310 | mov $inp,%rbp # current input, next iv | ||
1311 | |||
1312 | mov $_len,%r10 # load len | ||
1313 | sub \$16,%r10 | ||
1314 | jc .Lcbc_dec_partial | ||
1315 | mov %r10,$_len # update len | ||
1316 | mov %rbp,$ivec # update ivp | ||
1317 | |||
1318 | mov $s0,0($out) # write output | ||
1319 | mov $s1,4($out) | ||
1320 | mov $s2,8($out) | ||
1321 | mov $s3,12($out) | ||
1322 | |||
1323 | lea 16($inp),$inp | ||
1324 | lea 16($out),$out | ||
1325 | jnz .Lcbc_dec_loop | ||
1326 | .Lcbc_dec_end: | ||
1327 | mov $_ivp,%r12 # load user ivp | ||
1328 | mov 0(%rbp),%r10 # load iv | ||
1329 | mov 8(%rbp),%r11 | ||
1330 | mov %r10,0(%r12) # copy back to user | ||
1331 | mov %r11,8(%r12) | ||
1332 | jmp .Lcbc_cleanup | ||
1333 | |||
1334 | .align 4 | ||
1335 | .Lcbc_dec_partial: | ||
1336 | mov $s0,0+$ivec # dump output to stack | ||
1337 | mov $s1,4+$ivec | ||
1338 | mov $s2,8+$ivec | ||
1339 | mov $s3,12+$ivec | ||
1340 | mov $out,%rdi | ||
1341 | lea $ivec,%rsi | ||
1342 | mov \$16,%rcx | ||
1343 | add %r10,%rcx # number of bytes to copy | ||
1344 | .long 0xF689A4F3 # rep movsb | ||
1345 | jmp .Lcbc_dec_end | ||
1346 | |||
1347 | .align 16 | ||
1348 | .Lcbc_dec_in_place: | ||
1349 | mov 0($inp),$s0 # load input | ||
1350 | mov 4($inp),$s1 | ||
1351 | mov 8($inp),$s2 | ||
1352 | mov 12($inp),$s3 | ||
1353 | |||
1354 | mov $inp,$ivec # if ($verticalspin) save inp | ||
1355 | mov $keyp,$key | ||
1356 | call _x86_64_AES_decrypt | ||
1357 | |||
1358 | mov $ivec,$inp # if ($verticalspin) restore inp | ||
1359 | mov $_ivp,%rbp | ||
1360 | xor 0(%rbp),$s0 | ||
1361 | xor 4(%rbp),$s1 | ||
1362 | xor 8(%rbp),$s2 | ||
1363 | xor 12(%rbp),$s3 | ||
1364 | |||
1365 | mov 0($inp),%r10 # copy input to iv | ||
1366 | mov 8($inp),%r11 | ||
1367 | mov %r10,0(%rbp) | ||
1368 | mov %r11,8(%rbp) | ||
1369 | |||
1370 | mov $s0,0($out) # save output [zaps input] | ||
1371 | mov $s1,4($out) | ||
1372 | mov $s2,8($out) | ||
1373 | mov $s3,12($out) | ||
1374 | |||
1375 | mov $_len,%rcx | ||
1376 | lea 16($inp),$inp | ||
1377 | lea 16($out),$out | ||
1378 | sub \$16,%rcx | ||
1379 | jc .Lcbc_dec_in_place_partial | ||
1380 | mov %rcx,$_len | ||
1381 | jnz .Lcbc_dec_in_place | ||
1382 | jmp .Lcbc_cleanup | ||
1383 | |||
1384 | .align 4 | ||
1385 | .Lcbc_dec_in_place_partial: | ||
1386 | # one can argue if this is actually required | ||
1387 | lea ($out,%rcx),%rdi | ||
1388 | lea (%rbp,%rcx),%rsi | ||
1389 | neg %rcx | ||
1390 | .long 0xF689A4F3 # rep movsb # restore tail | ||
1391 | jmp .Lcbc_cleanup | ||
1392 | .size AES_cbc_encrypt,.-AES_cbc_encrypt | ||
1393 | ___ | ||
1394 | } | ||
1395 | |||
1396 | $code.=<<___; | ||
1397 | .globl AES_Te | ||
1398 | .align 64 | ||
1399 | AES_Te: | ||
1400 | ___ | ||
1401 | &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); | ||
1402 | &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); | ||
1403 | &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56); | ||
1404 | &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec); | ||
1405 | &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa); | ||
1406 | &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb); | ||
1407 | &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45); | ||
1408 | &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b); | ||
1409 | &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c); | ||
1410 | &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83); | ||
1411 | &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9); | ||
1412 | &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a); | ||
1413 | &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d); | ||
1414 | &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f); | ||
1415 | &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df); | ||
1416 | &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea); | ||
1417 | &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34); | ||
1418 | &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b); | ||
1419 | &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d); | ||
1420 | &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413); | ||
1421 | &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1); | ||
1422 | &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6); | ||
1423 | &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972); | ||
1424 | &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85); | ||
1425 | &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed); | ||
1426 | &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511); | ||
1427 | &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe); | ||
1428 | &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b); | ||
1429 | &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05); | ||
1430 | &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1); | ||
1431 | &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142); | ||
1432 | &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf); | ||
1433 | &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3); | ||
1434 | &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e); | ||
1435 | &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a); | ||
1436 | &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6); | ||
1437 | &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3); | ||
1438 | &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b); | ||
1439 | &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428); | ||
1440 | &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad); | ||
1441 | &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14); | ||
1442 | &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8); | ||
1443 | &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4); | ||
1444 | &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2); | ||
1445 | &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda); | ||
1446 | &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949); | ||
1447 | &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf); | ||
1448 | &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810); | ||
1449 | &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c); | ||
1450 | &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697); | ||
1451 | &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e); | ||
1452 | &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f); | ||
1453 | &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc); | ||
1454 | &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c); | ||
1455 | &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969); | ||
1456 | &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27); | ||
1457 | &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122); | ||
1458 | &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433); | ||
1459 | &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9); | ||
1460 | &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5); | ||
1461 | &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a); | ||
1462 | &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0); | ||
1463 | &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e); | ||
1464 | &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c); | ||
1465 | #rcon: | ||
1466 | $code.=<<___; | ||
1467 | .long 0x00000001, 0x00000002, 0x00000004, 0x00000008 | ||
1468 | .long 0x00000010, 0x00000020, 0x00000040, 0x00000080 | ||
1469 | .long 0x0000001b, 0x00000036, 0, 0, 0, 0, 0, 0 | ||
1470 | ___ | ||
1471 | $code.=<<___; | ||
1472 | .globl AES_Td | ||
1473 | .align 64 | ||
1474 | AES_Td: | ||
1475 | ___ | ||
1476 | &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); | ||
1477 | &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); | ||
1478 | &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5); | ||
1479 | &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5); | ||
1480 | &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d); | ||
1481 | &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b); | ||
1482 | &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295); | ||
1483 | &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e); | ||
1484 | &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927); | ||
1485 | &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d); | ||
1486 | &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362); | ||
1487 | &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9); | ||
1488 | &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52); | ||
1489 | &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566); | ||
1490 | &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3); | ||
1491 | &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed); | ||
1492 | &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e); | ||
1493 | &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4); | ||
1494 | &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4); | ||
1495 | &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd); | ||
1496 | &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d); | ||
1497 | &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060); | ||
1498 | &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967); | ||
1499 | &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879); | ||
1500 | &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000); | ||
1501 | &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c); | ||
1502 | &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36); | ||
1503 | &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624); | ||
1504 | &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b); | ||
1505 | &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c); | ||
1506 | &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12); | ||
1507 | &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14); | ||
1508 | &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3); | ||
1509 | &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b); | ||
1510 | &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8); | ||
1511 | &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684); | ||
1512 | &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7); | ||
1513 | &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177); | ||
1514 | &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947); | ||
1515 | &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322); | ||
1516 | &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498); | ||
1517 | &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f); | ||
1518 | &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54); | ||
1519 | &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382); | ||
1520 | &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf); | ||
1521 | &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb); | ||
1522 | &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83); | ||
1523 | &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef); | ||
1524 | &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029); | ||
1525 | &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235); | ||
1526 | &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733); | ||
1527 | &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117); | ||
1528 | &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4); | ||
1529 | &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546); | ||
1530 | &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb); | ||
1531 | &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d); | ||
1532 | &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb); | ||
1533 | &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a); | ||
1534 | &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773); | ||
1535 | &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478); | ||
1536 | &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2); | ||
1537 | &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff); | ||
1538 | &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664); | ||
1539 | &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0); | ||
1540 | #Td4: | ||
1541 | &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38); | ||
1542 | &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb); | ||
1543 | &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87); | ||
1544 | &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb); | ||
1545 | &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d); | ||
1546 | &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e); | ||
1547 | &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2); | ||
1548 | &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25); | ||
1549 | &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16); | ||
1550 | &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92); | ||
1551 | &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda); | ||
1552 | &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84); | ||
1553 | &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a); | ||
1554 | &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06); | ||
1555 | &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02); | ||
1556 | &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b); | ||
1557 | &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea); | ||
1558 | &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73); | ||
1559 | &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85); | ||
1560 | &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e); | ||
1561 | &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89); | ||
1562 | &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b); | ||
1563 | &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20); | ||
1564 | &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4); | ||
1565 | &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31); | ||
1566 | &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f); | ||
1567 | &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d); | ||
1568 | &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef); | ||
1569 | &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0); | ||
1570 | &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); | ||
1571 | &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); | ||
1572 | &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); | ||
1573 | |||
1574 | $code =~ s/\`([^\`]*)\`/eval($1)/gem; | ||
1575 | |||
1576 | print $code; | ||
1577 | |||
1578 | close STDOUT; | ||