summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/libcrypto/aes/asm/vpaes-x86_64.pl1222
1 files changed, 0 insertions, 1222 deletions
diff --git a/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl b/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl
deleted file mode 100644
index 7d92e8d8ca..0000000000
--- a/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl
+++ /dev/null
@@ -1,1222 +0,0 @@
1#!/usr/bin/env perl
2
3######################################################################
4## Constant-time SSSE3 AES core implementation.
5## version 0.1
6##
7## By Mike Hamburg (Stanford University), 2009
8## Public domain.
9##
10## For details see http://shiftleft.org/papers/vector_aes/ and
11## http://crypto.stanford.edu/vpaes/.
12
13######################################################################
14# September 2011.
15#
16# Interface to OpenSSL as "almost" drop-in replacement for
17# aes-x86_64.pl. "Almost" refers to the fact that AES_cbc_encrypt
18# doesn't handle partial vectors (doesn't have to if called from
19# EVP only). "Drop-in" implies that this module doesn't share key
20# schedule structure with the original nor does it make assumption
21# about its alignment...
22#
23# Performance summary. aes-x86_64.pl column lists large-block CBC
24# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per
25# byte processed with 128-bit key, and vpaes-x86_64.pl column -
26# [also large-block CBC] encrypt/decrypt.
27#
28# aes-x86_64.pl vpaes-x86_64.pl
29#
30# Core 2(**) 30.5/43.7/14.3 21.8/25.7(***)
31# Nehalem 30.5/42.2/14.6 9.8/11.8
32# Atom 63.9/79.0/32.1 64.0/84.8(***)
33#
34# (*) "Hyper-threading" in the context refers rather to cache shared
35# among multiple cores, than to specifically Intel HTT. As vast
36# majority of contemporary cores share cache, slower code path
37# is common place. In other words "with-hyper-threading-off"
38# results are presented mostly for reference purposes.
39#
40# (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe.
41#
42# (***) Less impressive improvement on Core 2 and Atom is due to slow
43# pshufb, yet it's respectable +40%/78% improvement on Core 2
44# (as implied, over "hyper-threading-safe" code path).
45#
46# <appro@openssl.org>
47
48$flavour = shift;
49$output = shift;
50if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
51
52$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
53
54$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
55( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
56( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
57die "can't locate x86_64-xlate.pl";
58
59open OUT,"| \"$^X\" $xlate $flavour $output";
60*STDOUT=*OUT;
61
62$PREFIX="vpaes";
63
64$code.=<<___;
65.text
66
67##
68## _aes_encrypt_core
69##
70## AES-encrypt %xmm0.
71##
72## Inputs:
73## %xmm0 = input
74## %xmm9-%xmm15 as in _vpaes_preheat
75## (%rdx) = scheduled keys
76##
77## Output in %xmm0
78## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax
79## Preserves %xmm6 - %xmm8 so you get some local vectors
80##
81##
82.type _vpaes_encrypt_core,\@abi-omnipotent
83.align 16
84_vpaes_encrypt_core:
85 _CET_ENDBR
86 mov %rdx, %r9
87 mov \$16, %r11
88 mov 240(%rdx),%eax
89 movdqa %xmm9, %xmm1
90 movdqa .Lk_ipt(%rip), %xmm2 # iptlo
91 pandn %xmm0, %xmm1
92 movdqu (%r9), %xmm5 # round0 key
93 psrld \$4, %xmm1
94 pand %xmm9, %xmm0
95 pshufb %xmm0, %xmm2
96 movdqa .Lk_ipt+16(%rip), %xmm0 # ipthi
97 pshufb %xmm1, %xmm0
98 pxor %xmm5, %xmm2
99 pxor %xmm2, %xmm0
100 add \$16, %r9
101 lea .Lk_mc_backward(%rip),%r10
102 jmp .Lenc_entry
103
104.align 16
105.Lenc_loop:
106 # middle of middle round
107 movdqa %xmm13, %xmm4 # 4 : sb1u
108 pshufb %xmm2, %xmm4 # 4 = sb1u
109 pxor %xmm5, %xmm4 # 4 = sb1u + k
110 movdqa %xmm12, %xmm0 # 0 : sb1t
111 pshufb %xmm3, %xmm0 # 0 = sb1t
112 pxor %xmm4, %xmm0 # 0 = A
113 movdqa %xmm15, %xmm5 # 4 : sb2u
114 pshufb %xmm2, %xmm5 # 4 = sb2u
115 movdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
116 movdqa %xmm14, %xmm2 # 2 : sb2t
117 pshufb %xmm3, %xmm2 # 2 = sb2t
118 pxor %xmm5, %xmm2 # 2 = 2A
119 movdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[]
120 movdqa %xmm0, %xmm3 # 3 = A
121 pshufb %xmm1, %xmm0 # 0 = B
122 add \$16, %r9 # next key
123 pxor %xmm2, %xmm0 # 0 = 2A+B
124 pshufb %xmm4, %xmm3 # 3 = D
125 add \$16, %r11 # next mc
126 pxor %xmm0, %xmm3 # 3 = 2A+B+D
127 pshufb %xmm1, %xmm0 # 0 = 2B+C
128 and \$0x30, %r11 # ... mod 4
129 pxor %xmm3, %xmm0 # 0 = 2A+3B+C+D
130 sub \$1,%rax # nr--
131
132.Lenc_entry:
133 # top of round
134 movdqa %xmm9, %xmm1 # 1 : i
135 pandn %xmm0, %xmm1 # 1 = i<<4
136 psrld \$4, %xmm1 # 1 = i
137 pand %xmm9, %xmm0 # 0 = k
138 movdqa %xmm11, %xmm5 # 2 : a/k
139 pshufb %xmm0, %xmm5 # 2 = a/k
140 pxor %xmm1, %xmm0 # 0 = j
141 movdqa %xmm10, %xmm3 # 3 : 1/i
142 pshufb %xmm1, %xmm3 # 3 = 1/i
143 pxor %xmm5, %xmm3 # 3 = iak = 1/i + a/k
144 movdqa %xmm10, %xmm4 # 4 : 1/j
145 pshufb %xmm0, %xmm4 # 4 = 1/j
146 pxor %xmm5, %xmm4 # 4 = jak = 1/j + a/k
147 movdqa %xmm10, %xmm2 # 2 : 1/iak
148 pshufb %xmm3, %xmm2 # 2 = 1/iak
149 pxor %xmm0, %xmm2 # 2 = io
150 movdqa %xmm10, %xmm3 # 3 : 1/jak
151 movdqu (%r9), %xmm5
152 pshufb %xmm4, %xmm3 # 3 = 1/jak
153 pxor %xmm1, %xmm3 # 3 = jo
154 jnz .Lenc_loop
155
156 # middle of last round
157 movdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo
158 movdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16
159 pshufb %xmm2, %xmm4 # 4 = sbou
160 pxor %xmm5, %xmm4 # 4 = sb1u + k
161 pshufb %xmm3, %xmm0 # 0 = sb1t
162 movdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[]
163 pxor %xmm4, %xmm0 # 0 = A
164 pshufb %xmm1, %xmm0
165 ret
166.size _vpaes_encrypt_core,.-_vpaes_encrypt_core
167
168##
169## Decryption core
170##
171## Same API as encryption core.
172##
173.type _vpaes_decrypt_core,\@abi-omnipotent
174.align 16
175_vpaes_decrypt_core:
176 _CET_ENDBR
177 mov %rdx, %r9 # load key
178 mov 240(%rdx),%eax
179 movdqa %xmm9, %xmm1
180 movdqa .Lk_dipt(%rip), %xmm2 # iptlo
181 pandn %xmm0, %xmm1
182 mov %rax, %r11
183 psrld \$4, %xmm1
184 movdqu (%r9), %xmm5 # round0 key
185 shl \$4, %r11
186 pand %xmm9, %xmm0
187 pshufb %xmm0, %xmm2
188 movdqa .Lk_dipt+16(%rip), %xmm0 # ipthi
189 xor \$0x30, %r11
190 lea .Lk_dsbd(%rip),%r10
191 pshufb %xmm1, %xmm0
192 and \$0x30, %r11
193 pxor %xmm5, %xmm2
194 movdqa .Lk_mc_forward+48(%rip), %xmm5
195 pxor %xmm2, %xmm0
196 add \$16, %r9
197 add %r10, %r11
198 jmp .Ldec_entry
199
200.align 16
201.Ldec_loop:
202##
203## Inverse mix columns
204##
205 movdqa -0x20(%r10),%xmm4 # 4 : sb9u
206 pshufb %xmm2, %xmm4 # 4 = sb9u
207 pxor %xmm0, %xmm4
208 movdqa -0x10(%r10),%xmm0 # 0 : sb9t
209 pshufb %xmm3, %xmm0 # 0 = sb9t
210 pxor %xmm4, %xmm0 # 0 = ch
211 add \$16, %r9 # next round key
212
213 pshufb %xmm5, %xmm0 # MC ch
214 movdqa 0x00(%r10),%xmm4 # 4 : sbdu
215 pshufb %xmm2, %xmm4 # 4 = sbdu
216 pxor %xmm0, %xmm4 # 4 = ch
217 movdqa 0x10(%r10),%xmm0 # 0 : sbdt
218 pshufb %xmm3, %xmm0 # 0 = sbdt
219 pxor %xmm4, %xmm0 # 0 = ch
220 sub \$1,%rax # nr--
221
222 pshufb %xmm5, %xmm0 # MC ch
223 movdqa 0x20(%r10),%xmm4 # 4 : sbbu
224 pshufb %xmm2, %xmm4 # 4 = sbbu
225 pxor %xmm0, %xmm4 # 4 = ch
226 movdqa 0x30(%r10),%xmm0 # 0 : sbbt
227 pshufb %xmm3, %xmm0 # 0 = sbbt
228 pxor %xmm4, %xmm0 # 0 = ch
229
230 pshufb %xmm5, %xmm0 # MC ch
231 movdqa 0x40(%r10),%xmm4 # 4 : sbeu
232 pshufb %xmm2, %xmm4 # 4 = sbeu
233 pxor %xmm0, %xmm4 # 4 = ch
234 movdqa 0x50(%r10),%xmm0 # 0 : sbet
235 pshufb %xmm3, %xmm0 # 0 = sbet
236 pxor %xmm4, %xmm0 # 0 = ch
237
238 palignr \$12, %xmm5, %xmm5
239
240.Ldec_entry:
241 # top of round
242 movdqa %xmm9, %xmm1 # 1 : i
243 pandn %xmm0, %xmm1 # 1 = i<<4
244 psrld \$4, %xmm1 # 1 = i
245 pand %xmm9, %xmm0 # 0 = k
246 movdqa %xmm11, %xmm2 # 2 : a/k
247 pshufb %xmm0, %xmm2 # 2 = a/k
248 pxor %xmm1, %xmm0 # 0 = j
249 movdqa %xmm10, %xmm3 # 3 : 1/i
250 pshufb %xmm1, %xmm3 # 3 = 1/i
251 pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k
252 movdqa %xmm10, %xmm4 # 4 : 1/j
253 pshufb %xmm0, %xmm4 # 4 = 1/j
254 pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k
255 movdqa %xmm10, %xmm2 # 2 : 1/iak
256 pshufb %xmm3, %xmm2 # 2 = 1/iak
257 pxor %xmm0, %xmm2 # 2 = io
258 movdqa %xmm10, %xmm3 # 3 : 1/jak
259 pshufb %xmm4, %xmm3 # 3 = 1/jak
260 pxor %xmm1, %xmm3 # 3 = jo
261 movdqu (%r9), %xmm0
262 jnz .Ldec_loop
263
264 # middle of last round
265 movdqa 0x60(%r10), %xmm4 # 3 : sbou
266 pshufb %xmm2, %xmm4 # 4 = sbou
267 pxor %xmm0, %xmm4 # 4 = sb1u + k
268 movdqa 0x70(%r10), %xmm0 # 0 : sbot
269 movdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160
270 pshufb %xmm3, %xmm0 # 0 = sb1t
271 pxor %xmm4, %xmm0 # 0 = A
272 pshufb %xmm2, %xmm0
273 ret
274.size _vpaes_decrypt_core,.-_vpaes_decrypt_core
275
276########################################################
277## ##
278## AES key schedule ##
279## ##
280########################################################
281.type _vpaes_schedule_core,\@abi-omnipotent
282.align 16
283_vpaes_schedule_core:
284 _CET_ENDBR
285 # rdi = key
286 # rsi = size in bits
287 # rdx = buffer
288 # rcx = direction. 0=encrypt, 1=decrypt
289
290 call _vpaes_preheat # load the tables
291 movdqa .Lk_rcon(%rip), %xmm8 # load rcon
292 movdqu (%rdi), %xmm0 # load key (unaligned)
293
294 # input transform
295 movdqa %xmm0, %xmm3
296 lea .Lk_ipt(%rip), %r11
297 call _vpaes_schedule_transform
298 movdqa %xmm0, %xmm7
299
300 lea .Lk_sr(%rip),%r10
301 test %rcx, %rcx
302 jnz .Lschedule_am_decrypting
303
304 # encrypting, output zeroth round key after transform
305 movdqu %xmm0, (%rdx)
306 jmp .Lschedule_go
307
308.Lschedule_am_decrypting:
309 # decrypting, output zeroth round key after shiftrows
310 movdqa (%r8,%r10),%xmm1
311 pshufb %xmm1, %xmm3
312 movdqu %xmm3, (%rdx)
313 xor \$0x30, %r8
314
315.Lschedule_go:
316 cmp \$192, %esi
317 ja .Lschedule_256
318 je .Lschedule_192
319 # 128: fall though
320
321##
322## .schedule_128
323##
324## 128-bit specific part of key schedule.
325##
326## This schedule is really simple, because all its parts
327## are accomplished by the subroutines.
328##
329.Lschedule_128:
330 mov \$10, %esi
331
332.Loop_schedule_128:
333 call _vpaes_schedule_round
334 dec %rsi
335 jz .Lschedule_mangle_last
336 call _vpaes_schedule_mangle # write output
337 jmp .Loop_schedule_128
338
339##
340## .aes_schedule_192
341##
342## 192-bit specific part of key schedule.
343##
344## The main body of this schedule is the same as the 128-bit
345## schedule, but with more smearing. The long, high side is
346## stored in %xmm7 as before, and the short, low side is in
347## the high bits of %xmm6.
348##
349## This schedule is somewhat nastier, however, because each
350## round produces 192 bits of key material, or 1.5 round keys.
351## Therefore, on each cycle we do 2 rounds and produce 3 round
352## keys.
353##
354.align 16
355.Lschedule_192:
356 movdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned)
357 call _vpaes_schedule_transform # input transform
358 movdqa %xmm0, %xmm6 # save short part
359 pxor %xmm4, %xmm4 # clear 4
360 movhlps %xmm4, %xmm6 # clobber low side with zeros
361 mov \$4, %esi
362
363.Loop_schedule_192:
364 call _vpaes_schedule_round
365 palignr \$8,%xmm6,%xmm0
366 call _vpaes_schedule_mangle # save key n
367 call _vpaes_schedule_192_smear
368 call _vpaes_schedule_mangle # save key n+1
369 call _vpaes_schedule_round
370 dec %rsi
371 jz .Lschedule_mangle_last
372 call _vpaes_schedule_mangle # save key n+2
373 call _vpaes_schedule_192_smear
374 jmp .Loop_schedule_192
375
376##
377## .aes_schedule_256
378##
379## 256-bit specific part of key schedule.
380##
381## The structure here is very similar to the 128-bit
382## schedule, but with an additional "low side" in
383## %xmm6. The low side's rounds are the same as the
384## high side's, except no rcon and no rotation.
385##
386.align 16
387.Lschedule_256:
388 movdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
389 call _vpaes_schedule_transform # input transform
390 mov \$7, %esi
391
392.Loop_schedule_256:
393 call _vpaes_schedule_mangle # output low result
394 movdqa %xmm0, %xmm6 # save cur_lo in xmm6
395
396 # high round
397 call _vpaes_schedule_round
398 dec %rsi
399 jz .Lschedule_mangle_last
400 call _vpaes_schedule_mangle
401
402 # low round. swap xmm7 and xmm6
403 pshufd \$0xFF, %xmm0, %xmm0
404 movdqa %xmm7, %xmm5
405 movdqa %xmm6, %xmm7
406 call _vpaes_schedule_low_round
407 movdqa %xmm5, %xmm7
408
409 jmp .Loop_schedule_256
410
411
412##
413## .aes_schedule_mangle_last
414##
415## Mangler for last round of key schedule
416## Mangles %xmm0
417## when encrypting, outputs out(%xmm0) ^ 63
418## when decrypting, outputs unskew(%xmm0)
419##
420## Always called right before return... jumps to cleanup and exits
421##
422.align 16
423.Lschedule_mangle_last:
424 # schedule last round key from xmm0
425 lea .Lk_deskew(%rip),%r11 # prepare to deskew
426 test %rcx, %rcx
427 jnz .Lschedule_mangle_last_dec
428
429 # encrypting
430 movdqa (%r8,%r10),%xmm1
431 pshufb %xmm1, %xmm0 # output permute
432 lea .Lk_opt(%rip), %r11 # prepare to output transform
433 add \$32, %rdx
434
435.Lschedule_mangle_last_dec:
436 add \$-16, %rdx
437 pxor .Lk_s63(%rip), %xmm0
438 call _vpaes_schedule_transform # output transform
439 movdqu %xmm0, (%rdx) # save last key
440
441 # cleanup
442 pxor %xmm0, %xmm0
443 pxor %xmm1, %xmm1
444 pxor %xmm2, %xmm2
445 pxor %xmm3, %xmm3
446 pxor %xmm4, %xmm4
447 pxor %xmm5, %xmm5
448 pxor %xmm6, %xmm6
449 pxor %xmm7, %xmm7
450 ret
451.size _vpaes_schedule_core,.-_vpaes_schedule_core
452
453##
454## .aes_schedule_192_smear
455##
456## Smear the short, low side in the 192-bit key schedule.
457##
458## Inputs:
459## %xmm7: high side, b a x y
460## %xmm6: low side, d c 0 0
461## %xmm13: 0
462##
463## Outputs:
464## %xmm6: b+c+d b+c 0 0
465## %xmm0: b+c+d b+c b a
466##
467.type _vpaes_schedule_192_smear,\@abi-omnipotent
468.align 16
469_vpaes_schedule_192_smear:
470 _CET_ENDBR
471 pshufd \$0x80, %xmm6, %xmm0 # d c 0 0 -> c 0 0 0
472 pxor %xmm0, %xmm6 # -> c+d c 0 0
473 pshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a
474 pxor %xmm0, %xmm6 # -> b+c+d b+c b a
475 movdqa %xmm6, %xmm0
476 pxor %xmm1, %xmm1
477 movhlps %xmm1, %xmm6 # clobber low side with zeros
478 ret
479.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
480
481##
482## .aes_schedule_round
483##
484## Runs one main round of the key schedule on %xmm0, %xmm7
485##
486## Specifically, runs subbytes on the high dword of %xmm0
487## then rotates it by one byte and xors into the low dword of
488## %xmm7.
489##
490## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
491## next rcon.
492##
493## Smears the dwords of %xmm7 by xoring the low into the
494## second low, result into third, result into highest.
495##
496## Returns results in %xmm7 = %xmm0.
497## Clobbers %xmm1-%xmm4, %r11.
498##
499.type _vpaes_schedule_round,\@abi-omnipotent
500.align 16
501_vpaes_schedule_round:
502 _CET_ENDBR
503 # extract rcon from xmm8
504 pxor %xmm1, %xmm1
505 palignr \$15, %xmm8, %xmm1
506 palignr \$15, %xmm8, %xmm8
507 pxor %xmm1, %xmm7
508
509 # rotate
510 pshufd \$0xFF, %xmm0, %xmm0
511 palignr \$1, %xmm0, %xmm0
512
513 # fall through...
514
515 # low round: same as high round, but no rotation and no rcon.
516_vpaes_schedule_low_round:
517 # smear xmm7
518 movdqa %xmm7, %xmm1
519 pslldq \$4, %xmm7
520 pxor %xmm1, %xmm7
521 movdqa %xmm7, %xmm1
522 pslldq \$8, %xmm7
523 pxor %xmm1, %xmm7
524 pxor .Lk_s63(%rip), %xmm7
525
526 # subbytes
527 movdqa %xmm9, %xmm1
528 pandn %xmm0, %xmm1
529 psrld \$4, %xmm1 # 1 = i
530 pand %xmm9, %xmm0 # 0 = k
531 movdqa %xmm11, %xmm2 # 2 : a/k
532 pshufb %xmm0, %xmm2 # 2 = a/k
533 pxor %xmm1, %xmm0 # 0 = j
534 movdqa %xmm10, %xmm3 # 3 : 1/i
535 pshufb %xmm1, %xmm3 # 3 = 1/i
536 pxor %xmm2, %xmm3 # 3 = iak = 1/i + a/k
537 movdqa %xmm10, %xmm4 # 4 : 1/j
538 pshufb %xmm0, %xmm4 # 4 = 1/j
539 pxor %xmm2, %xmm4 # 4 = jak = 1/j + a/k
540 movdqa %xmm10, %xmm2 # 2 : 1/iak
541 pshufb %xmm3, %xmm2 # 2 = 1/iak
542 pxor %xmm0, %xmm2 # 2 = io
543 movdqa %xmm10, %xmm3 # 3 : 1/jak
544 pshufb %xmm4, %xmm3 # 3 = 1/jak
545 pxor %xmm1, %xmm3 # 3 = jo
546 movdqa %xmm13, %xmm4 # 4 : sbou
547 pshufb %xmm2, %xmm4 # 4 = sbou
548 movdqa %xmm12, %xmm0 # 0 : sbot
549 pshufb %xmm3, %xmm0 # 0 = sb1t
550 pxor %xmm4, %xmm0 # 0 = sbox output
551
552 # add in smeared stuff
553 pxor %xmm7, %xmm0
554 movdqa %xmm0, %xmm7
555 ret
556.size _vpaes_schedule_round,.-_vpaes_schedule_round
557
558##
559## .aes_schedule_transform
560##
561## Linear-transform %xmm0 according to tables at (%r11)
562##
563## Requires that %xmm9 = 0x0F0F... as in preheat
564## Output in %xmm0
565## Clobbers %xmm1, %xmm2
566##
567.type _vpaes_schedule_transform,\@abi-omnipotent
568.align 16
569_vpaes_schedule_transform:
570 _CET_ENDBR
571 movdqa %xmm9, %xmm1
572 pandn %xmm0, %xmm1
573 psrld \$4, %xmm1
574 pand %xmm9, %xmm0
575 movdqa (%r11), %xmm2 # lo
576 pshufb %xmm0, %xmm2
577 movdqa 16(%r11), %xmm0 # hi
578 pshufb %xmm1, %xmm0
579 pxor %xmm2, %xmm0
580 ret
581.size _vpaes_schedule_transform,.-_vpaes_schedule_transform
582
583##
584## .aes_schedule_mangle
585##
586## Mangle xmm0 from (basis-transformed) standard version
587## to our version.
588##
589## On encrypt,
590## xor with 0x63
591## multiply by circulant 0,1,1,1
592## apply shiftrows transform
593##
594## On decrypt,
595## xor with 0x63
596## multiply by "inverse mixcolumns" circulant E,B,D,9
597## deskew
598## apply shiftrows transform
599##
600##
601## Writes out to (%rdx), and increments or decrements it
602## Keeps track of round number mod 4 in %r8
603## Preserves xmm0
604## Clobbers xmm1-xmm5
605##
606.type _vpaes_schedule_mangle,\@abi-omnipotent
607.align 16
608_vpaes_schedule_mangle:
609 _CET_ENDBR
610 movdqa %xmm0, %xmm4 # save xmm0 for later
611 movdqa .Lk_mc_forward(%rip),%xmm5
612 test %rcx, %rcx
613 jnz .Lschedule_mangle_dec
614
615 # encrypting
616 add \$16, %rdx
617 pxor .Lk_s63(%rip),%xmm4
618 pshufb %xmm5, %xmm4
619 movdqa %xmm4, %xmm3
620 pshufb %xmm5, %xmm4
621 pxor %xmm4, %xmm3
622 pshufb %xmm5, %xmm4
623 pxor %xmm4, %xmm3
624
625 jmp .Lschedule_mangle_both
626.align 16
627.Lschedule_mangle_dec:
628 # inverse mix columns
629 lea .Lk_dksd(%rip),%r11
630 movdqa %xmm9, %xmm1
631 pandn %xmm4, %xmm1
632 psrld \$4, %xmm1 # 1 = hi
633 pand %xmm9, %xmm4 # 4 = lo
634
635 movdqa 0x00(%r11), %xmm2
636 pshufb %xmm4, %xmm2
637 movdqa 0x10(%r11), %xmm3
638 pshufb %xmm1, %xmm3
639 pxor %xmm2, %xmm3
640 pshufb %xmm5, %xmm3
641
642 movdqa 0x20(%r11), %xmm2
643 pshufb %xmm4, %xmm2
644 pxor %xmm3, %xmm2
645 movdqa 0x30(%r11), %xmm3
646 pshufb %xmm1, %xmm3
647 pxor %xmm2, %xmm3
648 pshufb %xmm5, %xmm3
649
650 movdqa 0x40(%r11), %xmm2
651 pshufb %xmm4, %xmm2
652 pxor %xmm3, %xmm2
653 movdqa 0x50(%r11), %xmm3
654 pshufb %xmm1, %xmm3
655 pxor %xmm2, %xmm3
656 pshufb %xmm5, %xmm3
657
658 movdqa 0x60(%r11), %xmm2
659 pshufb %xmm4, %xmm2
660 pxor %xmm3, %xmm2
661 movdqa 0x70(%r11), %xmm3
662 pshufb %xmm1, %xmm3
663 pxor %xmm2, %xmm3
664
665 add \$-16, %rdx
666
667.Lschedule_mangle_both:
668 movdqa (%r8,%r10),%xmm1
669 pshufb %xmm1,%xmm3
670 add \$-16, %r8
671 and \$0x30, %r8
672 movdqu %xmm3, (%rdx)
673 ret
674.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
675
676#
677# Interface to OpenSSL
678#
679.globl ${PREFIX}_set_encrypt_key
680.type ${PREFIX}_set_encrypt_key,\@function,3
681.align 16
682${PREFIX}_set_encrypt_key:
683 _CET_ENDBR
684___
685$code.=<<___ if ($win64);
686 lea -0xb8(%rsp),%rsp
687 movaps %xmm6,0x10(%rsp)
688 movaps %xmm7,0x20(%rsp)
689 movaps %xmm8,0x30(%rsp)
690 movaps %xmm9,0x40(%rsp)
691 movaps %xmm10,0x50(%rsp)
692 movaps %xmm11,0x60(%rsp)
693 movaps %xmm12,0x70(%rsp)
694 movaps %xmm13,0x80(%rsp)
695 movaps %xmm14,0x90(%rsp)
696 movaps %xmm15,0xa0(%rsp)
697.Lenc_key_body:
698___
699$code.=<<___;
700 mov %esi,%eax
701 shr \$5,%eax
702 add \$5,%eax
703 mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
704
705 mov \$0,%ecx
706 mov \$0x30,%r8d
707 call _vpaes_schedule_core
708___
709$code.=<<___ if ($win64);
710 movaps 0x10(%rsp),%xmm6
711 movaps 0x20(%rsp),%xmm7
712 movaps 0x30(%rsp),%xmm8
713 movaps 0x40(%rsp),%xmm9
714 movaps 0x50(%rsp),%xmm10
715 movaps 0x60(%rsp),%xmm11
716 movaps 0x70(%rsp),%xmm12
717 movaps 0x80(%rsp),%xmm13
718 movaps 0x90(%rsp),%xmm14
719 movaps 0xa0(%rsp),%xmm15
720 lea 0xb8(%rsp),%rsp
721.Lenc_key_epilogue:
722___
723$code.=<<___;
724 xor %eax,%eax
725 ret
726.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key
727
728.globl ${PREFIX}_set_decrypt_key
729.type ${PREFIX}_set_decrypt_key,\@function,3
730.align 16
731${PREFIX}_set_decrypt_key:
732 _CET_ENDBR
733___
734$code.=<<___ if ($win64);
735 lea -0xb8(%rsp),%rsp
736 movaps %xmm6,0x10(%rsp)
737 movaps %xmm7,0x20(%rsp)
738 movaps %xmm8,0x30(%rsp)
739 movaps %xmm9,0x40(%rsp)
740 movaps %xmm10,0x50(%rsp)
741 movaps %xmm11,0x60(%rsp)
742 movaps %xmm12,0x70(%rsp)
743 movaps %xmm13,0x80(%rsp)
744 movaps %xmm14,0x90(%rsp)
745 movaps %xmm15,0xa0(%rsp)
746.Ldec_key_body:
747___
748$code.=<<___;
749 mov %esi,%eax
750 shr \$5,%eax
751 add \$5,%eax
752 mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
753 shl \$4,%eax
754 lea 16(%rdx,%rax),%rdx
755
756 mov \$1,%ecx
757 mov %esi,%r8d
758 shr \$1,%r8d
759 and \$32,%r8d
760 xor \$32,%r8d # nbits==192?0:32
761 call _vpaes_schedule_core
762___
763$code.=<<___ if ($win64);
764 movaps 0x10(%rsp),%xmm6
765 movaps 0x20(%rsp),%xmm7
766 movaps 0x30(%rsp),%xmm8
767 movaps 0x40(%rsp),%xmm9
768 movaps 0x50(%rsp),%xmm10
769 movaps 0x60(%rsp),%xmm11
770 movaps 0x70(%rsp),%xmm12
771 movaps 0x80(%rsp),%xmm13
772 movaps 0x90(%rsp),%xmm14
773 movaps 0xa0(%rsp),%xmm15
774 lea 0xb8(%rsp),%rsp
775.Ldec_key_epilogue:
776___
777$code.=<<___;
778 xor %eax,%eax
779 ret
780.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key
781
782.globl ${PREFIX}_encrypt
783.type ${PREFIX}_encrypt,\@function,3
784.align 16
785${PREFIX}_encrypt:
786 _CET_ENDBR
787___
788$code.=<<___ if ($win64);
789 lea -0xb8(%rsp),%rsp
790 movaps %xmm6,0x10(%rsp)
791 movaps %xmm7,0x20(%rsp)
792 movaps %xmm8,0x30(%rsp)
793 movaps %xmm9,0x40(%rsp)
794 movaps %xmm10,0x50(%rsp)
795 movaps %xmm11,0x60(%rsp)
796 movaps %xmm12,0x70(%rsp)
797 movaps %xmm13,0x80(%rsp)
798 movaps %xmm14,0x90(%rsp)
799 movaps %xmm15,0xa0(%rsp)
800.Lenc_body:
801___
802$code.=<<___;
803 movdqu (%rdi),%xmm0
804 call _vpaes_preheat
805 call _vpaes_encrypt_core
806 movdqu %xmm0,(%rsi)
807___
808$code.=<<___ if ($win64);
809 movaps 0x10(%rsp),%xmm6
810 movaps 0x20(%rsp),%xmm7
811 movaps 0x30(%rsp),%xmm8
812 movaps 0x40(%rsp),%xmm9
813 movaps 0x50(%rsp),%xmm10
814 movaps 0x60(%rsp),%xmm11
815 movaps 0x70(%rsp),%xmm12
816 movaps 0x80(%rsp),%xmm13
817 movaps 0x90(%rsp),%xmm14
818 movaps 0xa0(%rsp),%xmm15
819 lea 0xb8(%rsp),%rsp
820.Lenc_epilogue:
821___
822$code.=<<___;
823 ret
824.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt
825
826.globl ${PREFIX}_decrypt
827.type ${PREFIX}_decrypt,\@function,3
828.align 16
829${PREFIX}_decrypt:
830 _CET_ENDBR
831___
832$code.=<<___ if ($win64);
833 lea -0xb8(%rsp),%rsp
834 movaps %xmm6,0x10(%rsp)
835 movaps %xmm7,0x20(%rsp)
836 movaps %xmm8,0x30(%rsp)
837 movaps %xmm9,0x40(%rsp)
838 movaps %xmm10,0x50(%rsp)
839 movaps %xmm11,0x60(%rsp)
840 movaps %xmm12,0x70(%rsp)
841 movaps %xmm13,0x80(%rsp)
842 movaps %xmm14,0x90(%rsp)
843 movaps %xmm15,0xa0(%rsp)
844.Ldec_body:
845___
846$code.=<<___;
847 movdqu (%rdi),%xmm0
848 call _vpaes_preheat
849 call _vpaes_decrypt_core
850 movdqu %xmm0,(%rsi)
851___
852$code.=<<___ if ($win64);
853 movaps 0x10(%rsp),%xmm6
854 movaps 0x20(%rsp),%xmm7
855 movaps 0x30(%rsp),%xmm8
856 movaps 0x40(%rsp),%xmm9
857 movaps 0x50(%rsp),%xmm10
858 movaps 0x60(%rsp),%xmm11
859 movaps 0x70(%rsp),%xmm12
860 movaps 0x80(%rsp),%xmm13
861 movaps 0x90(%rsp),%xmm14
862 movaps 0xa0(%rsp),%xmm15
863 lea 0xb8(%rsp),%rsp
864.Ldec_epilogue:
865___
866$code.=<<___;
867 ret
868.size ${PREFIX}_decrypt,.-${PREFIX}_decrypt
869___
870{
871my ($inp,$out,$len,$key,$ivp,$enc)=("%rdi","%rsi","%rdx","%rcx","%r8","%r9");
872# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
873# size_t length, const AES_KEY *key,
874# unsigned char *ivp,const int enc);
875$code.=<<___;
876.globl ${PREFIX}_cbc_encrypt
877.type ${PREFIX}_cbc_encrypt,\@function,6
878.align 16
879${PREFIX}_cbc_encrypt:
880 _CET_ENDBR
881 xchg $key,$len
882___
883($len,$key)=($key,$len);
884$code.=<<___;
885 sub \$16,$len
886 jc .Lcbc_abort
887___
888$code.=<<___ if ($win64);
889 lea -0xb8(%rsp),%rsp
890 movaps %xmm6,0x10(%rsp)
891 movaps %xmm7,0x20(%rsp)
892 movaps %xmm8,0x30(%rsp)
893 movaps %xmm9,0x40(%rsp)
894 movaps %xmm10,0x50(%rsp)
895 movaps %xmm11,0x60(%rsp)
896 movaps %xmm12,0x70(%rsp)
897 movaps %xmm13,0x80(%rsp)
898 movaps %xmm14,0x90(%rsp)
899 movaps %xmm15,0xa0(%rsp)
900.Lcbc_body:
901___
902$code.=<<___;
903 movdqu ($ivp),%xmm6 # load IV
904 sub $inp,$out
905 call _vpaes_preheat
906 cmp \$0,${enc}d
907 je .Lcbc_dec_loop
908 jmp .Lcbc_enc_loop
909.align 16
910.Lcbc_enc_loop:
911 movdqu ($inp),%xmm0
912 pxor %xmm6,%xmm0
913 call _vpaes_encrypt_core
914 movdqa %xmm0,%xmm6
915 movdqu %xmm0,($out,$inp)
916 lea 16($inp),$inp
917 sub \$16,$len
918 jnc .Lcbc_enc_loop
919 jmp .Lcbc_done
920.align 16
921.Lcbc_dec_loop:
922 movdqu ($inp),%xmm0
923 movdqa %xmm0,%xmm7
924 call _vpaes_decrypt_core
925 pxor %xmm6,%xmm0
926 movdqa %xmm7,%xmm6
927 movdqu %xmm0,($out,$inp)
928 lea 16($inp),$inp
929 sub \$16,$len
930 jnc .Lcbc_dec_loop
931.Lcbc_done:
932 movdqu %xmm6,($ivp) # save IV
933___
934$code.=<<___ if ($win64);
935 movaps 0x10(%rsp),%xmm6
936 movaps 0x20(%rsp),%xmm7
937 movaps 0x30(%rsp),%xmm8
938 movaps 0x40(%rsp),%xmm9
939 movaps 0x50(%rsp),%xmm10
940 movaps 0x60(%rsp),%xmm11
941 movaps 0x70(%rsp),%xmm12
942 movaps 0x80(%rsp),%xmm13
943 movaps 0x90(%rsp),%xmm14
944 movaps 0xa0(%rsp),%xmm15
945 lea 0xb8(%rsp),%rsp
946.Lcbc_epilogue:
947___
948$code.=<<___;
949.Lcbc_abort:
950 ret
951.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
952___
953}
954$code.=<<___;
955##
956## _aes_preheat
957##
958## Fills register %r10 -> .aes_consts (so you can -fPIC)
959## and %xmm9-%xmm15 as specified below.
960##
961.type _vpaes_preheat,\@abi-omnipotent
962.align 16
963_vpaes_preheat:
964 _CET_ENDBR
965 lea .Lk_s0F(%rip), %r10
966 movdqa -0x20(%r10), %xmm10 # .Lk_inv
967 movdqa -0x10(%r10), %xmm11 # .Lk_inv+16
968 movdqa 0x00(%r10), %xmm9 # .Lk_s0F
969 movdqa 0x30(%r10), %xmm13 # .Lk_sb1
970 movdqa 0x40(%r10), %xmm12 # .Lk_sb1+16
971 movdqa 0x50(%r10), %xmm15 # .Lk_sb2
972 movdqa 0x60(%r10), %xmm14 # .Lk_sb2+16
973 ret
974.size _vpaes_preheat,.-_vpaes_preheat
975########################################################
976## ##
977## Constants ##
978## ##
979########################################################
980.section .rodata
981.type _vpaes_consts,\@object
982.align 64
983_vpaes_consts:
984.Lk_inv: # inv, inva
985 .quad 0x0E05060F0D080180, 0x040703090A0B0C02
986 .quad 0x01040A060F0B0780, 0x030D0E0C02050809
987
988.Lk_s0F: # s0F
989 .quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
990
991.Lk_ipt: # input transform (lo, hi)
992 .quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
993 .quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
994
995.Lk_sb1: # sb1u, sb1t
996 .quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
997 .quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
998.Lk_sb2: # sb2u, sb2t
999 .quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
1000 .quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
1001.Lk_sbo: # sbou, sbot
1002 .quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
1003 .quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
1004
1005.Lk_mc_forward: # mc_forward
1006 .quad 0x0407060500030201, 0x0C0F0E0D080B0A09
1007 .quad 0x080B0A0904070605, 0x000302010C0F0E0D
1008 .quad 0x0C0F0E0D080B0A09, 0x0407060500030201
1009 .quad 0x000302010C0F0E0D, 0x080B0A0904070605
1010
1011.Lk_mc_backward:# mc_backward
1012 .quad 0x0605040702010003, 0x0E0D0C0F0A09080B
1013 .quad 0x020100030E0D0C0F, 0x0A09080B06050407
1014 .quad 0x0E0D0C0F0A09080B, 0x0605040702010003
1015 .quad 0x0A09080B06050407, 0x020100030E0D0C0F
1016
1017.Lk_sr: # sr
1018 .quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
1019 .quad 0x030E09040F0A0500, 0x0B06010C07020D08
1020 .quad 0x0F060D040B020900, 0x070E050C030A0108
1021 .quad 0x0B0E0104070A0D00, 0x0306090C0F020508
1022
1023.Lk_rcon: # rcon
1024 .quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
1025
1026.Lk_s63: # s63: all equal to 0x63 transformed
1027 .quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
1028
1029.Lk_opt: # output transform
1030 .quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
1031 .quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
1032
1033.Lk_deskew: # deskew tables: inverts the sbox's "skew"
1034 .quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
1035 .quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
1036
1037##
1038## Decryption stuff
1039## Key schedule constants
1040##
1041.Lk_dksd: # decryption key schedule: invskew x*D
1042 .quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
1043 .quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
1044.Lk_dksb: # decryption key schedule: invskew x*B
1045 .quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
1046 .quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
1047.Lk_dkse: # decryption key schedule: invskew x*E + 0x63
1048 .quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
1049 .quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
1050.Lk_dks9: # decryption key schedule: invskew x*9
1051 .quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
1052 .quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
1053
1054##
1055## Decryption stuff
1056## Round function constants
1057##
1058.Lk_dipt: # decryption input transform
1059 .quad 0x0F505B040B545F00, 0x154A411E114E451A
1060 .quad 0x86E383E660056500, 0x12771772F491F194
1061
1062.Lk_dsb9: # decryption sbox output *9*u, *9*t
1063 .quad 0x851C03539A86D600, 0xCAD51F504F994CC9
1064 .quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
1065.Lk_dsbd: # decryption sbox output *D*u, *D*t
1066 .quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
1067 .quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
1068.Lk_dsbb: # decryption sbox output *B*u, *B*t
1069 .quad 0xD022649296B44200, 0x602646F6B0F2D404
1070 .quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
1071.Lk_dsbe: # decryption sbox output *E*u, *E*t
1072 .quad 0x46F2929626D4D000, 0x2242600464B4F6B0
1073 .quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
1074.Lk_dsbo: # decryption sbox final output
1075 .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
1076 .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
1077.align 64
1078.size _vpaes_consts,.-_vpaes_consts
1079.text
1080___
1081
1082if ($win64) {
1083# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
1084# CONTEXT *context,DISPATCHER_CONTEXT *disp)
1085$rec="%rcx";
1086$frame="%rdx";
1087$context="%r8";
1088$disp="%r9";
1089
1090$code.=<<___;
1091.extern __imp_RtlVirtualUnwind
1092.type se_handler,\@abi-omnipotent
1093.align 16
1094se_handler:
1095 _CET_ENDBR
1096 push %rsi
1097 push %rdi
1098 push %rbx
1099 push %rbp
1100 push %r12
1101 push %r13
1102 push %r14
1103 push %r15
1104 pushfq
1105 sub \$64,%rsp
1106
1107 mov 120($context),%rax # pull context->Rax
1108 mov 248($context),%rbx # pull context->Rip
1109
1110 mov 8($disp),%rsi # disp->ImageBase
1111 mov 56($disp),%r11 # disp->HandlerData
1112
1113 mov 0(%r11),%r10d # HandlerData[0]
1114 lea (%rsi,%r10),%r10 # prologue label
1115 cmp %r10,%rbx # context->Rip<prologue label
1116 jb .Lin_prologue
1117
1118 mov 152($context),%rax # pull context->Rsp
1119
1120 mov 4(%r11),%r10d # HandlerData[1]
1121 lea (%rsi,%r10),%r10 # epilogue label
1122 cmp %r10,%rbx # context->Rip>=epilogue label
1123 jae .Lin_prologue
1124
1125 lea 16(%rax),%rsi # %xmm save area
1126 lea 512($context),%rdi # &context.Xmm6
1127 mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
1128 .long 0xa548f3fc # cld; rep movsq
1129 lea 0xb8(%rax),%rax # adjust stack pointer
1130
1131.Lin_prologue:
1132 mov 8(%rax),%rdi
1133 mov 16(%rax),%rsi
1134 mov %rax,152($context) # restore context->Rsp
1135 mov %rsi,168($context) # restore context->Rsi
1136 mov %rdi,176($context) # restore context->Rdi
1137
1138 mov 40($disp),%rdi # disp->ContextRecord
1139 mov $context,%rsi # context
1140 mov \$`1232/8`,%ecx # sizeof(CONTEXT)
1141 .long 0xa548f3fc # cld; rep movsq
1142
1143 mov $disp,%rsi
1144 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
1145 mov 8(%rsi),%rdx # arg2, disp->ImageBase
1146 mov 0(%rsi),%r8 # arg3, disp->ControlPc
1147 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
1148 mov 40(%rsi),%r10 # disp->ContextRecord
1149 lea 56(%rsi),%r11 # &disp->HandlerData
1150 lea 24(%rsi),%r12 # &disp->EstablisherFrame
1151 mov %r10,32(%rsp) # arg5
1152 mov %r11,40(%rsp) # arg6
1153 mov %r12,48(%rsp) # arg7
1154 mov %rcx,56(%rsp) # arg8, (NULL)
1155 call *__imp_RtlVirtualUnwind(%rip)
1156
1157 mov \$1,%eax # ExceptionContinueSearch
1158 add \$64,%rsp
1159 popfq
1160 pop %r15
1161 pop %r14
1162 pop %r13
1163 pop %r12
1164 pop %rbp
1165 pop %rbx
1166 pop %rdi
1167 pop %rsi
1168 ret
1169.size se_handler,.-se_handler
1170
1171.section .pdata
1172.align 4
1173 .rva .LSEH_begin_${PREFIX}_set_encrypt_key
1174 .rva .LSEH_end_${PREFIX}_set_encrypt_key
1175 .rva .LSEH_info_${PREFIX}_set_encrypt_key
1176
1177 .rva .LSEH_begin_${PREFIX}_set_decrypt_key
1178 .rva .LSEH_end_${PREFIX}_set_decrypt_key
1179 .rva .LSEH_info_${PREFIX}_set_decrypt_key
1180
1181 .rva .LSEH_begin_${PREFIX}_encrypt
1182 .rva .LSEH_end_${PREFIX}_encrypt
1183 .rva .LSEH_info_${PREFIX}_encrypt
1184
1185 .rva .LSEH_begin_${PREFIX}_decrypt
1186 .rva .LSEH_end_${PREFIX}_decrypt
1187 .rva .LSEH_info_${PREFIX}_decrypt
1188
1189 .rva .LSEH_begin_${PREFIX}_cbc_encrypt
1190 .rva .LSEH_end_${PREFIX}_cbc_encrypt
1191 .rva .LSEH_info_${PREFIX}_cbc_encrypt
1192
1193.section .xdata
1194.align 8
1195.LSEH_info_${PREFIX}_set_encrypt_key:
1196 .byte 9,0,0,0
1197 .rva se_handler
1198 .rva .Lenc_key_body,.Lenc_key_epilogue # HandlerData[]
1199.LSEH_info_${PREFIX}_set_decrypt_key:
1200 .byte 9,0,0,0
1201 .rva se_handler
1202 .rva .Ldec_key_body,.Ldec_key_epilogue # HandlerData[]
1203.LSEH_info_${PREFIX}_encrypt:
1204 .byte 9,0,0,0
1205 .rva se_handler
1206 .rva .Lenc_body,.Lenc_epilogue # HandlerData[]
1207.LSEH_info_${PREFIX}_decrypt:
1208 .byte 9,0,0,0
1209 .rva se_handler
1210 .rva .Ldec_body,.Ldec_epilogue # HandlerData[]
1211.LSEH_info_${PREFIX}_cbc_encrypt:
1212 .byte 9,0,0,0
1213 .rva se_handler
1214 .rva .Lcbc_body,.Lcbc_epilogue # HandlerData[]
1215___
1216}
1217
1218$code =~ s/\`([^\`]*)\`/eval($1)/gem;
1219
1220print $code;
1221
1222close STDOUT;