summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes/asm
diff options
context:
space:
mode:
authordjm <>2009-04-06 06:30:10 +0000
committerdjm <>2009-04-06 06:30:10 +0000
commit2b6e09b39ef1d803b50ee024a06d1c250fde442d (patch)
treef116109c359f26a2b149bbc752be39c16099bae1 /src/lib/libcrypto/aes/asm
parenta0fdc9ec41594852f67ec77dfad9cb06bacc4186 (diff)
downloadopenbsd-2b6e09b39ef1d803b50ee024a06d1c250fde442d.tar.gz
openbsd-2b6e09b39ef1d803b50ee024a06d1c250fde442d.tar.bz2
openbsd-2b6e09b39ef1d803b50ee024a06d1c250fde442d.zip
import of OpenSSL 0.9.8k
Diffstat (limited to 'src/lib/libcrypto/aes/asm')
-rw-r--r--src/lib/libcrypto/aes/asm/aes-armv4.pl1030
-rw-r--r--src/lib/libcrypto/aes/asm/aes-ppc.pl1176
-rw-r--r--src/lib/libcrypto/aes/asm/aes-s390x.pl1333
-rwxr-xr-xsrc/lib/libcrypto/aes/asm/aes-sparcv9.pl1181
4 files changed, 4720 insertions, 0 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-armv4.pl b/src/lib/libcrypto/aes/asm/aes-armv4.pl
new file mode 100644
index 0000000000..15742c1ec5
--- /dev/null
+++ b/src/lib/libcrypto/aes/asm/aes-armv4.pl
@@ -0,0 +1,1030 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for ARMv4
11
12# January 2007.
13#
14# Code uses single 1K S-box and is >2 times faster than code generated
15# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
16# allows to merge logical or arithmetic operation with shift or rotate
17# in one instruction and emit combined result every cycle. The module
18# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
19# key.
20
21# May 2007.
22#
23# AES_set_[en|de]crypt_key is added.
24
25$s0="r0";
26$s1="r1";
27$s2="r2";
28$s3="r3";
29$t1="r4";
30$t2="r5";
31$t3="r6";
32$i1="r7";
33$i2="r8";
34$i3="r9";
35
36$tbl="r10";
37$key="r11";
38$rounds="r12";
39
40$code=<<___;
41.text
42.code 32
43
44.type AES_Te,%object
45.align 5
46AES_Te:
47.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
48.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
49.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
50.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
51.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
52.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
53.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
54.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
55.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
56.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
57.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
58.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
59.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
60.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
61.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
62.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
63.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
64.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
65.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
66.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
67.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
68.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
69.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
70.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
71.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
72.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
73.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
74.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
75.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
76.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
77.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
78.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
79.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
80.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
81.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
82.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
83.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
84.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
85.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
86.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
87.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
88.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
89.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
90.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
91.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
92.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
93.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
94.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
95.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
96.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
97.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
98.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
99.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
100.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
101.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
102.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
103.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
104.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
105.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
106.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
107.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
108.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
109.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
110.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
111@ Te4[256]
112.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
113.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
114.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
115.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
116.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
117.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
118.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
119.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
120.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
121.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
122.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
123.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
124.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
125.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
126.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
127.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
128.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
129.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
130.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
131.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
132.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
133.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
134.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
135.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
136.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
137.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
138.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
139.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
140.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
141.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
142.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
143.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
144@ rcon[]
145.word 0x01000000, 0x02000000, 0x04000000, 0x08000000
146.word 0x10000000, 0x20000000, 0x40000000, 0x80000000
147.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
148.size AES_Te,.-AES_Te
149
150@ void AES_encrypt(const unsigned char *in, unsigned char *out,
151@ const AES_KEY *key) {
152.global AES_encrypt
153.type AES_encrypt,%function
154.align 5
155AES_encrypt:
156 sub r3,pc,#8 @ AES_encrypt
157 stmdb sp!,{r1,r4-r12,lr}
158 mov $rounds,r0 @ inp
159 mov $key,r2
160 sub $tbl,r3,#AES_encrypt-AES_Te @ Te
161
162 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
163 ldrb $t1,[$rounds,#2] @ manner...
164 ldrb $t2,[$rounds,#1]
165 ldrb $t3,[$rounds,#0]
166 orr $s0,$s0,$t1,lsl#8
167 orr $s0,$s0,$t2,lsl#16
168 orr $s0,$s0,$t3,lsl#24
169 ldrb $s1,[$rounds,#7]
170 ldrb $t1,[$rounds,#6]
171 ldrb $t2,[$rounds,#5]
172 ldrb $t3,[$rounds,#4]
173 orr $s1,$s1,$t1,lsl#8
174 orr $s1,$s1,$t2,lsl#16
175 orr $s1,$s1,$t3,lsl#24
176 ldrb $s2,[$rounds,#11]
177 ldrb $t1,[$rounds,#10]
178 ldrb $t2,[$rounds,#9]
179 ldrb $t3,[$rounds,#8]
180 orr $s2,$s2,$t1,lsl#8
181 orr $s2,$s2,$t2,lsl#16
182 orr $s2,$s2,$t3,lsl#24
183 ldrb $s3,[$rounds,#15]
184 ldrb $t1,[$rounds,#14]
185 ldrb $t2,[$rounds,#13]
186 ldrb $t3,[$rounds,#12]
187 orr $s3,$s3,$t1,lsl#8
188 orr $s3,$s3,$t2,lsl#16
189 orr $s3,$s3,$t3,lsl#24
190
191 bl _armv4_AES_encrypt
192
193 ldr $rounds,[sp],#4 @ pop out
194 mov $t1,$s0,lsr#24 @ write output in endian-neutral
195 mov $t2,$s0,lsr#16 @ manner...
196 mov $t3,$s0,lsr#8
197 strb $t1,[$rounds,#0]
198 strb $t2,[$rounds,#1]
199 strb $t3,[$rounds,#2]
200 strb $s0,[$rounds,#3]
201 mov $t1,$s1,lsr#24
202 mov $t2,$s1,lsr#16
203 mov $t3,$s1,lsr#8
204 strb $t1,[$rounds,#4]
205 strb $t2,[$rounds,#5]
206 strb $t3,[$rounds,#6]
207 strb $s1,[$rounds,#7]
208 mov $t1,$s2,lsr#24
209 mov $t2,$s2,lsr#16
210 mov $t3,$s2,lsr#8
211 strb $t1,[$rounds,#8]
212 strb $t2,[$rounds,#9]
213 strb $t3,[$rounds,#10]
214 strb $s2,[$rounds,#11]
215 mov $t1,$s3,lsr#24
216 mov $t2,$s3,lsr#16
217 mov $t3,$s3,lsr#8
218 strb $t1,[$rounds,#12]
219 strb $t2,[$rounds,#13]
220 strb $t3,[$rounds,#14]
221 strb $s3,[$rounds,#15]
222
223 ldmia sp!,{r4-r12,lr}
224 tst lr,#1
225 moveq pc,lr @ be binary compatible with V4, yet
226 bx lr @ interoperable with Thumb ISA:-)
227.size AES_encrypt,.-AES_encrypt
228
229.type _armv4_AES_encrypt,%function
230.align 2
231_armv4_AES_encrypt:
232 str lr,[sp,#-4]! @ push lr
233 ldr $t1,[$key],#16
234 ldr $t2,[$key,#-12]
235 ldr $t3,[$key,#-8]
236 ldr $i1,[$key,#-4]
237 ldr $rounds,[$key,#240-16]
238 eor $s0,$s0,$t1
239 eor $s1,$s1,$t2
240 eor $s2,$s2,$t3
241 eor $s3,$s3,$i1
242 sub $rounds,$rounds,#1
243 mov lr,#255
244
245.Lenc_loop:
246 and $i2,lr,$s0,lsr#8
247 and $i3,lr,$s0,lsr#16
248 and $i1,lr,$s0
249 mov $s0,$s0,lsr#24
250 ldr $t1,[$tbl,$i1,lsl#2] @ Te3[s0>>0]
251 ldr $s0,[$tbl,$s0,lsl#2] @ Te0[s0>>24]
252 ldr $t2,[$tbl,$i2,lsl#2] @ Te2[s0>>8]
253 ldr $t3,[$tbl,$i3,lsl#2] @ Te1[s0>>16]
254
255 and $i1,lr,$s1,lsr#16 @ i0
256 and $i2,lr,$s1
257 and $i3,lr,$s1,lsr#8
258 mov $s1,$s1,lsr#24
259 ldr $i1,[$tbl,$i1,lsl#2] @ Te1[s1>>16]
260 ldr $s1,[$tbl,$s1,lsl#2] @ Te0[s1>>24]
261 ldr $i2,[$tbl,$i2,lsl#2] @ Te3[s1>>0]
262 ldr $i3,[$tbl,$i3,lsl#2] @ Te2[s1>>8]
263 eor $s0,$s0,$i1,ror#8
264 eor $s1,$s1,$t1,ror#24
265 eor $t2,$t2,$i2,ror#8
266 eor $t3,$t3,$i3,ror#8
267
268 and $i1,lr,$s2,lsr#8 @ i0
269 and $i2,lr,$s2,lsr#16 @ i1
270 and $i3,lr,$s2
271 mov $s2,$s2,lsr#24
272 ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8]
273 ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16]
274 ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24]
275 ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0]
276 eor $s0,$s0,$i1,ror#16
277 eor $s1,$s1,$i2,ror#8
278 eor $s2,$s2,$t2,ror#16
279 eor $t3,$t3,$i3,ror#16
280
281 and $i1,lr,$s3 @ i0
282 and $i2,lr,$s3,lsr#8 @ i1
283 and $i3,lr,$s3,lsr#16 @ i2
284 mov $s3,$s3,lsr#24
285 ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0]
286 ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8]
287 ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16]
288 ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24]
289 eor $s0,$s0,$i1,ror#24
290 eor $s1,$s1,$i2,ror#16
291 eor $s2,$s2,$i3,ror#8
292 eor $s3,$s3,$t3,ror#8
293
294 ldr $t1,[$key],#16
295 ldr $t2,[$key,#-12]
296 ldr $t3,[$key,#-8]
297 ldr $i1,[$key,#-4]
298 eor $s0,$s0,$t1
299 eor $s1,$s1,$t2
300 eor $s2,$s2,$t3
301 eor $s3,$s3,$i1
302
303 subs $rounds,$rounds,#1
304 bne .Lenc_loop
305
306 add $tbl,$tbl,#2
307
308 and $i1,lr,$s0
309 and $i2,lr,$s0,lsr#8
310 and $i3,lr,$s0,lsr#16
311 mov $s0,$s0,lsr#24
312 ldrb $t1,[$tbl,$i1,lsl#2] @ Te4[s0>>0]
313 ldrb $s0,[$tbl,$s0,lsl#2] @ Te4[s0>>24]
314 ldrb $t2,[$tbl,$i2,lsl#2] @ Te4[s0>>8]
315 ldrb $t3,[$tbl,$i3,lsl#2] @ Te4[s0>>16]
316
317 and $i1,lr,$s1,lsr#16 @ i0
318 and $i2,lr,$s1
319 and $i3,lr,$s1,lsr#8
320 mov $s1,$s1,lsr#24
321 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s1>>16]
322 ldrb $s1,[$tbl,$s1,lsl#2] @ Te4[s1>>24]
323 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s1>>0]
324 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s1>>8]
325 eor $s0,$i1,$s0,lsl#8
326 eor $s1,$t1,$s1,lsl#24
327 eor $t2,$i2,$t2,lsl#8
328 eor $t3,$i3,$t3,lsl#8
329
330 and $i1,lr,$s2,lsr#8 @ i0
331 and $i2,lr,$s2,lsr#16 @ i1
332 and $i3,lr,$s2
333 mov $s2,$s2,lsr#24
334 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8]
335 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16]
336 ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24]
337 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0]
338 eor $s0,$i1,$s0,lsl#8
339 eor $s1,$s1,$i2,lsl#16
340 eor $s2,$t2,$s2,lsl#24
341 eor $t3,$i3,$t3,lsl#8
342
343 and $i1,lr,$s3 @ i0
344 and $i2,lr,$s3,lsr#8 @ i1
345 and $i3,lr,$s3,lsr#16 @ i2
346 mov $s3,$s3,lsr#24
347 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0]
348 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8]
349 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16]
350 ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24]
351 eor $s0,$i1,$s0,lsl#8
352 eor $s1,$s1,$i2,lsl#8
353 eor $s2,$s2,$i3,lsl#16
354 eor $s3,$t3,$s3,lsl#24
355
356 ldr lr,[sp],#4 @ pop lr
357 ldr $t1,[$key,#0]
358 ldr $t2,[$key,#4]
359 ldr $t3,[$key,#8]
360 ldr $i1,[$key,#12]
361 eor $s0,$s0,$t1
362 eor $s1,$s1,$t2
363 eor $s2,$s2,$t3
364 eor $s3,$s3,$i1
365
366 sub $tbl,$tbl,#2
367 mov pc,lr @ return
368.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
369
370.global AES_set_encrypt_key
371.type AES_set_encrypt_key,%function
372.align 5
373AES_set_encrypt_key:
374 sub r3,pc,#8 @ AES_set_encrypt_key
375 teq r0,#0
376 moveq r0,#-1
377 beq .Labrt
378 teq r2,#0
379 moveq r0,#-1
380 beq .Labrt
381
382 teq r1,#128
383 beq .Lok
384 teq r1,#192
385 beq .Lok
386 teq r1,#256
387 movne r0,#-1
388 bne .Labrt
389
390.Lok: stmdb sp!,{r4-r12,lr}
391 sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4
392
393 mov $rounds,r0 @ inp
394 mov lr,r1 @ bits
395 mov $key,r2 @ key
396
397 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
398 ldrb $t1,[$rounds,#2] @ manner...
399 ldrb $t2,[$rounds,#1]
400 ldrb $t3,[$rounds,#0]
401 orr $s0,$s0,$t1,lsl#8
402 orr $s0,$s0,$t2,lsl#16
403 orr $s0,$s0,$t3,lsl#24
404 ldrb $s1,[$rounds,#7]
405 ldrb $t1,[$rounds,#6]
406 ldrb $t2,[$rounds,#5]
407 ldrb $t3,[$rounds,#4]
408 orr $s1,$s1,$t1,lsl#8
409 orr $s1,$s1,$t2,lsl#16
410 orr $s1,$s1,$t3,lsl#24
411 ldrb $s2,[$rounds,#11]
412 ldrb $t1,[$rounds,#10]
413 ldrb $t2,[$rounds,#9]
414 ldrb $t3,[$rounds,#8]
415 orr $s2,$s2,$t1,lsl#8
416 orr $s2,$s2,$t2,lsl#16
417 orr $s2,$s2,$t3,lsl#24
418 ldrb $s3,[$rounds,#15]
419 ldrb $t1,[$rounds,#14]
420 ldrb $t2,[$rounds,#13]
421 ldrb $t3,[$rounds,#12]
422 orr $s3,$s3,$t1,lsl#8
423 orr $s3,$s3,$t2,lsl#16
424 orr $s3,$s3,$t3,lsl#24
425 str $s0,[$key],#16
426 str $s1,[$key,#-12]
427 str $s2,[$key,#-8]
428 str $s3,[$key,#-4]
429
430 teq lr,#128
431 bne .Lnot128
432 mov $rounds,#10
433 str $rounds,[$key,#240-16]
434 add $t3,$tbl,#256 @ rcon
435 mov lr,#255
436
437.L128_loop:
438 and $t2,lr,$s3,lsr#24
439 and $i1,lr,$s3,lsr#16
440 and $i2,lr,$s3,lsr#8
441 and $i3,lr,$s3
442 ldrb $t2,[$tbl,$t2]
443 ldrb $i1,[$tbl,$i1]
444 ldrb $i2,[$tbl,$i2]
445 ldrb $i3,[$tbl,$i3]
446 ldr $t1,[$t3],#4 @ rcon[i++]
447 orr $t2,$t2,$i1,lsl#24
448 orr $t2,$t2,$i2,lsl#16
449 orr $t2,$t2,$i3,lsl#8
450 eor $t2,$t2,$t1
451 eor $s0,$s0,$t2 @ rk[4]=rk[0]^...
452 eor $s1,$s1,$s0 @ rk[5]=rk[1]^rk[4]
453 eor $s2,$s2,$s1 @ rk[6]=rk[2]^rk[5]
454 eor $s3,$s3,$s2 @ rk[7]=rk[3]^rk[6]
455 str $s0,[$key],#16
456 str $s1,[$key,#-12]
457 str $s2,[$key,#-8]
458 str $s3,[$key,#-4]
459
460 subs $rounds,$rounds,#1
461 bne .L128_loop
462 sub r2,$key,#176
463 b .Ldone
464
465.Lnot128:
466 ldrb $i2,[$rounds,#19]
467 ldrb $t1,[$rounds,#18]
468 ldrb $t2,[$rounds,#17]
469 ldrb $t3,[$rounds,#16]
470 orr $i2,$i2,$t1,lsl#8
471 orr $i2,$i2,$t2,lsl#16
472 orr $i2,$i2,$t3,lsl#24
473 ldrb $i3,[$rounds,#23]
474 ldrb $t1,[$rounds,#22]
475 ldrb $t2,[$rounds,#21]
476 ldrb $t3,[$rounds,#20]
477 orr $i3,$i3,$t1,lsl#8
478 orr $i3,$i3,$t2,lsl#16
479 orr $i3,$i3,$t3,lsl#24
480 str $i2,[$key],#8
481 str $i3,[$key,#-4]
482
483 teq lr,#192
484 bne .Lnot192
485 mov $rounds,#12
486 str $rounds,[$key,#240-24]
487 add $t3,$tbl,#256 @ rcon
488 mov lr,#255
489 mov $rounds,#8
490
491.L192_loop:
492 and $t2,lr,$i3,lsr#24
493 and $i1,lr,$i3,lsr#16
494 and $i2,lr,$i3,lsr#8
495 and $i3,lr,$i3
496 ldrb $t2,[$tbl,$t2]
497 ldrb $i1,[$tbl,$i1]
498 ldrb $i2,[$tbl,$i2]
499 ldrb $i3,[$tbl,$i3]
500 ldr $t1,[$t3],#4 @ rcon[i++]
501 orr $t2,$t2,$i1,lsl#24
502 orr $t2,$t2,$i2,lsl#16
503 orr $t2,$t2,$i3,lsl#8
504 eor $i3,$t2,$t1
505 eor $s0,$s0,$i3 @ rk[6]=rk[0]^...
506 eor $s1,$s1,$s0 @ rk[7]=rk[1]^rk[6]
507 eor $s2,$s2,$s1 @ rk[8]=rk[2]^rk[7]
508 eor $s3,$s3,$s2 @ rk[9]=rk[3]^rk[8]
509 str $s0,[$key],#24
510 str $s1,[$key,#-20]
511 str $s2,[$key,#-16]
512 str $s3,[$key,#-12]
513
514 subs $rounds,$rounds,#1
515 subeq r2,$key,#216
516 beq .Ldone
517
518 ldr $i1,[$key,#-32]
519 ldr $i2,[$key,#-28]
520 eor $i1,$i1,$s3 @ rk[10]=rk[4]^rk[9]
521 eor $i3,$i2,$i1 @ rk[11]=rk[5]^rk[10]
522 str $i1,[$key,#-8]
523 str $i3,[$key,#-4]
524 b .L192_loop
525
526.Lnot192:
527 ldrb $i2,[$rounds,#27]
528 ldrb $t1,[$rounds,#26]
529 ldrb $t2,[$rounds,#25]
530 ldrb $t3,[$rounds,#24]
531 orr $i2,$i2,$t1,lsl#8
532 orr $i2,$i2,$t2,lsl#16
533 orr $i2,$i2,$t3,lsl#24
534 ldrb $i3,[$rounds,#31]
535 ldrb $t1,[$rounds,#30]
536 ldrb $t2,[$rounds,#29]
537 ldrb $t3,[$rounds,#28]
538 orr $i3,$i3,$t1,lsl#8
539 orr $i3,$i3,$t2,lsl#16
540 orr $i3,$i3,$t3,lsl#24
541 str $i2,[$key],#8
542 str $i3,[$key,#-4]
543
544 mov $rounds,#14
545 str $rounds,[$key,#240-32]
546 add $t3,$tbl,#256 @ rcon
547 mov lr,#255
548 mov $rounds,#7
549
550.L256_loop:
551 and $t2,lr,$i3,lsr#24
552 and $i1,lr,$i3,lsr#16
553 and $i2,lr,$i3,lsr#8
554 and $i3,lr,$i3
555 ldrb $t2,[$tbl,$t2]
556 ldrb $i1,[$tbl,$i1]
557 ldrb $i2,[$tbl,$i2]
558 ldrb $i3,[$tbl,$i3]
559 ldr $t1,[$t3],#4 @ rcon[i++]
560 orr $t2,$t2,$i1,lsl#24
561 orr $t2,$t2,$i2,lsl#16
562 orr $t2,$t2,$i3,lsl#8
563 eor $i3,$t2,$t1
564 eor $s0,$s0,$i3 @ rk[8]=rk[0]^...
565 eor $s1,$s1,$s0 @ rk[9]=rk[1]^rk[8]
566 eor $s2,$s2,$s1 @ rk[10]=rk[2]^rk[9]
567 eor $s3,$s3,$s2 @ rk[11]=rk[3]^rk[10]
568 str $s0,[$key],#32
569 str $s1,[$key,#-28]
570 str $s2,[$key,#-24]
571 str $s3,[$key,#-20]
572
573 subs $rounds,$rounds,#1
574 subeq r2,$key,#256
575 beq .Ldone
576
577 and $t2,lr,$s3
578 and $i1,lr,$s3,lsr#8
579 and $i2,lr,$s3,lsr#16
580 and $i3,lr,$s3,lsr#24
581 ldrb $t2,[$tbl,$t2]
582 ldrb $i1,[$tbl,$i1]
583 ldrb $i2,[$tbl,$i2]
584 ldrb $i3,[$tbl,$i3]
585 orr $t2,$t2,$i1,lsl#8
586 orr $t2,$t2,$i2,lsl#16
587 orr $t2,$t2,$i3,lsl#24
588
589 ldr $t1,[$key,#-48]
590 ldr $i1,[$key,#-44]
591 ldr $i2,[$key,#-40]
592 ldr $i3,[$key,#-36]
593 eor $t1,$t1,$t2 @ rk[12]=rk[4]^...
594 eor $i1,$i1,$t1 @ rk[13]=rk[5]^rk[12]
595 eor $i2,$i2,$i1 @ rk[14]=rk[6]^rk[13]
596 eor $i3,$i3,$i2 @ rk[15]=rk[7]^rk[14]
597 str $t1,[$key,#-16]
598 str $i1,[$key,#-12]
599 str $i2,[$key,#-8]
600 str $i3,[$key,#-4]
601 b .L256_loop
602
603.Ldone: mov r0,#0
604 ldmia sp!,{r4-r12,lr}
605.Labrt: tst lr,#1
606 moveq pc,lr @ be binary compatible with V4, yet
607 bx lr @ interoperable with Thumb ISA:-)
608.size AES_set_encrypt_key,.-AES_set_encrypt_key
609
610.global AES_set_decrypt_key
611.type AES_set_decrypt_key,%function
612.align 5
613AES_set_decrypt_key:
614 str lr,[sp,#-4]! @ push lr
615 bl AES_set_encrypt_key
616 teq r0,#0
617 ldrne lr,[sp],#4 @ pop lr
618 bne .Labrt
619
620 stmdb sp!,{r4-r12}
621
622 ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2,
623 mov $key,r2 @ which is AES_KEY *key
624 mov $i1,r2
625 add $i2,r2,$rounds,lsl#4
626
627.Linv: ldr $s0,[$i1]
628 ldr $s1,[$i1,#4]
629 ldr $s2,[$i1,#8]
630 ldr $s3,[$i1,#12]
631 ldr $t1,[$i2]
632 ldr $t2,[$i2,#4]
633 ldr $t3,[$i2,#8]
634 ldr $i3,[$i2,#12]
635 str $s0,[$i2],#-16
636 str $s1,[$i2,#16+4]
637 str $s2,[$i2,#16+8]
638 str $s3,[$i2,#16+12]
639 str $t1,[$i1],#16
640 str $t2,[$i1,#-12]
641 str $t3,[$i1,#-8]
642 str $i3,[$i1,#-4]
643 teq $i1,$i2
644 bne .Linv
645___
646$mask80=$i1;
647$mask1b=$i2;
648$mask7f=$i3;
649$code.=<<___;
650 ldr $s0,[$key,#16]! @ prefetch tp1
651 mov $mask80,#0x80
652 mov $mask1b,#0x1b
653 orr $mask80,$mask80,#0x8000
654 orr $mask1b,$mask1b,#0x1b00
655 orr $mask80,$mask80,$mask80,lsl#16
656 orr $mask1b,$mask1b,$mask1b,lsl#16
657 sub $rounds,$rounds,#1
658 mvn $mask7f,$mask80
659 mov $rounds,$rounds,lsl#2 @ (rounds-1)*4
660
661.Lmix: and $t1,$s0,$mask80
662 and $s1,$s0,$mask7f
663 sub $t1,$t1,$t1,lsr#7
664 and $t1,$t1,$mask1b
665 eor $s1,$t1,$s1,lsl#1 @ tp2
666
667 and $t1,$s1,$mask80
668 and $s2,$s1,$mask7f
669 sub $t1,$t1,$t1,lsr#7
670 and $t1,$t1,$mask1b
671 eor $s2,$t1,$s2,lsl#1 @ tp4
672
673 and $t1,$s2,$mask80
674 and $s3,$s2,$mask7f
675 sub $t1,$t1,$t1,lsr#7
676 and $t1,$t1,$mask1b
677 eor $s3,$t1,$s3,lsl#1 @ tp8
678
679 eor $t1,$s1,$s2
680 eor $t2,$s0,$s3 @ tp9
681 eor $t1,$t1,$s3 @ tpe
682 eor $t1,$t1,$s1,ror#24
683 eor $t1,$t1,$t2,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8)
684 eor $t1,$t1,$s2,ror#16
685 eor $t1,$t1,$t2,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16)
686 eor $t1,$t1,$t2,ror#8 @ ^= ROTATE(tp9,24)
687
688 ldr $s0,[$key,#4] @ prefetch tp1
689 str $t1,[$key],#4
690 subs $rounds,$rounds,#1
691 bne .Lmix
692
693 mov r0,#0
694 ldmia sp!,{r4-r12,lr}
695 tst lr,#1
696 moveq pc,lr @ be binary compatible with V4, yet
697 bx lr @ interoperable with Thumb ISA:-)
698.size AES_set_decrypt_key,.-AES_set_decrypt_key
699
700.type AES_Td,%object
701.align 5
702AES_Td:
703.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
704.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
705.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
706.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
707.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
708.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
709.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
710.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
711.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
712.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
713.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
714.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
715.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
716.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
717.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
718.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
719.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
720.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
721.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
722.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
723.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
724.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
725.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
726.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
727.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
728.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
729.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
730.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
731.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
732.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
733.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
734.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
735.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
736.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
737.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
738.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
739.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
740.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
741.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
742.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
743.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
744.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
745.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
746.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
747.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
748.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
749.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
750.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
751.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
752.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
753.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
754.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
755.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
756.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
757.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
758.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
759.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
760.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
761.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
762.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
763.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
764.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
765.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
766.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
767@ Td4[256]
768.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
769.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
770.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
771.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
772.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
773.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
774.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
775.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
776.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
777.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
778.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
779.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
780.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
781.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
782.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
783.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
784.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
785.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
786.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
787.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
788.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
789.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
790.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
791.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
792.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
793.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
794.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
795.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
796.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
797.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
798.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
799.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
800.size AES_Td,.-AES_Td
801
802@ void AES_decrypt(const unsigned char *in, unsigned char *out,
803@ const AES_KEY *key) {
804.global AES_decrypt
805.type AES_decrypt,%function
806.align 5
807AES_decrypt:
808 sub r3,pc,#8 @ AES_decrypt
809 stmdb sp!,{r1,r4-r12,lr}
810 mov $rounds,r0 @ inp
811 mov $key,r2
812 sub $tbl,r3,#AES_decrypt-AES_Td @ Td
813
814 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
815 ldrb $t1,[$rounds,#2] @ manner...
816 ldrb $t2,[$rounds,#1]
817 ldrb $t3,[$rounds,#0]
818 orr $s0,$s0,$t1,lsl#8
819 orr $s0,$s0,$t2,lsl#16
820 orr $s0,$s0,$t3,lsl#24
821 ldrb $s1,[$rounds,#7]
822 ldrb $t1,[$rounds,#6]
823 ldrb $t2,[$rounds,#5]
824 ldrb $t3,[$rounds,#4]
825 orr $s1,$s1,$t1,lsl#8
826 orr $s1,$s1,$t2,lsl#16
827 orr $s1,$s1,$t3,lsl#24
828 ldrb $s2,[$rounds,#11]
829 ldrb $t1,[$rounds,#10]
830 ldrb $t2,[$rounds,#9]
831 ldrb $t3,[$rounds,#8]
832 orr $s2,$s2,$t1,lsl#8
833 orr $s2,$s2,$t2,lsl#16
834 orr $s2,$s2,$t3,lsl#24
835 ldrb $s3,[$rounds,#15]
836 ldrb $t1,[$rounds,#14]
837 ldrb $t2,[$rounds,#13]
838 ldrb $t3,[$rounds,#12]
839 orr $s3,$s3,$t1,lsl#8
840 orr $s3,$s3,$t2,lsl#16
841 orr $s3,$s3,$t3,lsl#24
842
843 bl _armv4_AES_decrypt
844
845 ldr $rounds,[sp],#4 @ pop out
846 mov $t1,$s0,lsr#24 @ write output in endian-neutral
847 mov $t2,$s0,lsr#16 @ manner...
848 mov $t3,$s0,lsr#8
849 strb $t1,[$rounds,#0]
850 strb $t2,[$rounds,#1]
851 strb $t3,[$rounds,#2]
852 strb $s0,[$rounds,#3]
853 mov $t1,$s1,lsr#24
854 mov $t2,$s1,lsr#16
855 mov $t3,$s1,lsr#8
856 strb $t1,[$rounds,#4]
857 strb $t2,[$rounds,#5]
858 strb $t3,[$rounds,#6]
859 strb $s1,[$rounds,#7]
860 mov $t1,$s2,lsr#24
861 mov $t2,$s2,lsr#16
862 mov $t3,$s2,lsr#8
863 strb $t1,[$rounds,#8]
864 strb $t2,[$rounds,#9]
865 strb $t3,[$rounds,#10]
866 strb $s2,[$rounds,#11]
867 mov $t1,$s3,lsr#24
868 mov $t2,$s3,lsr#16
869 mov $t3,$s3,lsr#8
870 strb $t1,[$rounds,#12]
871 strb $t2,[$rounds,#13]
872 strb $t3,[$rounds,#14]
873 strb $s3,[$rounds,#15]
874
875 ldmia sp!,{r4-r12,lr}
876 tst lr,#1
877 moveq pc,lr @ be binary compatible with V4, yet
878 bx lr @ interoperable with Thumb ISA:-)
879.size AES_decrypt,.-AES_decrypt
880
881.type _armv4_AES_decrypt,%function
882.align 2
883_armv4_AES_decrypt:
884 str lr,[sp,#-4]! @ push lr
885 ldr $t1,[$key],#16
886 ldr $t2,[$key,#-12]
887 ldr $t3,[$key,#-8]
888 ldr $i1,[$key,#-4]
889 ldr $rounds,[$key,#240-16]
890 eor $s0,$s0,$t1
891 eor $s1,$s1,$t2
892 eor $s2,$s2,$t3
893 eor $s3,$s3,$i1
894 sub $rounds,$rounds,#1
895 mov lr,#255
896
897.Ldec_loop:
898 and $i1,lr,$s0,lsr#16
899 and $i2,lr,$s0,lsr#8
900 and $i3,lr,$s0
901 mov $s0,$s0,lsr#24
902 ldr $t1,[$tbl,$i1,lsl#2] @ Td1[s0>>16]
903 ldr $s0,[$tbl,$s0,lsl#2] @ Td0[s0>>24]
904 ldr $t2,[$tbl,$i2,lsl#2] @ Td2[s0>>8]
905 ldr $t3,[$tbl,$i3,lsl#2] @ Td3[s0>>0]
906
907 and $i1,lr,$s1 @ i0
908 and $i2,lr,$s1,lsr#16
909 and $i3,lr,$s1,lsr#8
910 mov $s1,$s1,lsr#24
911 ldr $i1,[$tbl,$i1,lsl#2] @ Td3[s1>>0]
912 ldr $s1,[$tbl,$s1,lsl#2] @ Td0[s1>>24]
913 ldr $i2,[$tbl,$i2,lsl#2] @ Td1[s1>>16]
914 ldr $i3,[$tbl,$i3,lsl#2] @ Td2[s1>>8]
915 eor $s0,$s0,$i1,ror#24
916 eor $s1,$s1,$t1,ror#8
917 eor $t2,$i2,$t2,ror#8
918 eor $t3,$i3,$t3,ror#8
919
920 and $i1,lr,$s2,lsr#8 @ i0
921 and $i2,lr,$s2 @ i1
922 and $i3,lr,$s2,lsr#16
923 mov $s2,$s2,lsr#24
924 ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8]
925 ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0]
926 ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24]
927 ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16]
928 eor $s0,$s0,$i1,ror#16
929 eor $s1,$s1,$i2,ror#24
930 eor $s2,$s2,$t2,ror#8
931 eor $t3,$i3,$t3,ror#8
932
933 and $i1,lr,$s3,lsr#16 @ i0
934 and $i2,lr,$s3,lsr#8 @ i1
935 and $i3,lr,$s3 @ i2
936 mov $s3,$s3,lsr#24
937 ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16]
938 ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8]
939 ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0]
940 ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24]
941 eor $s0,$s0,$i1,ror#8
942 eor $s1,$s1,$i2,ror#16
943 eor $s2,$s2,$i3,ror#24
944 eor $s3,$s3,$t3,ror#8
945
946 ldr $t1,[$key],#16
947 ldr $t2,[$key,#-12]
948 ldr $t3,[$key,#-8]
949 ldr $i1,[$key,#-4]
950 eor $s0,$s0,$t1
951 eor $s1,$s1,$t2
952 eor $s2,$s2,$t3
953 eor $s3,$s3,$i1
954
955 subs $rounds,$rounds,#1
956 bne .Ldec_loop
957
958 add $tbl,$tbl,#1024
959
960 ldr $t1,[$tbl,#0] @ prefetch Td4
961 ldr $t2,[$tbl,#32]
962 ldr $t3,[$tbl,#64]
963 ldr $i1,[$tbl,#96]
964 ldr $i2,[$tbl,#128]
965 ldr $i3,[$tbl,#160]
966 ldr $t1,[$tbl,#192]
967 ldr $t2,[$tbl,#224]
968
969 and $i1,lr,$s0,lsr#16
970 and $i2,lr,$s0,lsr#8
971 and $i3,lr,$s0
972 ldrb $s0,[$tbl,$s0,lsr#24] @ Td4[s0>>24]
973 ldrb $t1,[$tbl,$i1] @ Td4[s0>>16]
974 ldrb $t2,[$tbl,$i2] @ Td4[s0>>8]
975 ldrb $t3,[$tbl,$i3] @ Td4[s0>>0]
976
977 and $i1,lr,$s1 @ i0
978 and $i2,lr,$s1,lsr#16
979 and $i3,lr,$s1,lsr#8
980 ldrb $i1,[$tbl,$i1] @ Td4[s1>>0]
981 ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24]
982 ldrb $i2,[$tbl,$i2] @ Td4[s1>>16]
983 ldrb $i3,[$tbl,$i3] @ Td4[s1>>8]
984 eor $s0,$i1,$s0,lsl#24
985 eor $s1,$t1,$s1,lsl#8
986 eor $t2,$t2,$i2,lsl#8
987 eor $t3,$t3,$i3,lsl#8
988
989 and $i1,lr,$s2,lsr#8 @ i0
990 and $i2,lr,$s2 @ i1
991 and $i3,lr,$s2,lsr#16
992 ldrb $i1,[$tbl,$i1] @ Td4[s2>>8]
993 ldrb $i2,[$tbl,$i2] @ Td4[s2>>0]
994 ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24]
995 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16]
996 eor $s0,$s0,$i1,lsl#8
997 eor $s1,$i2,$s1,lsl#16
998 eor $s2,$t2,$s2,lsl#16
999 eor $t3,$t3,$i3,lsl#16
1000
1001 and $i1,lr,$s3,lsr#16 @ i0
1002 and $i2,lr,$s3,lsr#8 @ i1
1003 and $i3,lr,$s3 @ i2
1004 ldrb $i1,[$tbl,$i1] @ Td4[s3>>16]
1005 ldrb $i2,[$tbl,$i2] @ Td4[s3>>8]
1006 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0]
1007 ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24]
1008 eor $s0,$s0,$i1,lsl#16
1009 eor $s1,$s1,$i2,lsl#8
1010 eor $s2,$i3,$s2,lsl#8
1011 eor $s3,$t3,$s3,lsl#24
1012
1013 ldr lr,[sp],#4 @ pop lr
1014 ldr $t1,[$key,#0]
1015 ldr $t2,[$key,#4]
1016 ldr $t3,[$key,#8]
1017 ldr $i1,[$key,#12]
1018 eor $s0,$s0,$t1
1019 eor $s1,$s1,$t2
1020 eor $s2,$s2,$t3
1021 eor $s3,$s3,$i1
1022
1023 sub $tbl,$tbl,#1024
1024 mov pc,lr @ return
1025.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
1026.asciz "AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1027___
1028
1029$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
1030print $code;
diff --git a/src/lib/libcrypto/aes/asm/aes-ppc.pl b/src/lib/libcrypto/aes/asm/aes-ppc.pl
new file mode 100644
index 0000000000..ce427655ef
--- /dev/null
+++ b/src/lib/libcrypto/aes/asm/aes-ppc.pl
@@ -0,0 +1,1176 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# Needs more work: key setup, page boundaries, CBC routine...
11#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14# 4.0. But these are not the ones currently used! Their "compact"
15# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt.
18
19$flavour = shift;
20
21if ($flavour =~ /64/) {
22 $SIZE_T =8;
23 $STU ="stdu";
24 $POP ="ld";
25 $PUSH ="std";
26} elsif ($flavour =~ /32/) {
27 $SIZE_T =4;
28 $STU ="stwu";
29 $POP ="lwz";
30 $PUSH ="stw";
31} else { die "nonsense $flavour"; }
32
33$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
35( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
36die "can't locate ppc-xlate.pl";
37
38open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
39
40$FRAME=32*$SIZE_T;
41
42sub _data_word()
43{ my $i;
44 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
45}
46
47$sp="r1";
48$toc="r2";
49$inp="r3";
50$out="r4";
51$key="r5";
52
53$Tbl0="r3";
54$Tbl1="r6";
55$Tbl2="r7";
56$Tbl3="r2";
57
58$s0="r8";
59$s1="r9";
60$s2="r10";
61$s3="r11";
62
63$t0="r12";
64$t1="r13";
65$t2="r14";
66$t3="r15";
67
68$acc00="r16";
69$acc01="r17";
70$acc02="r18";
71$acc03="r19";
72
73$acc04="r20";
74$acc05="r21";
75$acc06="r22";
76$acc07="r23";
77
78$acc08="r24";
79$acc09="r25";
80$acc10="r26";
81$acc11="r27";
82
83$acc12="r28";
84$acc13="r29";
85$acc14="r30";
86$acc15="r31";
87
88# stay away from TLS pointer
89if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
90else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
91$mask80=$Tbl2;
92$mask1b=$Tbl3;
93
94$code.=<<___;
95.machine "any"
96.text
97
98.align 7
99LAES_Te:
100 mflr r0
101 bcl 20,31,\$+4
102 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
103 addi $Tbl0,$Tbl0,`128-8`
104 mtlr r0
105 blr
106 .space `32-24`
107LAES_Td:
108 mflr r0
109 bcl 20,31,\$+4
110 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
111 addi $Tbl0,$Tbl0,`128-8-32+2048+256`
112 mtlr r0
113 blr
114 .space `128-32-24`
115___
116&_data_word(
117 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
118 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
119 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
120 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
121 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
122 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
123 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
124 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
125 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
126 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
127 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
128 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
129 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
130 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
131 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
132 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
133 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
134 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
135 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
136 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
137 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
138 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
139 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
140 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
141 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
142 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
143 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
144 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
145 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
146 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
147 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
148 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
149 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
150 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
151 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
152 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
153 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
154 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
155 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
156 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
157 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
158 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
159 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
160 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
161 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
162 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
163 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
164 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
165 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
166 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
167 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
168 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
169 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
170 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
171 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
172 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
173 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
174 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
175 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
176 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
177 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
178 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
179 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
180 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
181$code.=<<___;
182.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
183.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
184.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
185.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
186.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
187.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
188.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
189.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
190.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
191.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
192.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
193.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
194.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
195.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
196.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
197.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
198.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
199.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
200.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
201.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
202.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
203.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
204.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
205.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
206.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
207.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
208.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
209.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
210.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
211.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
212.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
213.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
214___
215&_data_word(
216 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
217 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
218 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
219 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
220 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
221 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
222 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
223 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
224 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
225 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
226 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
227 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
228 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
229 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
230 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
231 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
232 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
233 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
234 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
235 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
236 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
237 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
238 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
239 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
240 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
241 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
242 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
243 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
244 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
245 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
246 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
247 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
248 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
249 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
250 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
251 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
252 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
253 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
254 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
255 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
256 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
257 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
258 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
259 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
260 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
261 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
262 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
263 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
264 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
265 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
266 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
267 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
268 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
269 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
270 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
271 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
272 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
273 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
274 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
275 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
276 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
277 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
278 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
279 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
280$code.=<<___;
281.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
282.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
283.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
284.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
285.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
286.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
287.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
288.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
289.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
290.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
291.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
292.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
293.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
294.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
295.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
296.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
297.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
298.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
299.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
300.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
301.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
302.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
303.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
304.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
305.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
306.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
307.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
308.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
309.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
310.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
311.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
312.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
313
314
315.globl .AES_encrypt
316.align 7
317.AES_encrypt:
318 mflr r0
319 $STU $sp,-$FRAME($sp)
320
321 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
322 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
323 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
324 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
325 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
326 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
327 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
328 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
329 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
330 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
331 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
332 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
333 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
334 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
335 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
336 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
337 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
338 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
339 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
340 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
341 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
342
343 lwz $s0,0($inp)
344 lwz $s1,4($inp)
345 lwz $s2,8($inp)
346 lwz $s3,12($inp)
347 bl LAES_Te
348 bl Lppc_AES_encrypt_compact
349 stw $s0,0($out)
350 stw $s1,4($out)
351 stw $s2,8($out)
352 stw $s3,12($out)
353
354 $POP r0,`$FRAME-$SIZE_T*21`($sp)
355 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
356 $POP r13,`$FRAME-$SIZE_T*19`($sp)
357 $POP r14,`$FRAME-$SIZE_T*18`($sp)
358 $POP r15,`$FRAME-$SIZE_T*17`($sp)
359 $POP r16,`$FRAME-$SIZE_T*16`($sp)
360 $POP r17,`$FRAME-$SIZE_T*15`($sp)
361 $POP r18,`$FRAME-$SIZE_T*14`($sp)
362 $POP r19,`$FRAME-$SIZE_T*13`($sp)
363 $POP r20,`$FRAME-$SIZE_T*12`($sp)
364 $POP r21,`$FRAME-$SIZE_T*11`($sp)
365 $POP r22,`$FRAME-$SIZE_T*10`($sp)
366 $POP r23,`$FRAME-$SIZE_T*9`($sp)
367 $POP r24,`$FRAME-$SIZE_T*8`($sp)
368 $POP r25,`$FRAME-$SIZE_T*7`($sp)
369 $POP r26,`$FRAME-$SIZE_T*6`($sp)
370 $POP r27,`$FRAME-$SIZE_T*5`($sp)
371 $POP r28,`$FRAME-$SIZE_T*4`($sp)
372 $POP r29,`$FRAME-$SIZE_T*3`($sp)
373 $POP r30,`$FRAME-$SIZE_T*2`($sp)
374 $POP r31,`$FRAME-$SIZE_T*1`($sp)
375 mtlr r0
376 addi $sp,$sp,$FRAME
377 blr
378
379.align 4
380Lppc_AES_encrypt:
381 lwz $acc00,240($key)
382 lwz $t0,0($key)
383 lwz $t1,4($key)
384 lwz $t2,8($key)
385 lwz $t3,12($key)
386 addi $Tbl1,$Tbl0,3
387 addi $Tbl2,$Tbl0,2
388 addi $Tbl3,$Tbl0,1
389 addi $acc00,$acc00,-1
390 addi $key,$key,16
391 xor $s0,$s0,$t0
392 xor $s1,$s1,$t1
393 xor $s2,$s2,$t2
394 xor $s3,$s3,$t3
395 mtctr $acc00
396.align 4
397Lenc_loop:
398 rlwinm $acc00,$s0,`32-24+3`,21,28
399 rlwinm $acc01,$s1,`32-24+3`,21,28
400 lwz $t0,0($key)
401 lwz $t1,4($key)
402 rlwinm $acc02,$s2,`32-24+3`,21,28
403 rlwinm $acc03,$s3,`32-24+3`,21,28
404 lwz $t2,8($key)
405 lwz $t3,12($key)
406 rlwinm $acc04,$s1,`32-16+3`,21,28
407 rlwinm $acc05,$s2,`32-16+3`,21,28
408 lwzx $acc00,$Tbl0,$acc00
409 lwzx $acc01,$Tbl0,$acc01
410 rlwinm $acc06,$s3,`32-16+3`,21,28
411 rlwinm $acc07,$s0,`32-16+3`,21,28
412 lwzx $acc02,$Tbl0,$acc02
413 lwzx $acc03,$Tbl0,$acc03
414 rlwinm $acc08,$s2,`32-8+3`,21,28
415 rlwinm $acc09,$s3,`32-8+3`,21,28
416 lwzx $acc04,$Tbl1,$acc04
417 lwzx $acc05,$Tbl1,$acc05
418 rlwinm $acc10,$s0,`32-8+3`,21,28
419 rlwinm $acc11,$s1,`32-8+3`,21,28
420 lwzx $acc06,$Tbl1,$acc06
421 lwzx $acc07,$Tbl1,$acc07
422 rlwinm $acc12,$s3,`0+3`,21,28
423 rlwinm $acc13,$s0,`0+3`,21,28
424 lwzx $acc08,$Tbl2,$acc08
425 lwzx $acc09,$Tbl2,$acc09
426 rlwinm $acc14,$s1,`0+3`,21,28
427 rlwinm $acc15,$s2,`0+3`,21,28
428 lwzx $acc10,$Tbl2,$acc10
429 lwzx $acc11,$Tbl2,$acc11
430 xor $t0,$t0,$acc00
431 xor $t1,$t1,$acc01
432 lwzx $acc12,$Tbl3,$acc12
433 lwzx $acc13,$Tbl3,$acc13
434 xor $t2,$t2,$acc02
435 xor $t3,$t3,$acc03
436 lwzx $acc14,$Tbl3,$acc14
437 lwzx $acc15,$Tbl3,$acc15
438 xor $t0,$t0,$acc04
439 xor $t1,$t1,$acc05
440 xor $t2,$t2,$acc06
441 xor $t3,$t3,$acc07
442 xor $t0,$t0,$acc08
443 xor $t1,$t1,$acc09
444 xor $t2,$t2,$acc10
445 xor $t3,$t3,$acc11
446 xor $s0,$t0,$acc12
447 xor $s1,$t1,$acc13
448 xor $s2,$t2,$acc14
449 xor $s3,$t3,$acc15
450 addi $key,$key,16
451 bdnz- Lenc_loop
452
453 addi $Tbl2,$Tbl0,2048
454 nop
455 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
456 lwz $acc09,`2048+32`($Tbl0)
457 lwz $acc10,`2048+64`($Tbl0)
458 lwz $acc11,`2048+96`($Tbl0)
459 lwz $acc08,`2048+128`($Tbl0)
460 lwz $acc09,`2048+160`($Tbl0)
461 lwz $acc10,`2048+192`($Tbl0)
462 lwz $acc11,`2048+224`($Tbl0)
463 rlwinm $acc00,$s0,`32-24`,24,31
464 rlwinm $acc01,$s1,`32-24`,24,31
465 lwz $t0,0($key)
466 lwz $t1,4($key)
467 rlwinm $acc02,$s2,`32-24`,24,31
468 rlwinm $acc03,$s3,`32-24`,24,31
469 lwz $t2,8($key)
470 lwz $t3,12($key)
471 rlwinm $acc04,$s1,`32-16`,24,31
472 rlwinm $acc05,$s2,`32-16`,24,31
473 lbzx $acc00,$Tbl2,$acc00
474 lbzx $acc01,$Tbl2,$acc01
475 rlwinm $acc06,$s3,`32-16`,24,31
476 rlwinm $acc07,$s0,`32-16`,24,31
477 lbzx $acc02,$Tbl2,$acc02
478 lbzx $acc03,$Tbl2,$acc03
479 rlwinm $acc08,$s2,`32-8`,24,31
480 rlwinm $acc09,$s3,`32-8`,24,31
481 lbzx $acc04,$Tbl2,$acc04
482 lbzx $acc05,$Tbl2,$acc05
483 rlwinm $acc10,$s0,`32-8`,24,31
484 rlwinm $acc11,$s1,`32-8`,24,31
485 lbzx $acc06,$Tbl2,$acc06
486 lbzx $acc07,$Tbl2,$acc07
487 rlwinm $acc12,$s3,`0`,24,31
488 rlwinm $acc13,$s0,`0`,24,31
489 lbzx $acc08,$Tbl2,$acc08
490 lbzx $acc09,$Tbl2,$acc09
491 rlwinm $acc14,$s1,`0`,24,31
492 rlwinm $acc15,$s2,`0`,24,31
493 lbzx $acc10,$Tbl2,$acc10
494 lbzx $acc11,$Tbl2,$acc11
495 rlwinm $s0,$acc00,24,0,7
496 rlwinm $s1,$acc01,24,0,7
497 lbzx $acc12,$Tbl2,$acc12
498 lbzx $acc13,$Tbl2,$acc13
499 rlwinm $s2,$acc02,24,0,7
500 rlwinm $s3,$acc03,24,0,7
501 lbzx $acc14,$Tbl2,$acc14
502 lbzx $acc15,$Tbl2,$acc15
503 rlwimi $s0,$acc04,16,8,15
504 rlwimi $s1,$acc05,16,8,15
505 rlwimi $s2,$acc06,16,8,15
506 rlwimi $s3,$acc07,16,8,15
507 rlwimi $s0,$acc08,8,16,23
508 rlwimi $s1,$acc09,8,16,23
509 rlwimi $s2,$acc10,8,16,23
510 rlwimi $s3,$acc11,8,16,23
511 or $s0,$s0,$acc12
512 or $s1,$s1,$acc13
513 or $s2,$s2,$acc14
514 or $s3,$s3,$acc15
515 xor $s0,$s0,$t0
516 xor $s1,$s1,$t1
517 xor $s2,$s2,$t2
518 xor $s3,$s3,$t3
519 blr
520
521.align 4
522Lppc_AES_encrypt_compact:
523 lwz $acc00,240($key)
524 lwz $t0,0($key)
525 lwz $t1,4($key)
526 lwz $t2,8($key)
527 lwz $t3,12($key)
528 addi $Tbl1,$Tbl0,2048
529 lis $mask80,0x8080
530 lis $mask1b,0x1b1b
531 addi $key,$key,16
532 ori $mask80,$mask80,0x8080
533 ori $mask1b,$mask1b,0x1b1b
534 mtctr $acc00
535.align 4
536Lenc_compact_loop:
537 xor $s0,$s0,$t0
538 xor $s1,$s1,$t1
539 xor $s2,$s2,$t2
540 xor $s3,$s3,$t3
541 rlwinm $acc00,$s0,`32-24`,24,31
542 rlwinm $acc01,$s1,`32-24`,24,31
543 rlwinm $acc02,$s2,`32-24`,24,31
544 rlwinm $acc03,$s3,`32-24`,24,31
545 lbzx $acc00,$Tbl1,$acc00
546 lbzx $acc01,$Tbl1,$acc01
547 rlwinm $acc04,$s1,`32-16`,24,31
548 rlwinm $acc05,$s2,`32-16`,24,31
549 lbzx $acc02,$Tbl1,$acc02
550 lbzx $acc03,$Tbl1,$acc03
551 rlwinm $acc06,$s3,`32-16`,24,31
552 rlwinm $acc07,$s0,`32-16`,24,31
553 lbzx $acc04,$Tbl1,$acc04
554 lbzx $acc05,$Tbl1,$acc05
555 rlwinm $acc08,$s2,`32-8`,24,31
556 rlwinm $acc09,$s3,`32-8`,24,31
557 lbzx $acc06,$Tbl1,$acc06
558 lbzx $acc07,$Tbl1,$acc07
559 rlwinm $acc10,$s0,`32-8`,24,31
560 rlwinm $acc11,$s1,`32-8`,24,31
561 lbzx $acc08,$Tbl1,$acc08
562 lbzx $acc09,$Tbl1,$acc09
563 rlwinm $acc12,$s3,`0`,24,31
564 rlwinm $acc13,$s0,`0`,24,31
565 lbzx $acc10,$Tbl1,$acc10
566 lbzx $acc11,$Tbl1,$acc11
567 rlwinm $acc14,$s1,`0`,24,31
568 rlwinm $acc15,$s2,`0`,24,31
569 lbzx $acc12,$Tbl1,$acc12
570 lbzx $acc13,$Tbl1,$acc13
571 rlwinm $s0,$acc00,24,0,7
572 rlwinm $s1,$acc01,24,0,7
573 lbzx $acc14,$Tbl1,$acc14
574 lbzx $acc15,$Tbl1,$acc15
575 rlwinm $s2,$acc02,24,0,7
576 rlwinm $s3,$acc03,24,0,7
577 rlwimi $s0,$acc04,16,8,15
578 rlwimi $s1,$acc05,16,8,15
579 rlwimi $s2,$acc06,16,8,15
580 rlwimi $s3,$acc07,16,8,15
581 rlwimi $s0,$acc08,8,16,23
582 rlwimi $s1,$acc09,8,16,23
583 rlwimi $s2,$acc10,8,16,23
584 rlwimi $s3,$acc11,8,16,23
585 lwz $t0,0($key)
586 lwz $t1,4($key)
587 or $s0,$s0,$acc12
588 or $s1,$s1,$acc13
589 lwz $t2,8($key)
590 lwz $t3,12($key)
591 or $s2,$s2,$acc14
592 or $s3,$s3,$acc15
593
594 addi $key,$key,16
595 bdz Lenc_compact_done
596
597 and $acc00,$s0,$mask80 # r1=r0&0x80808080
598 and $acc01,$s1,$mask80
599 and $acc02,$s2,$mask80
600 and $acc03,$s3,$mask80
601 srwi $acc04,$acc00,7 # r1>>7
602 srwi $acc05,$acc01,7
603 srwi $acc06,$acc02,7
604 srwi $acc07,$acc03,7
605 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
606 andc $acc09,$s1,$mask80
607 andc $acc10,$s2,$mask80
608 andc $acc11,$s3,$mask80
609 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
610 sub $acc01,$acc01,$acc05
611 sub $acc02,$acc02,$acc06
612 sub $acc03,$acc03,$acc07
613 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
614 add $acc09,$acc09,$acc09
615 add $acc10,$acc10,$acc10
616 add $acc11,$acc11,$acc11
617 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
618 and $acc01,$acc01,$mask1b
619 and $acc02,$acc02,$mask1b
620 and $acc03,$acc03,$mask1b
621 xor $acc00,$acc00,$acc08 # r2
622 xor $acc01,$acc01,$acc09
623 xor $acc02,$acc02,$acc10
624 xor $acc03,$acc03,$acc11
625
626 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
627 rotlwi $acc13,$s1,16
628 rotlwi $acc14,$s2,16
629 rotlwi $acc15,$s3,16
630 xor $s0,$s0,$acc00 # r0^r2
631 xor $s1,$s1,$acc01
632 xor $s2,$s2,$acc02
633 xor $s3,$s3,$acc03
634 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
635 rotrwi $s1,$s1,24
636 rotrwi $s2,$s2,24
637 rotrwi $s3,$s3,24
638 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
639 xor $s1,$s1,$acc01
640 xor $s2,$s2,$acc02
641 xor $s3,$s3,$acc03
642 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
643 rotlwi $acc09,$acc13,8
644 rotlwi $acc10,$acc14,8
645 rotlwi $acc11,$acc15,8
646 xor $s0,$s0,$acc12 #
647 xor $s1,$s1,$acc13
648 xor $s2,$s2,$acc14
649 xor $s3,$s3,$acc15
650 xor $s0,$s0,$acc08 #
651 xor $s1,$s1,$acc09
652 xor $s2,$s2,$acc10
653 xor $s3,$s3,$acc11
654
655 b Lenc_compact_loop
656.align 4
657Lenc_compact_done:
658 xor $s0,$s0,$t0
659 xor $s1,$s1,$t1
660 xor $s2,$s2,$t2
661 xor $s3,$s3,$t3
662 blr
663
664.globl .AES_decrypt
665.align 7
666.AES_decrypt:
667 mflr r0
668 $STU $sp,-$FRAME($sp)
669
670 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
671 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
672 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
673 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
674 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
675 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
676 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
677 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
678 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
679 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
680 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
681 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
682 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
683 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
684 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
685 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
686 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
687 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
688 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
689 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
690 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
691
692 lwz $s0,0($inp)
693 lwz $s1,4($inp)
694 lwz $s2,8($inp)
695 lwz $s3,12($inp)
696 bl LAES_Td
697 bl Lppc_AES_decrypt_compact
698 stw $s0,0($out)
699 stw $s1,4($out)
700 stw $s2,8($out)
701 stw $s3,12($out)
702
703 $POP r0,`$FRAME-$SIZE_T*21`($sp)
704 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
705 $POP r13,`$FRAME-$SIZE_T*19`($sp)
706 $POP r14,`$FRAME-$SIZE_T*18`($sp)
707 $POP r15,`$FRAME-$SIZE_T*17`($sp)
708 $POP r16,`$FRAME-$SIZE_T*16`($sp)
709 $POP r17,`$FRAME-$SIZE_T*15`($sp)
710 $POP r18,`$FRAME-$SIZE_T*14`($sp)
711 $POP r19,`$FRAME-$SIZE_T*13`($sp)
712 $POP r20,`$FRAME-$SIZE_T*12`($sp)
713 $POP r21,`$FRAME-$SIZE_T*11`($sp)
714 $POP r22,`$FRAME-$SIZE_T*10`($sp)
715 $POP r23,`$FRAME-$SIZE_T*9`($sp)
716 $POP r24,`$FRAME-$SIZE_T*8`($sp)
717 $POP r25,`$FRAME-$SIZE_T*7`($sp)
718 $POP r26,`$FRAME-$SIZE_T*6`($sp)
719 $POP r27,`$FRAME-$SIZE_T*5`($sp)
720 $POP r28,`$FRAME-$SIZE_T*4`($sp)
721 $POP r29,`$FRAME-$SIZE_T*3`($sp)
722 $POP r30,`$FRAME-$SIZE_T*2`($sp)
723 $POP r31,`$FRAME-$SIZE_T*1`($sp)
724 mtlr r0
725 addi $sp,$sp,$FRAME
726 blr
727
728.align 4
729Lppc_AES_decrypt:
730 lwz $acc00,240($key)
731 lwz $t0,0($key)
732 lwz $t1,4($key)
733 lwz $t2,8($key)
734 lwz $t3,12($key)
735 addi $Tbl1,$Tbl0,3
736 addi $Tbl2,$Tbl0,2
737 addi $Tbl3,$Tbl0,1
738 addi $acc00,$acc00,-1
739 addi $key,$key,16
740 xor $s0,$s0,$t0
741 xor $s1,$s1,$t1
742 xor $s2,$s2,$t2
743 xor $s3,$s3,$t3
744 mtctr $acc00
745.align 4
746Ldec_loop:
747 rlwinm $acc00,$s0,`32-24+3`,21,28
748 rlwinm $acc01,$s1,`32-24+3`,21,28
749 lwz $t0,0($key)
750 lwz $t1,4($key)
751 rlwinm $acc02,$s2,`32-24+3`,21,28
752 rlwinm $acc03,$s3,`32-24+3`,21,28
753 lwz $t2,8($key)
754 lwz $t3,12($key)
755 rlwinm $acc04,$s3,`32-16+3`,21,28
756 rlwinm $acc05,$s0,`32-16+3`,21,28
757 lwzx $acc00,$Tbl0,$acc00
758 lwzx $acc01,$Tbl0,$acc01
759 rlwinm $acc06,$s1,`32-16+3`,21,28
760 rlwinm $acc07,$s2,`32-16+3`,21,28
761 lwzx $acc02,$Tbl0,$acc02
762 lwzx $acc03,$Tbl0,$acc03
763 rlwinm $acc08,$s2,`32-8+3`,21,28
764 rlwinm $acc09,$s3,`32-8+3`,21,28
765 lwzx $acc04,$Tbl1,$acc04
766 lwzx $acc05,$Tbl1,$acc05
767 rlwinm $acc10,$s0,`32-8+3`,21,28
768 rlwinm $acc11,$s1,`32-8+3`,21,28
769 lwzx $acc06,$Tbl1,$acc06
770 lwzx $acc07,$Tbl1,$acc07
771 rlwinm $acc12,$s1,`0+3`,21,28
772 rlwinm $acc13,$s2,`0+3`,21,28
773 lwzx $acc08,$Tbl2,$acc08
774 lwzx $acc09,$Tbl2,$acc09
775 rlwinm $acc14,$s3,`0+3`,21,28
776 rlwinm $acc15,$s0,`0+3`,21,28
777 lwzx $acc10,$Tbl2,$acc10
778 lwzx $acc11,$Tbl2,$acc11
779 xor $t0,$t0,$acc00
780 xor $t1,$t1,$acc01
781 lwzx $acc12,$Tbl3,$acc12
782 lwzx $acc13,$Tbl3,$acc13
783 xor $t2,$t2,$acc02
784 xor $t3,$t3,$acc03
785 lwzx $acc14,$Tbl3,$acc14
786 lwzx $acc15,$Tbl3,$acc15
787 xor $t0,$t0,$acc04
788 xor $t1,$t1,$acc05
789 xor $t2,$t2,$acc06
790 xor $t3,$t3,$acc07
791 xor $t0,$t0,$acc08
792 xor $t1,$t1,$acc09
793 xor $t2,$t2,$acc10
794 xor $t3,$t3,$acc11
795 xor $s0,$t0,$acc12
796 xor $s1,$t1,$acc13
797 xor $s2,$t2,$acc14
798 xor $s3,$t3,$acc15
799 addi $key,$key,16
800 bdnz- Ldec_loop
801
802 addi $Tbl2,$Tbl0,2048
803 nop
804 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
805 lwz $acc09,`2048+32`($Tbl0)
806 lwz $acc10,`2048+64`($Tbl0)
807 lwz $acc11,`2048+96`($Tbl0)
808 lwz $acc08,`2048+128`($Tbl0)
809 lwz $acc09,`2048+160`($Tbl0)
810 lwz $acc10,`2048+192`($Tbl0)
811 lwz $acc11,`2048+224`($Tbl0)
812 rlwinm $acc00,$s0,`32-24`,24,31
813 rlwinm $acc01,$s1,`32-24`,24,31
814 lwz $t0,0($key)
815 lwz $t1,4($key)
816 rlwinm $acc02,$s2,`32-24`,24,31
817 rlwinm $acc03,$s3,`32-24`,24,31
818 lwz $t2,8($key)
819 lwz $t3,12($key)
820 rlwinm $acc04,$s3,`32-16`,24,31
821 rlwinm $acc05,$s0,`32-16`,24,31
822 lbzx $acc00,$Tbl2,$acc00
823 lbzx $acc01,$Tbl2,$acc01
824 rlwinm $acc06,$s1,`32-16`,24,31
825 rlwinm $acc07,$s2,`32-16`,24,31
826 lbzx $acc02,$Tbl2,$acc02
827 lbzx $acc03,$Tbl2,$acc03
828 rlwinm $acc08,$s2,`32-8`,24,31
829 rlwinm $acc09,$s3,`32-8`,24,31
830 lbzx $acc04,$Tbl2,$acc04
831 lbzx $acc05,$Tbl2,$acc05
832 rlwinm $acc10,$s0,`32-8`,24,31
833 rlwinm $acc11,$s1,`32-8`,24,31
834 lbzx $acc06,$Tbl2,$acc06
835 lbzx $acc07,$Tbl2,$acc07
836 rlwinm $acc12,$s1,`0`,24,31
837 rlwinm $acc13,$s2,`0`,24,31
838 lbzx $acc08,$Tbl2,$acc08
839 lbzx $acc09,$Tbl2,$acc09
840 rlwinm $acc14,$s3,`0`,24,31
841 rlwinm $acc15,$s0,`0`,24,31
842 lbzx $acc10,$Tbl2,$acc10
843 lbzx $acc11,$Tbl2,$acc11
844 rlwinm $s0,$acc00,24,0,7
845 rlwinm $s1,$acc01,24,0,7
846 lbzx $acc12,$Tbl2,$acc12
847 lbzx $acc13,$Tbl2,$acc13
848 rlwinm $s2,$acc02,24,0,7
849 rlwinm $s3,$acc03,24,0,7
850 lbzx $acc14,$Tbl2,$acc14
851 lbzx $acc15,$Tbl2,$acc15
852 rlwimi $s0,$acc04,16,8,15
853 rlwimi $s1,$acc05,16,8,15
854 rlwimi $s2,$acc06,16,8,15
855 rlwimi $s3,$acc07,16,8,15
856 rlwimi $s0,$acc08,8,16,23
857 rlwimi $s1,$acc09,8,16,23
858 rlwimi $s2,$acc10,8,16,23
859 rlwimi $s3,$acc11,8,16,23
860 or $s0,$s0,$acc12
861 or $s1,$s1,$acc13
862 or $s2,$s2,$acc14
863 or $s3,$s3,$acc15
864 xor $s0,$s0,$t0
865 xor $s1,$s1,$t1
866 xor $s2,$s2,$t2
867 xor $s3,$s3,$t3
868 blr
869
870.align 4
871Lppc_AES_decrypt_compact:
872 lwz $acc00,240($key)
873 lwz $t0,0($key)
874 lwz $t1,4($key)
875 lwz $t2,8($key)
876 lwz $t3,12($key)
877 addi $Tbl1,$Tbl0,2048
878 lis $mask80,0x8080
879 lis $mask1b,0x1b1b
880 addi $key,$key,16
881 ori $mask80,$mask80,0x8080
882 ori $mask1b,$mask1b,0x1b1b
883___
884$code.=<<___ if ($SIZE_T==8);
885 insrdi $mask80,$mask80,32,0
886 insrdi $mask1b,$mask1b,32,0
887___
888$code.=<<___;
889 mtctr $acc00
890.align 4
891Ldec_compact_loop:
892 xor $s0,$s0,$t0
893 xor $s1,$s1,$t1
894 xor $s2,$s2,$t2
895 xor $s3,$s3,$t3
896 rlwinm $acc00,$s0,`32-24`,24,31
897 rlwinm $acc01,$s1,`32-24`,24,31
898 rlwinm $acc02,$s2,`32-24`,24,31
899 rlwinm $acc03,$s3,`32-24`,24,31
900 lbzx $acc00,$Tbl1,$acc00
901 lbzx $acc01,$Tbl1,$acc01
902 rlwinm $acc04,$s3,`32-16`,24,31
903 rlwinm $acc05,$s0,`32-16`,24,31
904 lbzx $acc02,$Tbl1,$acc02
905 lbzx $acc03,$Tbl1,$acc03
906 rlwinm $acc06,$s1,`32-16`,24,31
907 rlwinm $acc07,$s2,`32-16`,24,31
908 lbzx $acc04,$Tbl1,$acc04
909 lbzx $acc05,$Tbl1,$acc05
910 rlwinm $acc08,$s2,`32-8`,24,31
911 rlwinm $acc09,$s3,`32-8`,24,31
912 lbzx $acc06,$Tbl1,$acc06
913 lbzx $acc07,$Tbl1,$acc07
914 rlwinm $acc10,$s0,`32-8`,24,31
915 rlwinm $acc11,$s1,`32-8`,24,31
916 lbzx $acc08,$Tbl1,$acc08
917 lbzx $acc09,$Tbl1,$acc09
918 rlwinm $acc12,$s1,`0`,24,31
919 rlwinm $acc13,$s2,`0`,24,31
920 lbzx $acc10,$Tbl1,$acc10
921 lbzx $acc11,$Tbl1,$acc11
922 rlwinm $acc14,$s3,`0`,24,31
923 rlwinm $acc15,$s0,`0`,24,31
924 lbzx $acc12,$Tbl1,$acc12
925 lbzx $acc13,$Tbl1,$acc13
926 rlwinm $s0,$acc00,24,0,7
927 rlwinm $s1,$acc01,24,0,7
928 lbzx $acc14,$Tbl1,$acc14
929 lbzx $acc15,$Tbl1,$acc15
930 rlwinm $s2,$acc02,24,0,7
931 rlwinm $s3,$acc03,24,0,7
932 rlwimi $s0,$acc04,16,8,15
933 rlwimi $s1,$acc05,16,8,15
934 rlwimi $s2,$acc06,16,8,15
935 rlwimi $s3,$acc07,16,8,15
936 rlwimi $s0,$acc08,8,16,23
937 rlwimi $s1,$acc09,8,16,23
938 rlwimi $s2,$acc10,8,16,23
939 rlwimi $s3,$acc11,8,16,23
940 lwz $t0,0($key)
941 lwz $t1,4($key)
942 or $s0,$s0,$acc12
943 or $s1,$s1,$acc13
944 lwz $t2,8($key)
945 lwz $t3,12($key)
946 or $s2,$s2,$acc14
947 or $s3,$s3,$acc15
948
949 addi $key,$key,16
950 bdz Ldec_compact_done
951___
952$code.=<<___ if ($SIZE_T==8);
953 # vectorized permutation improves decrypt performance by 10%
954 insrdi $s0,$s1,32,0
955 insrdi $s2,$s3,32,0
956
957 and $acc00,$s0,$mask80 # r1=r0&0x80808080
958 and $acc02,$s2,$mask80
959 srdi $acc04,$acc00,7 # r1>>7
960 srdi $acc06,$acc02,7
961 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
962 andc $acc10,$s2,$mask80
963 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
964 sub $acc02,$acc02,$acc06
965 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
966 add $acc10,$acc10,$acc10
967 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
968 and $acc02,$acc02,$mask1b
969 xor $acc00,$acc00,$acc08 # r2
970 xor $acc02,$acc02,$acc10
971
972 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
973 and $acc06,$acc02,$mask80
974 srdi $acc08,$acc04,7 # r1>>7
975 srdi $acc10,$acc06,7
976 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
977 andc $acc14,$acc02,$mask80
978 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
979 sub $acc06,$acc06,$acc10
980 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
981 add $acc14,$acc14,$acc14
982 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
983 and $acc06,$acc06,$mask1b
984 xor $acc04,$acc04,$acc12 # r4
985 xor $acc06,$acc06,$acc14
986
987 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
988 and $acc10,$acc06,$mask80
989 srdi $acc12,$acc08,7 # r1>>7
990 srdi $acc14,$acc10,7
991 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
992 sub $acc10,$acc10,$acc14
993 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
994 andc $acc14,$acc06,$mask80
995 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
996 add $acc14,$acc14,$acc14
997 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
998 and $acc10,$acc10,$mask1b
999 xor $acc08,$acc08,$acc12 # r8
1000 xor $acc10,$acc10,$acc14
1001
1002 xor $acc00,$acc00,$s0 # r2^r0
1003 xor $acc02,$acc02,$s2
1004 xor $acc04,$acc04,$s0 # r4^r0
1005 xor $acc06,$acc06,$s2
1006
1007 extrdi $acc01,$acc00,32,0
1008 extrdi $acc03,$acc02,32,0
1009 extrdi $acc05,$acc04,32,0
1010 extrdi $acc07,$acc06,32,0
1011 extrdi $acc09,$acc08,32,0
1012 extrdi $acc11,$acc10,32,0
1013___
1014$code.=<<___ if ($SIZE_T==4);
1015 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1016 and $acc01,$s1,$mask80
1017 and $acc02,$s2,$mask80
1018 and $acc03,$s3,$mask80
1019 srwi $acc04,$acc00,7 # r1>>7
1020 srwi $acc05,$acc01,7
1021 srwi $acc06,$acc02,7
1022 srwi $acc07,$acc03,7
1023 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1024 andc $acc09,$s1,$mask80
1025 andc $acc10,$s2,$mask80
1026 andc $acc11,$s3,$mask80
1027 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1028 sub $acc01,$acc01,$acc05
1029 sub $acc02,$acc02,$acc06
1030 sub $acc03,$acc03,$acc07
1031 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1032 add $acc09,$acc09,$acc09
1033 add $acc10,$acc10,$acc10
1034 add $acc11,$acc11,$acc11
1035 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1036 and $acc01,$acc01,$mask1b
1037 and $acc02,$acc02,$mask1b
1038 and $acc03,$acc03,$mask1b
1039 xor $acc00,$acc00,$acc08 # r2
1040 xor $acc01,$acc01,$acc09
1041 xor $acc02,$acc02,$acc10
1042 xor $acc03,$acc03,$acc11
1043
1044 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1045 and $acc05,$acc01,$mask80
1046 and $acc06,$acc02,$mask80
1047 and $acc07,$acc03,$mask80
1048 srwi $acc08,$acc04,7 # r1>>7
1049 srwi $acc09,$acc05,7
1050 srwi $acc10,$acc06,7
1051 srwi $acc11,$acc07,7
1052 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1053 andc $acc13,$acc01,$mask80
1054 andc $acc14,$acc02,$mask80
1055 andc $acc15,$acc03,$mask80
1056 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1057 sub $acc05,$acc05,$acc09
1058 sub $acc06,$acc06,$acc10
1059 sub $acc07,$acc07,$acc11
1060 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1061 add $acc13,$acc13,$acc13
1062 add $acc14,$acc14,$acc14
1063 add $acc15,$acc15,$acc15
1064 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1065 and $acc05,$acc05,$mask1b
1066 and $acc06,$acc06,$mask1b
1067 and $acc07,$acc07,$mask1b
1068 xor $acc04,$acc04,$acc12 # r4
1069 xor $acc05,$acc05,$acc13
1070 xor $acc06,$acc06,$acc14
1071 xor $acc07,$acc07,$acc15
1072
1073 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1074 and $acc09,$acc05,$mask80
1075 and $acc10,$acc06,$mask80
1076 and $acc11,$acc07,$mask80
1077 srwi $acc12,$acc08,7 # r1>>7
1078 srwi $acc13,$acc09,7
1079 srwi $acc14,$acc10,7
1080 srwi $acc15,$acc11,7
1081 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1082 sub $acc09,$acc09,$acc13
1083 sub $acc10,$acc10,$acc14
1084 sub $acc11,$acc11,$acc15
1085 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1086 andc $acc13,$acc05,$mask80
1087 andc $acc14,$acc06,$mask80
1088 andc $acc15,$acc07,$mask80
1089 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1090 add $acc13,$acc13,$acc13
1091 add $acc14,$acc14,$acc14
1092 add $acc15,$acc15,$acc15
1093 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1094 and $acc09,$acc09,$mask1b
1095 and $acc10,$acc10,$mask1b
1096 and $acc11,$acc11,$mask1b
1097 xor $acc08,$acc08,$acc12 # r8
1098 xor $acc09,$acc09,$acc13
1099 xor $acc10,$acc10,$acc14
1100 xor $acc11,$acc11,$acc15
1101
1102 xor $acc00,$acc00,$s0 # r2^r0
1103 xor $acc01,$acc01,$s1
1104 xor $acc02,$acc02,$s2
1105 xor $acc03,$acc03,$s3
1106 xor $acc04,$acc04,$s0 # r4^r0
1107 xor $acc05,$acc05,$s1
1108 xor $acc06,$acc06,$s2
1109 xor $acc07,$acc07,$s3
1110___
1111$code.=<<___;
1112 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1113 rotrwi $s1,$s1,8
1114 rotrwi $s2,$s2,8
1115 rotrwi $s3,$s3,8
1116 xor $s0,$s0,$acc00 # ^= r2^r0
1117 xor $s1,$s1,$acc01
1118 xor $s2,$s2,$acc02
1119 xor $s3,$s3,$acc03
1120 xor $acc00,$acc00,$acc08
1121 xor $acc01,$acc01,$acc09
1122 xor $acc02,$acc02,$acc10
1123 xor $acc03,$acc03,$acc11
1124 xor $s0,$s0,$acc04 # ^= r4^r0
1125 xor $s1,$s1,$acc05
1126 xor $s2,$s2,$acc06
1127 xor $s3,$s3,$acc07
1128 rotrwi $acc00,$acc00,24
1129 rotrwi $acc01,$acc01,24
1130 rotrwi $acc02,$acc02,24
1131 rotrwi $acc03,$acc03,24
1132 xor $acc04,$acc04,$acc08
1133 xor $acc05,$acc05,$acc09
1134 xor $acc06,$acc06,$acc10
1135 xor $acc07,$acc07,$acc11
1136 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1137 xor $s1,$s1,$acc09
1138 xor $s2,$s2,$acc10
1139 xor $s3,$s3,$acc11
1140 rotrwi $acc04,$acc04,16
1141 rotrwi $acc05,$acc05,16
1142 rotrwi $acc06,$acc06,16
1143 rotrwi $acc07,$acc07,16
1144 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1145 xor $s1,$s1,$acc01
1146 xor $s2,$s2,$acc02
1147 xor $s3,$s3,$acc03
1148 rotrwi $acc08,$acc08,8
1149 rotrwi $acc09,$acc09,8
1150 rotrwi $acc10,$acc10,8
1151 rotrwi $acc11,$acc11,8
1152 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1153 xor $s1,$s1,$acc05
1154 xor $s2,$s2,$acc06
1155 xor $s3,$s3,$acc07
1156 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1157 xor $s1,$s1,$acc09
1158 xor $s2,$s2,$acc10
1159 xor $s3,$s3,$acc11
1160
1161 b Ldec_compact_loop
1162.align 4
1163Ldec_compact_done:
1164 xor $s0,$s0,$t0
1165 xor $s1,$s1,$t1
1166 xor $s2,$s2,$t2
1167 xor $s3,$s3,$t3
1168 blr
1169.long 0
1170.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1171.align 7
1172___
1173
1174$code =~ s/\`([^\`]*)\`/eval $1/gem;
1175print $code;
1176close STDOUT;
diff --git a/src/lib/libcrypto/aes/asm/aes-s390x.pl b/src/lib/libcrypto/aes/asm/aes-s390x.pl
new file mode 100644
index 0000000000..4b27afd92f
--- /dev/null
+++ b/src/lib/libcrypto/aes/asm/aes-s390x.pl
@@ -0,0 +1,1333 @@
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# AES for s390x.
11
12# April 2007.
13#
14# Software performance improvement over gcc-generated code is ~70% and
15# in absolute terms is ~73 cycles per byte processed with 128-bit key.
16# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
17# *strictly* in-order execution and issued instruction [in this case
18# load value from memory is critical] has to complete before execution
19# flow proceeds. S-boxes are compressed to 2KB[+256B].
20#
21# As for hardware acceleration support. It's basically a "teaser," as
22# it can and should be improved in several ways. Most notably support
23# for CBC is not utilized, nor multiple blocks are ever processed.
24# Then software key schedule can be postponed till hardware support
25# detection... Performance improvement over assembler is reportedly
26# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
27# support is implemented.
28
29# May 2007.
30#
31# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
32# for 128-bit keys, if hardware support is detected.
33
34# Januray 2009.
35#
36# Add support for hardware AES192/256 and reschedule instructions to
37# minimize/avoid Address Generation Interlock hazard and to favour
38# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
39# almost 50% on z9. The gain is smaller on z10, because being dual-
40# issue z10 makes it improssible to eliminate the interlock condition:
41# critial path is not long enough. Yet it spends ~24 cycles per byte
42# processed with 128-bit key.
43#
44# Unlike previous version hardware support detection takes place only
45# at the moment of key schedule setup, which is denoted in key->rounds.
46# This is done, because deferred key setup can't be made MT-safe, not
47# for key lengthes longer than 128 bits.
48#
49# Add AES_cbc_encrypt, which gives incredible performance improvement,
50# it was measured to be ~6.6x. It's less than previously mentioned 8x,
51# because software implementation was optimized.
52
53$softonly=0; # allow hardware support
54
55$t0="%r0"; $mask="%r0";
56$t1="%r1";
57$t2="%r2"; $inp="%r2";
58$t3="%r3"; $out="%r3"; $bits="%r3";
59$key="%r4";
60$i1="%r5";
61$i2="%r6";
62$i3="%r7";
63$s0="%r8";
64$s1="%r9";
65$s2="%r10";
66$s3="%r11";
67$tbl="%r12";
68$rounds="%r13";
69$ra="%r14";
70$sp="%r15";
71
72sub _data_word()
73{ my $i;
74 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
75}
76
77$code=<<___;
78.text
79
80.type AES_Te,\@object
81.align 256
82AES_Te:
83___
84&_data_word(
85 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
86 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
87 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
88 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
89 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
90 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
91 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
92 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
93 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
94 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
95 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
96 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
97 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
98 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
99 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
100 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
101 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
102 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
103 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
104 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
105 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
106 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
107 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
108 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
109 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
110 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
111 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
112 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
113 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
114 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
115 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
116 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
117 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
118 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
119 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
120 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
121 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
122 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
123 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
124 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
125 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
126 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
127 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
128 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
129 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
130 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
131 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
132 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
133 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
134 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
135 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
136 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
137 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
138 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
139 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
140 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
141 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
142 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
143 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
144 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
145 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
146 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
147 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
148 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
149$code.=<<___;
150# Te4[256]
151.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
152.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
153.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
154.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
155.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
156.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
157.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
158.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
159.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
160.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
161.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
162.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
163.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
164.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
165.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
166.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
167.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
168.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
169.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
170.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
171.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
172.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
173.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
174.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
175.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
176.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
177.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
178.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
179.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
180.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
181.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
182.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
183# rcon[]
184.long 0x01000000, 0x02000000, 0x04000000, 0x08000000
185.long 0x10000000, 0x20000000, 0x40000000, 0x80000000
186.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
187.align 256
188.size AES_Te,.-AES_Te
189
190# void AES_encrypt(const unsigned char *inp, unsigned char *out,
191# const AES_KEY *key) {
192.globl AES_encrypt
193.type AES_encrypt,\@function
194AES_encrypt:
195___
196$code.=<<___ if (!$softonly);
197 l %r0,240($key)
198 lhi %r1,16
199 clr %r0,%r1
200 jl .Lesoft
201
202 la %r1,0($key)
203 #la %r2,0($inp)
204 la %r4,0($out)
205 lghi %r3,16 # single block length
206 .long 0xb92e0042 # km %r4,%r2
207 brc 1,.-4 # can this happen?
208 br %r14
209.align 64
210.Lesoft:
211___
212$code.=<<___;
213 stmg %r3,$ra,24($sp)
214
215 llgf $s0,0($inp)
216 llgf $s1,4($inp)
217 llgf $s2,8($inp)
218 llgf $s3,12($inp)
219
220 larl $tbl,AES_Te
221 bras $ra,_s390x_AES_encrypt
222
223 lg $out,24($sp)
224 st $s0,0($out)
225 st $s1,4($out)
226 st $s2,8($out)
227 st $s3,12($out)
228
229 lmg %r6,$ra,48($sp)
230 br $ra
231.size AES_encrypt,.-AES_encrypt
232
233.type _s390x_AES_encrypt,\@function
234.align 16
235_s390x_AES_encrypt:
236 stg $ra,152($sp)
237 x $s0,0($key)
238 x $s1,4($key)
239 x $s2,8($key)
240 x $s3,12($key)
241 l $rounds,240($key)
242 llill $mask,`0xff<<3`
243 aghi $rounds,-1
244 j .Lenc_loop
245.align 16
246.Lenc_loop:
247 sllg $t1,$s0,`0+3`
248 srlg $t2,$s0,`8-3`
249 srlg $t3,$s0,`16-3`
250 srl $s0,`24-3`
251 nr $s0,$mask
252 ngr $t1,$mask
253 nr $t2,$mask
254 nr $t3,$mask
255
256 srlg $i1,$s1,`16-3` # i0
257 sllg $i2,$s1,`0+3`
258 srlg $i3,$s1,`8-3`
259 srl $s1,`24-3`
260 nr $i1,$mask
261 nr $s1,$mask
262 ngr $i2,$mask
263 nr $i3,$mask
264
265 l $s0,0($s0,$tbl) # Te0[s0>>24]
266 l $t1,1($t1,$tbl) # Te3[s0>>0]
267 l $t2,2($t2,$tbl) # Te2[s0>>8]
268 l $t3,3($t3,$tbl) # Te1[s0>>16]
269
270 x $s0,3($i1,$tbl) # Te1[s1>>16]
271 l $s1,0($s1,$tbl) # Te0[s1>>24]
272 x $t2,1($i2,$tbl) # Te3[s1>>0]
273 x $t3,2($i3,$tbl) # Te2[s1>>8]
274
275 srlg $i1,$s2,`8-3` # i0
276 srlg $i2,$s2,`16-3` # i1
277 nr $i1,$mask
278 nr $i2,$mask
279 sllg $i3,$s2,`0+3`
280 srl $s2,`24-3`
281 nr $s2,$mask
282 ngr $i3,$mask
283
284 xr $s1,$t1
285 srlg $ra,$s3,`8-3` # i1
286 sllg $t1,$s3,`0+3` # i0
287 nr $ra,$mask
288 la $key,16($key)
289 ngr $t1,$mask
290
291 x $s0,2($i1,$tbl) # Te2[s2>>8]
292 x $s1,3($i2,$tbl) # Te1[s2>>16]
293 l $s2,0($s2,$tbl) # Te0[s2>>24]
294 x $t3,1($i3,$tbl) # Te3[s2>>0]
295
296 srlg $i3,$s3,`16-3` # i2
297 xr $s2,$t2
298 srl $s3,`24-3`
299 nr $i3,$mask
300 nr $s3,$mask
301
302 x $s0,0($key)
303 x $s1,4($key)
304 x $s2,8($key)
305 x $t3,12($key)
306
307 x $s0,1($t1,$tbl) # Te3[s3>>0]
308 x $s1,2($ra,$tbl) # Te2[s3>>8]
309 x $s2,3($i3,$tbl) # Te1[s3>>16]
310 l $s3,0($s3,$tbl) # Te0[s3>>24]
311 xr $s3,$t3
312
313 brct $rounds,.Lenc_loop
314 .align 16
315
316 sllg $t1,$s0,`0+3`
317 srlg $t2,$s0,`8-3`
318 ngr $t1,$mask
319 srlg $t3,$s0,`16-3`
320 srl $s0,`24-3`
321 nr $s0,$mask
322 nr $t2,$mask
323 nr $t3,$mask
324
325 srlg $i1,$s1,`16-3` # i0
326 sllg $i2,$s1,`0+3`
327 ngr $i2,$mask
328 srlg $i3,$s1,`8-3`
329 srl $s1,`24-3`
330 nr $i1,$mask
331 nr $s1,$mask
332 nr $i3,$mask
333
334 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
335 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
336 sll $s0,24
337 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
338 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
339 sll $t2,8
340 sll $t3,16
341
342 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
343 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
344 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
345 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
346 sll $i1,16
347 sll $s1,24
348 sll $i3,8
349 or $s0,$i1
350 or $s1,$t1
351 or $t2,$i2
352 or $t3,$i3
353
354 srlg $i1,$s2,`8-3` # i0
355 srlg $i2,$s2,`16-3` # i1
356 nr $i1,$mask
357 nr $i2,$mask
358 sllg $i3,$s2,`0+3`
359 srl $s2,`24-3`
360 ngr $i3,$mask
361 nr $s2,$mask
362
363 sllg $t1,$s3,`0+3` # i0
364 srlg $ra,$s3,`8-3` # i1
365 ngr $t1,$mask
366
367 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
368 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
369 sll $i1,8
370 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
371 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
372 sll $i2,16
373 nr $ra,$mask
374 sll $s2,24
375 or $s0,$i1
376 or $s1,$i2
377 or $s2,$t2
378 or $t3,$i3
379
380 srlg $i3,$s3,`16-3` # i2
381 srl $s3,`24-3`
382 nr $i3,$mask
383 nr $s3,$mask
384
385 l $t0,16($key)
386 l $t2,20($key)
387
388 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
389 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
390 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
391 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
392 sll $i2,8
393 sll $i3,16
394 sll $s3,24
395 or $s0,$i1
396 or $s1,$i2
397 or $s2,$i3
398 or $s3,$t3
399
400 lg $ra,152($sp)
401 xr $s0,$t0
402 xr $s1,$t2
403 x $s2,24($key)
404 x $s3,28($key)
405
406 br $ra
407.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
408___
409
410$code.=<<___;
411.type AES_Td,\@object
412.align 256
413AES_Td:
414___
415&_data_word(
416 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
417 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
418 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
419 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
420 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
421 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
422 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
423 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
424 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
425 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
426 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
427 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
428 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
429 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
430 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
431 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
432 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
433 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
434 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
435 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
436 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
437 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
438 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
439 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
440 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
441 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
442 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
443 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
444 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
445 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
446 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
447 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
448 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
449 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
450 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
451 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
452 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
453 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
454 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
455 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
456 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
457 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
458 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
459 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
460 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
461 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
462 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
463 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
464 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
465 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
466 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
467 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
468 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
469 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
470 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
471 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
472 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
473 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
474 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
475 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
476 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
477 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
478 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
479 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
480$code.=<<___;
481# Td4[256]
482.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
483.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
484.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
485.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
486.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
487.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
488.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
489.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
490.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
491.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
492.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
493.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
494.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
495.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
496.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
497.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
498.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
499.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
500.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
501.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
502.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
503.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
504.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
505.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
506.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
507.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
508.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
509.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
510.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
511.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
512.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
513.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
514.size AES_Td,.-AES_Td
515
516# void AES_decrypt(const unsigned char *inp, unsigned char *out,
517# const AES_KEY *key) {
518.globl AES_decrypt
519.type AES_decrypt,\@function
520AES_decrypt:
521___
522$code.=<<___ if (!$softonly);
523 l %r0,240($key)
524 lhi %r1,16
525 clr %r0,%r1
526 jl .Ldsoft
527
528 la %r1,0($key)
529 #la %r2,0($inp)
530 la %r4,0($out)
531 lghi %r3,16 # single block length
532 .long 0xb92e0042 # km %r4,%r2
533 brc 1,.-4 # can this happen?
534 br %r14
535.align 64
536.Ldsoft:
537___
538$code.=<<___;
539 stmg %r3,$ra,24($sp)
540
541 llgf $s0,0($inp)
542 llgf $s1,4($inp)
543 llgf $s2,8($inp)
544 llgf $s3,12($inp)
545
546 larl $tbl,AES_Td
547 bras $ra,_s390x_AES_decrypt
548
549 lg $out,24($sp)
550 st $s0,0($out)
551 st $s1,4($out)
552 st $s2,8($out)
553 st $s3,12($out)
554
555 lmg %r6,$ra,48($sp)
556 br $ra
557.size AES_decrypt,.-AES_decrypt
558
559.type _s390x_AES_decrypt,\@function
560.align 16
561_s390x_AES_decrypt:
562 stg $ra,152($sp)
563 x $s0,0($key)
564 x $s1,4($key)
565 x $s2,8($key)
566 x $s3,12($key)
567 l $rounds,240($key)
568 llill $mask,`0xff<<3`
569 aghi $rounds,-1
570 j .Ldec_loop
571.align 16
572.Ldec_loop:
573 srlg $t1,$s0,`16-3`
574 srlg $t2,$s0,`8-3`
575 sllg $t3,$s0,`0+3`
576 srl $s0,`24-3`
577 nr $s0,$mask
578 nr $t1,$mask
579 nr $t2,$mask
580 ngr $t3,$mask
581
582 sllg $i1,$s1,`0+3` # i0
583 srlg $i2,$s1,`16-3`
584 srlg $i3,$s1,`8-3`
585 srl $s1,`24-3`
586 ngr $i1,$mask
587 nr $s1,$mask
588 nr $i2,$mask
589 nr $i3,$mask
590
591 l $s0,0($s0,$tbl) # Td0[s0>>24]
592 l $t1,3($t1,$tbl) # Td1[s0>>16]
593 l $t2,2($t2,$tbl) # Td2[s0>>8]
594 l $t3,1($t3,$tbl) # Td3[s0>>0]
595
596 x $s0,1($i1,$tbl) # Td3[s1>>0]
597 l $s1,0($s1,$tbl) # Td0[s1>>24]
598 x $t2,3($i2,$tbl) # Td1[s1>>16]
599 x $t3,2($i3,$tbl) # Td2[s1>>8]
600
601 srlg $i1,$s2,`8-3` # i0
602 sllg $i2,$s2,`0+3` # i1
603 srlg $i3,$s2,`16-3`
604 srl $s2,`24-3`
605 nr $i1,$mask
606 ngr $i2,$mask
607 nr $s2,$mask
608 nr $i3,$mask
609
610 xr $s1,$t1
611 srlg $ra,$s3,`8-3` # i1
612 srlg $t1,$s3,`16-3` # i0
613 nr $ra,$mask
614 la $key,16($key)
615 nr $t1,$mask
616
617 x $s0,2($i1,$tbl) # Td2[s2>>8]
618 x $s1,1($i2,$tbl) # Td3[s2>>0]
619 l $s2,0($s2,$tbl) # Td0[s2>>24]
620 x $t3,3($i3,$tbl) # Td1[s2>>16]
621
622 sllg $i3,$s3,`0+3` # i2
623 srl $s3,`24-3`
624 ngr $i3,$mask
625 nr $s3,$mask
626
627 xr $s2,$t2
628 x $s0,0($key)
629 x $s1,4($key)
630 x $s2,8($key)
631 x $t3,12($key)
632
633 x $s0,3($t1,$tbl) # Td1[s3>>16]
634 x $s1,2($ra,$tbl) # Td2[s3>>8]
635 x $s2,1($i3,$tbl) # Td3[s3>>0]
636 l $s3,0($s3,$tbl) # Td0[s3>>24]
637 xr $s3,$t3
638
639 brct $rounds,.Ldec_loop
640 .align 16
641
642 l $t1,`2048+0`($tbl) # prefetch Td4
643 l $t2,`2048+64`($tbl)
644 l $t3,`2048+128`($tbl)
645 l $i1,`2048+192`($tbl)
646 llill $mask,0xff
647
648 srlg $i3,$s0,24 # i0
649 srlg $t1,$s0,16
650 srlg $t2,$s0,8
651 nr $s0,$mask # i3
652 nr $t1,$mask
653
654 srlg $i1,$s1,24
655 nr $t2,$mask
656 srlg $i2,$s1,16
657 srlg $ra,$s1,8
658 nr $s1,$mask # i0
659 nr $i2,$mask
660 nr $ra,$mask
661
662 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
663 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
664 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
665 sll $t1,16
666 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
667 sllg $s0,$i3,24
668 sll $t2,8
669
670 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
671 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
672 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
673 sll $i1,24
674 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
675 sll $i2,16
676 sll $i3,8
677 or $s0,$s1
678 or $t1,$i1
679 or $t2,$i2
680 or $t3,$i3
681
682 srlg $i1,$s2,8 # i0
683 srlg $i2,$s2,24
684 srlg $i3,$s2,16
685 nr $s2,$mask # i1
686 nr $i1,$mask
687 nr $i3,$mask
688 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
689 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
690 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
691 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
692 sll $i1,8
693 sll $i2,24
694 or $s0,$i1
695 sll $i3,16
696 or $t2,$i2
697 or $t3,$i3
698
699 srlg $i1,$s3,16 # i0
700 srlg $i2,$s3,8 # i1
701 srlg $i3,$s3,24
702 nr $s3,$mask # i2
703 nr $i1,$mask
704 nr $i2,$mask
705
706 lg $ra,152($sp)
707 or $s1,$t1
708 l $t0,16($key)
709 l $t1,20($key)
710
711 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
712 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
713 sll $i1,16
714 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
715 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
716 sll $i2,8
717 sll $s3,24
718 or $s0,$i1
719 or $s1,$i2
720 or $s2,$t2
721 or $s3,$t3
722
723 xr $s0,$t0
724 xr $s1,$t1
725 x $s2,24($key)
726 x $s3,28($key)
727
728 br $ra
729.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
730___
731
732$code.=<<___;
733# void AES_set_encrypt_key(const unsigned char *in, int bits,
734# AES_KEY *key) {
735.globl AES_set_encrypt_key
736.type AES_set_encrypt_key,\@function
737.align 16
738AES_set_encrypt_key:
739 lghi $t0,0
740 clgr $inp,$t0
741 je .Lminus1
742 clgr $key,$t0
743 je .Lminus1
744
745 lghi $t0,128
746 clr $bits,$t0
747 je .Lproceed
748 lghi $t0,192
749 clr $bits,$t0
750 je .Lproceed
751 lghi $t0,256
752 clr $bits,$t0
753 je .Lproceed
754 lghi %r2,-2
755 br %r14
756
757.align 16
758.Lproceed:
759___
760$code.=<<___ if (!$softonly);
761 # convert bits to km code, [128,192,256]->[18,19,20]
762 lhi %r5,-128
763 lhi %r0,18
764 ar %r5,$bits
765 srl %r5,6
766 ar %r5,%r0
767
768 lghi %r0,0 # query capability vector
769 la %r1,16($sp)
770 .long 0xb92f0042 # kmc %r4,%r2
771
772 llihh %r1,0x8000
773 srlg %r1,%r1,0(%r5)
774 ng %r1,16($sp)
775 jz .Lekey_internal
776
777 lmg %r0,%r1,0($inp) # just copy 128 bits...
778 stmg %r0,%r1,0($key)
779 lhi %r0,192
780 cr $bits,%r0
781 jl 1f
782 lg %r1,16($inp)
783 stg %r1,16($key)
784 je 1f
785 lg %r1,24($inp)
786 stg %r1,24($key)
7871: st $bits,236($key) # save bits
788 st %r5,240($key) # save km code
789 lghi %r2,0
790 br %r14
791___
792$code.=<<___;
793.align 16
794.Lekey_internal:
795 stmg %r6,%r13,48($sp) # all non-volatile regs
796
797 larl $tbl,AES_Te+2048
798
799 llgf $s0,0($inp)
800 llgf $s1,4($inp)
801 llgf $s2,8($inp)
802 llgf $s3,12($inp)
803 st $s0,0($key)
804 st $s1,4($key)
805 st $s2,8($key)
806 st $s3,12($key)
807 lghi $t0,128
808 cr $bits,$t0
809 jne .Lnot128
810
811 llill $mask,0xff
812 lghi $t3,0 # i=0
813 lghi $rounds,10
814 st $rounds,240($key)
815
816 llgfr $t2,$s3 # temp=rk[3]
817 srlg $i1,$s3,8
818 srlg $i2,$s3,16
819 srlg $i3,$s3,24
820 nr $t2,$mask
821 nr $i1,$mask
822 nr $i2,$mask
823
824.align 16
825.L128_loop:
826 la $t2,0($t2,$tbl)
827 la $i1,0($i1,$tbl)
828 la $i2,0($i2,$tbl)
829 la $i3,0($i3,$tbl)
830 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
831 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
832 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
833 icm $t2,1,0($i3) # Te4[rk[3]>>24]
834 x $t2,256($t3,$tbl) # rcon[i]
835 xr $s0,$t2 # rk[4]=rk[0]^...
836 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
837 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
838 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
839
840 llgfr $t2,$s3 # temp=rk[3]
841 srlg $i1,$s3,8
842 srlg $i2,$s3,16
843 nr $t2,$mask
844 nr $i1,$mask
845 srlg $i3,$s3,24
846 nr $i2,$mask
847
848 st $s0,16($key)
849 st $s1,20($key)
850 st $s2,24($key)
851 st $s3,28($key)
852 la $key,16($key) # key+=4
853 la $t3,4($t3) # i++
854 brct $rounds,.L128_loop
855 lghi %r2,0
856 lmg %r6,%r13,48($sp)
857 br $ra
858
859.align 16
860.Lnot128:
861 llgf $t0,16($inp)
862 llgf $t1,20($inp)
863 st $t0,16($key)
864 st $t1,20($key)
865 lghi $t0,192
866 cr $bits,$t0
867 jne .Lnot192
868
869 llill $mask,0xff
870 lghi $t3,0 # i=0
871 lghi $rounds,12
872 st $rounds,240($key)
873 lghi $rounds,8
874
875 srlg $i1,$t1,8
876 srlg $i2,$t1,16
877 srlg $i3,$t1,24
878 nr $t1,$mask
879 nr $i1,$mask
880 nr $i2,$mask
881
882.align 16
883.L192_loop:
884 la $t1,0($t1,$tbl)
885 la $i1,0($i1,$tbl)
886 la $i2,0($i2,$tbl)
887 la $i3,0($i3,$tbl)
888 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
889 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
890 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
891 icm $t1,1,0($i3) # Te4[rk[5]>>24]
892 x $t1,256($t3,$tbl) # rcon[i]
893 xr $s0,$t1 # rk[6]=rk[0]^...
894 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
895 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
896 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
897
898 st $s0,24($key)
899 st $s1,28($key)
900 st $s2,32($key)
901 st $s3,36($key)
902 brct $rounds,.L192_continue
903 lghi %r2,0
904 lmg %r6,%r13,48($sp)
905 br $ra
906
907.align 16
908.L192_continue:
909 lgr $t1,$s3
910 x $t1,16($key) # rk[10]=rk[4]^rk[9]
911 st $t1,40($key)
912 x $t1,20($key) # rk[11]=rk[5]^rk[10]
913 st $t1,44($key)
914
915 srlg $i1,$t1,8
916 srlg $i2,$t1,16
917 srlg $i3,$t1,24
918 nr $t1,$mask
919 nr $i1,$mask
920 nr $i2,$mask
921
922 la $key,24($key) # key+=6
923 la $t3,4($t3) # i++
924 j .L192_loop
925
926.align 16
927.Lnot192:
928 llgf $t0,24($inp)
929 llgf $t1,28($inp)
930 st $t0,24($key)
931 st $t1,28($key)
932 llill $mask,0xff
933 lghi $t3,0 # i=0
934 lghi $rounds,14
935 st $rounds,240($key)
936 lghi $rounds,7
937
938 srlg $i1,$t1,8
939 srlg $i2,$t1,16
940 srlg $i3,$t1,24
941 nr $t1,$mask
942 nr $i1,$mask
943 nr $i2,$mask
944
945.align 16
946.L256_loop:
947 la $t1,0($t1,$tbl)
948 la $i1,0($i1,$tbl)
949 la $i2,0($i2,$tbl)
950 la $i3,0($i3,$tbl)
951 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
952 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
953 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
954 icm $t1,1,0($i3) # Te4[rk[7]>>24]
955 x $t1,256($t3,$tbl) # rcon[i]
956 xr $s0,$t1 # rk[8]=rk[0]^...
957 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
958 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
959 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
960 st $s0,32($key)
961 st $s1,36($key)
962 st $s2,40($key)
963 st $s3,44($key)
964 brct $rounds,.L256_continue
965 lghi %r2,0
966 lmg %r6,%r13,48($sp)
967 br $ra
968
969.align 16
970.L256_continue:
971 lgr $t1,$s3 # temp=rk[11]
972 srlg $i1,$s3,8
973 srlg $i2,$s3,16
974 srlg $i3,$s3,24
975 nr $t1,$mask
976 nr $i1,$mask
977 nr $i2,$mask
978 la $t1,0($t1,$tbl)
979 la $i1,0($i1,$tbl)
980 la $i2,0($i2,$tbl)
981 la $i3,0($i3,$tbl)
982 llgc $t1,0($t1) # Te4[rk[11]>>0]
983 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
984 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
985 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
986 x $t1,16($key) # rk[12]=rk[4]^...
987 st $t1,48($key)
988 x $t1,20($key) # rk[13]=rk[5]^rk[12]
989 st $t1,52($key)
990 x $t1,24($key) # rk[14]=rk[6]^rk[13]
991 st $t1,56($key)
992 x $t1,28($key) # rk[15]=rk[7]^rk[14]
993 st $t1,60($key)
994
995 srlg $i1,$t1,8
996 srlg $i2,$t1,16
997 srlg $i3,$t1,24
998 nr $t1,$mask
999 nr $i1,$mask
1000 nr $i2,$mask
1001
1002 la $key,32($key) # key+=8
1003 la $t3,4($t3) # i++
1004 j .L256_loop
1005
1006.Lminus1:
1007 lghi %r2,-1
1008 br $ra
1009.size AES_set_encrypt_key,.-AES_set_encrypt_key
1010
1011# void AES_set_decrypt_key(const unsigned char *in, int bits,
1012# AES_KEY *key) {
1013.globl AES_set_decrypt_key
1014.type AES_set_decrypt_key,\@function
1015.align 16
1016AES_set_decrypt_key:
1017 stg $key,32($sp) # I rely on AES_set_encrypt_key to
1018 stg $ra,112($sp) # save non-volatile registers!
1019 bras $ra,AES_set_encrypt_key
1020 lg $key,32($sp)
1021 lg $ra,112($sp)
1022 ltgr %r2,%r2
1023 bnzr $ra
1024___
1025$code.=<<___ if (!$softonly);
1026 l $t0,240($key)
1027 lhi $t1,16
1028 cr $t0,$t1
1029 jl .Lgo
1030 oill $t0,0x80 # set "decrypt" bit
1031 st $t0,240($key)
1032 br $ra
1033
1034.align 16
1035.Ldkey_internal:
1036 stg $key,32($sp)
1037 stg $ra,40($sp)
1038 bras $ra,.Lekey_internal
1039 lg $key,32($sp)
1040 lg $ra,40($sp)
1041___
1042$code.=<<___;
1043
1044.Lgo: llgf $rounds,240($key)
1045 la $i1,0($key)
1046 sllg $i2,$rounds,4
1047 la $i2,0($i2,$key)
1048 srl $rounds,1
1049 lghi $t1,-16
1050
1051.align 16
1052.Linv: lmg $s0,$s1,0($i1)
1053 lmg $s2,$s3,0($i2)
1054 stmg $s0,$s1,0($i2)
1055 stmg $s2,$s3,0($i1)
1056 la $i1,16($i1)
1057 la $i2,0($t1,$i2)
1058 brct $rounds,.Linv
1059___
1060$mask80=$i1;
1061$mask1b=$i2;
1062$maskfe=$i3;
1063$code.=<<___;
1064 llgf $rounds,240($key)
1065 aghi $rounds,-1
1066 sll $rounds,2 # (rounds-1)*4
1067 llilh $mask80,0x8080
1068 llilh $mask1b,0x1b1b
1069 llilh $maskfe,0xfefe
1070 oill $mask80,0x8080
1071 oill $mask1b,0x1b1b
1072 oill $maskfe,0xfefe
1073
1074.align 16
1075.Lmix: l $s0,16($key) # tp1
1076 lr $s1,$s0
1077 ngr $s1,$mask80
1078 srlg $t1,$s1,7
1079 slr $s1,$t1
1080 nr $s1,$mask1b
1081 sllg $t1,$s0,1
1082 nr $t1,$maskfe
1083 xr $s1,$t1 # tp2
1084
1085 lr $s2,$s1
1086 ngr $s2,$mask80
1087 srlg $t1,$s2,7
1088 slr $s2,$t1
1089 nr $s2,$mask1b
1090 sllg $t1,$s1,1
1091 nr $t1,$maskfe
1092 xr $s2,$t1 # tp4
1093
1094 lr $s3,$s2
1095 ngr $s3,$mask80
1096 srlg $t1,$s3,7
1097 slr $s3,$t1
1098 nr $s3,$mask1b
1099 sllg $t1,$s2,1
1100 nr $t1,$maskfe
1101 xr $s3,$t1 # tp8
1102
1103 xr $s1,$s0 # tp2^tp1
1104 xr $s2,$s0 # tp4^tp1
1105 rll $s0,$s0,24 # = ROTATE(tp1,8)
1106 xr $s2,$s3 # ^=tp8
1107 xr $s0,$s1 # ^=tp2^tp1
1108 xr $s1,$s3 # tp2^tp1^tp8
1109 xr $s0,$s2 # ^=tp4^tp1^tp8
1110 rll $s1,$s1,8
1111 rll $s2,$s2,16
1112 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1113 rll $s3,$s3,24
1114 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1115 xr $s0,$s3 # ^= ROTATE(tp8,8)
1116
1117 st $s0,16($key)
1118 la $key,4($key)
1119 brct $rounds,.Lmix
1120
1121 lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
1122 lghi %r2,0
1123 br $ra
1124.size AES_set_decrypt_key,.-AES_set_decrypt_key
1125___
1126
1127#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1128# size_t length, const AES_KEY *key,
1129# unsigned char *ivec, const int enc)
1130{
1131my $inp="%r2";
1132my $out="%r4"; # length and out are swapped
1133my $len="%r3";
1134my $key="%r5";
1135my $ivp="%r6";
1136
1137$code.=<<___;
1138.globl AES_cbc_encrypt
1139.type AES_cbc_encrypt,\@function
1140.align 16
1141AES_cbc_encrypt:
1142 xgr %r3,%r4 # flip %r3 and %r4, out and len
1143 xgr %r4,%r3
1144 xgr %r3,%r4
1145___
1146$code.=<<___ if (!$softonly);
1147 lhi %r0,16
1148 cl %r0,240($key)
1149 jh .Lcbc_software
1150
1151 lg %r0,0($ivp) # copy ivec
1152 lg %r1,8($ivp)
1153 stmg %r0,%r1,16($sp)
1154 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1155 stmg %r0,%r1,32($sp)
1156 lmg %r0,%r1,16($key)
1157 stmg %r0,%r1,48($sp)
1158 l %r0,240($key) # load kmc code
1159 lghi $key,15 # res=len%16, len-=res;
1160 ngr $key,$len
1161 slgr $len,$key
1162 la %r1,16($sp) # parameter block - ivec || key
1163 jz .Lkmc_truncated
1164 .long 0xb92f0042 # kmc %r4,%r2
1165 brc 1,.-4 # pay attention to "partial completion"
1166 ltr $key,$key
1167 jnz .Lkmc_truncated
1168.Lkmc_done:
1169 lmg %r0,%r1,16($sp) # copy ivec to caller
1170 stg %r0,0($ivp)
1171 stg %r1,8($ivp)
1172 br $ra
1173.align 16
1174.Lkmc_truncated:
1175 ahi $key,-1 # it's the way it's encoded in mvc
1176 tmll %r0,0x80
1177 jnz .Lkmc_truncated_dec
1178 lghi %r1,0
1179 stg %r1,128($sp)
1180 stg %r1,136($sp)
1181 bras %r1,1f
1182 mvc 128(1,$sp),0($inp)
11831: ex $key,0(%r1)
1184 la %r1,16($sp) # restore parameter block
1185 la $inp,128($sp)
1186 lghi $len,16
1187 .long 0xb92f0042 # kmc %r4,%r2
1188 j .Lkmc_done
1189.align 16
1190.Lkmc_truncated_dec:
1191 stg $out,64($sp)
1192 la $out,128($sp)
1193 lghi $len,16
1194 .long 0xb92f0042 # kmc %r4,%r2
1195 lg $out,64($sp)
1196 bras %r1,2f
1197 mvc 0(1,$out),128($sp)
11982: ex $key,0(%r1)
1199 j .Lkmc_done
1200.align 16
1201.Lcbc_software:
1202___
1203$code.=<<___;
1204 stmg $key,$ra,40($sp)
1205 lhi %r0,0
1206 cl %r0,164($sp)
1207 je .Lcbc_decrypt
1208
1209 larl $tbl,AES_Te
1210
1211 llgf $s0,0($ivp)
1212 llgf $s1,4($ivp)
1213 llgf $s2,8($ivp)
1214 llgf $s3,12($ivp)
1215
1216 lghi $t0,16
1217 slgr $len,$t0
1218 brc 4,.Lcbc_enc_tail # if borrow
1219.Lcbc_enc_loop:
1220 stmg $inp,$out,16($sp)
1221 x $s0,0($inp)
1222 x $s1,4($inp)
1223 x $s2,8($inp)
1224 x $s3,12($inp)
1225 lgr %r4,$key
1226
1227 bras $ra,_s390x_AES_encrypt
1228
1229 lmg $inp,$key,16($sp)
1230 st $s0,0($out)
1231 st $s1,4($out)
1232 st $s2,8($out)
1233 st $s3,12($out)
1234
1235 la $inp,16($inp)
1236 la $out,16($out)
1237 lghi $t0,16
1238 ltgr $len,$len
1239 jz .Lcbc_enc_done
1240 slgr $len,$t0
1241 brc 4,.Lcbc_enc_tail # if borrow
1242 j .Lcbc_enc_loop
1243.align 16
1244.Lcbc_enc_done:
1245 lg $ivp,48($sp)
1246 st $s0,0($ivp)
1247 st $s1,4($ivp)
1248 st $s2,8($ivp)
1249 st $s3,12($ivp)
1250
1251 lmg %r7,$ra,56($sp)
1252 br $ra
1253
1254.align 16
1255.Lcbc_enc_tail:
1256 aghi $len,15
1257 lghi $t0,0
1258 stg $t0,128($sp)
1259 stg $t0,136($sp)
1260 bras $t1,3f
1261 mvc 128(1,$sp),0($inp)
12623: ex $len,0($t1)
1263 lghi $len,0
1264 la $inp,128($sp)
1265 j .Lcbc_enc_loop
1266
1267.align 16
1268.Lcbc_decrypt:
1269 larl $tbl,AES_Td
1270
1271 lg $t0,0($ivp)
1272 lg $t1,8($ivp)
1273 stmg $t0,$t1,128($sp)
1274
1275.Lcbc_dec_loop:
1276 stmg $inp,$out,16($sp)
1277 llgf $s0,0($inp)
1278 llgf $s1,4($inp)
1279 llgf $s2,8($inp)
1280 llgf $s3,12($inp)
1281 lgr %r4,$key
1282
1283 bras $ra,_s390x_AES_decrypt
1284
1285 lmg $inp,$key,16($sp)
1286 sllg $s0,$s0,32
1287 sllg $s2,$s2,32
1288 lr $s0,$s1
1289 lr $s2,$s3
1290
1291 lg $t0,0($inp)
1292 lg $t1,8($inp)
1293 xg $s0,128($sp)
1294 xg $s2,136($sp)
1295 lghi $s1,16
1296 slgr $len,$s1
1297 brc 4,.Lcbc_dec_tail # if borrow
1298 brc 2,.Lcbc_dec_done # if zero
1299 stg $s0,0($out)
1300 stg $s2,8($out)
1301 stmg $t0,$t1,128($sp)
1302
1303 la $inp,16($inp)
1304 la $out,16($out)
1305 j .Lcbc_dec_loop
1306
1307.Lcbc_dec_done:
1308 stg $s0,0($out)
1309 stg $s2,8($out)
1310.Lcbc_dec_exit:
1311 lmg $ivp,$ra,48($sp)
1312 stmg $t0,$t1,0($ivp)
1313
1314 br $ra
1315
1316.align 16
1317.Lcbc_dec_tail:
1318 aghi $len,15
1319 stg $s0,128($sp)
1320 stg $s2,136($sp)
1321 bras $s1,4f
1322 mvc 0(1,$out),128($sp)
13234: ex $len,0($s1)
1324 j .Lcbc_dec_exit
1325.size AES_cbc_encrypt,.-AES_cbc_encrypt
1326___
1327}
1328$code.=<<___;
1329.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
1330___
1331
1332$code =~ s/\`([^\`]*)\`/eval $1/gem;
1333print $code;
diff --git a/src/lib/libcrypto/aes/asm/aes-sparcv9.pl b/src/lib/libcrypto/aes/asm/aes-sparcv9.pl
new file mode 100755
index 0000000000..c57b3a2d6d
--- /dev/null
+++ b/src/lib/libcrypto/aes/asm/aes-sparcv9.pl
@@ -0,0 +1,1181 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 1.1
10#
11# The major reason for undertaken effort was to mitigate the hazard of
12# cache-timing attack. This is [currently and initially!] addressed in
13# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
14# 2. References to them are scheduled for L2 cache latency, meaning
15# that the tables don't have to reside in L1 cache. Once again, this
16# is an initial draft and one should expect more countermeasures to
17# be implemented...
18#
19# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
20# round.
21#
22# Even though performance was not the primary goal [on the contrary,
23# extra shifts "induced" by compressed S-box and longer loop epilogue
24# "induced" by scheduling for L2 have negative effect on performance],
25# the code turned out to run in ~23 cycles per processed byte en-/
26# decrypted with 128-bit key. This is pretty good result for code
27# with mentioned qualities and UltraSPARC core. Compared to Sun C
28# generated code my encrypt procedure runs just few percents faster,
29# while decrypt one - whole 50% faster [yes, Sun C failed to generate
30# optimal decrypt procedure]. Compared to GNU C generated code both
31# procedures are more than 60% faster:-)
32
33$bits=32;
34for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
35if ($bits==64) { $bias=2047; $frame=192; }
36else { $bias=0; $frame=112; }
37$locals=16;
38
39$acc0="%l0";
40$acc1="%o0";
41$acc2="%o1";
42$acc3="%o2";
43
44$acc4="%l1";
45$acc5="%o3";
46$acc6="%o4";
47$acc7="%o5";
48
49$acc8="%l2";
50$acc9="%o7";
51$acc10="%g1";
52$acc11="%g2";
53
54$acc12="%l3";
55$acc13="%g3";
56$acc14="%g4";
57$acc15="%g5";
58
59$t0="%l4";
60$t1="%l5";
61$t2="%l6";
62$t3="%l7";
63
64$s0="%i0";
65$s1="%i1";
66$s2="%i2";
67$s3="%i3";
68$tbl="%i4";
69$key="%i5";
70$rounds="%i7"; # aliases with return address, which is off-loaded to stack
71
72sub _data_word()
73{ my $i;
74 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
75}
76
77$code.=<<___ if ($bits==64);
78.register %g2,#scratch
79.register %g3,#scratch
80___
81$code.=<<___;
82.section ".text",#alloc,#execinstr
83
84.align 256
85AES_Te:
86___
87&_data_word(
88 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
89 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
90 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
91 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
92 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
93 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
94 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
95 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
96 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
97 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
98 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
99 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
100 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
101 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
102 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
103 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
104 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
105 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
106 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
107 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
108 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
109 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
110 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
111 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
112 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
113 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
114 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
115 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
116 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
117 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
118 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
119 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
120 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
121 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
122 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
123 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
124 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
125 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
126 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
127 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
128 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
129 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
130 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
131 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
132 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
133 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
134 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
135 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
136 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
137 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
138 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
139 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
140 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
141 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
142 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
143 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
144 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
145 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
146 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
147 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
148 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
149 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
150 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
151 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
152$code.=<<___;
153 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
154 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
155 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
156 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
157 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
158 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
159 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
160 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
161 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
162 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
163 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
164 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
165 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
166 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
167 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
168 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
169 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
170 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
171 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
172 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
173 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
174 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
175 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
176 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
177 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
178 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
179 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
180 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
181 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
182 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
183 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
184 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
185.type AES_Te,#object
186.size AES_Te,(.-AES_Te)
187
188.align 64
189.skip 16
190_sparcv9_AES_encrypt:
191 save %sp,-$frame-$locals,%sp
192 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
193 ld [$key+240],$rounds
194 ld [$key+0],$t0
195 ld [$key+4],$t1 !
196 ld [$key+8],$t2
197 srl $rounds,1,$rounds
198 xor $t0,$s0,$s0
199 ld [$key+12],$t3
200 srl $s0,21,$acc0
201 xor $t1,$s1,$s1
202 ld [$key+16],$t0
203 srl $s1,13,$acc1 !
204 xor $t2,$s2,$s2
205 ld [$key+20],$t1
206 xor $t3,$s3,$s3
207 ld [$key+24],$t2
208 and $acc0,2040,$acc0
209 ld [$key+28],$t3
210 nop
211.Lenc_loop:
212 srl $s2,5,$acc2 !
213 and $acc1,2040,$acc1
214 ldx [$tbl+$acc0],$acc0
215 sll $s3,3,$acc3
216 and $acc2,2040,$acc2
217 ldx [$tbl+$acc1],$acc1
218 srl $s1,21,$acc4
219 and $acc3,2040,$acc3
220 ldx [$tbl+$acc2],$acc2 !
221 srl $s2,13,$acc5
222 and $acc4,2040,$acc4
223 ldx [$tbl+$acc3],$acc3
224 srl $s3,5,$acc6
225 and $acc5,2040,$acc5
226 ldx [$tbl+$acc4],$acc4
227 fmovs %f0,%f0
228 sll $s0,3,$acc7 !
229 and $acc6,2040,$acc6
230 ldx [$tbl+$acc5],$acc5
231 srl $s2,21,$acc8
232 and $acc7,2040,$acc7
233 ldx [$tbl+$acc6],$acc6
234 srl $s3,13,$acc9
235 and $acc8,2040,$acc8
236 ldx [$tbl+$acc7],$acc7 !
237 srl $s0,5,$acc10
238 and $acc9,2040,$acc9
239 ldx [$tbl+$acc8],$acc8
240 sll $s1,3,$acc11
241 and $acc10,2040,$acc10
242 ldx [$tbl+$acc9],$acc9
243 fmovs %f0,%f0
244 srl $s3,21,$acc12 !
245 and $acc11,2040,$acc11
246 ldx [$tbl+$acc10],$acc10
247 srl $s0,13,$acc13
248 and $acc12,2040,$acc12
249 ldx [$tbl+$acc11],$acc11
250 srl $s1,5,$acc14
251 and $acc13,2040,$acc13
252 ldx [$tbl+$acc12],$acc12 !
253 sll $s2,3,$acc15
254 and $acc14,2040,$acc14
255 ldx [$tbl+$acc13],$acc13
256 and $acc15,2040,$acc15
257 add $key,32,$key
258 ldx [$tbl+$acc14],$acc14
259 fmovs %f0,%f0
260 subcc $rounds,1,$rounds !
261 ldx [$tbl+$acc15],$acc15
262 bz,a,pn %icc,.Lenc_last
263 add $tbl,2048,$rounds
264
265 srlx $acc1,8,$acc1
266 xor $acc0,$t0,$t0
267 ld [$key+0],$s0
268 fmovs %f0,%f0
269 srlx $acc2,16,$acc2 !
270 xor $acc1,$t0,$t0
271 ld [$key+4],$s1
272 srlx $acc3,24,$acc3
273 xor $acc2,$t0,$t0
274 ld [$key+8],$s2
275 srlx $acc5,8,$acc5
276 xor $acc3,$t0,$t0
277 ld [$key+12],$s3 !
278 srlx $acc6,16,$acc6
279 xor $acc4,$t1,$t1
280 fmovs %f0,%f0
281 srlx $acc7,24,$acc7
282 xor $acc5,$t1,$t1
283 srlx $acc9,8,$acc9
284 xor $acc6,$t1,$t1
285 srlx $acc10,16,$acc10 !
286 xor $acc7,$t1,$t1
287 srlx $acc11,24,$acc11
288 xor $acc8,$t2,$t2
289 srlx $acc13,8,$acc13
290 xor $acc9,$t2,$t2
291 srlx $acc14,16,$acc14
292 xor $acc10,$t2,$t2
293 srlx $acc15,24,$acc15 !
294 xor $acc11,$t2,$t2
295 xor $acc12,$acc14,$acc14
296 xor $acc13,$t3,$t3
297 srl $t0,21,$acc0
298 xor $acc14,$t3,$t3
299 srl $t1,13,$acc1
300 xor $acc15,$t3,$t3
301
302 and $acc0,2040,$acc0 !
303 srl $t2,5,$acc2
304 and $acc1,2040,$acc1
305 ldx [$tbl+$acc0],$acc0
306 sll $t3,3,$acc3
307 and $acc2,2040,$acc2
308 ldx [$tbl+$acc1],$acc1
309 fmovs %f0,%f0
310 srl $t1,21,$acc4 !
311 and $acc3,2040,$acc3
312 ldx [$tbl+$acc2],$acc2
313 srl $t2,13,$acc5
314 and $acc4,2040,$acc4
315 ldx [$tbl+$acc3],$acc3
316 srl $t3,5,$acc6
317 and $acc5,2040,$acc5
318 ldx [$tbl+$acc4],$acc4 !
319 sll $t0,3,$acc7
320 and $acc6,2040,$acc6
321 ldx [$tbl+$acc5],$acc5
322 srl $t2,21,$acc8
323 and $acc7,2040,$acc7
324 ldx [$tbl+$acc6],$acc6
325 fmovs %f0,%f0
326 srl $t3,13,$acc9 !
327 and $acc8,2040,$acc8
328 ldx [$tbl+$acc7],$acc7
329 srl $t0,5,$acc10
330 and $acc9,2040,$acc9
331 ldx [$tbl+$acc8],$acc8
332 sll $t1,3,$acc11
333 and $acc10,2040,$acc10
334 ldx [$tbl+$acc9],$acc9 !
335 srl $t3,21,$acc12
336 and $acc11,2040,$acc11
337 ldx [$tbl+$acc10],$acc10
338 srl $t0,13,$acc13
339 and $acc12,2040,$acc12
340 ldx [$tbl+$acc11],$acc11
341 fmovs %f0,%f0
342 srl $t1,5,$acc14 !
343 and $acc13,2040,$acc13
344 ldx [$tbl+$acc12],$acc12
345 sll $t2,3,$acc15
346 and $acc14,2040,$acc14
347 ldx [$tbl+$acc13],$acc13
348 srlx $acc1,8,$acc1
349 and $acc15,2040,$acc15
350 ldx [$tbl+$acc14],$acc14 !
351
352 srlx $acc2,16,$acc2
353 xor $acc0,$s0,$s0
354 ldx [$tbl+$acc15],$acc15
355 srlx $acc3,24,$acc3
356 xor $acc1,$s0,$s0
357 ld [$key+16],$t0
358 fmovs %f0,%f0
359 srlx $acc5,8,$acc5 !
360 xor $acc2,$s0,$s0
361 ld [$key+20],$t1
362 srlx $acc6,16,$acc6
363 xor $acc3,$s0,$s0
364 ld [$key+24],$t2
365 srlx $acc7,24,$acc7
366 xor $acc4,$s1,$s1
367 ld [$key+28],$t3 !
368 srlx $acc9,8,$acc9
369 xor $acc5,$s1,$s1
370 ldx [$tbl+2048+0],%g0 ! prefetch te4
371 srlx $acc10,16,$acc10
372 xor $acc6,$s1,$s1
373 ldx [$tbl+2048+32],%g0 ! prefetch te4
374 srlx $acc11,24,$acc11
375 xor $acc7,$s1,$s1
376 ldx [$tbl+2048+64],%g0 ! prefetch te4
377 srlx $acc13,8,$acc13
378 xor $acc8,$s2,$s2
379 ldx [$tbl+2048+96],%g0 ! prefetch te4
380 srlx $acc14,16,$acc14 !
381 xor $acc9,$s2,$s2
382 ldx [$tbl+2048+128],%g0 ! prefetch te4
383 srlx $acc15,24,$acc15
384 xor $acc10,$s2,$s2
385 ldx [$tbl+2048+160],%g0 ! prefetch te4
386 srl $s0,21,$acc0
387 xor $acc11,$s2,$s2
388 ldx [$tbl+2048+192],%g0 ! prefetch te4
389 xor $acc12,$acc14,$acc14
390 xor $acc13,$s3,$s3
391 ldx [$tbl+2048+224],%g0 ! prefetch te4
392 srl $s1,13,$acc1 !
393 xor $acc14,$s3,$s3
394 xor $acc15,$s3,$s3
395 ba .Lenc_loop
396 and $acc0,2040,$acc0
397
398.align 32
399.Lenc_last:
400 srlx $acc1,8,$acc1 !
401 xor $acc0,$t0,$t0
402 ld [$key+0],$s0
403 srlx $acc2,16,$acc2
404 xor $acc1,$t0,$t0
405 ld [$key+4],$s1
406 srlx $acc3,24,$acc3
407 xor $acc2,$t0,$t0
408 ld [$key+8],$s2 !
409 srlx $acc5,8,$acc5
410 xor $acc3,$t0,$t0
411 ld [$key+12],$s3
412 srlx $acc6,16,$acc6
413 xor $acc4,$t1,$t1
414 srlx $acc7,24,$acc7
415 xor $acc5,$t1,$t1
416 srlx $acc9,8,$acc9 !
417 xor $acc6,$t1,$t1
418 srlx $acc10,16,$acc10
419 xor $acc7,$t1,$t1
420 srlx $acc11,24,$acc11
421 xor $acc8,$t2,$t2
422 srlx $acc13,8,$acc13
423 xor $acc9,$t2,$t2
424 srlx $acc14,16,$acc14 !
425 xor $acc10,$t2,$t2
426 srlx $acc15,24,$acc15
427 xor $acc11,$t2,$t2
428 xor $acc12,$acc14,$acc14
429 xor $acc13,$t3,$t3
430 srl $t0,24,$acc0
431 xor $acc14,$t3,$t3
432 srl $t1,16,$acc1 !
433 xor $acc15,$t3,$t3
434
435 srl $t2,8,$acc2
436 and $acc1,255,$acc1
437 ldub [$rounds+$acc0],$acc0
438 srl $t1,24,$acc4
439 and $acc2,255,$acc2
440 ldub [$rounds+$acc1],$acc1
441 srl $t2,16,$acc5 !
442 and $t3,255,$acc3
443 ldub [$rounds+$acc2],$acc2
444 ldub [$rounds+$acc3],$acc3
445 srl $t3,8,$acc6
446 and $acc5,255,$acc5
447 ldub [$rounds+$acc4],$acc4
448 fmovs %f0,%f0
449 srl $t2,24,$acc8 !
450 and $acc6,255,$acc6
451 ldub [$rounds+$acc5],$acc5
452 srl $t3,16,$acc9
453 and $t0,255,$acc7
454 ldub [$rounds+$acc6],$acc6
455 ldub [$rounds+$acc7],$acc7
456 fmovs %f0,%f0
457 srl $t0,8,$acc10 !
458 and $acc9,255,$acc9
459 ldub [$rounds+$acc8],$acc8
460 srl $t3,24,$acc12
461 and $acc10,255,$acc10
462 ldub [$rounds+$acc9],$acc9
463 srl $t0,16,$acc13
464 and $t1,255,$acc11
465 ldub [$rounds+$acc10],$acc10 !
466 srl $t1,8,$acc14
467 and $acc13,255,$acc13
468 ldub [$rounds+$acc11],$acc11
469 ldub [$rounds+$acc12],$acc12
470 and $acc14,255,$acc14
471 ldub [$rounds+$acc13],$acc13
472 and $t2,255,$acc15
473 ldub [$rounds+$acc14],$acc14 !
474
475 sll $acc0,24,$acc0
476 xor $acc3,$s0,$s0
477 ldub [$rounds+$acc15],$acc15
478 sll $acc1,16,$acc1
479 xor $acc0,$s0,$s0
480 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
481 fmovs %f0,%f0
482 sll $acc2,8,$acc2 !
483 xor $acc1,$s0,$s0
484 sll $acc4,24,$acc4
485 xor $acc2,$s0,$s0
486 sll $acc5,16,$acc5
487 xor $acc7,$s1,$s1
488 sll $acc6,8,$acc6
489 xor $acc4,$s1,$s1
490 sll $acc8,24,$acc8 !
491 xor $acc5,$s1,$s1
492 sll $acc9,16,$acc9
493 xor $acc11,$s2,$s2
494 sll $acc10,8,$acc10
495 xor $acc6,$s1,$s1
496 sll $acc12,24,$acc12
497 xor $acc8,$s2,$s2
498 sll $acc13,16,$acc13 !
499 xor $acc9,$s2,$s2
500 sll $acc14,8,$acc14
501 xor $acc10,$s2,$s2
502 xor $acc12,$acc14,$acc14
503 xor $acc13,$s3,$s3
504 xor $acc14,$s3,$s3
505 xor $acc15,$s3,$s3
506
507 ret
508 restore
509.type _sparcv9_AES_encrypt,#function
510.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
511
512.align 32
513.globl AES_encrypt
514AES_encrypt:
515 or %o0,%o1,%g1
516 andcc %g1,3,%g0
517 bnz,pn %xcc,.Lunaligned_enc
518 save %sp,-$frame,%sp
519
520 ld [%i0+0],%o0
521 ld [%i0+4],%o1
522 ld [%i0+8],%o2
523 ld [%i0+12],%o3
524
5251: call .+8
526 add %o7,AES_Te-1b,%o4
527 call _sparcv9_AES_encrypt
528 mov %i2,%o5
529
530 st %o0,[%i1+0]
531 st %o1,[%i1+4]
532 st %o2,[%i1+8]
533 st %o3,[%i1+12]
534
535 ret
536 restore
537
538.align 32
539.Lunaligned_enc:
540 ldub [%i0+0],%l0
541 ldub [%i0+1],%l1
542 ldub [%i0+2],%l2
543
544 sll %l0,24,%l0
545 ldub [%i0+3],%l3
546 sll %l1,16,%l1
547 ldub [%i0+4],%l4
548 sll %l2,8,%l2
549 or %l1,%l0,%l0
550 ldub [%i0+5],%l5
551 sll %l4,24,%l4
552 or %l3,%l2,%l2
553 ldub [%i0+6],%l6
554 sll %l5,16,%l5
555 or %l0,%l2,%o0
556 ldub [%i0+7],%l7
557
558 sll %l6,8,%l6
559 or %l5,%l4,%l4
560 ldub [%i0+8],%l0
561 or %l7,%l6,%l6
562 ldub [%i0+9],%l1
563 or %l4,%l6,%o1
564 ldub [%i0+10],%l2
565
566 sll %l0,24,%l0
567 ldub [%i0+11],%l3
568 sll %l1,16,%l1
569 ldub [%i0+12],%l4
570 sll %l2,8,%l2
571 or %l1,%l0,%l0
572 ldub [%i0+13],%l5
573 sll %l4,24,%l4
574 or %l3,%l2,%l2
575 ldub [%i0+14],%l6
576 sll %l5,16,%l5
577 or %l0,%l2,%o2
578 ldub [%i0+15],%l7
579
580 sll %l6,8,%l6
581 or %l5,%l4,%l4
582 or %l7,%l6,%l6
583 or %l4,%l6,%o3
584
5851: call .+8
586 add %o7,AES_Te-1b,%o4
587 call _sparcv9_AES_encrypt
588 mov %i2,%o5
589
590 srl %o0,24,%l0
591 srl %o0,16,%l1
592 stb %l0,[%i1+0]
593 srl %o0,8,%l2
594 stb %l1,[%i1+1]
595 stb %l2,[%i1+2]
596 srl %o1,24,%l4
597 stb %o0,[%i1+3]
598
599 srl %o1,16,%l5
600 stb %l4,[%i1+4]
601 srl %o1,8,%l6
602 stb %l5,[%i1+5]
603 stb %l6,[%i1+6]
604 srl %o2,24,%l0
605 stb %o1,[%i1+7]
606
607 srl %o2,16,%l1
608 stb %l0,[%i1+8]
609 srl %o2,8,%l2
610 stb %l1,[%i1+9]
611 stb %l2,[%i1+10]
612 srl %o3,24,%l4
613 stb %o2,[%i1+11]
614
615 srl %o3,16,%l5
616 stb %l4,[%i1+12]
617 srl %o3,8,%l6
618 stb %l5,[%i1+13]
619 stb %l6,[%i1+14]
620 stb %o3,[%i1+15]
621
622 ret
623 restore
624.type AES_encrypt,#function
625.size AES_encrypt,(.-AES_encrypt)
626
627___
628
629$code.=<<___;
630.align 256
631AES_Td:
632___
633&_data_word(
634 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
635 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
636 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
637 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
638 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
639 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
640 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
641 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
642 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
643 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
644 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
645 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
646 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
647 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
648 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
649 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
650 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
651 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
652 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
653 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
654 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
655 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
656 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
657 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
658 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
659 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
660 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
661 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
662 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
663 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
664 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
665 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
666 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
667 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
668 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
669 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
670 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
671 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
672 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
673 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
674 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
675 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
676 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
677 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
678 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
679 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
680 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
681 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
682 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
683 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
684 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
685 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
686 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
687 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
688 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
689 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
690 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
691 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
692 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
693 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
694 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
695 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
696 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
697 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
698$code.=<<___;
699 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
700 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
701 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
702 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
703 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
704 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
705 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
706 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
707 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
708 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
709 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
710 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
711 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
712 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
713 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
714 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
715 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
716 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
717 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
718 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
719 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
720 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
721 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
722 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
723 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
724 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
725 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
726 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
727 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
728 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
729 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
730 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
731.type AES_Td,#object
732.size AES_Td,(.-AES_Td)
733
734.align 64
735.skip 16
736_sparcv9_AES_decrypt:
737 save %sp,-$frame-$locals,%sp
738 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
739 ld [$key+240],$rounds
740 ld [$key+0],$t0
741 ld [$key+4],$t1 !
742 ld [$key+8],$t2
743 ld [$key+12],$t3
744 srl $rounds,1,$rounds
745 xor $t0,$s0,$s0
746 ld [$key+16],$t0
747 xor $t1,$s1,$s1
748 ld [$key+20],$t1
749 srl $s0,21,$acc0 !
750 xor $t2,$s2,$s2
751 ld [$key+24],$t2
752 xor $t3,$s3,$s3
753 and $acc0,2040,$acc0
754 ld [$key+28],$t3
755 srl $s3,13,$acc1
756 nop
757.Ldec_loop:
758 srl $s2,5,$acc2 !
759 and $acc1,2040,$acc1
760 ldx [$tbl+$acc0],$acc0
761 sll $s1,3,$acc3
762 and $acc2,2040,$acc2
763 ldx [$tbl+$acc1],$acc1
764 srl $s1,21,$acc4
765 and $acc3,2040,$acc3
766 ldx [$tbl+$acc2],$acc2 !
767 srl $s0,13,$acc5
768 and $acc4,2040,$acc4
769 ldx [$tbl+$acc3],$acc3
770 srl $s3,5,$acc6
771 and $acc5,2040,$acc5
772 ldx [$tbl+$acc4],$acc4
773 fmovs %f0,%f0
774 sll $s2,3,$acc7 !
775 and $acc6,2040,$acc6
776 ldx [$tbl+$acc5],$acc5
777 srl $s2,21,$acc8
778 and $acc7,2040,$acc7
779 ldx [$tbl+$acc6],$acc6
780 srl $s1,13,$acc9
781 and $acc8,2040,$acc8
782 ldx [$tbl+$acc7],$acc7 !
783 srl $s0,5,$acc10
784 and $acc9,2040,$acc9
785 ldx [$tbl+$acc8],$acc8
786 sll $s3,3,$acc11
787 and $acc10,2040,$acc10
788 ldx [$tbl+$acc9],$acc9
789 fmovs %f0,%f0
790 srl $s3,21,$acc12 !
791 and $acc11,2040,$acc11
792 ldx [$tbl+$acc10],$acc10
793 srl $s2,13,$acc13
794 and $acc12,2040,$acc12
795 ldx [$tbl+$acc11],$acc11
796 srl $s1,5,$acc14
797 and $acc13,2040,$acc13
798 ldx [$tbl+$acc12],$acc12 !
799 sll $s0,3,$acc15
800 and $acc14,2040,$acc14
801 ldx [$tbl+$acc13],$acc13
802 and $acc15,2040,$acc15
803 add $key,32,$key
804 ldx [$tbl+$acc14],$acc14
805 fmovs %f0,%f0
806 subcc $rounds,1,$rounds !
807 ldx [$tbl+$acc15],$acc15
808 bz,a,pn %icc,.Ldec_last
809 add $tbl,2048,$rounds
810
811 srlx $acc1,8,$acc1
812 xor $acc0,$t0,$t0
813 ld [$key+0],$s0
814 fmovs %f0,%f0
815 srlx $acc2,16,$acc2 !
816 xor $acc1,$t0,$t0
817 ld [$key+4],$s1
818 srlx $acc3,24,$acc3
819 xor $acc2,$t0,$t0
820 ld [$key+8],$s2
821 srlx $acc5,8,$acc5
822 xor $acc3,$t0,$t0
823 ld [$key+12],$s3 !
824 srlx $acc6,16,$acc6
825 xor $acc4,$t1,$t1
826 fmovs %f0,%f0
827 srlx $acc7,24,$acc7
828 xor $acc5,$t1,$t1
829 srlx $acc9,8,$acc9
830 xor $acc6,$t1,$t1
831 srlx $acc10,16,$acc10 !
832 xor $acc7,$t1,$t1
833 srlx $acc11,24,$acc11
834 xor $acc8,$t2,$t2
835 srlx $acc13,8,$acc13
836 xor $acc9,$t2,$t2
837 srlx $acc14,16,$acc14
838 xor $acc10,$t2,$t2
839 srlx $acc15,24,$acc15 !
840 xor $acc11,$t2,$t2
841 xor $acc12,$acc14,$acc14
842 xor $acc13,$t3,$t3
843 srl $t0,21,$acc0
844 xor $acc14,$t3,$t3
845 xor $acc15,$t3,$t3
846 srl $t3,13,$acc1
847
848 and $acc0,2040,$acc0 !
849 srl $t2,5,$acc2
850 and $acc1,2040,$acc1
851 ldx [$tbl+$acc0],$acc0
852 sll $t1,3,$acc3
853 and $acc2,2040,$acc2
854 ldx [$tbl+$acc1],$acc1
855 fmovs %f0,%f0
856 srl $t1,21,$acc4 !
857 and $acc3,2040,$acc3
858 ldx [$tbl+$acc2],$acc2
859 srl $t0,13,$acc5
860 and $acc4,2040,$acc4
861 ldx [$tbl+$acc3],$acc3
862 srl $t3,5,$acc6
863 and $acc5,2040,$acc5
864 ldx [$tbl+$acc4],$acc4 !
865 sll $t2,3,$acc7
866 and $acc6,2040,$acc6
867 ldx [$tbl+$acc5],$acc5
868 srl $t2,21,$acc8
869 and $acc7,2040,$acc7
870 ldx [$tbl+$acc6],$acc6
871 fmovs %f0,%f0
872 srl $t1,13,$acc9 !
873 and $acc8,2040,$acc8
874 ldx [$tbl+$acc7],$acc7
875 srl $t0,5,$acc10
876 and $acc9,2040,$acc9
877 ldx [$tbl+$acc8],$acc8
878 sll $t3,3,$acc11
879 and $acc10,2040,$acc10
880 ldx [$tbl+$acc9],$acc9 !
881 srl $t3,21,$acc12
882 and $acc11,2040,$acc11
883 ldx [$tbl+$acc10],$acc10
884 srl $t2,13,$acc13
885 and $acc12,2040,$acc12
886 ldx [$tbl+$acc11],$acc11
887 fmovs %f0,%f0
888 srl $t1,5,$acc14 !
889 and $acc13,2040,$acc13
890 ldx [$tbl+$acc12],$acc12
891 sll $t0,3,$acc15
892 and $acc14,2040,$acc14
893 ldx [$tbl+$acc13],$acc13
894 srlx $acc1,8,$acc1
895 and $acc15,2040,$acc15
896 ldx [$tbl+$acc14],$acc14 !
897
898 srlx $acc2,16,$acc2
899 xor $acc0,$s0,$s0
900 ldx [$tbl+$acc15],$acc15
901 srlx $acc3,24,$acc3
902 xor $acc1,$s0,$s0
903 ld [$key+16],$t0
904 fmovs %f0,%f0
905 srlx $acc5,8,$acc5 !
906 xor $acc2,$s0,$s0
907 ld [$key+20],$t1
908 srlx $acc6,16,$acc6
909 xor $acc3,$s0,$s0
910 ld [$key+24],$t2
911 srlx $acc7,24,$acc7
912 xor $acc4,$s1,$s1
913 ld [$key+28],$t3 !
914 srlx $acc9,8,$acc9
915 xor $acc5,$s1,$s1
916 ldx [$tbl+2048+0],%g0 ! prefetch td4
917 srlx $acc10,16,$acc10
918 xor $acc6,$s1,$s1
919 ldx [$tbl+2048+32],%g0 ! prefetch td4
920 srlx $acc11,24,$acc11
921 xor $acc7,$s1,$s1
922 ldx [$tbl+2048+64],%g0 ! prefetch td4
923 srlx $acc13,8,$acc13
924 xor $acc8,$s2,$s2
925 ldx [$tbl+2048+96],%g0 ! prefetch td4
926 srlx $acc14,16,$acc14 !
927 xor $acc9,$s2,$s2
928 ldx [$tbl+2048+128],%g0 ! prefetch td4
929 srlx $acc15,24,$acc15
930 xor $acc10,$s2,$s2
931 ldx [$tbl+2048+160],%g0 ! prefetch td4
932 srl $s0,21,$acc0
933 xor $acc11,$s2,$s2
934 ldx [$tbl+2048+192],%g0 ! prefetch td4
935 xor $acc12,$acc14,$acc14
936 xor $acc13,$s3,$s3
937 ldx [$tbl+2048+224],%g0 ! prefetch td4
938 and $acc0,2040,$acc0 !
939 xor $acc14,$s3,$s3
940 xor $acc15,$s3,$s3
941 ba .Ldec_loop
942 srl $s3,13,$acc1
943
944.align 32
945.Ldec_last:
946 srlx $acc1,8,$acc1 !
947 xor $acc0,$t0,$t0
948 ld [$key+0],$s0
949 srlx $acc2,16,$acc2
950 xor $acc1,$t0,$t0
951 ld [$key+4],$s1
952 srlx $acc3,24,$acc3
953 xor $acc2,$t0,$t0
954 ld [$key+8],$s2 !
955 srlx $acc5,8,$acc5
956 xor $acc3,$t0,$t0
957 ld [$key+12],$s3
958 srlx $acc6,16,$acc6
959 xor $acc4,$t1,$t1
960 srlx $acc7,24,$acc7
961 xor $acc5,$t1,$t1
962 srlx $acc9,8,$acc9 !
963 xor $acc6,$t1,$t1
964 srlx $acc10,16,$acc10
965 xor $acc7,$t1,$t1
966 srlx $acc11,24,$acc11
967 xor $acc8,$t2,$t2
968 srlx $acc13,8,$acc13
969 xor $acc9,$t2,$t2
970 srlx $acc14,16,$acc14 !
971 xor $acc10,$t2,$t2
972 srlx $acc15,24,$acc15
973 xor $acc11,$t2,$t2
974 xor $acc12,$acc14,$acc14
975 xor $acc13,$t3,$t3
976 srl $t0,24,$acc0
977 xor $acc14,$t3,$t3
978 xor $acc15,$t3,$t3 !
979 srl $t3,16,$acc1
980
981 srl $t2,8,$acc2
982 and $acc1,255,$acc1
983 ldub [$rounds+$acc0],$acc0
984 srl $t1,24,$acc4
985 and $acc2,255,$acc2
986 ldub [$rounds+$acc1],$acc1
987 srl $t0,16,$acc5 !
988 and $t1,255,$acc3
989 ldub [$rounds+$acc2],$acc2
990 ldub [$rounds+$acc3],$acc3
991 srl $t3,8,$acc6
992 and $acc5,255,$acc5
993 ldub [$rounds+$acc4],$acc4
994 fmovs %f0,%f0
995 srl $t2,24,$acc8 !
996 and $acc6,255,$acc6
997 ldub [$rounds+$acc5],$acc5
998 srl $t1,16,$acc9
999 and $t2,255,$acc7
1000 ldub [$rounds+$acc6],$acc6
1001 ldub [$rounds+$acc7],$acc7
1002 fmovs %f0,%f0
1003 srl $t0,8,$acc10 !
1004 and $acc9,255,$acc9
1005 ldub [$rounds+$acc8],$acc8
1006 srl $t3,24,$acc12
1007 and $acc10,255,$acc10
1008 ldub [$rounds+$acc9],$acc9
1009 srl $t2,16,$acc13
1010 and $t3,255,$acc11
1011 ldub [$rounds+$acc10],$acc10 !
1012 srl $t1,8,$acc14
1013 and $acc13,255,$acc13
1014 ldub [$rounds+$acc11],$acc11
1015 ldub [$rounds+$acc12],$acc12
1016 and $acc14,255,$acc14
1017 ldub [$rounds+$acc13],$acc13
1018 and $t0,255,$acc15
1019 ldub [$rounds+$acc14],$acc14 !
1020
1021 sll $acc0,24,$acc0
1022 xor $acc3,$s0,$s0
1023 ldub [$rounds+$acc15],$acc15
1024 sll $acc1,16,$acc1
1025 xor $acc0,$s0,$s0
1026 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1027 fmovs %f0,%f0
1028 sll $acc2,8,$acc2 !
1029 xor $acc1,$s0,$s0
1030 sll $acc4,24,$acc4
1031 xor $acc2,$s0,$s0
1032 sll $acc5,16,$acc5
1033 xor $acc7,$s1,$s1
1034 sll $acc6,8,$acc6
1035 xor $acc4,$s1,$s1
1036 sll $acc8,24,$acc8 !
1037 xor $acc5,$s1,$s1
1038 sll $acc9,16,$acc9
1039 xor $acc11,$s2,$s2
1040 sll $acc10,8,$acc10
1041 xor $acc6,$s1,$s1
1042 sll $acc12,24,$acc12
1043 xor $acc8,$s2,$s2
1044 sll $acc13,16,$acc13 !
1045 xor $acc9,$s2,$s2
1046 sll $acc14,8,$acc14
1047 xor $acc10,$s2,$s2
1048 xor $acc12,$acc14,$acc14
1049 xor $acc13,$s3,$s3
1050 xor $acc14,$s3,$s3
1051 xor $acc15,$s3,$s3
1052
1053 ret
1054 restore
1055.type _sparcv9_AES_decrypt,#function
1056.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1057
1058.align 32
1059.globl AES_decrypt
1060AES_decrypt:
1061 or %o0,%o1,%g1
1062 andcc %g1,3,%g0
1063 bnz,pn %xcc,.Lunaligned_dec
1064 save %sp,-$frame,%sp
1065
1066 ld [%i0+0],%o0
1067 ld [%i0+4],%o1
1068 ld [%i0+8],%o2
1069 ld [%i0+12],%o3
1070
10711: call .+8
1072 add %o7,AES_Td-1b,%o4
1073 call _sparcv9_AES_decrypt
1074 mov %i2,%o5
1075
1076 st %o0,[%i1+0]
1077 st %o1,[%i1+4]
1078 st %o2,[%i1+8]
1079 st %o3,[%i1+12]
1080
1081 ret
1082 restore
1083
1084.align 32
1085.Lunaligned_dec:
1086 ldub [%i0+0],%l0
1087 ldub [%i0+1],%l1
1088 ldub [%i0+2],%l2
1089
1090 sll %l0,24,%l0
1091 ldub [%i0+3],%l3
1092 sll %l1,16,%l1
1093 ldub [%i0+4],%l4
1094 sll %l2,8,%l2
1095 or %l1,%l0,%l0
1096 ldub [%i0+5],%l5
1097 sll %l4,24,%l4
1098 or %l3,%l2,%l2
1099 ldub [%i0+6],%l6
1100 sll %l5,16,%l5
1101 or %l0,%l2,%o0
1102 ldub [%i0+7],%l7
1103
1104 sll %l6,8,%l6
1105 or %l5,%l4,%l4
1106 ldub [%i0+8],%l0
1107 or %l7,%l6,%l6
1108 ldub [%i0+9],%l1
1109 or %l4,%l6,%o1
1110 ldub [%i0+10],%l2
1111
1112 sll %l0,24,%l0
1113 ldub [%i0+11],%l3
1114 sll %l1,16,%l1
1115 ldub [%i0+12],%l4
1116 sll %l2,8,%l2
1117 or %l1,%l0,%l0
1118 ldub [%i0+13],%l5
1119 sll %l4,24,%l4
1120 or %l3,%l2,%l2
1121 ldub [%i0+14],%l6
1122 sll %l5,16,%l5
1123 or %l0,%l2,%o2
1124 ldub [%i0+15],%l7
1125
1126 sll %l6,8,%l6
1127 or %l5,%l4,%l4
1128 or %l7,%l6,%l6
1129 or %l4,%l6,%o3
1130
11311: call .+8
1132 add %o7,AES_Td-1b,%o4
1133 call _sparcv9_AES_decrypt
1134 mov %i2,%o5
1135
1136 srl %o0,24,%l0
1137 srl %o0,16,%l1
1138 stb %l0,[%i1+0]
1139 srl %o0,8,%l2
1140 stb %l1,[%i1+1]
1141 stb %l2,[%i1+2]
1142 srl %o1,24,%l4
1143 stb %o0,[%i1+3]
1144
1145 srl %o1,16,%l5
1146 stb %l4,[%i1+4]
1147 srl %o1,8,%l6
1148 stb %l5,[%i1+5]
1149 stb %l6,[%i1+6]
1150 srl %o2,24,%l0
1151 stb %o1,[%i1+7]
1152
1153 srl %o2,16,%l1
1154 stb %l0,[%i1+8]
1155 srl %o2,8,%l2
1156 stb %l1,[%i1+9]
1157 stb %l2,[%i1+10]
1158 srl %o3,24,%l4
1159 stb %o2,[%i1+11]
1160
1161 srl %o3,16,%l5
1162 stb %l4,[%i1+12]
1163 srl %o3,8,%l6
1164 stb %l5,[%i1+13]
1165 stb %l6,[%i1+14]
1166 stb %o3,[%i1+15]
1167
1168 ret
1169 restore
1170.type AES_decrypt,#function
1171.size AES_decrypt,(.-AES_decrypt)
1172___
1173
1174# fmovs instructions substituting for FP nops were originally added
1175# to meet specific instruction alignment requirements to maximize ILP.
1176# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1177# undesired effect, so just omit them and sacrifice some portion of
1178# percent in performance...
1179$code =~ s/fmovs.*$//gem;
1180
1181print $code;