summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes/asm/aes-sparcv9.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/aes/asm/aes-sparcv9.pl')
-rwxr-xr-xsrc/lib/libcrypto/aes/asm/aes-sparcv9.pl1217
1 files changed, 0 insertions, 1217 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-sparcv9.pl b/src/lib/libcrypto/aes/asm/aes-sparcv9.pl
deleted file mode 100755
index 1348d09594..0000000000
--- a/src/lib/libcrypto/aes/asm/aes-sparcv9.pl
+++ /dev/null
@@ -1,1217 +0,0 @@
1#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. Rights for redistribution and usage in source and binary
6# forms are granted according to the OpenSSL license.
7# ====================================================================
8#
9# Version 1.1
10#
11# The major reason for undertaken effort was to mitigate the hazard of
12# cache-timing attack. This is [currently and initially!] addressed in
13# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
14# 2. References to them are scheduled for L2 cache latency, meaning
15# that the tables don't have to reside in L1 cache. Once again, this
16# is an initial draft and one should expect more countermeasures to
17# be implemented...
18#
19# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
20# round.
21#
22# Even though performance was not the primary goal [on the contrary,
23# extra shifts "induced" by compressed S-box and longer loop epilogue
24# "induced" by scheduling for L2 have negative effect on performance],
25# the code turned out to run in ~23 cycles per processed byte en-/
26# decrypted with 128-bit key. This is pretty good result for code
27# with mentioned qualities and UltraSPARC core. Compared to Sun C
28# generated code my encrypt procedure runs just few percents faster,
29# while decrypt one - whole 50% faster [yes, Sun C failed to generate
30# optimal decrypt procedure]. Compared to GNU C generated code both
31# procedures are more than 60% faster:-)
32
33$bits=32;
34for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
35if ($bits==64) { $bias=2047; $frame=192; }
36else { $bias=0; $frame=112; }
37$locals=16;
38
39$acc0="%l0";
40$acc1="%o0";
41$acc2="%o1";
42$acc3="%o2";
43
44$acc4="%l1";
45$acc5="%o3";
46$acc6="%o4";
47$acc7="%o5";
48
49$acc8="%l2";
50$acc9="%o7";
51$acc10="%g1";
52$acc11="%g2";
53
54$acc12="%l3";
55$acc13="%g3";
56$acc14="%g4";
57$acc15="%g5";
58
59$t0="%l4";
60$t1="%l5";
61$t2="%l6";
62$t3="%l7";
63
64$s0="%i0";
65$s1="%i1";
66$s2="%i2";
67$s3="%i3";
68$tbl="%i4";
69$key="%i5";
70$rounds="%i7"; # aliases with return address, which is off-loaded to stack
71
72sub _data_word()
73{ my $i;
74 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
75}
76
77$code.=<<___ if ($bits==64);
78.register %g2,#scratch
79.register %g3,#scratch
80___
81$code.=<<___;
82.section ".rodata",#alloc
83
84.align 256
85AES_Te:
86___
87&_data_word(
88 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
89 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
90 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
91 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
92 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
93 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
94 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
95 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
96 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
97 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
98 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
99 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
100 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
101 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
102 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
103 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
104 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
105 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
106 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
107 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
108 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
109 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
110 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
111 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
112 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
113 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
114 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
115 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
116 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
117 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
118 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
119 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
120 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
121 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
122 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
123 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
124 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
125 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
126 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
127 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
128 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
129 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
130 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
131 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
132 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
133 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
134 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
135 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
136 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
137 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
138 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
139 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
140 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
141 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
142 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
143 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
144 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
145 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
146 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
147 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
148 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
149 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
150 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
151 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
152$code.=<<___;
153 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
154 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
155 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
156 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
157 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
158 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
159 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
160 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
161 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
162 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
163 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
164 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
165 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
166 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
167 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
168 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
169 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
170 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
171 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
172 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
173 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
174 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
175 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
176 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
177 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
178 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
179 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
180 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
181 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
182 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
183 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
184 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
185.type AES_Te,#object
186.size AES_Te,(.-AES_Te)
187
188.section ".text",#alloc,#execinstr
189.align 64
190.skip 16
191_sparcv9_AES_encrypt:
192 save %sp,-$frame-$locals,%sp
193 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
194 ld [$key+240],$rounds
195 ld [$key+0],$t0
196 ld [$key+4],$t1 !
197 ld [$key+8],$t2
198 srl $rounds,1,$rounds
199 xor $t0,$s0,$s0
200 ld [$key+12],$t3
201 srl $s0,21,$acc0
202 xor $t1,$s1,$s1
203 ld [$key+16],$t0
204 srl $s1,13,$acc1 !
205 xor $t2,$s2,$s2
206 ld [$key+20],$t1
207 xor $t3,$s3,$s3
208 ld [$key+24],$t2
209 and $acc0,2040,$acc0
210 ld [$key+28],$t3
211 nop
212.Lenc_loop:
213 srl $s2,5,$acc2 !
214 and $acc1,2040,$acc1
215 ldx [$tbl+$acc0],$acc0
216 sll $s3,3,$acc3
217 and $acc2,2040,$acc2
218 ldx [$tbl+$acc1],$acc1
219 srl $s1,21,$acc4
220 and $acc3,2040,$acc3
221 ldx [$tbl+$acc2],$acc2 !
222 srl $s2,13,$acc5
223 and $acc4,2040,$acc4
224 ldx [$tbl+$acc3],$acc3
225 srl $s3,5,$acc6
226 and $acc5,2040,$acc5
227 ldx [$tbl+$acc4],$acc4
228 fmovs %f0,%f0
229 sll $s0,3,$acc7 !
230 and $acc6,2040,$acc6
231 ldx [$tbl+$acc5],$acc5
232 srl $s2,21,$acc8
233 and $acc7,2040,$acc7
234 ldx [$tbl+$acc6],$acc6
235 srl $s3,13,$acc9
236 and $acc8,2040,$acc8
237 ldx [$tbl+$acc7],$acc7 !
238 srl $s0,5,$acc10
239 and $acc9,2040,$acc9
240 ldx [$tbl+$acc8],$acc8
241 sll $s1,3,$acc11
242 and $acc10,2040,$acc10
243 ldx [$tbl+$acc9],$acc9
244 fmovs %f0,%f0
245 srl $s3,21,$acc12 !
246 and $acc11,2040,$acc11
247 ldx [$tbl+$acc10],$acc10
248 srl $s0,13,$acc13
249 and $acc12,2040,$acc12
250 ldx [$tbl+$acc11],$acc11
251 srl $s1,5,$acc14
252 and $acc13,2040,$acc13
253 ldx [$tbl+$acc12],$acc12 !
254 sll $s2,3,$acc15
255 and $acc14,2040,$acc14
256 ldx [$tbl+$acc13],$acc13
257 and $acc15,2040,$acc15
258 add $key,32,$key
259 ldx [$tbl+$acc14],$acc14
260 fmovs %f0,%f0
261 subcc $rounds,1,$rounds !
262 ldx [$tbl+$acc15],$acc15
263 bz,a,pn %icc,.Lenc_last
264 add $tbl,2048,$rounds
265
266 srlx $acc1,8,$acc1
267 xor $acc0,$t0,$t0
268 ld [$key+0],$s0
269 fmovs %f0,%f0
270 srlx $acc2,16,$acc2 !
271 xor $acc1,$t0,$t0
272 ld [$key+4],$s1
273 srlx $acc3,24,$acc3
274 xor $acc2,$t0,$t0
275 ld [$key+8],$s2
276 srlx $acc5,8,$acc5
277 xor $acc3,$t0,$t0
278 ld [$key+12],$s3 !
279 srlx $acc6,16,$acc6
280 xor $acc4,$t1,$t1
281 fmovs %f0,%f0
282 srlx $acc7,24,$acc7
283 xor $acc5,$t1,$t1
284 srlx $acc9,8,$acc9
285 xor $acc6,$t1,$t1
286 srlx $acc10,16,$acc10 !
287 xor $acc7,$t1,$t1
288 srlx $acc11,24,$acc11
289 xor $acc8,$t2,$t2
290 srlx $acc13,8,$acc13
291 xor $acc9,$t2,$t2
292 srlx $acc14,16,$acc14
293 xor $acc10,$t2,$t2
294 srlx $acc15,24,$acc15 !
295 xor $acc11,$t2,$t2
296 xor $acc12,$acc14,$acc14
297 xor $acc13,$t3,$t3
298 srl $t0,21,$acc0
299 xor $acc14,$t3,$t3
300 srl $t1,13,$acc1
301 xor $acc15,$t3,$t3
302
303 and $acc0,2040,$acc0 !
304 srl $t2,5,$acc2
305 and $acc1,2040,$acc1
306 ldx [$tbl+$acc0],$acc0
307 sll $t3,3,$acc3
308 and $acc2,2040,$acc2
309 ldx [$tbl+$acc1],$acc1
310 fmovs %f0,%f0
311 srl $t1,21,$acc4 !
312 and $acc3,2040,$acc3
313 ldx [$tbl+$acc2],$acc2
314 srl $t2,13,$acc5
315 and $acc4,2040,$acc4
316 ldx [$tbl+$acc3],$acc3
317 srl $t3,5,$acc6
318 and $acc5,2040,$acc5
319 ldx [$tbl+$acc4],$acc4 !
320 sll $t0,3,$acc7
321 and $acc6,2040,$acc6
322 ldx [$tbl+$acc5],$acc5
323 srl $t2,21,$acc8
324 and $acc7,2040,$acc7
325 ldx [$tbl+$acc6],$acc6
326 fmovs %f0,%f0
327 srl $t3,13,$acc9 !
328 and $acc8,2040,$acc8
329 ldx [$tbl+$acc7],$acc7
330 srl $t0,5,$acc10
331 and $acc9,2040,$acc9
332 ldx [$tbl+$acc8],$acc8
333 sll $t1,3,$acc11
334 and $acc10,2040,$acc10
335 ldx [$tbl+$acc9],$acc9 !
336 srl $t3,21,$acc12
337 and $acc11,2040,$acc11
338 ldx [$tbl+$acc10],$acc10
339 srl $t0,13,$acc13
340 and $acc12,2040,$acc12
341 ldx [$tbl+$acc11],$acc11
342 fmovs %f0,%f0
343 srl $t1,5,$acc14 !
344 and $acc13,2040,$acc13
345 ldx [$tbl+$acc12],$acc12
346 sll $t2,3,$acc15
347 and $acc14,2040,$acc14
348 ldx [$tbl+$acc13],$acc13
349 srlx $acc1,8,$acc1
350 and $acc15,2040,$acc15
351 ldx [$tbl+$acc14],$acc14 !
352
353 srlx $acc2,16,$acc2
354 xor $acc0,$s0,$s0
355 ldx [$tbl+$acc15],$acc15
356 srlx $acc3,24,$acc3
357 xor $acc1,$s0,$s0
358 ld [$key+16],$t0
359 fmovs %f0,%f0
360 srlx $acc5,8,$acc5 !
361 xor $acc2,$s0,$s0
362 ld [$key+20],$t1
363 srlx $acc6,16,$acc6
364 xor $acc3,$s0,$s0
365 ld [$key+24],$t2
366 srlx $acc7,24,$acc7
367 xor $acc4,$s1,$s1
368 ld [$key+28],$t3 !
369 srlx $acc9,8,$acc9
370 xor $acc5,$s1,$s1
371 ldx [$tbl+2048+0],%g0 ! prefetch te4
372 srlx $acc10,16,$acc10
373 xor $acc6,$s1,$s1
374 ldx [$tbl+2048+32],%g0 ! prefetch te4
375 srlx $acc11,24,$acc11
376 xor $acc7,$s1,$s1
377 ldx [$tbl+2048+64],%g0 ! prefetch te4
378 srlx $acc13,8,$acc13
379 xor $acc8,$s2,$s2
380 ldx [$tbl+2048+96],%g0 ! prefetch te4
381 srlx $acc14,16,$acc14 !
382 xor $acc9,$s2,$s2
383 ldx [$tbl+2048+128],%g0 ! prefetch te4
384 srlx $acc15,24,$acc15
385 xor $acc10,$s2,$s2
386 ldx [$tbl+2048+160],%g0 ! prefetch te4
387 srl $s0,21,$acc0
388 xor $acc11,$s2,$s2
389 ldx [$tbl+2048+192],%g0 ! prefetch te4
390 xor $acc12,$acc14,$acc14
391 xor $acc13,$s3,$s3
392 ldx [$tbl+2048+224],%g0 ! prefetch te4
393 srl $s1,13,$acc1 !
394 xor $acc14,$s3,$s3
395 xor $acc15,$s3,$s3
396 ba .Lenc_loop
397 and $acc0,2040,$acc0
398
399.align 32
400.Lenc_last:
401 srlx $acc1,8,$acc1 !
402 xor $acc0,$t0,$t0
403 ld [$key+0],$s0
404 srlx $acc2,16,$acc2
405 xor $acc1,$t0,$t0
406 ld [$key+4],$s1
407 srlx $acc3,24,$acc3
408 xor $acc2,$t0,$t0
409 ld [$key+8],$s2 !
410 srlx $acc5,8,$acc5
411 xor $acc3,$t0,$t0
412 ld [$key+12],$s3
413 srlx $acc6,16,$acc6
414 xor $acc4,$t1,$t1
415 srlx $acc7,24,$acc7
416 xor $acc5,$t1,$t1
417 srlx $acc9,8,$acc9 !
418 xor $acc6,$t1,$t1
419 srlx $acc10,16,$acc10
420 xor $acc7,$t1,$t1
421 srlx $acc11,24,$acc11
422 xor $acc8,$t2,$t2
423 srlx $acc13,8,$acc13
424 xor $acc9,$t2,$t2
425 srlx $acc14,16,$acc14 !
426 xor $acc10,$t2,$t2
427 srlx $acc15,24,$acc15
428 xor $acc11,$t2,$t2
429 xor $acc12,$acc14,$acc14
430 xor $acc13,$t3,$t3
431 srl $t0,24,$acc0
432 xor $acc14,$t3,$t3
433 srl $t1,16,$acc1 !
434 xor $acc15,$t3,$t3
435
436 srl $t2,8,$acc2
437 and $acc1,255,$acc1
438 ldub [$rounds+$acc0],$acc0
439 srl $t1,24,$acc4
440 and $acc2,255,$acc2
441 ldub [$rounds+$acc1],$acc1
442 srl $t2,16,$acc5 !
443 and $t3,255,$acc3
444 ldub [$rounds+$acc2],$acc2
445 ldub [$rounds+$acc3],$acc3
446 srl $t3,8,$acc6
447 and $acc5,255,$acc5
448 ldub [$rounds+$acc4],$acc4
449 fmovs %f0,%f0
450 srl $t2,24,$acc8 !
451 and $acc6,255,$acc6
452 ldub [$rounds+$acc5],$acc5
453 srl $t3,16,$acc9
454 and $t0,255,$acc7
455 ldub [$rounds+$acc6],$acc6
456 ldub [$rounds+$acc7],$acc7
457 fmovs %f0,%f0
458 srl $t0,8,$acc10 !
459 and $acc9,255,$acc9
460 ldub [$rounds+$acc8],$acc8
461 srl $t3,24,$acc12
462 and $acc10,255,$acc10
463 ldub [$rounds+$acc9],$acc9
464 srl $t0,16,$acc13
465 and $t1,255,$acc11
466 ldub [$rounds+$acc10],$acc10 !
467 srl $t1,8,$acc14
468 and $acc13,255,$acc13
469 ldub [$rounds+$acc11],$acc11
470 ldub [$rounds+$acc12],$acc12
471 and $acc14,255,$acc14
472 ldub [$rounds+$acc13],$acc13
473 and $t2,255,$acc15
474 ldub [$rounds+$acc14],$acc14 !
475
476 sll $acc0,24,$acc0
477 xor $acc3,$s0,$s0
478 ldub [$rounds+$acc15],$acc15
479 sll $acc1,16,$acc1
480 xor $acc0,$s0,$s0
481 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
482 fmovs %f0,%f0
483 sll $acc2,8,$acc2 !
484 xor $acc1,$s0,$s0
485 sll $acc4,24,$acc4
486 xor $acc2,$s0,$s0
487 sll $acc5,16,$acc5
488 xor $acc7,$s1,$s1
489 sll $acc6,8,$acc6
490 xor $acc4,$s1,$s1
491 sll $acc8,24,$acc8 !
492 xor $acc5,$s1,$s1
493 sll $acc9,16,$acc9
494 xor $acc11,$s2,$s2
495 sll $acc10,8,$acc10
496 xor $acc6,$s1,$s1
497 sll $acc12,24,$acc12
498 xor $acc8,$s2,$s2
499 sll $acc13,16,$acc13 !
500 xor $acc9,$s2,$s2
501 sll $acc14,8,$acc14
502 xor $acc10,$s2,$s2
503 xor $acc12,$acc14,$acc14
504 xor $acc13,$s3,$s3
505 xor $acc14,$s3,$s3
506 xor $acc15,$s3,$s3
507
508 ret
509 restore
510.type _sparcv9_AES_encrypt,#function
511.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
512
513.align 32
514.globl aes_encrypt_internal
515aes_encrypt_internal:
516 save %sp,-$frame,%sp
517#ifdef __PIC__
518 sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o5
519 rd %pc, %o4
520 or %o5, %lo(_GLOBAL_OFFSET_TABLE_+4), %o5
521 add %o5, %o4, %o5
522#endif
523
524 or %i0,%i1,%g1
525 andcc %g1,3,%g0
526 bnz,pn %xcc,.Lunaligned_enc
527 nop
528
529 ld [%i0+0],%o0
530 ld [%i0+4],%o1
531 ld [%i0+8],%o2
532 ld [%i0+12],%o3
533
534#ifdef __PIC__
535 set AES_Te, %o4
536 ldx [%o4+%o5], %o4
537#else
538 set AES_Te, %o4
539#endif
540 call _sparcv9_AES_encrypt
541 mov %i2,%o5
542
543 st %o0,[%i1+0]
544 st %o1,[%i1+4]
545 st %o2,[%i1+8]
546 st %o3,[%i1+12]
547
548 ret
549 restore
550
551.align 32
552.Lunaligned_enc:
553 ldub [%i0+0],%l0
554 ldub [%i0+1],%l1
555 ldub [%i0+2],%l2
556
557 sll %l0,24,%l0
558 ldub [%i0+3],%l3
559 sll %l1,16,%l1
560 ldub [%i0+4],%l4
561 sll %l2,8,%l2
562 or %l1,%l0,%l0
563 ldub [%i0+5],%l5
564 sll %l4,24,%l4
565 or %l3,%l2,%l2
566 ldub [%i0+6],%l6
567 sll %l5,16,%l5
568 or %l0,%l2,%o0
569 ldub [%i0+7],%l7
570
571 sll %l6,8,%l6
572 or %l5,%l4,%l4
573 ldub [%i0+8],%l0
574 or %l7,%l6,%l6
575 ldub [%i0+9],%l1
576 or %l4,%l6,%o1
577 ldub [%i0+10],%l2
578
579 sll %l0,24,%l0
580 ldub [%i0+11],%l3
581 sll %l1,16,%l1
582 ldub [%i0+12],%l4
583 sll %l2,8,%l2
584 or %l1,%l0,%l0
585 ldub [%i0+13],%l5
586 sll %l4,24,%l4
587 or %l3,%l2,%l2
588 ldub [%i0+14],%l6
589 sll %l5,16,%l5
590 or %l0,%l2,%o2
591 ldub [%i0+15],%l7
592
593 sll %l6,8,%l6
594 or %l5,%l4,%l4
595 or %l7,%l6,%l6
596 or %l4,%l6,%o3
597
598#ifdef __PIC__
599 set AES_Te, %o4
600 ldx [%o4+%o5], %o4
601#else
602 set AES_Te, %o4
603#endif
604 call _sparcv9_AES_encrypt
605 mov %i2,%o5
606
607 srl %o0,24,%l0
608 srl %o0,16,%l1
609 stb %l0,[%i1+0]
610 srl %o0,8,%l2
611 stb %l1,[%i1+1]
612 stb %l2,[%i1+2]
613 srl %o1,24,%l4
614 stb %o0,[%i1+3]
615
616 srl %o1,16,%l5
617 stb %l4,[%i1+4]
618 srl %o1,8,%l6
619 stb %l5,[%i1+5]
620 stb %l6,[%i1+6]
621 srl %o2,24,%l0
622 stb %o1,[%i1+7]
623
624 srl %o2,16,%l1
625 stb %l0,[%i1+8]
626 srl %o2,8,%l2
627 stb %l1,[%i1+9]
628 stb %l2,[%i1+10]
629 srl %o3,24,%l4
630 stb %o2,[%i1+11]
631
632 srl %o3,16,%l5
633 stb %l4,[%i1+12]
634 srl %o3,8,%l6
635 stb %l5,[%i1+13]
636 stb %l6,[%i1+14]
637 stb %o3,[%i1+15]
638
639 ret
640 restore
641.type aes_encrypt_internal,#function
642.size aes_encrypt_internal,(.-aes_encrypt_internal)
643
644___
645
646$code.=<<___;
647.section ".rodata",#alloc
648.align 256
649AES_Td:
650___
651&_data_word(
652 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
653 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
654 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
655 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
656 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
657 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
658 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
659 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
660 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
661 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
662 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
663 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
664 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
665 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
666 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
667 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
668 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
669 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
670 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
671 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
672 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
673 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
674 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
675 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
676 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
677 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
678 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
679 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
680 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
681 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
682 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
683 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
684 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
685 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
686 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
687 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
688 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
689 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
690 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
691 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
692 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
693 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
694 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
695 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
696 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
697 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
698 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
699 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
700 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
701 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
702 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
703 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
704 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
705 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
706 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
707 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
708 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
709 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
710 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
711 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
712 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
713 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
714 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
715 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
716$code.=<<___;
717 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
718 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
719 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
720 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
721 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
722 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
723 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
724 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
725 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
726 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
727 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
728 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
729 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
730 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
731 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
732 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
733 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
734 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
735 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
736 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
737 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
738 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
739 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
740 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
741 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
742 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
743 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
744 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
745 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
746 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
747 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
748 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
749.type AES_Td,#object
750.size AES_Td,(.-AES_Td)
751
752.section ".text",#alloc,#execinstr
753.align 64
754.skip 16
755_sparcv9_AES_decrypt:
756 save %sp,-$frame-$locals,%sp
757 stx %i7,[%sp+$bias+$frame+0] ! off-load return address
758 ld [$key+240],$rounds
759 ld [$key+0],$t0
760 ld [$key+4],$t1 !
761 ld [$key+8],$t2
762 ld [$key+12],$t3
763 srl $rounds,1,$rounds
764 xor $t0,$s0,$s0
765 ld [$key+16],$t0
766 xor $t1,$s1,$s1
767 ld [$key+20],$t1
768 srl $s0,21,$acc0 !
769 xor $t2,$s2,$s2
770 ld [$key+24],$t2
771 xor $t3,$s3,$s3
772 and $acc0,2040,$acc0
773 ld [$key+28],$t3
774 srl $s3,13,$acc1
775 nop
776.Ldec_loop:
777 srl $s2,5,$acc2 !
778 and $acc1,2040,$acc1
779 ldx [$tbl+$acc0],$acc0
780 sll $s1,3,$acc3
781 and $acc2,2040,$acc2
782 ldx [$tbl+$acc1],$acc1
783 srl $s1,21,$acc4
784 and $acc3,2040,$acc3
785 ldx [$tbl+$acc2],$acc2 !
786 srl $s0,13,$acc5
787 and $acc4,2040,$acc4
788 ldx [$tbl+$acc3],$acc3
789 srl $s3,5,$acc6
790 and $acc5,2040,$acc5
791 ldx [$tbl+$acc4],$acc4
792 fmovs %f0,%f0
793 sll $s2,3,$acc7 !
794 and $acc6,2040,$acc6
795 ldx [$tbl+$acc5],$acc5
796 srl $s2,21,$acc8
797 and $acc7,2040,$acc7
798 ldx [$tbl+$acc6],$acc6
799 srl $s1,13,$acc9
800 and $acc8,2040,$acc8
801 ldx [$tbl+$acc7],$acc7 !
802 srl $s0,5,$acc10
803 and $acc9,2040,$acc9
804 ldx [$tbl+$acc8],$acc8
805 sll $s3,3,$acc11
806 and $acc10,2040,$acc10
807 ldx [$tbl+$acc9],$acc9
808 fmovs %f0,%f0
809 srl $s3,21,$acc12 !
810 and $acc11,2040,$acc11
811 ldx [$tbl+$acc10],$acc10
812 srl $s2,13,$acc13
813 and $acc12,2040,$acc12
814 ldx [$tbl+$acc11],$acc11
815 srl $s1,5,$acc14
816 and $acc13,2040,$acc13
817 ldx [$tbl+$acc12],$acc12 !
818 sll $s0,3,$acc15
819 and $acc14,2040,$acc14
820 ldx [$tbl+$acc13],$acc13
821 and $acc15,2040,$acc15
822 add $key,32,$key
823 ldx [$tbl+$acc14],$acc14
824 fmovs %f0,%f0
825 subcc $rounds,1,$rounds !
826 ldx [$tbl+$acc15],$acc15
827 bz,a,pn %icc,.Ldec_last
828 add $tbl,2048,$rounds
829
830 srlx $acc1,8,$acc1
831 xor $acc0,$t0,$t0
832 ld [$key+0],$s0
833 fmovs %f0,%f0
834 srlx $acc2,16,$acc2 !
835 xor $acc1,$t0,$t0
836 ld [$key+4],$s1
837 srlx $acc3,24,$acc3
838 xor $acc2,$t0,$t0
839 ld [$key+8],$s2
840 srlx $acc5,8,$acc5
841 xor $acc3,$t0,$t0
842 ld [$key+12],$s3 !
843 srlx $acc6,16,$acc6
844 xor $acc4,$t1,$t1
845 fmovs %f0,%f0
846 srlx $acc7,24,$acc7
847 xor $acc5,$t1,$t1
848 srlx $acc9,8,$acc9
849 xor $acc6,$t1,$t1
850 srlx $acc10,16,$acc10 !
851 xor $acc7,$t1,$t1
852 srlx $acc11,24,$acc11
853 xor $acc8,$t2,$t2
854 srlx $acc13,8,$acc13
855 xor $acc9,$t2,$t2
856 srlx $acc14,16,$acc14
857 xor $acc10,$t2,$t2
858 srlx $acc15,24,$acc15 !
859 xor $acc11,$t2,$t2
860 xor $acc12,$acc14,$acc14
861 xor $acc13,$t3,$t3
862 srl $t0,21,$acc0
863 xor $acc14,$t3,$t3
864 xor $acc15,$t3,$t3
865 srl $t3,13,$acc1
866
867 and $acc0,2040,$acc0 !
868 srl $t2,5,$acc2
869 and $acc1,2040,$acc1
870 ldx [$tbl+$acc0],$acc0
871 sll $t1,3,$acc3
872 and $acc2,2040,$acc2
873 ldx [$tbl+$acc1],$acc1
874 fmovs %f0,%f0
875 srl $t1,21,$acc4 !
876 and $acc3,2040,$acc3
877 ldx [$tbl+$acc2],$acc2
878 srl $t0,13,$acc5
879 and $acc4,2040,$acc4
880 ldx [$tbl+$acc3],$acc3
881 srl $t3,5,$acc6
882 and $acc5,2040,$acc5
883 ldx [$tbl+$acc4],$acc4 !
884 sll $t2,3,$acc7
885 and $acc6,2040,$acc6
886 ldx [$tbl+$acc5],$acc5
887 srl $t2,21,$acc8
888 and $acc7,2040,$acc7
889 ldx [$tbl+$acc6],$acc6
890 fmovs %f0,%f0
891 srl $t1,13,$acc9 !
892 and $acc8,2040,$acc8
893 ldx [$tbl+$acc7],$acc7
894 srl $t0,5,$acc10
895 and $acc9,2040,$acc9
896 ldx [$tbl+$acc8],$acc8
897 sll $t3,3,$acc11
898 and $acc10,2040,$acc10
899 ldx [$tbl+$acc9],$acc9 !
900 srl $t3,21,$acc12
901 and $acc11,2040,$acc11
902 ldx [$tbl+$acc10],$acc10
903 srl $t2,13,$acc13
904 and $acc12,2040,$acc12
905 ldx [$tbl+$acc11],$acc11
906 fmovs %f0,%f0
907 srl $t1,5,$acc14 !
908 and $acc13,2040,$acc13
909 ldx [$tbl+$acc12],$acc12
910 sll $t0,3,$acc15
911 and $acc14,2040,$acc14
912 ldx [$tbl+$acc13],$acc13
913 srlx $acc1,8,$acc1
914 and $acc15,2040,$acc15
915 ldx [$tbl+$acc14],$acc14 !
916
917 srlx $acc2,16,$acc2
918 xor $acc0,$s0,$s0
919 ldx [$tbl+$acc15],$acc15
920 srlx $acc3,24,$acc3
921 xor $acc1,$s0,$s0
922 ld [$key+16],$t0
923 fmovs %f0,%f0
924 srlx $acc5,8,$acc5 !
925 xor $acc2,$s0,$s0
926 ld [$key+20],$t1
927 srlx $acc6,16,$acc6
928 xor $acc3,$s0,$s0
929 ld [$key+24],$t2
930 srlx $acc7,24,$acc7
931 xor $acc4,$s1,$s1
932 ld [$key+28],$t3 !
933 srlx $acc9,8,$acc9
934 xor $acc5,$s1,$s1
935 ldx [$tbl+2048+0],%g0 ! prefetch td4
936 srlx $acc10,16,$acc10
937 xor $acc6,$s1,$s1
938 ldx [$tbl+2048+32],%g0 ! prefetch td4
939 srlx $acc11,24,$acc11
940 xor $acc7,$s1,$s1
941 ldx [$tbl+2048+64],%g0 ! prefetch td4
942 srlx $acc13,8,$acc13
943 xor $acc8,$s2,$s2
944 ldx [$tbl+2048+96],%g0 ! prefetch td4
945 srlx $acc14,16,$acc14 !
946 xor $acc9,$s2,$s2
947 ldx [$tbl+2048+128],%g0 ! prefetch td4
948 srlx $acc15,24,$acc15
949 xor $acc10,$s2,$s2
950 ldx [$tbl+2048+160],%g0 ! prefetch td4
951 srl $s0,21,$acc0
952 xor $acc11,$s2,$s2
953 ldx [$tbl+2048+192],%g0 ! prefetch td4
954 xor $acc12,$acc14,$acc14
955 xor $acc13,$s3,$s3
956 ldx [$tbl+2048+224],%g0 ! prefetch td4
957 and $acc0,2040,$acc0 !
958 xor $acc14,$s3,$s3
959 xor $acc15,$s3,$s3
960 ba .Ldec_loop
961 srl $s3,13,$acc1
962
963.align 32
964.Ldec_last:
965 srlx $acc1,8,$acc1 !
966 xor $acc0,$t0,$t0
967 ld [$key+0],$s0
968 srlx $acc2,16,$acc2
969 xor $acc1,$t0,$t0
970 ld [$key+4],$s1
971 srlx $acc3,24,$acc3
972 xor $acc2,$t0,$t0
973 ld [$key+8],$s2 !
974 srlx $acc5,8,$acc5
975 xor $acc3,$t0,$t0
976 ld [$key+12],$s3
977 srlx $acc6,16,$acc6
978 xor $acc4,$t1,$t1
979 srlx $acc7,24,$acc7
980 xor $acc5,$t1,$t1
981 srlx $acc9,8,$acc9 !
982 xor $acc6,$t1,$t1
983 srlx $acc10,16,$acc10
984 xor $acc7,$t1,$t1
985 srlx $acc11,24,$acc11
986 xor $acc8,$t2,$t2
987 srlx $acc13,8,$acc13
988 xor $acc9,$t2,$t2
989 srlx $acc14,16,$acc14 !
990 xor $acc10,$t2,$t2
991 srlx $acc15,24,$acc15
992 xor $acc11,$t2,$t2
993 xor $acc12,$acc14,$acc14
994 xor $acc13,$t3,$t3
995 srl $t0,24,$acc0
996 xor $acc14,$t3,$t3
997 xor $acc15,$t3,$t3 !
998 srl $t3,16,$acc1
999
1000 srl $t2,8,$acc2
1001 and $acc1,255,$acc1
1002 ldub [$rounds+$acc0],$acc0
1003 srl $t1,24,$acc4
1004 and $acc2,255,$acc2
1005 ldub [$rounds+$acc1],$acc1
1006 srl $t0,16,$acc5 !
1007 and $t1,255,$acc3
1008 ldub [$rounds+$acc2],$acc2
1009 ldub [$rounds+$acc3],$acc3
1010 srl $t3,8,$acc6
1011 and $acc5,255,$acc5
1012 ldub [$rounds+$acc4],$acc4
1013 fmovs %f0,%f0
1014 srl $t2,24,$acc8 !
1015 and $acc6,255,$acc6
1016 ldub [$rounds+$acc5],$acc5
1017 srl $t1,16,$acc9
1018 and $t2,255,$acc7
1019 ldub [$rounds+$acc6],$acc6
1020 ldub [$rounds+$acc7],$acc7
1021 fmovs %f0,%f0
1022 srl $t0,8,$acc10 !
1023 and $acc9,255,$acc9
1024 ldub [$rounds+$acc8],$acc8
1025 srl $t3,24,$acc12
1026 and $acc10,255,$acc10
1027 ldub [$rounds+$acc9],$acc9
1028 srl $t2,16,$acc13
1029 and $t3,255,$acc11
1030 ldub [$rounds+$acc10],$acc10 !
1031 srl $t1,8,$acc14
1032 and $acc13,255,$acc13
1033 ldub [$rounds+$acc11],$acc11
1034 ldub [$rounds+$acc12],$acc12
1035 and $acc14,255,$acc14
1036 ldub [$rounds+$acc13],$acc13
1037 and $t0,255,$acc15
1038 ldub [$rounds+$acc14],$acc14 !
1039
1040 sll $acc0,24,$acc0
1041 xor $acc3,$s0,$s0
1042 ldub [$rounds+$acc15],$acc15
1043 sll $acc1,16,$acc1
1044 xor $acc0,$s0,$s0
1045 ldx [%sp+$bias+$frame+0],%i7 ! restore return address
1046 fmovs %f0,%f0
1047 sll $acc2,8,$acc2 !
1048 xor $acc1,$s0,$s0
1049 sll $acc4,24,$acc4
1050 xor $acc2,$s0,$s0
1051 sll $acc5,16,$acc5
1052 xor $acc7,$s1,$s1
1053 sll $acc6,8,$acc6
1054 xor $acc4,$s1,$s1
1055 sll $acc8,24,$acc8 !
1056 xor $acc5,$s1,$s1
1057 sll $acc9,16,$acc9
1058 xor $acc11,$s2,$s2
1059 sll $acc10,8,$acc10
1060 xor $acc6,$s1,$s1
1061 sll $acc12,24,$acc12
1062 xor $acc8,$s2,$s2
1063 sll $acc13,16,$acc13 !
1064 xor $acc9,$s2,$s2
1065 sll $acc14,8,$acc14
1066 xor $acc10,$s2,$s2
1067 xor $acc12,$acc14,$acc14
1068 xor $acc13,$s3,$s3
1069 xor $acc14,$s3,$s3
1070 xor $acc15,$s3,$s3
1071
1072 ret
1073 restore
1074.type _sparcv9_AES_decrypt,#function
1075.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
1076
1077.align 32
1078.globl aes_decrypt_internal
1079aes_decrypt_internal:
1080 save %sp,-$frame,%sp
1081#ifdef __PIC__
1082 sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o5
1083 rd %pc, %o4
1084 or %o5, %lo(_GLOBAL_OFFSET_TABLE_+4), %o5
1085 add %o5, %o4, %o5
1086#endif
1087
1088 or %i0,%i1,%g1
1089 andcc %g1,3,%g0
1090 bnz,pn %xcc,.Lunaligned_dec
1091 nop
1092
1093 ld [%i0+0],%o0
1094 ld [%i0+4],%o1
1095 ld [%i0+8],%o2
1096 ld [%i0+12],%o3
1097
1098#ifdef __PIC__
1099 set AES_Td, %o4
1100 ldx [%o4+%o5], %o4
1101#else
1102 set AES_Td, %o4
1103#endif
1104 call _sparcv9_AES_decrypt
1105 mov %i2,%o5
1106
1107 st %o0,[%i1+0]
1108 st %o1,[%i1+4]
1109 st %o2,[%i1+8]
1110 st %o3,[%i1+12]
1111
1112 ret
1113 restore
1114
1115.align 32
1116.Lunaligned_dec:
1117 ldub [%i0+0],%l0
1118 ldub [%i0+1],%l1
1119 ldub [%i0+2],%l2
1120
1121 sll %l0,24,%l0
1122 ldub [%i0+3],%l3
1123 sll %l1,16,%l1
1124 ldub [%i0+4],%l4
1125 sll %l2,8,%l2
1126 or %l1,%l0,%l0
1127 ldub [%i0+5],%l5
1128 sll %l4,24,%l4
1129 or %l3,%l2,%l2
1130 ldub [%i0+6],%l6
1131 sll %l5,16,%l5
1132 or %l0,%l2,%o0
1133 ldub [%i0+7],%l7
1134
1135 sll %l6,8,%l6
1136 or %l5,%l4,%l4
1137 ldub [%i0+8],%l0
1138 or %l7,%l6,%l6
1139 ldub [%i0+9],%l1
1140 or %l4,%l6,%o1
1141 ldub [%i0+10],%l2
1142
1143 sll %l0,24,%l0
1144 ldub [%i0+11],%l3
1145 sll %l1,16,%l1
1146 ldub [%i0+12],%l4
1147 sll %l2,8,%l2
1148 or %l1,%l0,%l0
1149 ldub [%i0+13],%l5
1150 sll %l4,24,%l4
1151 or %l3,%l2,%l2
1152 ldub [%i0+14],%l6
1153 sll %l5,16,%l5
1154 or %l0,%l2,%o2
1155 ldub [%i0+15],%l7
1156
1157 sll %l6,8,%l6
1158 or %l5,%l4,%l4
1159 or %l7,%l6,%l6
1160 or %l4,%l6,%o3
1161
1162#ifdef __PIC__
1163 set AES_Td, %o4
1164 ldx [%o4+%o5], %o4
1165#else
1166 set AES_Td, %o4
1167#endif
1168 call _sparcv9_AES_decrypt
1169 mov %i2,%o5
1170
1171 srl %o0,24,%l0
1172 srl %o0,16,%l1
1173 stb %l0,[%i1+0]
1174 srl %o0,8,%l2
1175 stb %l1,[%i1+1]
1176 stb %l2,[%i1+2]
1177 srl %o1,24,%l4
1178 stb %o0,[%i1+3]
1179
1180 srl %o1,16,%l5
1181 stb %l4,[%i1+4]
1182 srl %o1,8,%l6
1183 stb %l5,[%i1+5]
1184 stb %l6,[%i1+6]
1185 srl %o2,24,%l0
1186 stb %o1,[%i1+7]
1187
1188 srl %o2,16,%l1
1189 stb %l0,[%i1+8]
1190 srl %o2,8,%l2
1191 stb %l1,[%i1+9]
1192 stb %l2,[%i1+10]
1193 srl %o3,24,%l4
1194 stb %o2,[%i1+11]
1195
1196 srl %o3,16,%l5
1197 stb %l4,[%i1+12]
1198 srl %o3,8,%l6
1199 stb %l5,[%i1+13]
1200 stb %l6,[%i1+14]
1201 stb %o3,[%i1+15]
1202
1203 ret
1204 restore
1205.type aes_decrypt_internal,#function
1206.size aes_decrypt_internal,(.-aes_decrypt_internal)
1207___
1208
1209# fmovs instructions substituting for FP nops were originally added
1210# to meet specific instruction alignment requirements to maximize ILP.
1211# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
1212# undesired effect, so just omit them and sacrifice some portion of
1213# percent in performance...
1214$code =~ s/fmovs.*$//gm;
1215
1216print $code;
1217close STDOUT; # ensure flush