diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib/libcrypto/aes/asm/aes-armv4.pl | 182 |
1 files changed, 143 insertions, 39 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-armv4.pl b/src/lib/libcrypto/aes/asm/aes-armv4.pl index c51ee1fbf6..86b86c4a0f 100644 --- a/src/lib/libcrypto/aes/asm/aes-armv4.pl +++ b/src/lib/libcrypto/aes/asm/aes-armv4.pl | |||
@@ -27,6 +27,11 @@ | |||
27 | # Rescheduling for dual-issue pipeline resulted in 12% improvement on | 27 | # Rescheduling for dual-issue pipeline resulted in 12% improvement on |
28 | # Cortex A8 core and ~25 cycles per byte processed with 128-bit key. | 28 | # Cortex A8 core and ~25 cycles per byte processed with 128-bit key. |
29 | 29 | ||
30 | # February 2011. | ||
31 | # | ||
32 | # Profiler-assisted and platform-specific optimization resulted in 16% | ||
33 | # improvement on Cortex A8 core and ~21.5 cycles per byte. | ||
34 | |||
30 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} | 35 | while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} |
31 | open STDOUT,">$output"; | 36 | open STDOUT,">$output"; |
32 | 37 | ||
@@ -46,6 +51,7 @@ $key="r11"; | |||
46 | $rounds="r12"; | 51 | $rounds="r12"; |
47 | 52 | ||
48 | $code=<<___; | 53 | $code=<<___; |
54 | #include "arm_arch.h" | ||
49 | .text | 55 | .text |
50 | .code 32 | 56 | .code 32 |
51 | 57 | ||
@@ -166,7 +172,7 @@ AES_encrypt: | |||
166 | mov $rounds,r0 @ inp | 172 | mov $rounds,r0 @ inp |
167 | mov $key,r2 | 173 | mov $key,r2 |
168 | sub $tbl,r3,#AES_encrypt-AES_Te @ Te | 174 | sub $tbl,r3,#AES_encrypt-AES_Te @ Te |
169 | 175 | #if __ARM_ARCH__<7 | |
170 | ldrb $s0,[$rounds,#3] @ load input data in endian-neutral | 176 | ldrb $s0,[$rounds,#3] @ load input data in endian-neutral |
171 | ldrb $t1,[$rounds,#2] @ manner... | 177 | ldrb $t1,[$rounds,#2] @ manner... |
172 | ldrb $t2,[$rounds,#1] | 178 | ldrb $t2,[$rounds,#1] |
@@ -195,10 +201,33 @@ AES_encrypt: | |||
195 | orr $s3,$s3,$t1,lsl#8 | 201 | orr $s3,$s3,$t1,lsl#8 |
196 | orr $s3,$s3,$t2,lsl#16 | 202 | orr $s3,$s3,$t2,lsl#16 |
197 | orr $s3,$s3,$t3,lsl#24 | 203 | orr $s3,$s3,$t3,lsl#24 |
198 | 204 | #else | |
205 | ldr $s0,[$rounds,#0] | ||
206 | ldr $s1,[$rounds,#4] | ||
207 | ldr $s2,[$rounds,#8] | ||
208 | ldr $s3,[$rounds,#12] | ||
209 | #ifdef __ARMEL__ | ||
210 | rev $s0,$s0 | ||
211 | rev $s1,$s1 | ||
212 | rev $s2,$s2 | ||
213 | rev $s3,$s3 | ||
214 | #endif | ||
215 | #endif | ||
199 | bl _armv4_AES_encrypt | 216 | bl _armv4_AES_encrypt |
200 | 217 | ||
201 | ldr $rounds,[sp],#4 @ pop out | 218 | ldr $rounds,[sp],#4 @ pop out |
219 | #if __ARM_ARCH__>=7 | ||
220 | #ifdef __ARMEL__ | ||
221 | rev $s0,$s0 | ||
222 | rev $s1,$s1 | ||
223 | rev $s2,$s2 | ||
224 | rev $s3,$s3 | ||
225 | #endif | ||
226 | str $s0,[$rounds,#0] | ||
227 | str $s1,[$rounds,#4] | ||
228 | str $s2,[$rounds,#8] | ||
229 | str $s3,[$rounds,#12] | ||
230 | #else | ||
202 | mov $t1,$s0,lsr#24 @ write output in endian-neutral | 231 | mov $t1,$s0,lsr#24 @ write output in endian-neutral |
203 | mov $t2,$s0,lsr#16 @ manner... | 232 | mov $t2,$s0,lsr#16 @ manner... |
204 | mov $t3,$s0,lsr#8 | 233 | mov $t3,$s0,lsr#8 |
@@ -227,11 +256,15 @@ AES_encrypt: | |||
227 | strb $t2,[$rounds,#13] | 256 | strb $t2,[$rounds,#13] |
228 | strb $t3,[$rounds,#14] | 257 | strb $t3,[$rounds,#14] |
229 | strb $s3,[$rounds,#15] | 258 | strb $s3,[$rounds,#15] |
230 | 259 | #endif | |
260 | #if __ARM_ARCH__>=5 | ||
261 | ldmia sp!,{r4-r12,pc} | ||
262 | #else | ||
231 | ldmia sp!,{r4-r12,lr} | 263 | ldmia sp!,{r4-r12,lr} |
232 | tst lr,#1 | 264 | tst lr,#1 |
233 | moveq pc,lr @ be binary compatible with V4, yet | 265 | moveq pc,lr @ be binary compatible with V4, yet |
234 | bx lr @ interoperable with Thumb ISA:-) | 266 | bx lr @ interoperable with Thumb ISA:-) |
267 | #endif | ||
235 | .size AES_encrypt,.-AES_encrypt | 268 | .size AES_encrypt,.-AES_encrypt |
236 | 269 | ||
237 | .type _armv4_AES_encrypt,%function | 270 | .type _armv4_AES_encrypt,%function |
@@ -271,11 +304,11 @@ _armv4_AES_encrypt: | |||
271 | and $i2,lr,$s2,lsr#16 @ i1 | 304 | and $i2,lr,$s2,lsr#16 @ i1 |
272 | eor $t3,$t3,$i3,ror#8 | 305 | eor $t3,$t3,$i3,ror#8 |
273 | and $i3,lr,$s2 | 306 | and $i3,lr,$s2 |
274 | eor $s1,$s1,$t1,ror#24 | ||
275 | ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8] | 307 | ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8] |
308 | eor $s1,$s1,$t1,ror#24 | ||
309 | ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16] | ||
276 | mov $s2,$s2,lsr#24 | 310 | mov $s2,$s2,lsr#24 |
277 | 311 | ||
278 | ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16] | ||
279 | ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0] | 312 | ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0] |
280 | eor $s0,$s0,$i1,ror#16 | 313 | eor $s0,$s0,$i1,ror#16 |
281 | ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24] | 314 | ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24] |
@@ -284,16 +317,16 @@ _armv4_AES_encrypt: | |||
284 | and $i2,lr,$s3,lsr#8 @ i1 | 317 | and $i2,lr,$s3,lsr#8 @ i1 |
285 | eor $t3,$t3,$i3,ror#16 | 318 | eor $t3,$t3,$i3,ror#16 |
286 | and $i3,lr,$s3,lsr#16 @ i2 | 319 | and $i3,lr,$s3,lsr#16 @ i2 |
287 | eor $s2,$s2,$t2,ror#16 | ||
288 | ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0] | 320 | ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0] |
321 | eor $s2,$s2,$t2,ror#16 | ||
322 | ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8] | ||
289 | mov $s3,$s3,lsr#24 | 323 | mov $s3,$s3,lsr#24 |
290 | 324 | ||
291 | ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8] | ||
292 | ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16] | 325 | ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16] |
293 | eor $s0,$s0,$i1,ror#24 | 326 | eor $s0,$s0,$i1,ror#24 |
294 | ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24] | ||
295 | eor $s1,$s1,$i2,ror#16 | ||
296 | ldr $i1,[$key],#16 | 327 | ldr $i1,[$key],#16 |
328 | eor $s1,$s1,$i2,ror#16 | ||
329 | ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24] | ||
297 | eor $s2,$s2,$i3,ror#8 | 330 | eor $s2,$s2,$i3,ror#8 |
298 | ldr $t1,[$key,#-12] | 331 | ldr $t1,[$key,#-12] |
299 | eor $s3,$s3,$t3,ror#8 | 332 | eor $s3,$s3,$t3,ror#8 |
@@ -333,11 +366,11 @@ _armv4_AES_encrypt: | |||
333 | and $i2,lr,$s2,lsr#16 @ i1 | 366 | and $i2,lr,$s2,lsr#16 @ i1 |
334 | eor $t3,$i3,$t3,lsl#8 | 367 | eor $t3,$i3,$t3,lsl#8 |
335 | and $i3,lr,$s2 | 368 | and $i3,lr,$s2 |
336 | eor $s1,$t1,$s1,lsl#24 | ||
337 | ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8] | 369 | ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8] |
370 | eor $s1,$t1,$s1,lsl#24 | ||
371 | ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16] | ||
338 | mov $s2,$s2,lsr#24 | 372 | mov $s2,$s2,lsr#24 |
339 | 373 | ||
340 | ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16] | ||
341 | ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0] | 374 | ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0] |
342 | eor $s0,$i1,$s0,lsl#8 | 375 | eor $s0,$i1,$s0,lsl#8 |
343 | ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24] | 376 | ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24] |
@@ -346,15 +379,15 @@ _armv4_AES_encrypt: | |||
346 | and $i2,lr,$s3,lsr#8 @ i1 | 379 | and $i2,lr,$s3,lsr#8 @ i1 |
347 | eor $t3,$i3,$t3,lsl#8 | 380 | eor $t3,$i3,$t3,lsl#8 |
348 | and $i3,lr,$s3,lsr#16 @ i2 | 381 | and $i3,lr,$s3,lsr#16 @ i2 |
349 | eor $s2,$t2,$s2,lsl#24 | ||
350 | ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0] | 382 | ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0] |
383 | eor $s2,$t2,$s2,lsl#24 | ||
384 | ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8] | ||
351 | mov $s3,$s3,lsr#24 | 385 | mov $s3,$s3,lsr#24 |
352 | 386 | ||
353 | ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8] | ||
354 | ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16] | 387 | ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16] |
355 | eor $s0,$i1,$s0,lsl#8 | 388 | eor $s0,$i1,$s0,lsl#8 |
356 | ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24] | ||
357 | ldr $i1,[$key,#0] | 389 | ldr $i1,[$key,#0] |
390 | ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24] | ||
358 | eor $s1,$s1,$i2,lsl#8 | 391 | eor $s1,$s1,$i2,lsl#8 |
359 | ldr $t1,[$key,#4] | 392 | ldr $t1,[$key,#4] |
360 | eor $s2,$s2,$i3,lsl#16 | 393 | eor $s2,$s2,$i3,lsl#16 |
@@ -371,10 +404,11 @@ _armv4_AES_encrypt: | |||
371 | ldr pc,[sp],#4 @ pop and return | 404 | ldr pc,[sp],#4 @ pop and return |
372 | .size _armv4_AES_encrypt,.-_armv4_AES_encrypt | 405 | .size _armv4_AES_encrypt,.-_armv4_AES_encrypt |
373 | 406 | ||
374 | .global AES_set_encrypt_key | 407 | .global private_AES_set_encrypt_key |
375 | .type AES_set_encrypt_key,%function | 408 | .type private_AES_set_encrypt_key,%function |
376 | .align 5 | 409 | .align 5 |
377 | AES_set_encrypt_key: | 410 | private_AES_set_encrypt_key: |
411 | _armv4_AES_set_encrypt_key: | ||
378 | sub r3,pc,#8 @ AES_set_encrypt_key | 412 | sub r3,pc,#8 @ AES_set_encrypt_key |
379 | teq r0,#0 | 413 | teq r0,#0 |
380 | moveq r0,#-1 | 414 | moveq r0,#-1 |
@@ -392,12 +426,13 @@ AES_set_encrypt_key: | |||
392 | bne .Labrt | 426 | bne .Labrt |
393 | 427 | ||
394 | .Lok: stmdb sp!,{r4-r12,lr} | 428 | .Lok: stmdb sp!,{r4-r12,lr} |
395 | sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4 | 429 | sub $tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 |
396 | 430 | ||
397 | mov $rounds,r0 @ inp | 431 | mov $rounds,r0 @ inp |
398 | mov lr,r1 @ bits | 432 | mov lr,r1 @ bits |
399 | mov $key,r2 @ key | 433 | mov $key,r2 @ key |
400 | 434 | ||
435 | #if __ARM_ARCH__<7 | ||
401 | ldrb $s0,[$rounds,#3] @ load input data in endian-neutral | 436 | ldrb $s0,[$rounds,#3] @ load input data in endian-neutral |
402 | ldrb $t1,[$rounds,#2] @ manner... | 437 | ldrb $t1,[$rounds,#2] @ manner... |
403 | ldrb $t2,[$rounds,#1] | 438 | ldrb $t2,[$rounds,#1] |
@@ -430,6 +465,22 @@ AES_set_encrypt_key: | |||
430 | orr $s3,$s3,$t3,lsl#24 | 465 | orr $s3,$s3,$t3,lsl#24 |
431 | str $s2,[$key,#-8] | 466 | str $s2,[$key,#-8] |
432 | str $s3,[$key,#-4] | 467 | str $s3,[$key,#-4] |
468 | #else | ||
469 | ldr $s0,[$rounds,#0] | ||
470 | ldr $s1,[$rounds,#4] | ||
471 | ldr $s2,[$rounds,#8] | ||
472 | ldr $s3,[$rounds,#12] | ||
473 | #ifdef __ARMEL__ | ||
474 | rev $s0,$s0 | ||
475 | rev $s1,$s1 | ||
476 | rev $s2,$s2 | ||
477 | rev $s3,$s3 | ||
478 | #endif | ||
479 | str $s0,[$key],#16 | ||
480 | str $s1,[$key,#-12] | ||
481 | str $s2,[$key,#-8] | ||
482 | str $s3,[$key,#-4] | ||
483 | #endif | ||
433 | 484 | ||
434 | teq lr,#128 | 485 | teq lr,#128 |
435 | bne .Lnot128 | 486 | bne .Lnot128 |
@@ -466,6 +517,7 @@ AES_set_encrypt_key: | |||
466 | b .Ldone | 517 | b .Ldone |
467 | 518 | ||
468 | .Lnot128: | 519 | .Lnot128: |
520 | #if __ARM_ARCH__<7 | ||
469 | ldrb $i2,[$rounds,#19] | 521 | ldrb $i2,[$rounds,#19] |
470 | ldrb $t1,[$rounds,#18] | 522 | ldrb $t1,[$rounds,#18] |
471 | ldrb $t2,[$rounds,#17] | 523 | ldrb $t2,[$rounds,#17] |
@@ -482,6 +534,16 @@ AES_set_encrypt_key: | |||
482 | str $i2,[$key],#8 | 534 | str $i2,[$key],#8 |
483 | orr $i3,$i3,$t3,lsl#24 | 535 | orr $i3,$i3,$t3,lsl#24 |
484 | str $i3,[$key,#-4] | 536 | str $i3,[$key,#-4] |
537 | #else | ||
538 | ldr $i2,[$rounds,#16] | ||
539 | ldr $i3,[$rounds,#20] | ||
540 | #ifdef __ARMEL__ | ||
541 | rev $i2,$i2 | ||
542 | rev $i3,$i3 | ||
543 | #endif | ||
544 | str $i2,[$key],#8 | ||
545 | str $i3,[$key,#-4] | ||
546 | #endif | ||
485 | 547 | ||
486 | teq lr,#192 | 548 | teq lr,#192 |
487 | bne .Lnot192 | 549 | bne .Lnot192 |
@@ -526,6 +588,7 @@ AES_set_encrypt_key: | |||
526 | b .L192_loop | 588 | b .L192_loop |
527 | 589 | ||
528 | .Lnot192: | 590 | .Lnot192: |
591 | #if __ARM_ARCH__<7 | ||
529 | ldrb $i2,[$rounds,#27] | 592 | ldrb $i2,[$rounds,#27] |
530 | ldrb $t1,[$rounds,#26] | 593 | ldrb $t1,[$rounds,#26] |
531 | ldrb $t2,[$rounds,#25] | 594 | ldrb $t2,[$rounds,#25] |
@@ -542,6 +605,16 @@ AES_set_encrypt_key: | |||
542 | str $i2,[$key],#8 | 605 | str $i2,[$key],#8 |
543 | orr $i3,$i3,$t3,lsl#24 | 606 | orr $i3,$i3,$t3,lsl#24 |
544 | str $i3,[$key,#-4] | 607 | str $i3,[$key,#-4] |
608 | #else | ||
609 | ldr $i2,[$rounds,#24] | ||
610 | ldr $i3,[$rounds,#28] | ||
611 | #ifdef __ARMEL__ | ||
612 | rev $i2,$i2 | ||
613 | rev $i3,$i3 | ||
614 | #endif | ||
615 | str $i2,[$key],#8 | ||
616 | str $i3,[$key,#-4] | ||
617 | #endif | ||
545 | 618 | ||
546 | mov $rounds,#14 | 619 | mov $rounds,#14 |
547 | str $rounds,[$key,#240-32] | 620 | str $rounds,[$key,#240-32] |
@@ -606,14 +679,14 @@ AES_set_encrypt_key: | |||
606 | .Labrt: tst lr,#1 | 679 | .Labrt: tst lr,#1 |
607 | moveq pc,lr @ be binary compatible with V4, yet | 680 | moveq pc,lr @ be binary compatible with V4, yet |
608 | bx lr @ interoperable with Thumb ISA:-) | 681 | bx lr @ interoperable with Thumb ISA:-) |
609 | .size AES_set_encrypt_key,.-AES_set_encrypt_key | 682 | .size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key |
610 | 683 | ||
611 | .global AES_set_decrypt_key | 684 | .global private_AES_set_decrypt_key |
612 | .type AES_set_decrypt_key,%function | 685 | .type private_AES_set_decrypt_key,%function |
613 | .align 5 | 686 | .align 5 |
614 | AES_set_decrypt_key: | 687 | private_AES_set_decrypt_key: |
615 | str lr,[sp,#-4]! @ push lr | 688 | str lr,[sp,#-4]! @ push lr |
616 | bl AES_set_encrypt_key | 689 | bl _armv4_AES_set_encrypt_key |
617 | teq r0,#0 | 690 | teq r0,#0 |
618 | ldrne lr,[sp],#4 @ pop lr | 691 | ldrne lr,[sp],#4 @ pop lr |
619 | bne .Labrt | 692 | bne .Labrt |
@@ -692,11 +765,15 @@ $code.=<<___; | |||
692 | bne .Lmix | 765 | bne .Lmix |
693 | 766 | ||
694 | mov r0,#0 | 767 | mov r0,#0 |
768 | #if __ARM_ARCH__>=5 | ||
769 | ldmia sp!,{r4-r12,pc} | ||
770 | #else | ||
695 | ldmia sp!,{r4-r12,lr} | 771 | ldmia sp!,{r4-r12,lr} |
696 | tst lr,#1 | 772 | tst lr,#1 |
697 | moveq pc,lr @ be binary compatible with V4, yet | 773 | moveq pc,lr @ be binary compatible with V4, yet |
698 | bx lr @ interoperable with Thumb ISA:-) | 774 | bx lr @ interoperable with Thumb ISA:-) |
699 | .size AES_set_decrypt_key,.-AES_set_decrypt_key | 775 | #endif |
776 | .size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key | ||
700 | 777 | ||
701 | .type AES_Td,%object | 778 | .type AES_Td,%object |
702 | .align 5 | 779 | .align 5 |
@@ -811,7 +888,7 @@ AES_decrypt: | |||
811 | mov $rounds,r0 @ inp | 888 | mov $rounds,r0 @ inp |
812 | mov $key,r2 | 889 | mov $key,r2 |
813 | sub $tbl,r3,#AES_decrypt-AES_Td @ Td | 890 | sub $tbl,r3,#AES_decrypt-AES_Td @ Td |
814 | 891 | #if __ARM_ARCH__<7 | |
815 | ldrb $s0,[$rounds,#3] @ load input data in endian-neutral | 892 | ldrb $s0,[$rounds,#3] @ load input data in endian-neutral |
816 | ldrb $t1,[$rounds,#2] @ manner... | 893 | ldrb $t1,[$rounds,#2] @ manner... |
817 | ldrb $t2,[$rounds,#1] | 894 | ldrb $t2,[$rounds,#1] |
@@ -840,10 +917,33 @@ AES_decrypt: | |||
840 | orr $s3,$s3,$t1,lsl#8 | 917 | orr $s3,$s3,$t1,lsl#8 |
841 | orr $s3,$s3,$t2,lsl#16 | 918 | orr $s3,$s3,$t2,lsl#16 |
842 | orr $s3,$s3,$t3,lsl#24 | 919 | orr $s3,$s3,$t3,lsl#24 |
843 | 920 | #else | |
921 | ldr $s0,[$rounds,#0] | ||
922 | ldr $s1,[$rounds,#4] | ||
923 | ldr $s2,[$rounds,#8] | ||
924 | ldr $s3,[$rounds,#12] | ||
925 | #ifdef __ARMEL__ | ||
926 | rev $s0,$s0 | ||
927 | rev $s1,$s1 | ||
928 | rev $s2,$s2 | ||
929 | rev $s3,$s3 | ||
930 | #endif | ||
931 | #endif | ||
844 | bl _armv4_AES_decrypt | 932 | bl _armv4_AES_decrypt |
845 | 933 | ||
846 | ldr $rounds,[sp],#4 @ pop out | 934 | ldr $rounds,[sp],#4 @ pop out |
935 | #if __ARM_ARCH__>=7 | ||
936 | #ifdef __ARMEL__ | ||
937 | rev $s0,$s0 | ||
938 | rev $s1,$s1 | ||
939 | rev $s2,$s2 | ||
940 | rev $s3,$s3 | ||
941 | #endif | ||
942 | str $s0,[$rounds,#0] | ||
943 | str $s1,[$rounds,#4] | ||
944 | str $s2,[$rounds,#8] | ||
945 | str $s3,[$rounds,#12] | ||
946 | #else | ||
847 | mov $t1,$s0,lsr#24 @ write output in endian-neutral | 947 | mov $t1,$s0,lsr#24 @ write output in endian-neutral |
848 | mov $t2,$s0,lsr#16 @ manner... | 948 | mov $t2,$s0,lsr#16 @ manner... |
849 | mov $t3,$s0,lsr#8 | 949 | mov $t3,$s0,lsr#8 |
@@ -872,11 +972,15 @@ AES_decrypt: | |||
872 | strb $t2,[$rounds,#13] | 972 | strb $t2,[$rounds,#13] |
873 | strb $t3,[$rounds,#14] | 973 | strb $t3,[$rounds,#14] |
874 | strb $s3,[$rounds,#15] | 974 | strb $s3,[$rounds,#15] |
875 | 975 | #endif | |
976 | #if __ARM_ARCH__>=5 | ||
977 | ldmia sp!,{r4-r12,pc} | ||
978 | #else | ||
876 | ldmia sp!,{r4-r12,lr} | 979 | ldmia sp!,{r4-r12,lr} |
877 | tst lr,#1 | 980 | tst lr,#1 |
878 | moveq pc,lr @ be binary compatible with V4, yet | 981 | moveq pc,lr @ be binary compatible with V4, yet |
879 | bx lr @ interoperable with Thumb ISA:-) | 982 | bx lr @ interoperable with Thumb ISA:-) |
983 | #endif | ||
880 | .size AES_decrypt,.-AES_decrypt | 984 | .size AES_decrypt,.-AES_decrypt |
881 | 985 | ||
882 | .type _armv4_AES_decrypt,%function | 986 | .type _armv4_AES_decrypt,%function |
@@ -916,11 +1020,11 @@ _armv4_AES_decrypt: | |||
916 | and $i2,lr,$s2 @ i1 | 1020 | and $i2,lr,$s2 @ i1 |
917 | eor $t3,$i3,$t3,ror#8 | 1021 | eor $t3,$i3,$t3,ror#8 |
918 | and $i3,lr,$s2,lsr#16 | 1022 | and $i3,lr,$s2,lsr#16 |
919 | eor $s1,$s1,$t1,ror#8 | ||
920 | ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8] | 1023 | ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8] |
1024 | eor $s1,$s1,$t1,ror#8 | ||
1025 | ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0] | ||
921 | mov $s2,$s2,lsr#24 | 1026 | mov $s2,$s2,lsr#24 |
922 | 1027 | ||
923 | ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0] | ||
924 | ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16] | 1028 | ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16] |
925 | eor $s0,$s0,$i1,ror#16 | 1029 | eor $s0,$s0,$i1,ror#16 |
926 | ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24] | 1030 | ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24] |
@@ -929,22 +1033,22 @@ _armv4_AES_decrypt: | |||
929 | and $i2,lr,$s3,lsr#8 @ i1 | 1033 | and $i2,lr,$s3,lsr#8 @ i1 |
930 | eor $t3,$i3,$t3,ror#8 | 1034 | eor $t3,$i3,$t3,ror#8 |
931 | and $i3,lr,$s3 @ i2 | 1035 | and $i3,lr,$s3 @ i2 |
932 | eor $s2,$s2,$t2,ror#8 | ||
933 | ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16] | 1036 | ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16] |
1037 | eor $s2,$s2,$t2,ror#8 | ||
1038 | ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8] | ||
934 | mov $s3,$s3,lsr#24 | 1039 | mov $s3,$s3,lsr#24 |
935 | 1040 | ||
936 | ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8] | ||
937 | ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0] | 1041 | ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0] |
938 | eor $s0,$s0,$i1,ror#8 | 1042 | eor $s0,$s0,$i1,ror#8 |
939 | ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24] | 1043 | ldr $i1,[$key],#16 |
940 | eor $s1,$s1,$i2,ror#16 | 1044 | eor $s1,$s1,$i2,ror#16 |
1045 | ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24] | ||
941 | eor $s2,$s2,$i3,ror#24 | 1046 | eor $s2,$s2,$i3,ror#24 |
942 | ldr $i1,[$key],#16 | ||
943 | eor $s3,$s3,$t3,ror#8 | ||
944 | 1047 | ||
945 | ldr $t1,[$key,#-12] | 1048 | ldr $t1,[$key,#-12] |
946 | ldr $t2,[$key,#-8] | ||
947 | eor $s0,$s0,$i1 | 1049 | eor $s0,$s0,$i1 |
1050 | ldr $t2,[$key,#-8] | ||
1051 | eor $s3,$s3,$t3,ror#8 | ||
948 | ldr $t3,[$key,#-4] | 1052 | ldr $t3,[$key,#-4] |
949 | and $i1,lr,$s0,lsr#16 | 1053 | and $i1,lr,$s0,lsr#16 |
950 | eor $s1,$s1,$t1 | 1054 | eor $s1,$s1,$t1 |
@@ -985,11 +1089,11 @@ _armv4_AES_decrypt: | |||
985 | and $i1,lr,$s2,lsr#8 @ i0 | 1089 | and $i1,lr,$s2,lsr#8 @ i0 |
986 | eor $t2,$t2,$i2,lsl#8 | 1090 | eor $t2,$t2,$i2,lsl#8 |
987 | and $i2,lr,$s2 @ i1 | 1091 | and $i2,lr,$s2 @ i1 |
988 | eor $t3,$t3,$i3,lsl#8 | ||
989 | ldrb $i1,[$tbl,$i1] @ Td4[s2>>8] | 1092 | ldrb $i1,[$tbl,$i1] @ Td4[s2>>8] |
1093 | eor $t3,$t3,$i3,lsl#8 | ||
1094 | ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] | ||
990 | and $i3,lr,$s2,lsr#16 | 1095 | and $i3,lr,$s2,lsr#16 |
991 | 1096 | ||
992 | ldrb $i2,[$tbl,$i2] @ Td4[s2>>0] | ||
993 | ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] | 1097 | ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24] |
994 | eor $s0,$s0,$i1,lsl#8 | 1098 | eor $s0,$s0,$i1,lsl#8 |
995 | ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] | 1099 | ldrb $i3,[$tbl,$i3] @ Td4[s2>>16] |
@@ -997,11 +1101,11 @@ _armv4_AES_decrypt: | |||
997 | and $i1,lr,$s3,lsr#16 @ i0 | 1101 | and $i1,lr,$s3,lsr#16 @ i0 |
998 | eor $s2,$t2,$s2,lsl#16 | 1102 | eor $s2,$t2,$s2,lsl#16 |
999 | and $i2,lr,$s3,lsr#8 @ i1 | 1103 | and $i2,lr,$s3,lsr#8 @ i1 |
1000 | eor $t3,$t3,$i3,lsl#16 | ||
1001 | ldrb $i1,[$tbl,$i1] @ Td4[s3>>16] | 1104 | ldrb $i1,[$tbl,$i1] @ Td4[s3>>16] |
1105 | eor $t3,$t3,$i3,lsl#16 | ||
1106 | ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] | ||
1002 | and $i3,lr,$s3 @ i2 | 1107 | and $i3,lr,$s3 @ i2 |
1003 | 1108 | ||
1004 | ldrb $i2,[$tbl,$i2] @ Td4[s3>>8] | ||
1005 | ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] | 1109 | ldrb $i3,[$tbl,$i3] @ Td4[s3>>0] |
1006 | ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] | 1110 | ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24] |
1007 | eor $s0,$s0,$i1,lsl#16 | 1111 | eor $s0,$s0,$i1,lsl#16 |