diff options
Diffstat (limited to 'src/lib/libcrypto/aes')
| -rw-r--r-- | src/lib/libcrypto/aes/asm/aes-mips.pl | 20 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/aes-parisc.pl | 3 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/aes-s390x.pl | 95 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl | 3 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/bsaes-x86_64.pl | 76 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/vpaes-x86_64.pl | 5 |
6 files changed, 126 insertions, 76 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-mips.pl b/src/lib/libcrypto/aes/asm/aes-mips.pl index 2ce6deffc8..e52395421b 100644 --- a/src/lib/libcrypto/aes/asm/aes-mips.pl +++ b/src/lib/libcrypto/aes/asm/aes-mips.pl | |||
| @@ -1036,9 +1036,9 @@ _mips_AES_set_encrypt_key: | |||
| 1036 | nop | 1036 | nop |
| 1037 | .end _mips_AES_set_encrypt_key | 1037 | .end _mips_AES_set_encrypt_key |
| 1038 | 1038 | ||
| 1039 | .globl AES_set_encrypt_key | 1039 | .globl private_AES_set_encrypt_key |
| 1040 | .ent AES_set_encrypt_key | 1040 | .ent private_AES_set_encrypt_key |
| 1041 | AES_set_encrypt_key: | 1041 | private_AES_set_encrypt_key: |
| 1042 | .frame $sp,$FRAMESIZE,$ra | 1042 | .frame $sp,$FRAMESIZE,$ra |
| 1043 | .mask $SAVED_REGS_MASK,-$SZREG | 1043 | .mask $SAVED_REGS_MASK,-$SZREG |
| 1044 | .set noreorder | 1044 | .set noreorder |
| @@ -1060,7 +1060,7 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue | |||
| 1060 | ___ | 1060 | ___ |
| 1061 | $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification | 1061 | $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification |
| 1062 | .cplocal $Tbl | 1062 | .cplocal $Tbl |
| 1063 | .cpsetup $pf,$zero,AES_set_encrypt_key | 1063 | .cpsetup $pf,$zero,private_AES_set_encrypt_key |
| 1064 | ___ | 1064 | ___ |
| 1065 | $code.=<<___; | 1065 | $code.=<<___; |
| 1066 | .set reorder | 1066 | .set reorder |
| @@ -1083,7 +1083,7 @@ ___ | |||
| 1083 | $code.=<<___; | 1083 | $code.=<<___; |
| 1084 | jr $ra | 1084 | jr $ra |
| 1085 | $PTR_ADD $sp,$FRAMESIZE | 1085 | $PTR_ADD $sp,$FRAMESIZE |
| 1086 | .end AES_set_encrypt_key | 1086 | .end private_AES_set_encrypt_key |
| 1087 | ___ | 1087 | ___ |
| 1088 | 1088 | ||
| 1089 | my ($head,$tail)=($inp,$bits); | 1089 | my ($head,$tail)=($inp,$bits); |
| @@ -1091,9 +1091,9 @@ my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3); | |||
| 1091 | my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2); | 1091 | my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2); |
| 1092 | $code.=<<___; | 1092 | $code.=<<___; |
| 1093 | .align 5 | 1093 | .align 5 |
| 1094 | .globl AES_set_decrypt_key | 1094 | .globl private_AES_set_decrypt_key |
| 1095 | .ent AES_set_decrypt_key | 1095 | .ent private_AES_set_decrypt_key |
| 1096 | AES_set_decrypt_key: | 1096 | private_AES_set_decrypt_key: |
| 1097 | .frame $sp,$FRAMESIZE,$ra | 1097 | .frame $sp,$FRAMESIZE,$ra |
| 1098 | .mask $SAVED_REGS_MASK,-$SZREG | 1098 | .mask $SAVED_REGS_MASK,-$SZREG |
| 1099 | .set noreorder | 1099 | .set noreorder |
| @@ -1115,7 +1115,7 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue | |||
| 1115 | ___ | 1115 | ___ |
| 1116 | $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification | 1116 | $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification |
| 1117 | .cplocal $Tbl | 1117 | .cplocal $Tbl |
| 1118 | .cpsetup $pf,$zero,AES_set_decrypt_key | 1118 | .cpsetup $pf,$zero,private_AES_set_decrypt_key |
| 1119 | ___ | 1119 | ___ |
| 1120 | $code.=<<___; | 1120 | $code.=<<___; |
| 1121 | .set reorder | 1121 | .set reorder |
| @@ -1226,7 +1226,7 @@ ___ | |||
| 1226 | $code.=<<___; | 1226 | $code.=<<___; |
| 1227 | jr $ra | 1227 | jr $ra |
| 1228 | $PTR_ADD $sp,$FRAMESIZE | 1228 | $PTR_ADD $sp,$FRAMESIZE |
| 1229 | .end AES_set_decrypt_key | 1229 | .end private_AES_set_decrypt_key |
| 1230 | ___ | 1230 | ___ |
| 1231 | }}} | 1231 | }}} |
| 1232 | 1232 | ||
diff --git a/src/lib/libcrypto/aes/asm/aes-parisc.pl b/src/lib/libcrypto/aes/asm/aes-parisc.pl index c36b6a2270..714dcfbbe3 100644 --- a/src/lib/libcrypto/aes/asm/aes-parisc.pl +++ b/src/lib/libcrypto/aes/asm/aes-parisc.pl | |||
| @@ -1015,7 +1015,8 @@ foreach (split("\n",$code)) { | |||
| 1015 | $SIZE_T==4 ? sprintf("extru%s,%d,8,",$1,31-$2) | 1015 | $SIZE_T==4 ? sprintf("extru%s,%d,8,",$1,31-$2) |
| 1016 | : sprintf("extrd,u%s,%d,8,",$1,63-$2)/e; | 1016 | : sprintf("extrd,u%s,%d,8,",$1,63-$2)/e; |
| 1017 | 1017 | ||
| 1018 | s/,\*/,/ if ($SIZE_T==4); | 1018 | s/,\*/,/ if ($SIZE_T==4); |
| 1019 | s/\bbv\b(.*\(%r2\))/bve$1/ if ($SIZE_T==8); | ||
| 1019 | print $_,"\n"; | 1020 | print $_,"\n"; |
| 1020 | } | 1021 | } |
| 1021 | close STDOUT; | 1022 | close STDOUT; |
diff --git a/src/lib/libcrypto/aes/asm/aes-s390x.pl b/src/lib/libcrypto/aes/asm/aes-s390x.pl index 445a1e6762..e75dcd0315 100644 --- a/src/lib/libcrypto/aes/asm/aes-s390x.pl +++ b/src/lib/libcrypto/aes/asm/aes-s390x.pl | |||
| @@ -1598,11 +1598,11 @@ $code.=<<___ if(1); | |||
| 1598 | lghi $s1,0x7f | 1598 | lghi $s1,0x7f |
| 1599 | nr $s1,%r0 | 1599 | nr $s1,%r0 |
| 1600 | lghi %r0,0 # query capability vector | 1600 | lghi %r0,0 # query capability vector |
| 1601 | la %r1,2*$SIZE_T($sp) | 1601 | la %r1,$tweak-16($sp) |
| 1602 | .long 0xb92e0042 # km %r4,%r2 | 1602 | .long 0xb92e0042 # km %r4,%r2 |
| 1603 | llihh %r1,0x8000 | 1603 | llihh %r1,0x8000 |
| 1604 | srlg %r1,%r1,32($s1) # check for 32+function code | 1604 | srlg %r1,%r1,32($s1) # check for 32+function code |
| 1605 | ng %r1,2*$SIZE_T($sp) | 1605 | ng %r1,$tweak-16($sp) |
| 1606 | lgr %r0,$s0 # restore the function code | 1606 | lgr %r0,$s0 # restore the function code |
| 1607 | la %r1,0($key1) # restore $key1 | 1607 | la %r1,0($key1) # restore $key1 |
| 1608 | jz .Lxts_km_vanilla | 1608 | jz .Lxts_km_vanilla |
| @@ -1628,7 +1628,7 @@ $code.=<<___ if(1); | |||
| 1628 | 1628 | ||
| 1629 | lrvg $s0,$tweak+0($sp) # load the last tweak | 1629 | lrvg $s0,$tweak+0($sp) # load the last tweak |
| 1630 | lrvg $s1,$tweak+8($sp) | 1630 | lrvg $s1,$tweak+8($sp) |
| 1631 | stmg %r0,%r3,$tweak-32(%r1) # wipe copy of the key | 1631 | stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key |
| 1632 | 1632 | ||
| 1633 | nill %r0,0xffdf # switch back to original function code | 1633 | nill %r0,0xffdf # switch back to original function code |
| 1634 | la %r1,0($key1) # restore pointer to $key1 | 1634 | la %r1,0($key1) # restore pointer to $key1 |
| @@ -1684,11 +1684,9 @@ $code.=<<___; | |||
| 1684 | lghi $i1,0x87 | 1684 | lghi $i1,0x87 |
| 1685 | srag $i2,$s1,63 # broadcast upper bit | 1685 | srag $i2,$s1,63 # broadcast upper bit |
| 1686 | ngr $i1,$i2 # rem | 1686 | ngr $i1,$i2 # rem |
| 1687 | srlg $i2,$s0,63 # carry bit from lower half | 1687 | algr $s0,$s0 |
| 1688 | sllg $s0,$s0,1 | 1688 | alcgr $s1,$s1 |
| 1689 | sllg $s1,$s1,1 | ||
| 1690 | xgr $s0,$i1 | 1689 | xgr $s0,$i1 |
| 1691 | ogr $s1,$i2 | ||
| 1692 | .Lxts_km_start: | 1690 | .Lxts_km_start: |
| 1693 | lrvgr $i1,$s0 # flip byte order | 1691 | lrvgr $i1,$s0 # flip byte order |
| 1694 | lrvgr $i2,$s1 | 1692 | lrvgr $i2,$s1 |
| @@ -1745,11 +1743,9 @@ $code.=<<___; | |||
| 1745 | lghi $i1,0x87 | 1743 | lghi $i1,0x87 |
| 1746 | srag $i2,$s1,63 # broadcast upper bit | 1744 | srag $i2,$s1,63 # broadcast upper bit |
| 1747 | ngr $i1,$i2 # rem | 1745 | ngr $i1,$i2 # rem |
| 1748 | srlg $i2,$s0,63 # carry bit from lower half | 1746 | algr $s0,$s0 |
| 1749 | sllg $s0,$s0,1 | 1747 | alcgr $s1,$s1 |
| 1750 | sllg $s1,$s1,1 | ||
| 1751 | xgr $s0,$i1 | 1748 | xgr $s0,$i1 |
| 1752 | ogr $s1,$i2 | ||
| 1753 | 1749 | ||
| 1754 | ltr $len,$len # clear zero flag | 1750 | ltr $len,$len # clear zero flag |
| 1755 | br $ra | 1751 | br $ra |
| @@ -1781,8 +1777,8 @@ $code.=<<___ if (!$softonly); | |||
| 1781 | clr %r0,%r1 | 1777 | clr %r0,%r1 |
| 1782 | jl .Lxts_enc_software | 1778 | jl .Lxts_enc_software |
| 1783 | 1779 | ||
| 1780 | st${g} $ra,5*$SIZE_T($sp) | ||
| 1784 | stm${g} %r6,$s3,6*$SIZE_T($sp) | 1781 | stm${g} %r6,$s3,6*$SIZE_T($sp) |
| 1785 | st${g} $ra,14*$SIZE_T($sp) | ||
| 1786 | 1782 | ||
| 1787 | sllg $len,$len,4 # $len&=~15 | 1783 | sllg $len,$len,4 # $len&=~15 |
| 1788 | slgr $out,$inp | 1784 | slgr $out,$inp |
| @@ -1830,9 +1826,9 @@ $code.=<<___ if (!$softonly); | |||
| 1830 | stg $i2,8($i3) | 1826 | stg $i2,8($i3) |
| 1831 | 1827 | ||
| 1832 | .Lxts_enc_km_done: | 1828 | .Lxts_enc_km_done: |
| 1833 | l${g} $ra,14*$SIZE_T($sp) | 1829 | stg $sp,$tweak+0($sp) # wipe tweak |
| 1834 | st${g} $sp,$tweak($sp) # wipe tweak | 1830 | stg $sp,$tweak+8($sp) |
| 1835 | st${g} $sp,$tweak($sp) | 1831 | l${g} $ra,5*$SIZE_T($sp) |
| 1836 | lm${g} %r6,$s3,6*$SIZE_T($sp) | 1832 | lm${g} %r6,$s3,6*$SIZE_T($sp) |
| 1837 | br $ra | 1833 | br $ra |
| 1838 | .align 16 | 1834 | .align 16 |
| @@ -1843,12 +1839,11 @@ $code.=<<___; | |||
| 1843 | 1839 | ||
| 1844 | slgr $out,$inp | 1840 | slgr $out,$inp |
| 1845 | 1841 | ||
| 1846 | xgr $s0,$s0 # clear upper half | 1842 | l${g} $s3,$stdframe($sp) # ivp |
| 1847 | xgr $s1,$s1 | 1843 | llgf $s0,0($s3) # load iv |
| 1848 | lrv $s0,$stdframe+4($sp) # load secno | 1844 | llgf $s1,4($s3) |
| 1849 | lrv $s1,$stdframe+0($sp) | 1845 | llgf $s2,8($s3) |
| 1850 | xgr $s2,$s2 | 1846 | llgf $s3,12($s3) |
| 1851 | xgr $s3,$s3 | ||
| 1852 | stm${g} %r2,%r5,2*$SIZE_T($sp) | 1847 | stm${g} %r2,%r5,2*$SIZE_T($sp) |
| 1853 | la $key,0($key2) | 1848 | la $key,0($key2) |
| 1854 | larl $tbl,AES_Te | 1849 | larl $tbl,AES_Te |
| @@ -1864,11 +1859,9 @@ $code.=<<___; | |||
| 1864 | lghi %r1,0x87 | 1859 | lghi %r1,0x87 |
| 1865 | srag %r0,$s3,63 # broadcast upper bit | 1860 | srag %r0,$s3,63 # broadcast upper bit |
| 1866 | ngr %r1,%r0 # rem | 1861 | ngr %r1,%r0 # rem |
| 1867 | srlg %r0,$s1,63 # carry bit from lower half | 1862 | algr $s1,$s1 |
| 1868 | sllg $s1,$s1,1 | 1863 | alcgr $s3,$s3 |
| 1869 | sllg $s3,$s3,1 | ||
| 1870 | xgr $s1,%r1 | 1864 | xgr $s1,%r1 |
| 1871 | ogr $s3,%r0 | ||
| 1872 | lrvgr $s1,$s1 # flip byte order | 1865 | lrvgr $s1,$s1 # flip byte order |
| 1873 | lrvgr $s3,$s3 | 1866 | lrvgr $s3,$s3 |
| 1874 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | 1867 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits |
| @@ -1917,11 +1910,9 @@ $code.=<<___; | |||
| 1917 | lghi %r1,0x87 | 1910 | lghi %r1,0x87 |
| 1918 | srag %r0,$s3,63 # broadcast upper bit | 1911 | srag %r0,$s3,63 # broadcast upper bit |
| 1919 | ngr %r1,%r0 # rem | 1912 | ngr %r1,%r0 # rem |
| 1920 | srlg %r0,$s1,63 # carry bit from lower half | 1913 | algr $s1,$s1 |
| 1921 | sllg $s1,$s1,1 | 1914 | alcgr $s3,$s3 |
| 1922 | sllg $s3,$s3,1 | ||
| 1923 | xgr $s1,%r1 | 1915 | xgr $s1,%r1 |
| 1924 | ogr $s3,%r0 | ||
| 1925 | lrvgr $s1,$s1 # flip byte order | 1916 | lrvgr $s1,$s1 # flip byte order |
| 1926 | lrvgr $s3,$s3 | 1917 | lrvgr $s3,$s3 |
| 1927 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | 1918 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits |
| @@ -1956,7 +1947,8 @@ $code.=<<___; | |||
| 1956 | .size AES_xts_encrypt,.-AES_xts_encrypt | 1947 | .size AES_xts_encrypt,.-AES_xts_encrypt |
| 1957 | ___ | 1948 | ___ |
| 1958 | # void AES_xts_decrypt(const char *inp,char *out,size_t len, | 1949 | # void AES_xts_decrypt(const char *inp,char *out,size_t len, |
| 1959 | # const AES_KEY *key1, const AES_KEY *key2,u64 secno); | 1950 | # const AES_KEY *key1, const AES_KEY *key2, |
| 1951 | # const unsigned char iv[16]); | ||
| 1960 | # | 1952 | # |
| 1961 | $code.=<<___; | 1953 | $code.=<<___; |
| 1962 | .globl AES_xts_decrypt | 1954 | .globl AES_xts_decrypt |
| @@ -1988,8 +1980,8 @@ $code.=<<___ if (!$softonly); | |||
| 1988 | clr %r0,%r1 | 1980 | clr %r0,%r1 |
| 1989 | jl .Lxts_dec_software | 1981 | jl .Lxts_dec_software |
| 1990 | 1982 | ||
| 1983 | st${g} $ra,5*$SIZE_T($sp) | ||
| 1991 | stm${g} %r6,$s3,6*$SIZE_T($sp) | 1984 | stm${g} %r6,$s3,6*$SIZE_T($sp) |
| 1992 | st${g} $ra,14*$SIZE_T($sp) | ||
| 1993 | 1985 | ||
| 1994 | nill $len,0xfff0 # $len&=~15 | 1986 | nill $len,0xfff0 # $len&=~15 |
| 1995 | slgr $out,$inp | 1987 | slgr $out,$inp |
| @@ -2028,11 +2020,9 @@ $code.=<<___ if (!$softonly); | |||
| 2028 | lghi $i1,0x87 | 2020 | lghi $i1,0x87 |
| 2029 | srag $i2,$s1,63 # broadcast upper bit | 2021 | srag $i2,$s1,63 # broadcast upper bit |
| 2030 | ngr $i1,$i2 # rem | 2022 | ngr $i1,$i2 # rem |
| 2031 | srlg $i2,$s0,63 # carry bit from lower half | 2023 | algr $s0,$s0 |
| 2032 | sllg $s0,$s0,1 | 2024 | alcgr $s1,$s1 |
| 2033 | sllg $s1,$s1,1 | ||
| 2034 | xgr $s0,$i1 | 2025 | xgr $s0,$i1 |
| 2035 | ogr $s1,$i2 | ||
| 2036 | lrvgr $i1,$s0 # flip byte order | 2026 | lrvgr $i1,$s0 # flip byte order |
| 2037 | lrvgr $i2,$s1 | 2027 | lrvgr $i2,$s1 |
| 2038 | 2028 | ||
| @@ -2075,9 +2065,9 @@ $code.=<<___ if (!$softonly); | |||
| 2075 | stg $s2,0($i3) | 2065 | stg $s2,0($i3) |
| 2076 | stg $s3,8($i3) | 2066 | stg $s3,8($i3) |
| 2077 | .Lxts_dec_km_done: | 2067 | .Lxts_dec_km_done: |
| 2078 | l${g} $ra,14*$SIZE_T($sp) | 2068 | stg $sp,$tweak+0($sp) # wipe tweak |
| 2079 | st${g} $sp,$tweak($sp) # wipe tweak | 2069 | stg $sp,$tweak+8($sp) |
| 2080 | st${g} $sp,$tweak($sp) | 2070 | l${g} $ra,5*$SIZE_T($sp) |
| 2081 | lm${g} %r6,$s3,6*$SIZE_T($sp) | 2071 | lm${g} %r6,$s3,6*$SIZE_T($sp) |
| 2082 | br $ra | 2072 | br $ra |
| 2083 | .align 16 | 2073 | .align 16 |
| @@ -2089,12 +2079,11 @@ $code.=<<___; | |||
| 2089 | srlg $len,$len,4 | 2079 | srlg $len,$len,4 |
| 2090 | slgr $out,$inp | 2080 | slgr $out,$inp |
| 2091 | 2081 | ||
| 2092 | xgr $s0,$s0 # clear upper half | 2082 | l${g} $s3,$stdframe($sp) # ivp |
| 2093 | xgr $s1,$s1 | 2083 | llgf $s0,0($s3) # load iv |
| 2094 | lrv $s0,$stdframe+4($sp) # load secno | 2084 | llgf $s1,4($s3) |
| 2095 | lrv $s1,$stdframe+0($sp) | 2085 | llgf $s2,8($s3) |
| 2096 | xgr $s2,$s2 | 2086 | llgf $s3,12($s3) |
| 2097 | xgr $s3,$s3 | ||
| 2098 | stm${g} %r2,%r5,2*$SIZE_T($sp) | 2087 | stm${g} %r2,%r5,2*$SIZE_T($sp) |
| 2099 | la $key,0($key2) | 2088 | la $key,0($key2) |
| 2100 | larl $tbl,AES_Te | 2089 | larl $tbl,AES_Te |
| @@ -2113,11 +2102,9 @@ $code.=<<___; | |||
| 2113 | lghi %r1,0x87 | 2102 | lghi %r1,0x87 |
| 2114 | srag %r0,$s3,63 # broadcast upper bit | 2103 | srag %r0,$s3,63 # broadcast upper bit |
| 2115 | ngr %r1,%r0 # rem | 2104 | ngr %r1,%r0 # rem |
| 2116 | srlg %r0,$s1,63 # carry bit from lower half | 2105 | algr $s1,$s1 |
| 2117 | sllg $s1,$s1,1 | 2106 | alcgr $s3,$s3 |
| 2118 | sllg $s3,$s3,1 | ||
| 2119 | xgr $s1,%r1 | 2107 | xgr $s1,%r1 |
| 2120 | ogr $s3,%r0 | ||
| 2121 | lrvgr $s1,$s1 # flip byte order | 2108 | lrvgr $s1,$s1 # flip byte order |
| 2122 | lrvgr $s3,$s3 | 2109 | lrvgr $s3,$s3 |
| 2123 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | 2110 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits |
| @@ -2156,11 +2143,9 @@ $code.=<<___; | |||
| 2156 | lghi %r1,0x87 | 2143 | lghi %r1,0x87 |
| 2157 | srag %r0,$s3,63 # broadcast upper bit | 2144 | srag %r0,$s3,63 # broadcast upper bit |
| 2158 | ngr %r1,%r0 # rem | 2145 | ngr %r1,%r0 # rem |
| 2159 | srlg %r0,$s1,63 # carry bit from lower half | 2146 | algr $s1,$s1 |
| 2160 | sllg $s1,$s1,1 | 2147 | alcgr $s3,$s3 |
| 2161 | sllg $s3,$s3,1 | ||
| 2162 | xgr $s1,%r1 | 2148 | xgr $s1,%r1 |
| 2163 | ogr $s3,%r0 | ||
| 2164 | lrvgr $i2,$s1 # flip byte order | 2149 | lrvgr $i2,$s1 # flip byte order |
| 2165 | lrvgr $i3,$s3 | 2150 | lrvgr $i3,$s3 |
| 2166 | stmg $i2,$i3,$tweak($sp) # save the 1st tweak | 2151 | stmg $i2,$i3,$tweak($sp) # save the 1st tweak |
| @@ -2176,11 +2161,9 @@ $code.=<<___; | |||
| 2176 | lghi %r1,0x87 | 2161 | lghi %r1,0x87 |
| 2177 | srag %r0,$s3,63 # broadcast upper bit | 2162 | srag %r0,$s3,63 # broadcast upper bit |
| 2178 | ngr %r1,%r0 # rem | 2163 | ngr %r1,%r0 # rem |
| 2179 | srlg %r0,$s1,63 # carry bit from lower half | 2164 | algr $s1,$s1 |
| 2180 | sllg $s1,$s1,1 | 2165 | alcgr $s3,$s3 |
| 2181 | sllg $s3,$s3,1 | ||
| 2182 | xgr $s1,%r1 | 2166 | xgr $s1,%r1 |
| 2183 | ogr $s3,%r0 | ||
| 2184 | lrvgr $s1,$s1 # flip byte order | 2167 | lrvgr $s1,$s1 # flip byte order |
| 2185 | lrvgr $s3,$s3 | 2168 | lrvgr $s3,$s3 |
| 2186 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | 2169 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits |
diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl index c6f6b3334a..3c8f6c19e7 100644 --- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl | |||
| @@ -69,7 +69,8 @@ $avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && | |||
| 69 | `ml64 2>&1` =~ /Version ([0-9]+)\./ && | 69 | `ml64 2>&1` =~ /Version ([0-9]+)\./ && |
| 70 | $1>=10); | 70 | $1>=10); |
| 71 | 71 | ||
| 72 | open STDOUT,"| $^X $xlate $flavour $output"; | 72 | open OUT,"| \"$^X\" $xlate $flavour $output"; |
| 73 | *STDOUT=*OUT; | ||
| 73 | 74 | ||
| 74 | # void aesni_cbc_sha1_enc(const void *inp, | 75 | # void aesni_cbc_sha1_enc(const void *inp, |
| 75 | # void *out, | 76 | # void *out, |
diff --git a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl index c9c6312fa7..41b90f0844 100644 --- a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl | |||
| @@ -83,9 +83,9 @@ | |||
| 83 | # Add decryption procedure. Performance in CPU cycles spent to decrypt | 83 | # Add decryption procedure. Performance in CPU cycles spent to decrypt |
| 84 | # one byte out of 4096-byte buffer with 128-bit key is: | 84 | # one byte out of 4096-byte buffer with 128-bit key is: |
| 85 | # | 85 | # |
| 86 | # Core 2 11.0 | 86 | # Core 2 9.83 |
| 87 | # Nehalem 9.16 | 87 | # Nehalem 7.74 |
| 88 | # Atom 20.9 | 88 | # Atom 19.0 |
| 89 | # | 89 | # |
| 90 | # November 2011. | 90 | # November 2011. |
| 91 | # | 91 | # |
| @@ -105,7 +105,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |||
| 105 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | 105 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or |
| 106 | die "can't locate x86_64-xlate.pl"; | 106 | die "can't locate x86_64-xlate.pl"; |
| 107 | 107 | ||
| 108 | open STDOUT,"| $^X $xlate $flavour $output"; | 108 | open OUT,"| \"$^X\" $xlate $flavour $output"; |
| 109 | *STDOUT=*OUT; | ||
| 109 | 110 | ||
| 110 | my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx"); | 111 | my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx"); |
| 111 | my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15) | 112 | my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15) |
| @@ -455,6 +456,7 @@ sub MixColumns { | |||
| 455 | # modified to emit output in order suitable for feeding back to aesenc[last] | 456 | # modified to emit output in order suitable for feeding back to aesenc[last] |
| 456 | my @x=@_[0..7]; | 457 | my @x=@_[0..7]; |
| 457 | my @t=@_[8..15]; | 458 | my @t=@_[8..15]; |
| 459 | my $inv=@_[16]; # optional | ||
| 458 | $code.=<<___; | 460 | $code.=<<___; |
| 459 | pshufd \$0x93, @x[0], @t[0] # x0 <<< 32 | 461 | pshufd \$0x93, @x[0], @t[0] # x0 <<< 32 |
| 460 | pshufd \$0x93, @x[1], @t[1] | 462 | pshufd \$0x93, @x[1], @t[1] |
| @@ -496,7 +498,8 @@ $code.=<<___; | |||
| 496 | pxor @t[4], @t[0] | 498 | pxor @t[4], @t[0] |
| 497 | pshufd \$0x4E, @x[2], @x[6] | 499 | pshufd \$0x4E, @x[2], @x[6] |
| 498 | pxor @t[5], @t[1] | 500 | pxor @t[5], @t[1] |
| 499 | 501 | ___ | |
| 502 | $code.=<<___ if (!$inv); | ||
| 500 | pxor @t[3], @x[4] | 503 | pxor @t[3], @x[4] |
| 501 | pxor @t[7], @x[5] | 504 | pxor @t[7], @x[5] |
| 502 | pxor @t[6], @x[3] | 505 | pxor @t[6], @x[3] |
| @@ -504,9 +507,20 @@ $code.=<<___; | |||
| 504 | pxor @t[2], @x[6] | 507 | pxor @t[2], @x[6] |
| 505 | movdqa @t[1], @x[7] | 508 | movdqa @t[1], @x[7] |
| 506 | ___ | 509 | ___ |
| 510 | $code.=<<___ if ($inv); | ||
| 511 | pxor @x[4], @t[3] | ||
| 512 | pxor @t[7], @x[5] | ||
| 513 | pxor @x[3], @t[6] | ||
| 514 | movdqa @t[0], @x[3] | ||
| 515 | pxor @t[2], @x[6] | ||
| 516 | movdqa @t[6], @x[2] | ||
| 517 | movdqa @t[1], @x[7] | ||
| 518 | movdqa @x[6], @x[4] | ||
| 519 | movdqa @t[3], @x[6] | ||
| 520 | ___ | ||
| 507 | } | 521 | } |
| 508 | 522 | ||
| 509 | sub InvMixColumns { | 523 | sub InvMixColumns_orig { |
| 510 | my @x=@_[0..7]; | 524 | my @x=@_[0..7]; |
| 511 | my @t=@_[8..15]; | 525 | my @t=@_[8..15]; |
| 512 | 526 | ||
| @@ -660,6 +674,54 @@ $code.=<<___; | |||
| 660 | ___ | 674 | ___ |
| 661 | } | 675 | } |
| 662 | 676 | ||
| 677 | sub InvMixColumns { | ||
| 678 | my @x=@_[0..7]; | ||
| 679 | my @t=@_[8..15]; | ||
| 680 | |||
| 681 | # Thanks to Jussi Kivilinna for providing pointer to | ||
| 682 | # | ||
| 683 | # | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | | ||
| 684 | # | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | | ||
| 685 | # | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | | ||
| 686 | # | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | | ||
| 687 | |||
| 688 | $code.=<<___; | ||
| 689 | # multiplication by 0x05-0x00-0x04-0x00 | ||
| 690 | pshufd \$0x4E, @x[0], @t[0] | ||
| 691 | pshufd \$0x4E, @x[6], @t[6] | ||
| 692 | pxor @x[0], @t[0] | ||
| 693 | pshufd \$0x4E, @x[7], @t[7] | ||
| 694 | pxor @x[6], @t[6] | ||
| 695 | pshufd \$0x4E, @x[1], @t[1] | ||
| 696 | pxor @x[7], @t[7] | ||
| 697 | pshufd \$0x4E, @x[2], @t[2] | ||
| 698 | pxor @x[1], @t[1] | ||
| 699 | pshufd \$0x4E, @x[3], @t[3] | ||
| 700 | pxor @x[2], @t[2] | ||
| 701 | pxor @t[6], @x[0] | ||
| 702 | pxor @t[6], @x[1] | ||
| 703 | pshufd \$0x4E, @x[4], @t[4] | ||
| 704 | pxor @x[3], @t[3] | ||
| 705 | pxor @t[0], @x[2] | ||
| 706 | pxor @t[1], @x[3] | ||
| 707 | pshufd \$0x4E, @x[5], @t[5] | ||
| 708 | pxor @x[4], @t[4] | ||
| 709 | pxor @t[7], @x[1] | ||
| 710 | pxor @t[2], @x[4] | ||
| 711 | pxor @x[5], @t[5] | ||
| 712 | |||
| 713 | pxor @t[7], @x[2] | ||
| 714 | pxor @t[6], @x[3] | ||
| 715 | pxor @t[6], @x[4] | ||
| 716 | pxor @t[3], @x[5] | ||
| 717 | pxor @t[4], @x[6] | ||
| 718 | pxor @t[7], @x[4] | ||
| 719 | pxor @t[7], @x[5] | ||
| 720 | pxor @t[5], @x[7] | ||
| 721 | ___ | ||
| 722 | &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 | ||
| 723 | } | ||
| 724 | |||
| 663 | sub aesenc { # not used | 725 | sub aesenc { # not used |
| 664 | my @b=@_[0..7]; | 726 | my @b=@_[0..7]; |
| 665 | my @t=@_[8..15]; | 727 | my @t=@_[8..15]; |
| @@ -2027,6 +2089,8 @@ ___ | |||
| 2027 | # const unsigned char iv[16]); | 2089 | # const unsigned char iv[16]); |
| 2028 | # | 2090 | # |
| 2029 | my ($twmask,$twres,$twtmp)=@XMM[13..15]; | 2091 | my ($twmask,$twres,$twtmp)=@XMM[13..15]; |
| 2092 | $arg6=~s/d$//; | ||
| 2093 | |||
| 2030 | $code.=<<___; | 2094 | $code.=<<___; |
| 2031 | .globl bsaes_xts_encrypt | 2095 | .globl bsaes_xts_encrypt |
| 2032 | .type bsaes_xts_encrypt,\@abi-omnipotent | 2096 | .type bsaes_xts_encrypt,\@abi-omnipotent |
diff --git a/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl b/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl index 37998db5e1..bd7f45b850 100644 --- a/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl | |||
| @@ -56,7 +56,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |||
| 56 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | 56 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or |
| 57 | die "can't locate x86_64-xlate.pl"; | 57 | die "can't locate x86_64-xlate.pl"; |
| 58 | 58 | ||
| 59 | open STDOUT,"| $^X $xlate $flavour $output"; | 59 | open OUT,"| \"$^X\" $xlate $flavour $output"; |
| 60 | *STDOUT=*OUT; | ||
| 60 | 61 | ||
| 61 | $PREFIX="vpaes"; | 62 | $PREFIX="vpaes"; |
| 62 | 63 | ||
| @@ -1059,7 +1060,7 @@ _vpaes_consts: | |||
| 1059 | .Lk_dsbo: # decryption sbox final output | 1060 | .Lk_dsbo: # decryption sbox final output |
| 1060 | .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D | 1061 | .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D |
| 1061 | .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C | 1062 | .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C |
| 1062 | .asciz "Vector Permutaion AES for x86_64/SSSE3, Mike Hamburg (Stanford University)" | 1063 | .asciz "Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)" |
| 1063 | .align 64 | 1064 | .align 64 |
| 1064 | .size _vpaes_consts,.-_vpaes_consts | 1065 | .size _vpaes_consts,.-_vpaes_consts |
| 1065 | ___ | 1066 | ___ |
