diff options
author | miod <> | 2014-04-13 15:16:40 +0000 |
---|---|---|
committer | miod <> | 2014-04-13 15:16:40 +0000 |
commit | 52628ee3f51f011b463aaedb1a28aa0524b43cb3 (patch) | |
tree | 4bd2adeac981051908ec5756401424bbb4e57d6a /src/lib/libcrypto/aes | |
parent | 40c22d3625a3818690c889ed6216fedf2be522c9 (diff) | |
download | openbsd-52628ee3f51f011b463aaedb1a28aa0524b43cb3.tar.gz openbsd-52628ee3f51f011b463aaedb1a28aa0524b43cb3.tar.bz2 openbsd-52628ee3f51f011b463aaedb1a28aa0524b43cb3.zip |
Import OpenSSL 1.0.1g
Diffstat (limited to 'src/lib/libcrypto/aes')
-rw-r--r-- | src/lib/libcrypto/aes/asm/aes-mips.pl | 20 | ||||
-rw-r--r-- | src/lib/libcrypto/aes/asm/aes-parisc.pl | 3 | ||||
-rw-r--r-- | src/lib/libcrypto/aes/asm/aes-s390x.pl | 95 | ||||
-rwxr-xr-x | src/lib/libcrypto/aes/asm/aes-x86_64.pl | 3 | ||||
-rw-r--r-- | src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl | 3 | ||||
-rw-r--r-- | src/lib/libcrypto/aes/asm/aesni-x86_64.pl | 3 | ||||
-rw-r--r-- | src/lib/libcrypto/aes/asm/bsaes-x86_64.pl | 76 | ||||
-rw-r--r-- | src/lib/libcrypto/aes/asm/vpaes-x86_64.pl | 5 |
8 files changed, 130 insertions, 78 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-mips.pl b/src/lib/libcrypto/aes/asm/aes-mips.pl index 2ce6deffc8..e52395421b 100644 --- a/src/lib/libcrypto/aes/asm/aes-mips.pl +++ b/src/lib/libcrypto/aes/asm/aes-mips.pl | |||
@@ -1036,9 +1036,9 @@ _mips_AES_set_encrypt_key: | |||
1036 | nop | 1036 | nop |
1037 | .end _mips_AES_set_encrypt_key | 1037 | .end _mips_AES_set_encrypt_key |
1038 | 1038 | ||
1039 | .globl AES_set_encrypt_key | 1039 | .globl private_AES_set_encrypt_key |
1040 | .ent AES_set_encrypt_key | 1040 | .ent private_AES_set_encrypt_key |
1041 | AES_set_encrypt_key: | 1041 | private_AES_set_encrypt_key: |
1042 | .frame $sp,$FRAMESIZE,$ra | 1042 | .frame $sp,$FRAMESIZE,$ra |
1043 | .mask $SAVED_REGS_MASK,-$SZREG | 1043 | .mask $SAVED_REGS_MASK,-$SZREG |
1044 | .set noreorder | 1044 | .set noreorder |
@@ -1060,7 +1060,7 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue | |||
1060 | ___ | 1060 | ___ |
1061 | $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification | 1061 | $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification |
1062 | .cplocal $Tbl | 1062 | .cplocal $Tbl |
1063 | .cpsetup $pf,$zero,AES_set_encrypt_key | 1063 | .cpsetup $pf,$zero,private_AES_set_encrypt_key |
1064 | ___ | 1064 | ___ |
1065 | $code.=<<___; | 1065 | $code.=<<___; |
1066 | .set reorder | 1066 | .set reorder |
@@ -1083,7 +1083,7 @@ ___ | |||
1083 | $code.=<<___; | 1083 | $code.=<<___; |
1084 | jr $ra | 1084 | jr $ra |
1085 | $PTR_ADD $sp,$FRAMESIZE | 1085 | $PTR_ADD $sp,$FRAMESIZE |
1086 | .end AES_set_encrypt_key | 1086 | .end private_AES_set_encrypt_key |
1087 | ___ | 1087 | ___ |
1088 | 1088 | ||
1089 | my ($head,$tail)=($inp,$bits); | 1089 | my ($head,$tail)=($inp,$bits); |
@@ -1091,9 +1091,9 @@ my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3); | |||
1091 | my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2); | 1091 | my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2); |
1092 | $code.=<<___; | 1092 | $code.=<<___; |
1093 | .align 5 | 1093 | .align 5 |
1094 | .globl AES_set_decrypt_key | 1094 | .globl private_AES_set_decrypt_key |
1095 | .ent AES_set_decrypt_key | 1095 | .ent private_AES_set_decrypt_key |
1096 | AES_set_decrypt_key: | 1096 | private_AES_set_decrypt_key: |
1097 | .frame $sp,$FRAMESIZE,$ra | 1097 | .frame $sp,$FRAMESIZE,$ra |
1098 | .mask $SAVED_REGS_MASK,-$SZREG | 1098 | .mask $SAVED_REGS_MASK,-$SZREG |
1099 | .set noreorder | 1099 | .set noreorder |
@@ -1115,7 +1115,7 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue | |||
1115 | ___ | 1115 | ___ |
1116 | $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification | 1116 | $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification |
1117 | .cplocal $Tbl | 1117 | .cplocal $Tbl |
1118 | .cpsetup $pf,$zero,AES_set_decrypt_key | 1118 | .cpsetup $pf,$zero,private_AES_set_decrypt_key |
1119 | ___ | 1119 | ___ |
1120 | $code.=<<___; | 1120 | $code.=<<___; |
1121 | .set reorder | 1121 | .set reorder |
@@ -1226,7 +1226,7 @@ ___ | |||
1226 | $code.=<<___; | 1226 | $code.=<<___; |
1227 | jr $ra | 1227 | jr $ra |
1228 | $PTR_ADD $sp,$FRAMESIZE | 1228 | $PTR_ADD $sp,$FRAMESIZE |
1229 | .end AES_set_decrypt_key | 1229 | .end private_AES_set_decrypt_key |
1230 | ___ | 1230 | ___ |
1231 | }}} | 1231 | }}} |
1232 | 1232 | ||
diff --git a/src/lib/libcrypto/aes/asm/aes-parisc.pl b/src/lib/libcrypto/aes/asm/aes-parisc.pl index c36b6a2270..714dcfbbe3 100644 --- a/src/lib/libcrypto/aes/asm/aes-parisc.pl +++ b/src/lib/libcrypto/aes/asm/aes-parisc.pl | |||
@@ -1015,7 +1015,8 @@ foreach (split("\n",$code)) { | |||
1015 | $SIZE_T==4 ? sprintf("extru%s,%d,8,",$1,31-$2) | 1015 | $SIZE_T==4 ? sprintf("extru%s,%d,8,",$1,31-$2) |
1016 | : sprintf("extrd,u%s,%d,8,",$1,63-$2)/e; | 1016 | : sprintf("extrd,u%s,%d,8,",$1,63-$2)/e; |
1017 | 1017 | ||
1018 | s/,\*/,/ if ($SIZE_T==4); | 1018 | s/,\*/,/ if ($SIZE_T==4); |
1019 | s/\bbv\b(.*\(%r2\))/bve$1/ if ($SIZE_T==8); | ||
1019 | print $_,"\n"; | 1020 | print $_,"\n"; |
1020 | } | 1021 | } |
1021 | close STDOUT; | 1022 | close STDOUT; |
diff --git a/src/lib/libcrypto/aes/asm/aes-s390x.pl b/src/lib/libcrypto/aes/asm/aes-s390x.pl index 445a1e6762..e75dcd0315 100644 --- a/src/lib/libcrypto/aes/asm/aes-s390x.pl +++ b/src/lib/libcrypto/aes/asm/aes-s390x.pl | |||
@@ -1598,11 +1598,11 @@ $code.=<<___ if(1); | |||
1598 | lghi $s1,0x7f | 1598 | lghi $s1,0x7f |
1599 | nr $s1,%r0 | 1599 | nr $s1,%r0 |
1600 | lghi %r0,0 # query capability vector | 1600 | lghi %r0,0 # query capability vector |
1601 | la %r1,2*$SIZE_T($sp) | 1601 | la %r1,$tweak-16($sp) |
1602 | .long 0xb92e0042 # km %r4,%r2 | 1602 | .long 0xb92e0042 # km %r4,%r2 |
1603 | llihh %r1,0x8000 | 1603 | llihh %r1,0x8000 |
1604 | srlg %r1,%r1,32($s1) # check for 32+function code | 1604 | srlg %r1,%r1,32($s1) # check for 32+function code |
1605 | ng %r1,2*$SIZE_T($sp) | 1605 | ng %r1,$tweak-16($sp) |
1606 | lgr %r0,$s0 # restore the function code | 1606 | lgr %r0,$s0 # restore the function code |
1607 | la %r1,0($key1) # restore $key1 | 1607 | la %r1,0($key1) # restore $key1 |
1608 | jz .Lxts_km_vanilla | 1608 | jz .Lxts_km_vanilla |
@@ -1628,7 +1628,7 @@ $code.=<<___ if(1); | |||
1628 | 1628 | ||
1629 | lrvg $s0,$tweak+0($sp) # load the last tweak | 1629 | lrvg $s0,$tweak+0($sp) # load the last tweak |
1630 | lrvg $s1,$tweak+8($sp) | 1630 | lrvg $s1,$tweak+8($sp) |
1631 | stmg %r0,%r3,$tweak-32(%r1) # wipe copy of the key | 1631 | stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key |
1632 | 1632 | ||
1633 | nill %r0,0xffdf # switch back to original function code | 1633 | nill %r0,0xffdf # switch back to original function code |
1634 | la %r1,0($key1) # restore pointer to $key1 | 1634 | la %r1,0($key1) # restore pointer to $key1 |
@@ -1684,11 +1684,9 @@ $code.=<<___; | |||
1684 | lghi $i1,0x87 | 1684 | lghi $i1,0x87 |
1685 | srag $i2,$s1,63 # broadcast upper bit | 1685 | srag $i2,$s1,63 # broadcast upper bit |
1686 | ngr $i1,$i2 # rem | 1686 | ngr $i1,$i2 # rem |
1687 | srlg $i2,$s0,63 # carry bit from lower half | 1687 | algr $s0,$s0 |
1688 | sllg $s0,$s0,1 | 1688 | alcgr $s1,$s1 |
1689 | sllg $s1,$s1,1 | ||
1690 | xgr $s0,$i1 | 1689 | xgr $s0,$i1 |
1691 | ogr $s1,$i2 | ||
1692 | .Lxts_km_start: | 1690 | .Lxts_km_start: |
1693 | lrvgr $i1,$s0 # flip byte order | 1691 | lrvgr $i1,$s0 # flip byte order |
1694 | lrvgr $i2,$s1 | 1692 | lrvgr $i2,$s1 |
@@ -1745,11 +1743,9 @@ $code.=<<___; | |||
1745 | lghi $i1,0x87 | 1743 | lghi $i1,0x87 |
1746 | srag $i2,$s1,63 # broadcast upper bit | 1744 | srag $i2,$s1,63 # broadcast upper bit |
1747 | ngr $i1,$i2 # rem | 1745 | ngr $i1,$i2 # rem |
1748 | srlg $i2,$s0,63 # carry bit from lower half | 1746 | algr $s0,$s0 |
1749 | sllg $s0,$s0,1 | 1747 | alcgr $s1,$s1 |
1750 | sllg $s1,$s1,1 | ||
1751 | xgr $s0,$i1 | 1748 | xgr $s0,$i1 |
1752 | ogr $s1,$i2 | ||
1753 | 1749 | ||
1754 | ltr $len,$len # clear zero flag | 1750 | ltr $len,$len # clear zero flag |
1755 | br $ra | 1751 | br $ra |
@@ -1781,8 +1777,8 @@ $code.=<<___ if (!$softonly); | |||
1781 | clr %r0,%r1 | 1777 | clr %r0,%r1 |
1782 | jl .Lxts_enc_software | 1778 | jl .Lxts_enc_software |
1783 | 1779 | ||
1780 | st${g} $ra,5*$SIZE_T($sp) | ||
1784 | stm${g} %r6,$s3,6*$SIZE_T($sp) | 1781 | stm${g} %r6,$s3,6*$SIZE_T($sp) |
1785 | st${g} $ra,14*$SIZE_T($sp) | ||
1786 | 1782 | ||
1787 | sllg $len,$len,4 # $len&=~15 | 1783 | sllg $len,$len,4 # $len&=~15 |
1788 | slgr $out,$inp | 1784 | slgr $out,$inp |
@@ -1830,9 +1826,9 @@ $code.=<<___ if (!$softonly); | |||
1830 | stg $i2,8($i3) | 1826 | stg $i2,8($i3) |
1831 | 1827 | ||
1832 | .Lxts_enc_km_done: | 1828 | .Lxts_enc_km_done: |
1833 | l${g} $ra,14*$SIZE_T($sp) | 1829 | stg $sp,$tweak+0($sp) # wipe tweak |
1834 | st${g} $sp,$tweak($sp) # wipe tweak | 1830 | stg $sp,$tweak+8($sp) |
1835 | st${g} $sp,$tweak($sp) | 1831 | l${g} $ra,5*$SIZE_T($sp) |
1836 | lm${g} %r6,$s3,6*$SIZE_T($sp) | 1832 | lm${g} %r6,$s3,6*$SIZE_T($sp) |
1837 | br $ra | 1833 | br $ra |
1838 | .align 16 | 1834 | .align 16 |
@@ -1843,12 +1839,11 @@ $code.=<<___; | |||
1843 | 1839 | ||
1844 | slgr $out,$inp | 1840 | slgr $out,$inp |
1845 | 1841 | ||
1846 | xgr $s0,$s0 # clear upper half | 1842 | l${g} $s3,$stdframe($sp) # ivp |
1847 | xgr $s1,$s1 | 1843 | llgf $s0,0($s3) # load iv |
1848 | lrv $s0,$stdframe+4($sp) # load secno | 1844 | llgf $s1,4($s3) |
1849 | lrv $s1,$stdframe+0($sp) | 1845 | llgf $s2,8($s3) |
1850 | xgr $s2,$s2 | 1846 | llgf $s3,12($s3) |
1851 | xgr $s3,$s3 | ||
1852 | stm${g} %r2,%r5,2*$SIZE_T($sp) | 1847 | stm${g} %r2,%r5,2*$SIZE_T($sp) |
1853 | la $key,0($key2) | 1848 | la $key,0($key2) |
1854 | larl $tbl,AES_Te | 1849 | larl $tbl,AES_Te |
@@ -1864,11 +1859,9 @@ $code.=<<___; | |||
1864 | lghi %r1,0x87 | 1859 | lghi %r1,0x87 |
1865 | srag %r0,$s3,63 # broadcast upper bit | 1860 | srag %r0,$s3,63 # broadcast upper bit |
1866 | ngr %r1,%r0 # rem | 1861 | ngr %r1,%r0 # rem |
1867 | srlg %r0,$s1,63 # carry bit from lower half | 1862 | algr $s1,$s1 |
1868 | sllg $s1,$s1,1 | 1863 | alcgr $s3,$s3 |
1869 | sllg $s3,$s3,1 | ||
1870 | xgr $s1,%r1 | 1864 | xgr $s1,%r1 |
1871 | ogr $s3,%r0 | ||
1872 | lrvgr $s1,$s1 # flip byte order | 1865 | lrvgr $s1,$s1 # flip byte order |
1873 | lrvgr $s3,$s3 | 1866 | lrvgr $s3,$s3 |
1874 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | 1867 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits |
@@ -1917,11 +1910,9 @@ $code.=<<___; | |||
1917 | lghi %r1,0x87 | 1910 | lghi %r1,0x87 |
1918 | srag %r0,$s3,63 # broadcast upper bit | 1911 | srag %r0,$s3,63 # broadcast upper bit |
1919 | ngr %r1,%r0 # rem | 1912 | ngr %r1,%r0 # rem |
1920 | srlg %r0,$s1,63 # carry bit from lower half | 1913 | algr $s1,$s1 |
1921 | sllg $s1,$s1,1 | 1914 | alcgr $s3,$s3 |
1922 | sllg $s3,$s3,1 | ||
1923 | xgr $s1,%r1 | 1915 | xgr $s1,%r1 |
1924 | ogr $s3,%r0 | ||
1925 | lrvgr $s1,$s1 # flip byte order | 1916 | lrvgr $s1,$s1 # flip byte order |
1926 | lrvgr $s3,$s3 | 1917 | lrvgr $s3,$s3 |
1927 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | 1918 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits |
@@ -1956,7 +1947,8 @@ $code.=<<___; | |||
1956 | .size AES_xts_encrypt,.-AES_xts_encrypt | 1947 | .size AES_xts_encrypt,.-AES_xts_encrypt |
1957 | ___ | 1948 | ___ |
1958 | # void AES_xts_decrypt(const char *inp,char *out,size_t len, | 1949 | # void AES_xts_decrypt(const char *inp,char *out,size_t len, |
1959 | # const AES_KEY *key1, const AES_KEY *key2,u64 secno); | 1950 | # const AES_KEY *key1, const AES_KEY *key2, |
1951 | # const unsigned char iv[16]); | ||
1960 | # | 1952 | # |
1961 | $code.=<<___; | 1953 | $code.=<<___; |
1962 | .globl AES_xts_decrypt | 1954 | .globl AES_xts_decrypt |
@@ -1988,8 +1980,8 @@ $code.=<<___ if (!$softonly); | |||
1988 | clr %r0,%r1 | 1980 | clr %r0,%r1 |
1989 | jl .Lxts_dec_software | 1981 | jl .Lxts_dec_software |
1990 | 1982 | ||
1983 | st${g} $ra,5*$SIZE_T($sp) | ||
1991 | stm${g} %r6,$s3,6*$SIZE_T($sp) | 1984 | stm${g} %r6,$s3,6*$SIZE_T($sp) |
1992 | st${g} $ra,14*$SIZE_T($sp) | ||
1993 | 1985 | ||
1994 | nill $len,0xfff0 # $len&=~15 | 1986 | nill $len,0xfff0 # $len&=~15 |
1995 | slgr $out,$inp | 1987 | slgr $out,$inp |
@@ -2028,11 +2020,9 @@ $code.=<<___ if (!$softonly); | |||
2028 | lghi $i1,0x87 | 2020 | lghi $i1,0x87 |
2029 | srag $i2,$s1,63 # broadcast upper bit | 2021 | srag $i2,$s1,63 # broadcast upper bit |
2030 | ngr $i1,$i2 # rem | 2022 | ngr $i1,$i2 # rem |
2031 | srlg $i2,$s0,63 # carry bit from lower half | 2023 | algr $s0,$s0 |
2032 | sllg $s0,$s0,1 | 2024 | alcgr $s1,$s1 |
2033 | sllg $s1,$s1,1 | ||
2034 | xgr $s0,$i1 | 2025 | xgr $s0,$i1 |
2035 | ogr $s1,$i2 | ||
2036 | lrvgr $i1,$s0 # flip byte order | 2026 | lrvgr $i1,$s0 # flip byte order |
2037 | lrvgr $i2,$s1 | 2027 | lrvgr $i2,$s1 |
2038 | 2028 | ||
@@ -2075,9 +2065,9 @@ $code.=<<___ if (!$softonly); | |||
2075 | stg $s2,0($i3) | 2065 | stg $s2,0($i3) |
2076 | stg $s3,8($i3) | 2066 | stg $s3,8($i3) |
2077 | .Lxts_dec_km_done: | 2067 | .Lxts_dec_km_done: |
2078 | l${g} $ra,14*$SIZE_T($sp) | 2068 | stg $sp,$tweak+0($sp) # wipe tweak |
2079 | st${g} $sp,$tweak($sp) # wipe tweak | 2069 | stg $sp,$tweak+8($sp) |
2080 | st${g} $sp,$tweak($sp) | 2070 | l${g} $ra,5*$SIZE_T($sp) |
2081 | lm${g} %r6,$s3,6*$SIZE_T($sp) | 2071 | lm${g} %r6,$s3,6*$SIZE_T($sp) |
2082 | br $ra | 2072 | br $ra |
2083 | .align 16 | 2073 | .align 16 |
@@ -2089,12 +2079,11 @@ $code.=<<___; | |||
2089 | srlg $len,$len,4 | 2079 | srlg $len,$len,4 |
2090 | slgr $out,$inp | 2080 | slgr $out,$inp |
2091 | 2081 | ||
2092 | xgr $s0,$s0 # clear upper half | 2082 | l${g} $s3,$stdframe($sp) # ivp |
2093 | xgr $s1,$s1 | 2083 | llgf $s0,0($s3) # load iv |
2094 | lrv $s0,$stdframe+4($sp) # load secno | 2084 | llgf $s1,4($s3) |
2095 | lrv $s1,$stdframe+0($sp) | 2085 | llgf $s2,8($s3) |
2096 | xgr $s2,$s2 | 2086 | llgf $s3,12($s3) |
2097 | xgr $s3,$s3 | ||
2098 | stm${g} %r2,%r5,2*$SIZE_T($sp) | 2087 | stm${g} %r2,%r5,2*$SIZE_T($sp) |
2099 | la $key,0($key2) | 2088 | la $key,0($key2) |
2100 | larl $tbl,AES_Te | 2089 | larl $tbl,AES_Te |
@@ -2113,11 +2102,9 @@ $code.=<<___; | |||
2113 | lghi %r1,0x87 | 2102 | lghi %r1,0x87 |
2114 | srag %r0,$s3,63 # broadcast upper bit | 2103 | srag %r0,$s3,63 # broadcast upper bit |
2115 | ngr %r1,%r0 # rem | 2104 | ngr %r1,%r0 # rem |
2116 | srlg %r0,$s1,63 # carry bit from lower half | 2105 | algr $s1,$s1 |
2117 | sllg $s1,$s1,1 | 2106 | alcgr $s3,$s3 |
2118 | sllg $s3,$s3,1 | ||
2119 | xgr $s1,%r1 | 2107 | xgr $s1,%r1 |
2120 | ogr $s3,%r0 | ||
2121 | lrvgr $s1,$s1 # flip byte order | 2108 | lrvgr $s1,$s1 # flip byte order |
2122 | lrvgr $s3,$s3 | 2109 | lrvgr $s3,$s3 |
2123 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | 2110 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits |
@@ -2156,11 +2143,9 @@ $code.=<<___; | |||
2156 | lghi %r1,0x87 | 2143 | lghi %r1,0x87 |
2157 | srag %r0,$s3,63 # broadcast upper bit | 2144 | srag %r0,$s3,63 # broadcast upper bit |
2158 | ngr %r1,%r0 # rem | 2145 | ngr %r1,%r0 # rem |
2159 | srlg %r0,$s1,63 # carry bit from lower half | 2146 | algr $s1,$s1 |
2160 | sllg $s1,$s1,1 | 2147 | alcgr $s3,$s3 |
2161 | sllg $s3,$s3,1 | ||
2162 | xgr $s1,%r1 | 2148 | xgr $s1,%r1 |
2163 | ogr $s3,%r0 | ||
2164 | lrvgr $i2,$s1 # flip byte order | 2149 | lrvgr $i2,$s1 # flip byte order |
2165 | lrvgr $i3,$s3 | 2150 | lrvgr $i3,$s3 |
2166 | stmg $i2,$i3,$tweak($sp) # save the 1st tweak | 2151 | stmg $i2,$i3,$tweak($sp) # save the 1st tweak |
@@ -2176,11 +2161,9 @@ $code.=<<___; | |||
2176 | lghi %r1,0x87 | 2161 | lghi %r1,0x87 |
2177 | srag %r0,$s3,63 # broadcast upper bit | 2162 | srag %r0,$s3,63 # broadcast upper bit |
2178 | ngr %r1,%r0 # rem | 2163 | ngr %r1,%r0 # rem |
2179 | srlg %r0,$s1,63 # carry bit from lower half | 2164 | algr $s1,$s1 |
2180 | sllg $s1,$s1,1 | 2165 | alcgr $s3,$s3 |
2181 | sllg $s3,$s3,1 | ||
2182 | xgr $s1,%r1 | 2166 | xgr $s1,%r1 |
2183 | ogr $s3,%r0 | ||
2184 | lrvgr $s1,$s1 # flip byte order | 2167 | lrvgr $s1,$s1 # flip byte order |
2185 | lrvgr $s3,$s3 | 2168 | lrvgr $s3,$s3 |
2186 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits | 2169 | srlg $s0,$s1,32 # smash the tweak to 4x32-bits |
diff --git a/src/lib/libcrypto/aes/asm/aes-x86_64.pl b/src/lib/libcrypto/aes/asm/aes-x86_64.pl index 48fa857d5b..34cbb5d844 100755 --- a/src/lib/libcrypto/aes/asm/aes-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aes-x86_64.pl | |||
@@ -36,7 +36,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |||
36 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | 36 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or |
37 | die "can't locate x86_64-xlate.pl"; | 37 | die "can't locate x86_64-xlate.pl"; |
38 | 38 | ||
39 | open STDOUT,"| $^X $xlate $flavour $output"; | 39 | open OUT,"| \"$^X\" $xlate $flavour $output"; |
40 | *STDOUT=*OUT; | ||
40 | 41 | ||
41 | $verticalspin=1; # unlike 32-bit version $verticalspin performs | 42 | $verticalspin=1; # unlike 32-bit version $verticalspin performs |
42 | # ~15% better on both AMD and Intel cores | 43 | # ~15% better on both AMD and Intel cores |
diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl index c6f6b3334a..3c8f6c19e7 100644 --- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl | |||
@@ -69,7 +69,8 @@ $avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && | |||
69 | `ml64 2>&1` =~ /Version ([0-9]+)\./ && | 69 | `ml64 2>&1` =~ /Version ([0-9]+)\./ && |
70 | $1>=10); | 70 | $1>=10); |
71 | 71 | ||
72 | open STDOUT,"| $^X $xlate $flavour $output"; | 72 | open OUT,"| \"$^X\" $xlate $flavour $output"; |
73 | *STDOUT=*OUT; | ||
73 | 74 | ||
74 | # void aesni_cbc_sha1_enc(const void *inp, | 75 | # void aesni_cbc_sha1_enc(const void *inp, |
75 | # void *out, | 76 | # void *out, |
diff --git a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl index 499f3b3f42..0dbb194b8d 100644 --- a/src/lib/libcrypto/aes/asm/aesni-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/aesni-x86_64.pl | |||
@@ -172,7 +172,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |||
172 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | 172 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or |
173 | die "can't locate x86_64-xlate.pl"; | 173 | die "can't locate x86_64-xlate.pl"; |
174 | 174 | ||
175 | open STDOUT,"| $^X $xlate $flavour $output"; | 175 | open OUT,"| \"$^X\" $xlate $flavour $output"; |
176 | *STDOUT=*OUT; | ||
176 | 177 | ||
177 | $movkey = $PREFIX eq "aesni" ? "movups" : "movups"; | 178 | $movkey = $PREFIX eq "aesni" ? "movups" : "movups"; |
178 | @_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order | 179 | @_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order |
diff --git a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl index c9c6312fa7..41b90f0844 100644 --- a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl | |||
@@ -83,9 +83,9 @@ | |||
83 | # Add decryption procedure. Performance in CPU cycles spent to decrypt | 83 | # Add decryption procedure. Performance in CPU cycles spent to decrypt |
84 | # one byte out of 4096-byte buffer with 128-bit key is: | 84 | # one byte out of 4096-byte buffer with 128-bit key is: |
85 | # | 85 | # |
86 | # Core 2 11.0 | 86 | # Core 2 9.83 |
87 | # Nehalem 9.16 | 87 | # Nehalem 7.74 |
88 | # Atom 20.9 | 88 | # Atom 19.0 |
89 | # | 89 | # |
90 | # November 2011. | 90 | # November 2011. |
91 | # | 91 | # |
@@ -105,7 +105,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |||
105 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | 105 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or |
106 | die "can't locate x86_64-xlate.pl"; | 106 | die "can't locate x86_64-xlate.pl"; |
107 | 107 | ||
108 | open STDOUT,"| $^X $xlate $flavour $output"; | 108 | open OUT,"| \"$^X\" $xlate $flavour $output"; |
109 | *STDOUT=*OUT; | ||
109 | 110 | ||
110 | my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx"); | 111 | my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx"); |
111 | my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15) | 112 | my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15) |
@@ -455,6 +456,7 @@ sub MixColumns { | |||
455 | # modified to emit output in order suitable for feeding back to aesenc[last] | 456 | # modified to emit output in order suitable for feeding back to aesenc[last] |
456 | my @x=@_[0..7]; | 457 | my @x=@_[0..7]; |
457 | my @t=@_[8..15]; | 458 | my @t=@_[8..15]; |
459 | my $inv=@_[16]; # optional | ||
458 | $code.=<<___; | 460 | $code.=<<___; |
459 | pshufd \$0x93, @x[0], @t[0] # x0 <<< 32 | 461 | pshufd \$0x93, @x[0], @t[0] # x0 <<< 32 |
460 | pshufd \$0x93, @x[1], @t[1] | 462 | pshufd \$0x93, @x[1], @t[1] |
@@ -496,7 +498,8 @@ $code.=<<___; | |||
496 | pxor @t[4], @t[0] | 498 | pxor @t[4], @t[0] |
497 | pshufd \$0x4E, @x[2], @x[6] | 499 | pshufd \$0x4E, @x[2], @x[6] |
498 | pxor @t[5], @t[1] | 500 | pxor @t[5], @t[1] |
499 | 501 | ___ | |
502 | $code.=<<___ if (!$inv); | ||
500 | pxor @t[3], @x[4] | 503 | pxor @t[3], @x[4] |
501 | pxor @t[7], @x[5] | 504 | pxor @t[7], @x[5] |
502 | pxor @t[6], @x[3] | 505 | pxor @t[6], @x[3] |
@@ -504,9 +507,20 @@ $code.=<<___; | |||
504 | pxor @t[2], @x[6] | 507 | pxor @t[2], @x[6] |
505 | movdqa @t[1], @x[7] | 508 | movdqa @t[1], @x[7] |
506 | ___ | 509 | ___ |
510 | $code.=<<___ if ($inv); | ||
511 | pxor @x[4], @t[3] | ||
512 | pxor @t[7], @x[5] | ||
513 | pxor @x[3], @t[6] | ||
514 | movdqa @t[0], @x[3] | ||
515 | pxor @t[2], @x[6] | ||
516 | movdqa @t[6], @x[2] | ||
517 | movdqa @t[1], @x[7] | ||
518 | movdqa @x[6], @x[4] | ||
519 | movdqa @t[3], @x[6] | ||
520 | ___ | ||
507 | } | 521 | } |
508 | 522 | ||
509 | sub InvMixColumns { | 523 | sub InvMixColumns_orig { |
510 | my @x=@_[0..7]; | 524 | my @x=@_[0..7]; |
511 | my @t=@_[8..15]; | 525 | my @t=@_[8..15]; |
512 | 526 | ||
@@ -660,6 +674,54 @@ $code.=<<___; | |||
660 | ___ | 674 | ___ |
661 | } | 675 | } |
662 | 676 | ||
677 | sub InvMixColumns { | ||
678 | my @x=@_[0..7]; | ||
679 | my @t=@_[8..15]; | ||
680 | |||
681 | # Thanks to Jussi Kivilinna for providing pointer to | ||
682 | # | ||
683 | # | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 | | ||
684 | # | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 | | ||
685 | # | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 | | ||
686 | # | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 | | ||
687 | |||
688 | $code.=<<___; | ||
689 | # multiplication by 0x05-0x00-0x04-0x00 | ||
690 | pshufd \$0x4E, @x[0], @t[0] | ||
691 | pshufd \$0x4E, @x[6], @t[6] | ||
692 | pxor @x[0], @t[0] | ||
693 | pshufd \$0x4E, @x[7], @t[7] | ||
694 | pxor @x[6], @t[6] | ||
695 | pshufd \$0x4E, @x[1], @t[1] | ||
696 | pxor @x[7], @t[7] | ||
697 | pshufd \$0x4E, @x[2], @t[2] | ||
698 | pxor @x[1], @t[1] | ||
699 | pshufd \$0x4E, @x[3], @t[3] | ||
700 | pxor @x[2], @t[2] | ||
701 | pxor @t[6], @x[0] | ||
702 | pxor @t[6], @x[1] | ||
703 | pshufd \$0x4E, @x[4], @t[4] | ||
704 | pxor @x[3], @t[3] | ||
705 | pxor @t[0], @x[2] | ||
706 | pxor @t[1], @x[3] | ||
707 | pshufd \$0x4E, @x[5], @t[5] | ||
708 | pxor @x[4], @t[4] | ||
709 | pxor @t[7], @x[1] | ||
710 | pxor @t[2], @x[4] | ||
711 | pxor @x[5], @t[5] | ||
712 | |||
713 | pxor @t[7], @x[2] | ||
714 | pxor @t[6], @x[3] | ||
715 | pxor @t[6], @x[4] | ||
716 | pxor @t[3], @x[5] | ||
717 | pxor @t[4], @x[6] | ||
718 | pxor @t[7], @x[4] | ||
719 | pxor @t[7], @x[5] | ||
720 | pxor @t[5], @x[7] | ||
721 | ___ | ||
722 | &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6 | ||
723 | } | ||
724 | |||
663 | sub aesenc { # not used | 725 | sub aesenc { # not used |
664 | my @b=@_[0..7]; | 726 | my @b=@_[0..7]; |
665 | my @t=@_[8..15]; | 727 | my @t=@_[8..15]; |
@@ -2027,6 +2089,8 @@ ___ | |||
2027 | # const unsigned char iv[16]); | 2089 | # const unsigned char iv[16]); |
2028 | # | 2090 | # |
2029 | my ($twmask,$twres,$twtmp)=@XMM[13..15]; | 2091 | my ($twmask,$twres,$twtmp)=@XMM[13..15]; |
2092 | $arg6=~s/d$//; | ||
2093 | |||
2030 | $code.=<<___; | 2094 | $code.=<<___; |
2031 | .globl bsaes_xts_encrypt | 2095 | .globl bsaes_xts_encrypt |
2032 | .type bsaes_xts_encrypt,\@abi-omnipotent | 2096 | .type bsaes_xts_encrypt,\@abi-omnipotent |
diff --git a/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl b/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl index 37998db5e1..bd7f45b850 100644 --- a/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl +++ b/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl | |||
@@ -56,7 +56,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | |||
56 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or | 56 | ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or |
57 | die "can't locate x86_64-xlate.pl"; | 57 | die "can't locate x86_64-xlate.pl"; |
58 | 58 | ||
59 | open STDOUT,"| $^X $xlate $flavour $output"; | 59 | open OUT,"| \"$^X\" $xlate $flavour $output"; |
60 | *STDOUT=*OUT; | ||
60 | 61 | ||
61 | $PREFIX="vpaes"; | 62 | $PREFIX="vpaes"; |
62 | 63 | ||
@@ -1059,7 +1060,7 @@ _vpaes_consts: | |||
1059 | .Lk_dsbo: # decryption sbox final output | 1060 | .Lk_dsbo: # decryption sbox final output |
1060 | .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D | 1061 | .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D |
1061 | .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C | 1062 | .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C |
1062 | .asciz "Vector Permutaion AES for x86_64/SSSE3, Mike Hamburg (Stanford University)" | 1063 | .asciz "Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)" |
1063 | .align 64 | 1064 | .align 64 |
1064 | .size _vpaes_consts,.-_vpaes_consts | 1065 | .size _vpaes_consts,.-_vpaes_consts |
1065 | ___ | 1066 | ___ |