summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/aes/asm
diff options
context:
space:
mode:
authormiod <>2014-04-13 15:16:40 +0000
committermiod <>2014-04-13 15:16:40 +0000
commit92349eb53934e1b3e9b807e603d45417a6320d21 (patch)
tree0de23bccc2c4fc24d466d6a4291987efc8e44770 /src/lib/libcrypto/aes/asm
parentbdcc75be513421611e357921c457c3c5f631a14c (diff)
parent52628ee3f51f011b463aaedb1a28aa0524b43cb3 (diff)
downloadopenbsd-92349eb53934e1b3e9b807e603d45417a6320d21.tar.gz
openbsd-92349eb53934e1b3e9b807e603d45417a6320d21.tar.bz2
openbsd-92349eb53934e1b3e9b807e603d45417a6320d21.zip
This commit was generated by cvs2git to track changes on a CVS vendor
branch.
Diffstat (limited to 'src/lib/libcrypto/aes/asm')
-rw-r--r--src/lib/libcrypto/aes/asm/aes-mips.pl20
-rw-r--r--src/lib/libcrypto/aes/asm/aes-parisc.pl3
-rw-r--r--src/lib/libcrypto/aes/asm/aes-s390x.pl95
-rw-r--r--src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl3
-rw-r--r--src/lib/libcrypto/aes/asm/bsaes-x86_64.pl76
-rw-r--r--src/lib/libcrypto/aes/asm/vpaes-x86_64.pl5
6 files changed, 126 insertions, 76 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-mips.pl b/src/lib/libcrypto/aes/asm/aes-mips.pl
index 2ce6deffc8..e52395421b 100644
--- a/src/lib/libcrypto/aes/asm/aes-mips.pl
+++ b/src/lib/libcrypto/aes/asm/aes-mips.pl
@@ -1036,9 +1036,9 @@ _mips_AES_set_encrypt_key:
1036 nop 1036 nop
1037.end _mips_AES_set_encrypt_key 1037.end _mips_AES_set_encrypt_key
1038 1038
1039.globl AES_set_encrypt_key 1039.globl private_AES_set_encrypt_key
1040.ent AES_set_encrypt_key 1040.ent private_AES_set_encrypt_key
1041AES_set_encrypt_key: 1041private_AES_set_encrypt_key:
1042 .frame $sp,$FRAMESIZE,$ra 1042 .frame $sp,$FRAMESIZE,$ra
1043 .mask $SAVED_REGS_MASK,-$SZREG 1043 .mask $SAVED_REGS_MASK,-$SZREG
1044 .set noreorder 1044 .set noreorder
@@ -1060,7 +1060,7 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
1060___ 1060___
1061$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification 1061$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
1062 .cplocal $Tbl 1062 .cplocal $Tbl
1063 .cpsetup $pf,$zero,AES_set_encrypt_key 1063 .cpsetup $pf,$zero,private_AES_set_encrypt_key
1064___ 1064___
1065$code.=<<___; 1065$code.=<<___;
1066 .set reorder 1066 .set reorder
@@ -1083,7 +1083,7 @@ ___
1083$code.=<<___; 1083$code.=<<___;
1084 jr $ra 1084 jr $ra
1085 $PTR_ADD $sp,$FRAMESIZE 1085 $PTR_ADD $sp,$FRAMESIZE
1086.end AES_set_encrypt_key 1086.end private_AES_set_encrypt_key
1087___ 1087___
1088 1088
1089my ($head,$tail)=($inp,$bits); 1089my ($head,$tail)=($inp,$bits);
@@ -1091,9 +1091,9 @@ my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
1091my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2); 1091my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
1092$code.=<<___; 1092$code.=<<___;
1093.align 5 1093.align 5
1094.globl AES_set_decrypt_key 1094.globl private_AES_set_decrypt_key
1095.ent AES_set_decrypt_key 1095.ent private_AES_set_decrypt_key
1096AES_set_decrypt_key: 1096private_AES_set_decrypt_key:
1097 .frame $sp,$FRAMESIZE,$ra 1097 .frame $sp,$FRAMESIZE,$ra
1098 .mask $SAVED_REGS_MASK,-$SZREG 1098 .mask $SAVED_REGS_MASK,-$SZREG
1099 .set noreorder 1099 .set noreorder
@@ -1115,7 +1115,7 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
1115___ 1115___
1116$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification 1116$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
1117 .cplocal $Tbl 1117 .cplocal $Tbl
1118 .cpsetup $pf,$zero,AES_set_decrypt_key 1118 .cpsetup $pf,$zero,private_AES_set_decrypt_key
1119___ 1119___
1120$code.=<<___; 1120$code.=<<___;
1121 .set reorder 1121 .set reorder
@@ -1226,7 +1226,7 @@ ___
1226$code.=<<___; 1226$code.=<<___;
1227 jr $ra 1227 jr $ra
1228 $PTR_ADD $sp,$FRAMESIZE 1228 $PTR_ADD $sp,$FRAMESIZE
1229.end AES_set_decrypt_key 1229.end private_AES_set_decrypt_key
1230___ 1230___
1231}}} 1231}}}
1232 1232
diff --git a/src/lib/libcrypto/aes/asm/aes-parisc.pl b/src/lib/libcrypto/aes/asm/aes-parisc.pl
index c36b6a2270..714dcfbbe3 100644
--- a/src/lib/libcrypto/aes/asm/aes-parisc.pl
+++ b/src/lib/libcrypto/aes/asm/aes-parisc.pl
@@ -1015,7 +1015,8 @@ foreach (split("\n",$code)) {
1015 $SIZE_T==4 ? sprintf("extru%s,%d,8,",$1,31-$2) 1015 $SIZE_T==4 ? sprintf("extru%s,%d,8,",$1,31-$2)
1016 : sprintf("extrd,u%s,%d,8,",$1,63-$2)/e; 1016 : sprintf("extrd,u%s,%d,8,",$1,63-$2)/e;
1017 1017
1018 s/,\*/,/ if ($SIZE_T==4); 1018 s/,\*/,/ if ($SIZE_T==4);
1019 s/\bbv\b(.*\(%r2\))/bve$1/ if ($SIZE_T==8);
1019 print $_,"\n"; 1020 print $_,"\n";
1020} 1021}
1021close STDOUT; 1022close STDOUT;
diff --git a/src/lib/libcrypto/aes/asm/aes-s390x.pl b/src/lib/libcrypto/aes/asm/aes-s390x.pl
index 445a1e6762..e75dcd0315 100644
--- a/src/lib/libcrypto/aes/asm/aes-s390x.pl
+++ b/src/lib/libcrypto/aes/asm/aes-s390x.pl
@@ -1598,11 +1598,11 @@ $code.=<<___ if(1);
1598 lghi $s1,0x7f 1598 lghi $s1,0x7f
1599 nr $s1,%r0 1599 nr $s1,%r0
1600 lghi %r0,0 # query capability vector 1600 lghi %r0,0 # query capability vector
1601 la %r1,2*$SIZE_T($sp) 1601 la %r1,$tweak-16($sp)
1602 .long 0xb92e0042 # km %r4,%r2 1602 .long 0xb92e0042 # km %r4,%r2
1603 llihh %r1,0x8000 1603 llihh %r1,0x8000
1604 srlg %r1,%r1,32($s1) # check for 32+function code 1604 srlg %r1,%r1,32($s1) # check for 32+function code
1605 ng %r1,2*$SIZE_T($sp) 1605 ng %r1,$tweak-16($sp)
1606 lgr %r0,$s0 # restore the function code 1606 lgr %r0,$s0 # restore the function code
1607 la %r1,0($key1) # restore $key1 1607 la %r1,0($key1) # restore $key1
1608 jz .Lxts_km_vanilla 1608 jz .Lxts_km_vanilla
@@ -1628,7 +1628,7 @@ $code.=<<___ if(1);
1628 1628
1629 lrvg $s0,$tweak+0($sp) # load the last tweak 1629 lrvg $s0,$tweak+0($sp) # load the last tweak
1630 lrvg $s1,$tweak+8($sp) 1630 lrvg $s1,$tweak+8($sp)
1631 stmg %r0,%r3,$tweak-32(%r1) # wipe copy of the key 1631 stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key
1632 1632
1633 nill %r0,0xffdf # switch back to original function code 1633 nill %r0,0xffdf # switch back to original function code
1634 la %r1,0($key1) # restore pointer to $key1 1634 la %r1,0($key1) # restore pointer to $key1
@@ -1684,11 +1684,9 @@ $code.=<<___;
1684 lghi $i1,0x87 1684 lghi $i1,0x87
1685 srag $i2,$s1,63 # broadcast upper bit 1685 srag $i2,$s1,63 # broadcast upper bit
1686 ngr $i1,$i2 # rem 1686 ngr $i1,$i2 # rem
1687 srlg $i2,$s0,63 # carry bit from lower half 1687 algr $s0,$s0
1688 sllg $s0,$s0,1 1688 alcgr $s1,$s1
1689 sllg $s1,$s1,1
1690 xgr $s0,$i1 1689 xgr $s0,$i1
1691 ogr $s1,$i2
1692.Lxts_km_start: 1690.Lxts_km_start:
1693 lrvgr $i1,$s0 # flip byte order 1691 lrvgr $i1,$s0 # flip byte order
1694 lrvgr $i2,$s1 1692 lrvgr $i2,$s1
@@ -1745,11 +1743,9 @@ $code.=<<___;
1745 lghi $i1,0x87 1743 lghi $i1,0x87
1746 srag $i2,$s1,63 # broadcast upper bit 1744 srag $i2,$s1,63 # broadcast upper bit
1747 ngr $i1,$i2 # rem 1745 ngr $i1,$i2 # rem
1748 srlg $i2,$s0,63 # carry bit from lower half 1746 algr $s0,$s0
1749 sllg $s0,$s0,1 1747 alcgr $s1,$s1
1750 sllg $s1,$s1,1
1751 xgr $s0,$i1 1748 xgr $s0,$i1
1752 ogr $s1,$i2
1753 1749
1754 ltr $len,$len # clear zero flag 1750 ltr $len,$len # clear zero flag
1755 br $ra 1751 br $ra
@@ -1781,8 +1777,8 @@ $code.=<<___ if (!$softonly);
1781 clr %r0,%r1 1777 clr %r0,%r1
1782 jl .Lxts_enc_software 1778 jl .Lxts_enc_software
1783 1779
1780 st${g} $ra,5*$SIZE_T($sp)
1784 stm${g} %r6,$s3,6*$SIZE_T($sp) 1781 stm${g} %r6,$s3,6*$SIZE_T($sp)
1785 st${g} $ra,14*$SIZE_T($sp)
1786 1782
1787 sllg $len,$len,4 # $len&=~15 1783 sllg $len,$len,4 # $len&=~15
1788 slgr $out,$inp 1784 slgr $out,$inp
@@ -1830,9 +1826,9 @@ $code.=<<___ if (!$softonly);
1830 stg $i2,8($i3) 1826 stg $i2,8($i3)
1831 1827
1832.Lxts_enc_km_done: 1828.Lxts_enc_km_done:
1833 l${g} $ra,14*$SIZE_T($sp) 1829 stg $sp,$tweak+0($sp) # wipe tweak
1834 st${g} $sp,$tweak($sp) # wipe tweak 1830 stg $sp,$tweak+8($sp)
1835 st${g} $sp,$tweak($sp) 1831 l${g} $ra,5*$SIZE_T($sp)
1836 lm${g} %r6,$s3,6*$SIZE_T($sp) 1832 lm${g} %r6,$s3,6*$SIZE_T($sp)
1837 br $ra 1833 br $ra
1838.align 16 1834.align 16
@@ -1843,12 +1839,11 @@ $code.=<<___;
1843 1839
1844 slgr $out,$inp 1840 slgr $out,$inp
1845 1841
1846 xgr $s0,$s0 # clear upper half 1842 l${g} $s3,$stdframe($sp) # ivp
1847 xgr $s1,$s1 1843 llgf $s0,0($s3) # load iv
1848 lrv $s0,$stdframe+4($sp) # load secno 1844 llgf $s1,4($s3)
1849 lrv $s1,$stdframe+0($sp) 1845 llgf $s2,8($s3)
1850 xgr $s2,$s2 1846 llgf $s3,12($s3)
1851 xgr $s3,$s3
1852 stm${g} %r2,%r5,2*$SIZE_T($sp) 1847 stm${g} %r2,%r5,2*$SIZE_T($sp)
1853 la $key,0($key2) 1848 la $key,0($key2)
1854 larl $tbl,AES_Te 1849 larl $tbl,AES_Te
@@ -1864,11 +1859,9 @@ $code.=<<___;
1864 lghi %r1,0x87 1859 lghi %r1,0x87
1865 srag %r0,$s3,63 # broadcast upper bit 1860 srag %r0,$s3,63 # broadcast upper bit
1866 ngr %r1,%r0 # rem 1861 ngr %r1,%r0 # rem
1867 srlg %r0,$s1,63 # carry bit from lower half 1862 algr $s1,$s1
1868 sllg $s1,$s1,1 1863 alcgr $s3,$s3
1869 sllg $s3,$s3,1
1870 xgr $s1,%r1 1864 xgr $s1,%r1
1871 ogr $s3,%r0
1872 lrvgr $s1,$s1 # flip byte order 1865 lrvgr $s1,$s1 # flip byte order
1873 lrvgr $s3,$s3 1866 lrvgr $s3,$s3
1874 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 1867 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
@@ -1917,11 +1910,9 @@ $code.=<<___;
1917 lghi %r1,0x87 1910 lghi %r1,0x87
1918 srag %r0,$s3,63 # broadcast upper bit 1911 srag %r0,$s3,63 # broadcast upper bit
1919 ngr %r1,%r0 # rem 1912 ngr %r1,%r0 # rem
1920 srlg %r0,$s1,63 # carry bit from lower half 1913 algr $s1,$s1
1921 sllg $s1,$s1,1 1914 alcgr $s3,$s3
1922 sllg $s3,$s3,1
1923 xgr $s1,%r1 1915 xgr $s1,%r1
1924 ogr $s3,%r0
1925 lrvgr $s1,$s1 # flip byte order 1916 lrvgr $s1,$s1 # flip byte order
1926 lrvgr $s3,$s3 1917 lrvgr $s3,$s3
1927 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 1918 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
@@ -1956,7 +1947,8 @@ $code.=<<___;
1956.size AES_xts_encrypt,.-AES_xts_encrypt 1947.size AES_xts_encrypt,.-AES_xts_encrypt
1957___ 1948___
1958# void AES_xts_decrypt(const char *inp,char *out,size_t len, 1949# void AES_xts_decrypt(const char *inp,char *out,size_t len,
1959# const AES_KEY *key1, const AES_KEY *key2,u64 secno); 1950# const AES_KEY *key1, const AES_KEY *key2,
1951# const unsigned char iv[16]);
1960# 1952#
1961$code.=<<___; 1953$code.=<<___;
1962.globl AES_xts_decrypt 1954.globl AES_xts_decrypt
@@ -1988,8 +1980,8 @@ $code.=<<___ if (!$softonly);
1988 clr %r0,%r1 1980 clr %r0,%r1
1989 jl .Lxts_dec_software 1981 jl .Lxts_dec_software
1990 1982
1983 st${g} $ra,5*$SIZE_T($sp)
1991 stm${g} %r6,$s3,6*$SIZE_T($sp) 1984 stm${g} %r6,$s3,6*$SIZE_T($sp)
1992 st${g} $ra,14*$SIZE_T($sp)
1993 1985
1994 nill $len,0xfff0 # $len&=~15 1986 nill $len,0xfff0 # $len&=~15
1995 slgr $out,$inp 1987 slgr $out,$inp
@@ -2028,11 +2020,9 @@ $code.=<<___ if (!$softonly);
2028 lghi $i1,0x87 2020 lghi $i1,0x87
2029 srag $i2,$s1,63 # broadcast upper bit 2021 srag $i2,$s1,63 # broadcast upper bit
2030 ngr $i1,$i2 # rem 2022 ngr $i1,$i2 # rem
2031 srlg $i2,$s0,63 # carry bit from lower half 2023 algr $s0,$s0
2032 sllg $s0,$s0,1 2024 alcgr $s1,$s1
2033 sllg $s1,$s1,1
2034 xgr $s0,$i1 2025 xgr $s0,$i1
2035 ogr $s1,$i2
2036 lrvgr $i1,$s0 # flip byte order 2026 lrvgr $i1,$s0 # flip byte order
2037 lrvgr $i2,$s1 2027 lrvgr $i2,$s1
2038 2028
@@ -2075,9 +2065,9 @@ $code.=<<___ if (!$softonly);
2075 stg $s2,0($i3) 2065 stg $s2,0($i3)
2076 stg $s3,8($i3) 2066 stg $s3,8($i3)
2077.Lxts_dec_km_done: 2067.Lxts_dec_km_done:
2078 l${g} $ra,14*$SIZE_T($sp) 2068 stg $sp,$tweak+0($sp) # wipe tweak
2079 st${g} $sp,$tweak($sp) # wipe tweak 2069 stg $sp,$tweak+8($sp)
2080 st${g} $sp,$tweak($sp) 2070 l${g} $ra,5*$SIZE_T($sp)
2081 lm${g} %r6,$s3,6*$SIZE_T($sp) 2071 lm${g} %r6,$s3,6*$SIZE_T($sp)
2082 br $ra 2072 br $ra
2083.align 16 2073.align 16
@@ -2089,12 +2079,11 @@ $code.=<<___;
2089 srlg $len,$len,4 2079 srlg $len,$len,4
2090 slgr $out,$inp 2080 slgr $out,$inp
2091 2081
2092 xgr $s0,$s0 # clear upper half 2082 l${g} $s3,$stdframe($sp) # ivp
2093 xgr $s1,$s1 2083 llgf $s0,0($s3) # load iv
2094 lrv $s0,$stdframe+4($sp) # load secno 2084 llgf $s1,4($s3)
2095 lrv $s1,$stdframe+0($sp) 2085 llgf $s2,8($s3)
2096 xgr $s2,$s2 2086 llgf $s3,12($s3)
2097 xgr $s3,$s3
2098 stm${g} %r2,%r5,2*$SIZE_T($sp) 2087 stm${g} %r2,%r5,2*$SIZE_T($sp)
2099 la $key,0($key2) 2088 la $key,0($key2)
2100 larl $tbl,AES_Te 2089 larl $tbl,AES_Te
@@ -2113,11 +2102,9 @@ $code.=<<___;
2113 lghi %r1,0x87 2102 lghi %r1,0x87
2114 srag %r0,$s3,63 # broadcast upper bit 2103 srag %r0,$s3,63 # broadcast upper bit
2115 ngr %r1,%r0 # rem 2104 ngr %r1,%r0 # rem
2116 srlg %r0,$s1,63 # carry bit from lower half 2105 algr $s1,$s1
2117 sllg $s1,$s1,1 2106 alcgr $s3,$s3
2118 sllg $s3,$s3,1
2119 xgr $s1,%r1 2107 xgr $s1,%r1
2120 ogr $s3,%r0
2121 lrvgr $s1,$s1 # flip byte order 2108 lrvgr $s1,$s1 # flip byte order
2122 lrvgr $s3,$s3 2109 lrvgr $s3,$s3
2123 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 2110 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
@@ -2156,11 +2143,9 @@ $code.=<<___;
2156 lghi %r1,0x87 2143 lghi %r1,0x87
2157 srag %r0,$s3,63 # broadcast upper bit 2144 srag %r0,$s3,63 # broadcast upper bit
2158 ngr %r1,%r0 # rem 2145 ngr %r1,%r0 # rem
2159 srlg %r0,$s1,63 # carry bit from lower half 2146 algr $s1,$s1
2160 sllg $s1,$s1,1 2147 alcgr $s3,$s3
2161 sllg $s3,$s3,1
2162 xgr $s1,%r1 2148 xgr $s1,%r1
2163 ogr $s3,%r0
2164 lrvgr $i2,$s1 # flip byte order 2149 lrvgr $i2,$s1 # flip byte order
2165 lrvgr $i3,$s3 2150 lrvgr $i3,$s3
2166 stmg $i2,$i3,$tweak($sp) # save the 1st tweak 2151 stmg $i2,$i3,$tweak($sp) # save the 1st tweak
@@ -2176,11 +2161,9 @@ $code.=<<___;
2176 lghi %r1,0x87 2161 lghi %r1,0x87
2177 srag %r0,$s3,63 # broadcast upper bit 2162 srag %r0,$s3,63 # broadcast upper bit
2178 ngr %r1,%r0 # rem 2163 ngr %r1,%r0 # rem
2179 srlg %r0,$s1,63 # carry bit from lower half 2164 algr $s1,$s1
2180 sllg $s1,$s1,1 2165 alcgr $s3,$s3
2181 sllg $s3,$s3,1
2182 xgr $s1,%r1 2166 xgr $s1,%r1
2183 ogr $s3,%r0
2184 lrvgr $s1,$s1 # flip byte order 2167 lrvgr $s1,$s1 # flip byte order
2185 lrvgr $s3,$s3 2168 lrvgr $s3,$s3
2186 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 2169 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
diff --git a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
index c6f6b3334a..3c8f6c19e7 100644
--- a/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/aesni-sha1-x86_64.pl
@@ -69,7 +69,8 @@ $avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
69 `ml64 2>&1` =~ /Version ([0-9]+)\./ && 69 `ml64 2>&1` =~ /Version ([0-9]+)\./ &&
70 $1>=10); 70 $1>=10);
71 71
72open STDOUT,"| $^X $xlate $flavour $output"; 72open OUT,"| \"$^X\" $xlate $flavour $output";
73*STDOUT=*OUT;
73 74
74# void aesni_cbc_sha1_enc(const void *inp, 75# void aesni_cbc_sha1_enc(const void *inp,
75# void *out, 76# void *out,
diff --git a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl
index c9c6312fa7..41b90f0844 100644
--- a/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/bsaes-x86_64.pl
@@ -83,9 +83,9 @@
83# Add decryption procedure. Performance in CPU cycles spent to decrypt 83# Add decryption procedure. Performance in CPU cycles spent to decrypt
84# one byte out of 4096-byte buffer with 128-bit key is: 84# one byte out of 4096-byte buffer with 128-bit key is:
85# 85#
86# Core 2 11.0 86# Core 2 9.83
87# Nehalem 9.16 87# Nehalem 7.74
88# Atom 20.9 88# Atom 19.0
89# 89#
90# November 2011. 90# November 2011.
91# 91#
@@ -105,7 +105,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
105( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 105( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
106die "can't locate x86_64-xlate.pl"; 106die "can't locate x86_64-xlate.pl";
107 107
108open STDOUT,"| $^X $xlate $flavour $output"; 108open OUT,"| \"$^X\" $xlate $flavour $output";
109*STDOUT=*OUT;
109 110
110my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx"); 111my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx");
111my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15) 112my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15)
@@ -455,6 +456,7 @@ sub MixColumns {
455# modified to emit output in order suitable for feeding back to aesenc[last] 456# modified to emit output in order suitable for feeding back to aesenc[last]
456my @x=@_[0..7]; 457my @x=@_[0..7];
457my @t=@_[8..15]; 458my @t=@_[8..15];
459my $inv=@_[16]; # optional
458$code.=<<___; 460$code.=<<___;
459 pshufd \$0x93, @x[0], @t[0] # x0 <<< 32 461 pshufd \$0x93, @x[0], @t[0] # x0 <<< 32
460 pshufd \$0x93, @x[1], @t[1] 462 pshufd \$0x93, @x[1], @t[1]
@@ -496,7 +498,8 @@ $code.=<<___;
496 pxor @t[4], @t[0] 498 pxor @t[4], @t[0]
497 pshufd \$0x4E, @x[2], @x[6] 499 pshufd \$0x4E, @x[2], @x[6]
498 pxor @t[5], @t[1] 500 pxor @t[5], @t[1]
499 501___
502$code.=<<___ if (!$inv);
500 pxor @t[3], @x[4] 503 pxor @t[3], @x[4]
501 pxor @t[7], @x[5] 504 pxor @t[7], @x[5]
502 pxor @t[6], @x[3] 505 pxor @t[6], @x[3]
@@ -504,9 +507,20 @@ $code.=<<___;
504 pxor @t[2], @x[6] 507 pxor @t[2], @x[6]
505 movdqa @t[1], @x[7] 508 movdqa @t[1], @x[7]
506___ 509___
510$code.=<<___ if ($inv);
511 pxor @x[4], @t[3]
512 pxor @t[7], @x[5]
513 pxor @x[3], @t[6]
514 movdqa @t[0], @x[3]
515 pxor @t[2], @x[6]
516 movdqa @t[6], @x[2]
517 movdqa @t[1], @x[7]
518 movdqa @x[6], @x[4]
519 movdqa @t[3], @x[6]
520___
507} 521}
508 522
509sub InvMixColumns { 523sub InvMixColumns_orig {
510my @x=@_[0..7]; 524my @x=@_[0..7];
511my @t=@_[8..15]; 525my @t=@_[8..15];
512 526
@@ -660,6 +674,54 @@ $code.=<<___;
660___ 674___
661} 675}
662 676
677sub InvMixColumns {
678my @x=@_[0..7];
679my @t=@_[8..15];
680
681# Thanks to Jussi Kivilinna for providing pointer to
682#
683# | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 |
684# | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
685# | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 |
686# | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 |
687
688$code.=<<___;
689 # multiplication by 0x05-0x00-0x04-0x00
690 pshufd \$0x4E, @x[0], @t[0]
691 pshufd \$0x4E, @x[6], @t[6]
692 pxor @x[0], @t[0]
693 pshufd \$0x4E, @x[7], @t[7]
694 pxor @x[6], @t[6]
695 pshufd \$0x4E, @x[1], @t[1]
696 pxor @x[7], @t[7]
697 pshufd \$0x4E, @x[2], @t[2]
698 pxor @x[1], @t[1]
699 pshufd \$0x4E, @x[3], @t[3]
700 pxor @x[2], @t[2]
701 pxor @t[6], @x[0]
702 pxor @t[6], @x[1]
703 pshufd \$0x4E, @x[4], @t[4]
704 pxor @x[3], @t[3]
705 pxor @t[0], @x[2]
706 pxor @t[1], @x[3]
707 pshufd \$0x4E, @x[5], @t[5]
708 pxor @x[4], @t[4]
709 pxor @t[7], @x[1]
710 pxor @t[2], @x[4]
711 pxor @x[5], @t[5]
712
713 pxor @t[7], @x[2]
714 pxor @t[6], @x[3]
715 pxor @t[6], @x[4]
716 pxor @t[3], @x[5]
717 pxor @t[4], @x[6]
718 pxor @t[7], @x[4]
719 pxor @t[7], @x[5]
720 pxor @t[5], @x[7]
721___
722 &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6
723}
724
663sub aesenc { # not used 725sub aesenc { # not used
664my @b=@_[0..7]; 726my @b=@_[0..7];
665my @t=@_[8..15]; 727my @t=@_[8..15];
@@ -2027,6 +2089,8 @@ ___
2027# const unsigned char iv[16]); 2089# const unsigned char iv[16]);
2028# 2090#
2029my ($twmask,$twres,$twtmp)=@XMM[13..15]; 2091my ($twmask,$twres,$twtmp)=@XMM[13..15];
2092$arg6=~s/d$//;
2093
2030$code.=<<___; 2094$code.=<<___;
2031.globl bsaes_xts_encrypt 2095.globl bsaes_xts_encrypt
2032.type bsaes_xts_encrypt,\@abi-omnipotent 2096.type bsaes_xts_encrypt,\@abi-omnipotent
diff --git a/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl b/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl
index 37998db5e1..bd7f45b850 100644
--- a/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl
+++ b/src/lib/libcrypto/aes/asm/vpaes-x86_64.pl
@@ -56,7 +56,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
56( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 56( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
57die "can't locate x86_64-xlate.pl"; 57die "can't locate x86_64-xlate.pl";
58 58
59open STDOUT,"| $^X $xlate $flavour $output"; 59open OUT,"| \"$^X\" $xlate $flavour $output";
60*STDOUT=*OUT;
60 61
61$PREFIX="vpaes"; 62$PREFIX="vpaes";
62 63
@@ -1059,7 +1060,7 @@ _vpaes_consts:
1059.Lk_dsbo: # decryption sbox final output 1060.Lk_dsbo: # decryption sbox final output
1060 .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D 1061 .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
1061 .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C 1062 .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
1062.asciz "Vector Permutaion AES for x86_64/SSSE3, Mike Hamburg (Stanford University)" 1063.asciz "Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)"
1063.align 64 1064.align 64
1064.size _vpaes_consts,.-_vpaes_consts 1065.size _vpaes_consts,.-_vpaes_consts
1065___ 1066___