diff options
author | bcook <> | 2015-02-25 15:39:49 +0000 |
---|---|---|
committer | bcook <> | 2015-02-25 15:39:49 +0000 |
commit | 432e1d553bd75841b5b29f1a8008b519d538f765 (patch) | |
tree | fca56e3d23c024e7f0d0132456914f4f3181e5df /src/lib | |
parent | c95a8d3fbea64773cc8d6de4314c26a413e58a60 (diff) | |
download | openbsd-432e1d553bd75841b5b29f1a8008b519d538f765.tar.gz openbsd-432e1d553bd75841b5b29f1a8008b519d538f765.tar.bz2 openbsd-432e1d553bd75841b5b29f1a8008b519d538f765.zip |
Fix CVE-2014-3570: properly calculate the square of a BIGNUM value.
See https://www.openssl.org/news/secadv_20150108.txt for a more detailed
discussion.
Original OpenSSL patch here:
https://github.com/openssl/openssl/commit/a7a44ba55cb4f884c6bc9ceac90072dea38e66d0
The regression test is modified a little for KNF.
ok miod@
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/libcrypto/bn/asm/mips.pl | 611 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/asm/x86_64-gcc.c | 103 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 243 | ||||
-rw-r--r-- | src/lib/libssl/src/crypto/bn/asm/mips.pl | 611 | ||||
-rw-r--r-- | src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c | 103 | ||||
-rw-r--r-- | src/lib/libssl/src/crypto/bn/bn_asm.c | 243 |
6 files changed, 606 insertions, 1308 deletions
diff --git a/src/lib/libcrypto/bn/asm/mips.pl b/src/lib/libcrypto/bn/asm/mips.pl index d2f3ef7bbf..215c9a7483 100644 --- a/src/lib/libcrypto/bn/asm/mips.pl +++ b/src/lib/libcrypto/bn/asm/mips.pl | |||
@@ -1872,6 +1872,41 @@ ___ | |||
1872 | 1872 | ||
1873 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); | 1873 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); |
1874 | 1874 | ||
1875 | sub add_c2 () { | ||
1876 | my ($hi,$lo,$c0,$c1,$c2, | ||
1877 | $warm, # !$warm denotes first call with specific sequence of | ||
1878 | # $c_[XYZ] when there is no Z-carry to accumulate yet; | ||
1879 | $an,$bn # these two are arguments for multiplication which | ||
1880 | # result is used in *next* step [which is why it's | ||
1881 | # commented as "forward multiplication" below]; | ||
1882 | )=@_; | ||
1883 | $code.=<<___; | ||
1884 | mflo $lo | ||
1885 | mfhi $hi | ||
1886 | $ADDU $c0,$lo | ||
1887 | sltu $at,$c0,$lo | ||
1888 | $MULTU $an,$bn # forward multiplication | ||
1889 | $ADDU $c0,$lo | ||
1890 | $ADDU $at,$hi | ||
1891 | sltu $lo,$c0,$lo | ||
1892 | $ADDU $c1,$at | ||
1893 | $ADDU $hi,$lo | ||
1894 | ___ | ||
1895 | $code.=<<___ if (!$warm); | ||
1896 | sltu $c2,$c1,$at | ||
1897 | $ADDU $c1,$hi | ||
1898 | sltu $hi,$c1,$hi | ||
1899 | $ADDU $c2,$hi | ||
1900 | ___ | ||
1901 | $code.=<<___ if ($warm); | ||
1902 | sltu $at,$c1,$at | ||
1903 | $ADDU $c1,$hi | ||
1904 | $ADDU $c2,$at | ||
1905 | sltu $hi,$c1,$hi | ||
1906 | $ADDU $c2,$hi | ||
1907 | ___ | ||
1908 | } | ||
1909 | |||
1875 | $code.=<<___; | 1910 | $code.=<<___; |
1876 | 1911 | ||
1877 | .align 5 | 1912 | .align 5 |
@@ -1920,21 +1955,10 @@ $code.=<<___; | |||
1920 | sltu $at,$c_2,$t_1 | 1955 | sltu $at,$c_2,$t_1 |
1921 | $ADDU $c_3,$t_2,$at | 1956 | $ADDU $c_3,$t_2,$at |
1922 | $ST $c_2,$BNSZ($a0) | 1957 | $ST $c_2,$BNSZ($a0) |
1923 | 1958 | ___ | |
1924 | mflo $t_1 | 1959 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
1925 | mfhi $t_2 | 1960 | $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); |
1926 | slt $c_2,$t_2,$zero | 1961 | $code.=<<___; |
1927 | $SLL $t_2,1 | ||
1928 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
1929 | slt $a2,$t_1,$zero | ||
1930 | $ADDU $t_2,$a2 | ||
1931 | $SLL $t_1,1 | ||
1932 | $ADDU $c_3,$t_1 | ||
1933 | sltu $at,$c_3,$t_1 | ||
1934 | $ADDU $t_2,$at | ||
1935 | $ADDU $c_1,$t_2 | ||
1936 | sltu $at,$c_1,$t_2 | ||
1937 | $ADDU $c_2,$at | ||
1938 | mflo $t_1 | 1962 | mflo $t_1 |
1939 | mfhi $t_2 | 1963 | mfhi $t_2 |
1940 | $ADDU $c_3,$t_1 | 1964 | $ADDU $c_3,$t_1 |
@@ -1945,67 +1969,19 @@ $code.=<<___; | |||
1945 | sltu $at,$c_1,$t_2 | 1969 | sltu $at,$c_1,$t_2 |
1946 | $ADDU $c_2,$at | 1970 | $ADDU $c_2,$at |
1947 | $ST $c_3,2*$BNSZ($a0) | 1971 | $ST $c_3,2*$BNSZ($a0) |
1948 | 1972 | ___ | |
1949 | mflo $t_1 | 1973 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
1950 | mfhi $t_2 | 1974 | $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); |
1951 | slt $c_3,$t_2,$zero | 1975 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
1952 | $SLL $t_2,1 | 1976 | $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1); |
1953 | $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); | 1977 | $code.=<<___; |
1954 | slt $a2,$t_1,$zero | ||
1955 | $ADDU $t_2,$a2 | ||
1956 | $SLL $t_1,1 | ||
1957 | $ADDU $c_1,$t_1 | ||
1958 | sltu $at,$c_1,$t_1 | ||
1959 | $ADDU $t_2,$at | ||
1960 | $ADDU $c_2,$t_2 | ||
1961 | sltu $at,$c_2,$t_2 | ||
1962 | $ADDU $c_3,$at | ||
1963 | mflo $t_1 | ||
1964 | mfhi $t_2 | ||
1965 | slt $at,$t_2,$zero | ||
1966 | $ADDU $c_3,$at | ||
1967 | $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); | ||
1968 | $SLL $t_2,1 | ||
1969 | slt $a2,$t_1,$zero | ||
1970 | $ADDU $t_2,$a2 | ||
1971 | $SLL $t_1,1 | ||
1972 | $ADDU $c_1,$t_1 | ||
1973 | sltu $at,$c_1,$t_1 | ||
1974 | $ADDU $t_2,$at | ||
1975 | $ADDU $c_2,$t_2 | ||
1976 | sltu $at,$c_2,$t_2 | ||
1977 | $ADDU $c_3,$at | ||
1978 | $ST $c_1,3*$BNSZ($a0) | 1978 | $ST $c_1,3*$BNSZ($a0) |
1979 | 1979 | ___ | |
1980 | mflo $t_1 | 1980 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
1981 | mfhi $t_2 | 1981 | $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); |
1982 | slt $c_1,$t_2,$zero | 1982 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
1983 | $SLL $t_2,1 | 1983 | $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); |
1984 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | 1984 | $code.=<<___; |
1985 | slt $a2,$t_1,$zero | ||
1986 | $ADDU $t_2,$a2 | ||
1987 | $SLL $t_1,1 | ||
1988 | $ADDU $c_2,$t_1 | ||
1989 | sltu $at,$c_2,$t_1 | ||
1990 | $ADDU $t_2,$at | ||
1991 | $ADDU $c_3,$t_2 | ||
1992 | sltu $at,$c_3,$t_2 | ||
1993 | $ADDU $c_1,$at | ||
1994 | mflo $t_1 | ||
1995 | mfhi $t_2 | ||
1996 | slt $at,$t_2,$zero | ||
1997 | $ADDU $c_1,$at | ||
1998 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
1999 | $SLL $t_2,1 | ||
2000 | slt $a2,$t_1,$zero | ||
2001 | $ADDU $t_2,$a2 | ||
2002 | $SLL $t_1,1 | ||
2003 | $ADDU $c_2,$t_1 | ||
2004 | sltu $at,$c_2,$t_1 | ||
2005 | $ADDU $t_2,$at | ||
2006 | $ADDU $c_3,$t_2 | ||
2007 | sltu $at,$c_3,$t_2 | ||
2008 | $ADDU $c_1,$at | ||
2009 | mflo $t_1 | 1985 | mflo $t_1 |
2010 | mfhi $t_2 | 1986 | mfhi $t_2 |
2011 | $ADDU $c_2,$t_1 | 1987 | $ADDU $c_2,$t_1 |
@@ -2016,97 +1992,23 @@ $code.=<<___; | |||
2016 | sltu $at,$c_3,$t_2 | 1992 | sltu $at,$c_3,$t_2 |
2017 | $ADDU $c_1,$at | 1993 | $ADDU $c_1,$at |
2018 | $ST $c_2,4*$BNSZ($a0) | 1994 | $ST $c_2,4*$BNSZ($a0) |
2019 | 1995 | ___ | |
2020 | mflo $t_1 | 1996 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
2021 | mfhi $t_2 | 1997 | $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); |
2022 | slt $c_2,$t_2,$zero | 1998 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
2023 | $SLL $t_2,1 | 1999 | $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2); |
2024 | $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); | 2000 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
2025 | slt $a2,$t_1,$zero | 2001 | $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3); |
2026 | $ADDU $t_2,$a2 | 2002 | $code.=<<___; |
2027 | $SLL $t_1,1 | ||
2028 | $ADDU $c_3,$t_1 | ||
2029 | sltu $at,$c_3,$t_1 | ||
2030 | $ADDU $t_2,$at | ||
2031 | $ADDU $c_1,$t_2 | ||
2032 | sltu $at,$c_1,$t_2 | ||
2033 | $ADDU $c_2,$at | ||
2034 | mflo $t_1 | ||
2035 | mfhi $t_2 | ||
2036 | slt $at,$t_2,$zero | ||
2037 | $ADDU $c_2,$at | ||
2038 | $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | ||
2039 | $SLL $t_2,1 | ||
2040 | slt $a2,$t_1,$zero | ||
2041 | $ADDU $t_2,$a2 | ||
2042 | $SLL $t_1,1 | ||
2043 | $ADDU $c_3,$t_1 | ||
2044 | sltu $at,$c_3,$t_1 | ||
2045 | $ADDU $t_2,$at | ||
2046 | $ADDU $c_1,$t_2 | ||
2047 | sltu $at,$c_1,$t_2 | ||
2048 | $ADDU $c_2,$at | ||
2049 | mflo $t_1 | ||
2050 | mfhi $t_2 | ||
2051 | slt $at,$t_2,$zero | ||
2052 | $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); | ||
2053 | $ADDU $c_2,$at | ||
2054 | $SLL $t_2,1 | ||
2055 | slt $a2,$t_1,$zero | ||
2056 | $ADDU $t_2,$a2 | ||
2057 | $SLL $t_1,1 | ||
2058 | $ADDU $c_3,$t_1 | ||
2059 | sltu $at,$c_3,$t_1 | ||
2060 | $ADDU $t_2,$at | ||
2061 | $ADDU $c_1,$t_2 | ||
2062 | sltu $at,$c_1,$t_2 | ||
2063 | $ADDU $c_2,$at | ||
2064 | $ST $c_3,5*$BNSZ($a0) | 2003 | $ST $c_3,5*$BNSZ($a0) |
2065 | 2004 | ___ | |
2066 | mflo $t_1 | 2005 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
2067 | mfhi $t_2 | 2006 | $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3); |
2068 | slt $c_3,$t_2,$zero | 2007 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
2069 | $SLL $t_2,1 | 2008 | $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3); |
2070 | $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); | 2009 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
2071 | slt $a2,$t_1,$zero | 2010 | $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); |
2072 | $ADDU $t_2,$a2 | 2011 | $code.=<<___; |
2073 | $SLL $t_1,1 | ||
2074 | $ADDU $c_1,$t_1 | ||
2075 | sltu $at,$c_1,$t_1 | ||
2076 | $ADDU $t_2,$at | ||
2077 | $ADDU $c_2,$t_2 | ||
2078 | sltu $at,$c_2,$t_2 | ||
2079 | $ADDU $c_3,$at | ||
2080 | mflo $t_1 | ||
2081 | mfhi $t_2 | ||
2082 | slt $at,$t_2,$zero | ||
2083 | $ADDU $c_3,$at | ||
2084 | $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); | ||
2085 | $SLL $t_2,1 | ||
2086 | slt $a2,$t_1,$zero | ||
2087 | $ADDU $t_2,$a2 | ||
2088 | $SLL $t_1,1 | ||
2089 | $ADDU $c_1,$t_1 | ||
2090 | sltu $at,$c_1,$t_1 | ||
2091 | $ADDU $t_2,$at | ||
2092 | $ADDU $c_2,$t_2 | ||
2093 | sltu $at,$c_2,$t_2 | ||
2094 | $ADDU $c_3,$at | ||
2095 | mflo $t_1 | ||
2096 | mfhi $t_2 | ||
2097 | slt $at,$t_2,$zero | ||
2098 | $ADDU $c_3,$at | ||
2099 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
2100 | $SLL $t_2,1 | ||
2101 | slt $a2,$t_1,$zero | ||
2102 | $ADDU $t_2,$a2 | ||
2103 | $SLL $t_1,1 | ||
2104 | $ADDU $c_1,$t_1 | ||
2105 | sltu $at,$c_1,$t_1 | ||
2106 | $ADDU $t_2,$at | ||
2107 | $ADDU $c_2,$t_2 | ||
2108 | sltu $at,$c_2,$t_2 | ||
2109 | $ADDU $c_3,$at | ||
2110 | mflo $t_1 | 2012 | mflo $t_1 |
2111 | mfhi $t_2 | 2013 | mfhi $t_2 |
2112 | $ADDU $c_1,$t_1 | 2014 | $ADDU $c_1,$t_1 |
@@ -2117,112 +2019,25 @@ $code.=<<___; | |||
2117 | sltu $at,$c_2,$t_2 | 2019 | sltu $at,$c_2,$t_2 |
2118 | $ADDU $c_3,$at | 2020 | $ADDU $c_3,$at |
2119 | $ST $c_1,6*$BNSZ($a0) | 2021 | $ST $c_1,6*$BNSZ($a0) |
2120 | 2022 | ___ | |
2121 | mflo $t_1 | 2023 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
2122 | mfhi $t_2 | 2024 | $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); |
2123 | slt $c_1,$t_2,$zero | 2025 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
2124 | $SLL $t_2,1 | 2026 | $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1); |
2125 | $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); | 2027 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
2126 | slt $a2,$t_1,$zero | 2028 | $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1); |
2127 | $ADDU $t_2,$a2 | 2029 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
2128 | $SLL $t_1,1 | 2030 | $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2); |
2129 | $ADDU $c_2,$t_1 | 2031 | $code.=<<___; |
2130 | sltu $at,$c_2,$t_1 | ||
2131 | $ADDU $t_2,$at | ||
2132 | $ADDU $c_3,$t_2 | ||
2133 | sltu $at,$c_3,$t_2 | ||
2134 | $ADDU $c_1,$at | ||
2135 | mflo $t_1 | ||
2136 | mfhi $t_2 | ||
2137 | slt $at,$t_2,$zero | ||
2138 | $ADDU $c_1,$at | ||
2139 | $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); | ||
2140 | $SLL $t_2,1 | ||
2141 | slt $a2,$t_1,$zero | ||
2142 | $ADDU $t_2,$a2 | ||
2143 | $SLL $t_1,1 | ||
2144 | $ADDU $c_2,$t_1 | ||
2145 | sltu $at,$c_2,$t_1 | ||
2146 | $ADDU $t_2,$at | ||
2147 | $ADDU $c_3,$t_2 | ||
2148 | sltu $at,$c_3,$t_2 | ||
2149 | $ADDU $c_1,$at | ||
2150 | mflo $t_1 | ||
2151 | mfhi $t_2 | ||
2152 | slt $at,$t_2,$zero | ||
2153 | $ADDU $c_1,$at | ||
2154 | $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); | ||
2155 | $SLL $t_2,1 | ||
2156 | slt $a2,$t_1,$zero | ||
2157 | $ADDU $t_2,$a2 | ||
2158 | $SLL $t_1,1 | ||
2159 | $ADDU $c_2,$t_1 | ||
2160 | sltu $at,$c_2,$t_1 | ||
2161 | $ADDU $t_2,$at | ||
2162 | $ADDU $c_3,$t_2 | ||
2163 | sltu $at,$c_3,$t_2 | ||
2164 | $ADDU $c_1,$at | ||
2165 | mflo $t_1 | ||
2166 | mfhi $t_2 | ||
2167 | slt $at,$t_2,$zero | ||
2168 | $ADDU $c_1,$at | ||
2169 | $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); | ||
2170 | $SLL $t_2,1 | ||
2171 | slt $a2,$t_1,$zero | ||
2172 | $ADDU $t_2,$a2 | ||
2173 | $SLL $t_1,1 | ||
2174 | $ADDU $c_2,$t_1 | ||
2175 | sltu $at,$c_2,$t_1 | ||
2176 | $ADDU $t_2,$at | ||
2177 | $ADDU $c_3,$t_2 | ||
2178 | sltu $at,$c_3,$t_2 | ||
2179 | $ADDU $c_1,$at | ||
2180 | $ST $c_2,7*$BNSZ($a0) | 2032 | $ST $c_2,7*$BNSZ($a0) |
2181 | 2033 | ___ | |
2182 | mflo $t_1 | 2034 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
2183 | mfhi $t_2 | 2035 | $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2); |
2184 | slt $c_2,$t_2,$zero | 2036 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
2185 | $SLL $t_2,1 | 2037 | $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2); |
2186 | $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); | 2038 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
2187 | slt $a2,$t_1,$zero | 2039 | $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); |
2188 | $ADDU $t_2,$a2 | 2040 | $code.=<<___; |
2189 | $SLL $t_1,1 | ||
2190 | $ADDU $c_3,$t_1 | ||
2191 | sltu $at,$c_3,$t_1 | ||
2192 | $ADDU $t_2,$at | ||
2193 | $ADDU $c_1,$t_2 | ||
2194 | sltu $at,$c_1,$t_2 | ||
2195 | $ADDU $c_2,$at | ||
2196 | mflo $t_1 | ||
2197 | mfhi $t_2 | ||
2198 | slt $at,$t_2,$zero | ||
2199 | $ADDU $c_2,$at | ||
2200 | $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); | ||
2201 | $SLL $t_2,1 | ||
2202 | slt $a2,$t_1,$zero | ||
2203 | $ADDU $t_2,$a2 | ||
2204 | $SLL $t_1,1 | ||
2205 | $ADDU $c_3,$t_1 | ||
2206 | sltu $at,$c_3,$t_1 | ||
2207 | $ADDU $t_2,$at | ||
2208 | $ADDU $c_1,$t_2 | ||
2209 | sltu $at,$c_1,$t_2 | ||
2210 | $ADDU $c_2,$at | ||
2211 | mflo $t_1 | ||
2212 | mfhi $t_2 | ||
2213 | slt $at,$t_2,$zero | ||
2214 | $ADDU $c_2,$at | ||
2215 | $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); | ||
2216 | $SLL $t_2,1 | ||
2217 | slt $a2,$t_1,$zero | ||
2218 | $ADDU $t_2,$a2 | ||
2219 | $SLL $t_1,1 | ||
2220 | $ADDU $c_3,$t_1 | ||
2221 | sltu $at,$c_3,$t_1 | ||
2222 | $ADDU $t_2,$at | ||
2223 | $ADDU $c_1,$t_2 | ||
2224 | sltu $at,$c_1,$t_2 | ||
2225 | $ADDU $c_2,$at | ||
2226 | mflo $t_1 | 2041 | mflo $t_1 |
2227 | mfhi $t_2 | 2042 | mfhi $t_2 |
2228 | $ADDU $c_3,$t_1 | 2043 | $ADDU $c_3,$t_1 |
@@ -2233,82 +2048,21 @@ $code.=<<___; | |||
2233 | sltu $at,$c_1,$t_2 | 2048 | sltu $at,$c_1,$t_2 |
2234 | $ADDU $c_2,$at | 2049 | $ADDU $c_2,$at |
2235 | $ST $c_3,8*$BNSZ($a0) | 2050 | $ST $c_3,8*$BNSZ($a0) |
2236 | 2051 | ___ | |
2237 | mflo $t_1 | 2052 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
2238 | mfhi $t_2 | 2053 | $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); |
2239 | slt $c_3,$t_2,$zero | 2054 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
2240 | $SLL $t_2,1 | 2055 | $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3); |
2241 | $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); | 2056 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
2242 | slt $a2,$t_1,$zero | 2057 | $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1); |
2243 | $ADDU $t_2,$a2 | 2058 | $code.=<<___; |
2244 | $SLL $t_1,1 | ||
2245 | $ADDU $c_1,$t_1 | ||
2246 | sltu $at,$c_1,$t_1 | ||
2247 | $ADDU $t_2,$at | ||
2248 | $ADDU $c_2,$t_2 | ||
2249 | sltu $at,$c_2,$t_2 | ||
2250 | $ADDU $c_3,$at | ||
2251 | mflo $t_1 | ||
2252 | mfhi $t_2 | ||
2253 | slt $at,$t_2,$zero | ||
2254 | $ADDU $c_3,$at | ||
2255 | $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); | ||
2256 | $SLL $t_2,1 | ||
2257 | slt $a2,$t_1,$zero | ||
2258 | $ADDU $t_2,$a2 | ||
2259 | $SLL $t_1,1 | ||
2260 | $ADDU $c_1,$t_1 | ||
2261 | sltu $at,$c_1,$t_1 | ||
2262 | $ADDU $t_2,$at | ||
2263 | $ADDU $c_2,$t_2 | ||
2264 | sltu $at,$c_2,$t_2 | ||
2265 | $ADDU $c_3,$at | ||
2266 | mflo $t_1 | ||
2267 | mfhi $t_2 | ||
2268 | slt $at,$t_2,$zero | ||
2269 | $ADDU $c_3,$at | ||
2270 | $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); | ||
2271 | $SLL $t_2,1 | ||
2272 | slt $a2,$t_1,$zero | ||
2273 | $ADDU $t_2,$a2 | ||
2274 | $SLL $t_1,1 | ||
2275 | $ADDU $c_1,$t_1 | ||
2276 | sltu $at,$c_1,$t_1 | ||
2277 | $ADDU $t_2,$at | ||
2278 | $ADDU $c_2,$t_2 | ||
2279 | sltu $at,$c_2,$t_2 | ||
2280 | $ADDU $c_3,$at | ||
2281 | $ST $c_1,9*$BNSZ($a0) | 2059 | $ST $c_1,9*$BNSZ($a0) |
2282 | 2060 | ___ | |
2283 | mflo $t_1 | 2061 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
2284 | mfhi $t_2 | 2062 | $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1); |
2285 | slt $c_1,$t_2,$zero | 2063 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
2286 | $SLL $t_2,1 | 2064 | $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); |
2287 | $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); | 2065 | $code.=<<___; |
2288 | slt $a2,$t_1,$zero | ||
2289 | $ADDU $t_2,$a2 | ||
2290 | $SLL $t_1,1 | ||
2291 | $ADDU $c_2,$t_1 | ||
2292 | sltu $at,$c_2,$t_1 | ||
2293 | $ADDU $t_2,$at | ||
2294 | $ADDU $c_3,$t_2 | ||
2295 | sltu $at,$c_3,$t_2 | ||
2296 | $ADDU $c_1,$at | ||
2297 | mflo $t_1 | ||
2298 | mfhi $t_2 | ||
2299 | slt $at,$t_2,$zero | ||
2300 | $ADDU $c_1,$at | ||
2301 | $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); | ||
2302 | $SLL $t_2,1 | ||
2303 | slt $a2,$t_1,$zero | ||
2304 | $ADDU $t_2,$a2 | ||
2305 | $SLL $t_1,1 | ||
2306 | $ADDU $c_2,$t_1 | ||
2307 | sltu $at,$c_2,$t_1 | ||
2308 | $ADDU $t_2,$at | ||
2309 | $ADDU $c_3,$t_2 | ||
2310 | sltu $at,$c_3,$t_2 | ||
2311 | $ADDU $c_1,$at | ||
2312 | mflo $t_1 | 2066 | mflo $t_1 |
2313 | mfhi $t_2 | 2067 | mfhi $t_2 |
2314 | $ADDU $c_2,$t_1 | 2068 | $ADDU $c_2,$t_1 |
@@ -2319,52 +2073,17 @@ $code.=<<___; | |||
2319 | sltu $at,$c_3,$t_2 | 2073 | sltu $at,$c_3,$t_2 |
2320 | $ADDU $c_1,$at | 2074 | $ADDU $c_1,$at |
2321 | $ST $c_2,10*$BNSZ($a0) | 2075 | $ST $c_2,10*$BNSZ($a0) |
2322 | 2076 | ___ | |
2323 | mflo $t_1 | 2077 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
2324 | mfhi $t_2 | 2078 | $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); |
2325 | slt $c_2,$t_2,$zero | 2079 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
2326 | $SLL $t_2,1 | 2080 | $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3); |
2327 | $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); | 2081 | $code.=<<___; |
2328 | slt $a2,$t_1,$zero | ||
2329 | $ADDU $t_2,$a2 | ||
2330 | $SLL $t_1,1 | ||
2331 | $ADDU $c_3,$t_1 | ||
2332 | sltu $at,$c_3,$t_1 | ||
2333 | $ADDU $t_2,$at | ||
2334 | $ADDU $c_1,$t_2 | ||
2335 | sltu $at,$c_1,$t_2 | ||
2336 | $ADDU $c_2,$at | ||
2337 | mflo $t_1 | ||
2338 | mfhi $t_2 | ||
2339 | slt $at,$t_2,$zero | ||
2340 | $ADDU $c_2,$at | ||
2341 | $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); | ||
2342 | $SLL $t_2,1 | ||
2343 | slt $a2,$t_1,$zero | ||
2344 | $ADDU $t_2,$a2 | ||
2345 | $SLL $t_1,1 | ||
2346 | $ADDU $c_3,$t_1 | ||
2347 | sltu $at,$c_3,$t_1 | ||
2348 | $ADDU $t_2,$at | ||
2349 | $ADDU $c_1,$t_2 | ||
2350 | sltu $at,$c_1,$t_2 | ||
2351 | $ADDU $c_2,$at | ||
2352 | $ST $c_3,11*$BNSZ($a0) | 2082 | $ST $c_3,11*$BNSZ($a0) |
2353 | 2083 | ___ | |
2354 | mflo $t_1 | 2084 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
2355 | mfhi $t_2 | 2085 | $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); |
2356 | slt $c_3,$t_2,$zero | 2086 | $code.=<<___; |
2357 | $SLL $t_2,1 | ||
2358 | $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); | ||
2359 | slt $a2,$t_1,$zero | ||
2360 | $ADDU $t_2,$a2 | ||
2361 | $SLL $t_1,1 | ||
2362 | $ADDU $c_1,$t_1 | ||
2363 | sltu $at,$c_1,$t_1 | ||
2364 | $ADDU $t_2,$at | ||
2365 | $ADDU $c_2,$t_2 | ||
2366 | sltu $at,$c_2,$t_2 | ||
2367 | $ADDU $c_3,$at | ||
2368 | mflo $t_1 | 2087 | mflo $t_1 |
2369 | mfhi $t_2 | 2088 | mfhi $t_2 |
2370 | $ADDU $c_1,$t_1 | 2089 | $ADDU $c_1,$t_1 |
@@ -2375,21 +2094,10 @@ $code.=<<___; | |||
2375 | sltu $at,$c_2,$t_2 | 2094 | sltu $at,$c_2,$t_2 |
2376 | $ADDU $c_3,$at | 2095 | $ADDU $c_3,$at |
2377 | $ST $c_1,12*$BNSZ($a0) | 2096 | $ST $c_1,12*$BNSZ($a0) |
2378 | 2097 | ___ | |
2379 | mflo $t_1 | 2098 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
2380 | mfhi $t_2 | 2099 | $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); |
2381 | slt $c_1,$t_2,$zero | 2100 | $code.=<<___; |
2382 | $SLL $t_2,1 | ||
2383 | $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); | ||
2384 | slt $a2,$t_1,$zero | ||
2385 | $ADDU $t_2,$a2 | ||
2386 | $SLL $t_1,1 | ||
2387 | $ADDU $c_2,$t_1 | ||
2388 | sltu $at,$c_2,$t_1 | ||
2389 | $ADDU $t_2,$at | ||
2390 | $ADDU $c_3,$t_2 | ||
2391 | sltu $at,$c_3,$t_2 | ||
2392 | $ADDU $c_1,$at | ||
2393 | $ST $c_2,13*$BNSZ($a0) | 2101 | $ST $c_2,13*$BNSZ($a0) |
2394 | 2102 | ||
2395 | mflo $t_1 | 2103 | mflo $t_1 |
@@ -2457,21 +2165,10 @@ $code.=<<___; | |||
2457 | sltu $at,$c_2,$t_1 | 2165 | sltu $at,$c_2,$t_1 |
2458 | $ADDU $c_3,$t_2,$at | 2166 | $ADDU $c_3,$t_2,$at |
2459 | $ST $c_2,$BNSZ($a0) | 2167 | $ST $c_2,$BNSZ($a0) |
2460 | 2168 | ___ | |
2461 | mflo $t_1 | 2169 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
2462 | mfhi $t_2 | 2170 | $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); |
2463 | slt $c_2,$t_2,$zero | 2171 | $code.=<<___; |
2464 | $SLL $t_2,1 | ||
2465 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
2466 | slt $a2,$t_1,$zero | ||
2467 | $ADDU $t_2,$a2 | ||
2468 | $SLL $t_1,1 | ||
2469 | $ADDU $c_3,$t_1 | ||
2470 | sltu $at,$c_3,$t_1 | ||
2471 | $ADDU $t_2,$at | ||
2472 | $ADDU $c_1,$t_2 | ||
2473 | sltu $at,$c_1,$t_2 | ||
2474 | $ADDU $c_2,$at | ||
2475 | mflo $t_1 | 2172 | mflo $t_1 |
2476 | mfhi $t_2 | 2173 | mfhi $t_2 |
2477 | $ADDU $c_3,$t_1 | 2174 | $ADDU $c_3,$t_1 |
@@ -2482,52 +2179,17 @@ $code.=<<___; | |||
2482 | sltu $at,$c_1,$t_2 | 2179 | sltu $at,$c_1,$t_2 |
2483 | $ADDU $c_2,$at | 2180 | $ADDU $c_2,$at |
2484 | $ST $c_3,2*$BNSZ($a0) | 2181 | $ST $c_3,2*$BNSZ($a0) |
2485 | 2182 | ___ | |
2486 | mflo $t_1 | 2183 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
2487 | mfhi $t_2 | 2184 | $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); |
2488 | slt $c_3,$t_2,$zero | 2185 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
2489 | $SLL $t_2,1 | 2186 | $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); |
2490 | $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); | 2187 | $code.=<<___; |
2491 | slt $a2,$t_1,$zero | ||
2492 | $ADDU $t_2,$a2 | ||
2493 | $SLL $t_1,1 | ||
2494 | $ADDU $c_1,$t_1 | ||
2495 | sltu $at,$c_1,$t_1 | ||
2496 | $ADDU $t_2,$at | ||
2497 | $ADDU $c_2,$t_2 | ||
2498 | sltu $at,$c_2,$t_2 | ||
2499 | $ADDU $c_3,$at | ||
2500 | mflo $t_1 | ||
2501 | mfhi $t_2 | ||
2502 | slt $at,$t_2,$zero | ||
2503 | $ADDU $c_3,$at | ||
2504 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | ||
2505 | $SLL $t_2,1 | ||
2506 | slt $a2,$t_1,$zero | ||
2507 | $ADDU $t_2,$a2 | ||
2508 | $SLL $t_1,1 | ||
2509 | $ADDU $c_1,$t_1 | ||
2510 | sltu $at,$c_1,$t_1 | ||
2511 | $ADDU $t_2,$at | ||
2512 | $ADDU $c_2,$t_2 | ||
2513 | sltu $at,$c_2,$t_2 | ||
2514 | $ADDU $c_3,$at | ||
2515 | $ST $c_1,3*$BNSZ($a0) | 2188 | $ST $c_1,3*$BNSZ($a0) |
2516 | 2189 | ___ | |
2517 | mflo $t_1 | 2190 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
2518 | mfhi $t_2 | 2191 | $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); |
2519 | slt $c_1,$t_2,$zero | 2192 | $code.=<<___; |
2520 | $SLL $t_2,1 | ||
2521 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
2522 | slt $a2,$t_1,$zero | ||
2523 | $ADDU $t_2,$a2 | ||
2524 | $SLL $t_1,1 | ||
2525 | $ADDU $c_2,$t_1 | ||
2526 | sltu $at,$c_2,$t_1 | ||
2527 | $ADDU $t_2,$at | ||
2528 | $ADDU $c_3,$t_2 | ||
2529 | sltu $at,$c_3,$t_2 | ||
2530 | $ADDU $c_1,$at | ||
2531 | mflo $t_1 | 2193 | mflo $t_1 |
2532 | mfhi $t_2 | 2194 | mfhi $t_2 |
2533 | $ADDU $c_2,$t_1 | 2195 | $ADDU $c_2,$t_1 |
@@ -2538,21 +2200,10 @@ $code.=<<___; | |||
2538 | sltu $at,$c_3,$t_2 | 2200 | sltu $at,$c_3,$t_2 |
2539 | $ADDU $c_1,$at | 2201 | $ADDU $c_1,$at |
2540 | $ST $c_2,4*$BNSZ($a0) | 2202 | $ST $c_2,4*$BNSZ($a0) |
2541 | 2203 | ___ | |
2542 | mflo $t_1 | 2204 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
2543 | mfhi $t_2 | 2205 | $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); |
2544 | slt $c_2,$t_2,$zero | 2206 | $code.=<<___; |
2545 | $SLL $t_2,1 | ||
2546 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
2547 | slt $a2,$t_1,$zero | ||
2548 | $ADDU $t_2,$a2 | ||
2549 | $SLL $t_1,1 | ||
2550 | $ADDU $c_3,$t_1 | ||
2551 | sltu $at,$c_3,$t_1 | ||
2552 | $ADDU $t_2,$at | ||
2553 | $ADDU $c_1,$t_2 | ||
2554 | sltu $at,$c_1,$t_2 | ||
2555 | $ADDU $c_2,$at | ||
2556 | $ST $c_3,5*$BNSZ($a0) | 2207 | $ST $c_3,5*$BNSZ($a0) |
2557 | 2208 | ||
2558 | mflo $t_1 | 2209 | mflo $t_1 |
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c index c9a2b6be73..9deffa71f1 100644 --- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c +++ b/src/lib/libcrypto/bn/asm/x86_64-gcc.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: x86_64-gcc.c,v 1.4 2014/10/28 07:35:58 jsg Exp $ */ | 1 | /* $OpenBSD: x86_64-gcc.c,v 1.5 2015/02/25 15:39:49 bcook Exp $ */ |
2 | #include "../bn_lcl.h" | 2 | #include "../bn_lcl.h" |
3 | #if !(defined(__GNUC__) && __GNUC__>=2) | 3 | #if !(defined(__GNUC__) && __GNUC__>=2) |
4 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ | 4 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ |
@@ -270,77 +270,76 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
270 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | 270 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ |
271 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | 271 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ |
272 | 272 | ||
273 | /* | ||
274 | * Keep in mind that carrying into high part of multiplication result | ||
275 | * can not overflow, because it cannot be all-ones. | ||
276 | */ | ||
273 | #if 0 | 277 | #if 0 |
274 | /* original macros are kept for reference purposes */ | 278 | /* original macros are kept for reference purposes */ |
275 | #define mul_add_c(a,b,c0,c1,c2) { \ | 279 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
276 | BN_ULONG ta=(a),tb=(b); \ | 280 | BN_ULONG ta = (a), tb = (b); \ |
277 | t1 = ta * tb; \ | 281 | BN_ULONG lo, hi; \ |
278 | t2 = BN_UMULT_HIGH(ta,tb); \ | 282 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
279 | c0 += t1; t2 += (c0<t1)?1:0; \ | 283 | c0 += lo; hi += (c0<lo)?1:0; \ |
280 | c1 += t2; c2 += (c1<t2)?1:0; \ | 284 | c1 += hi; c2 += (c1<hi)?1:0; \ |
281 | } | 285 | } while(0) |
282 | 286 | ||
283 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 287 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
284 | BN_ULONG ta=(a),tb=(b),t0; \ | 288 | BN_ULONG ta = (a), tb = (b); \ |
285 | t1 = BN_UMULT_HIGH(ta,tb); \ | 289 | BN_ULONG lo, hi, tt; \ |
286 | t0 = ta * tb; \ | 290 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
287 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 291 | c0 += lo; tt = hi+((c0<lo)?1:0); \ |
288 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 292 | c1 += tt; c2 += (c1<tt)?1:0; \ |
289 | c0 += t1; t2 += (c0<t1)?1:0; \ | 293 | c0 += lo; hi += (c0<lo)?1:0; \ |
290 | c1 += t2; c2 += (c1<t2)?1:0; \ | 294 | c1 += hi; c2 += (c1<hi)?1:0; \ |
291 | } | 295 | } while(0) |
296 | |||
297 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
298 | BN_ULONG ta = (a)[i]; \ | ||
299 | BN_ULONG lo, hi; \ | ||
300 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
301 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
302 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
303 | } while(0) | ||
292 | #else | 304 | #else |
293 | #define mul_add_c(a,b,c0,c1,c2) do { \ | 305 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
306 | BN_ULONG t1,t2; \ | ||
294 | asm ("mulq %3" \ | 307 | asm ("mulq %3" \ |
295 | : "=a"(t1),"=d"(t2) \ | 308 | : "=a"(t1),"=d"(t2) \ |
296 | : "a"(a),"m"(b) \ | 309 | : "a"(a),"m"(b) \ |
297 | : "cc"); \ | 310 | : "cc"); \ |
298 | asm ("addq %2,%0; adcq %3,%1" \ | 311 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
299 | : "+r"(c0),"+d"(t2) \ | 312 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
300 | : "a"(t1),"g"(0) \ | 313 | : "r"(t1),"r"(t2),"g"(0) \ |
301 | : "cc"); \ | 314 | : "cc"); \ |
302 | asm ("addq %2,%0; adcq %3,%1" \ | ||
303 | : "+r"(c1),"+r"(c2) \ | ||
304 | : "d"(t2),"g"(0) \ | ||
305 | : "cc"); \ | ||
306 | } while (0) | 315 | } while (0) |
307 | 316 | ||
308 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | 317 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
318 | BN_ULONG t1,t2; \ | ||
309 | asm ("mulq %2" \ | 319 | asm ("mulq %2" \ |
310 | : "=a"(t1),"=d"(t2) \ | 320 | : "=a"(t1),"=d"(t2) \ |
311 | : "a"(a[i]) \ | 321 | : "a"(a[i]) \ |
312 | : "cc"); \ | 322 | : "cc"); \ |
313 | asm ("addq %2,%0; adcq %3,%1" \ | 323 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
314 | : "+r"(c0),"+d"(t2) \ | 324 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
315 | : "a"(t1),"g"(0) \ | 325 | : "r"(t1),"r"(t2),"g"(0) \ |
316 | : "cc"); \ | 326 | : "cc"); \ |
317 | asm ("addq %2,%0; adcq %3,%1" \ | ||
318 | : "+r"(c1),"+r"(c2) \ | ||
319 | : "d"(t2),"g"(0) \ | ||
320 | : "cc"); \ | ||
321 | } while (0) | 327 | } while (0) |
322 | 328 | ||
323 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | 329 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
330 | BN_ULONG t1,t2; \ | ||
324 | asm ("mulq %3" \ | 331 | asm ("mulq %3" \ |
325 | : "=a"(t1),"=d"(t2) \ | 332 | : "=a"(t1),"=d"(t2) \ |
326 | : "a"(a),"m"(b) \ | 333 | : "a"(a),"m"(b) \ |
327 | : "cc"); \ | 334 | : "cc"); \ |
328 | asm ("addq %0,%0; adcq %2,%1" \ | 335 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
329 | : "+d"(t2),"+r"(c2) \ | 336 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
330 | : "g"(0) \ | 337 | : "r"(t1),"r"(t2),"g"(0) \ |
331 | : "cc"); \ | 338 | : "cc"); \ |
332 | asm ("addq %0,%0; adcq %2,%1" \ | 339 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
333 | : "+a"(t1),"+d"(t2) \ | 340 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
334 | : "g"(0) \ | 341 | : "r"(t1),"r"(t2),"g"(0) \ |
335 | : "cc"); \ | 342 | : "cc"); \ |
336 | asm ("addq %2,%0; adcq %3,%1" \ | ||
337 | : "+r"(c0),"+d"(t2) \ | ||
338 | : "a"(t1),"g"(0) \ | ||
339 | : "cc"); \ | ||
340 | asm ("addq %2,%0; adcq %3,%1" \ | ||
341 | : "+r"(c1),"+r"(c2) \ | ||
342 | : "d"(t2),"g"(0) \ | ||
343 | : "cc"); \ | ||
344 | } while (0) | 343 | } while (0) |
345 | #endif | 344 | #endif |
346 | 345 | ||
@@ -349,7 +348,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
349 | 348 | ||
350 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 349 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
351 | { | 350 | { |
352 | BN_ULONG t1,t2; | ||
353 | BN_ULONG c1,c2,c3; | 351 | BN_ULONG c1,c2,c3; |
354 | 352 | ||
355 | c1=0; | 353 | c1=0; |
@@ -453,7 +451,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
453 | 451 | ||
454 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 452 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
455 | { | 453 | { |
456 | BN_ULONG t1,t2; | ||
457 | BN_ULONG c1,c2,c3; | 454 | BN_ULONG c1,c2,c3; |
458 | 455 | ||
459 | c1=0; | 456 | c1=0; |
@@ -493,7 +490,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
493 | 490 | ||
494 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | 491 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
495 | { | 492 | { |
496 | BN_ULONG t1,t2; | ||
497 | BN_ULONG c1,c2,c3; | 493 | BN_ULONG c1,c2,c3; |
498 | 494 | ||
499 | c1=0; | 495 | c1=0; |
@@ -569,7 +565,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | |||
569 | 565 | ||
570 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | 566 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
571 | { | 567 | { |
572 | BN_ULONG t1,t2; | ||
573 | BN_ULONG c1,c2,c3; | 568 | BN_ULONG c1,c2,c3; |
574 | 569 | ||
575 | c1=0; | 570 | c1=0; |
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index c6efd2513a..49f0ba5d7b 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_asm.c,v 1.13 2014/07/11 08:44:47 jsing Exp $ */ | 1 | /* $OpenBSD: bn_asm.c,v 1.14 2015/02/25 15:39:49 bcook Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -495,116 +495,143 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
495 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | 495 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ |
496 | 496 | ||
497 | #ifdef BN_LLONG | 497 | #ifdef BN_LLONG |
498 | #define mul_add_c(a,b,c0,c1,c2) \ | 498 | /* |
499 | t=(BN_ULLONG)a*b; \ | 499 | * Keep in mind that additions to multiplication result can not |
500 | t1=(BN_ULONG)Lw(t); \ | 500 | * overflow, because its high half cannot be all-ones. |
501 | t2=(BN_ULONG)Hw(t); \ | 501 | */ |
502 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 502 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
503 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 503 | BN_ULONG hi; \ |
504 | 504 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ | |
505 | #define mul_add_c2(a,b,c0,c1,c2) \ | 505 | t += c0; /* no carry */ \ |
506 | t=(BN_ULLONG)a*b; \ | 506 | c0 = (BN_ULONG)Lw(t); \ |
507 | tt=(t+t)&BN_MASK; \ | 507 | hi = (BN_ULONG)Hw(t); \ |
508 | if (tt < t) c2++; \ | 508 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
509 | t1=(BN_ULONG)Lw(tt); \ | 509 | } while(0) |
510 | t2=(BN_ULONG)Hw(tt); \ | 510 | |
511 | c0=(c0+t1)&BN_MASK2; \ | 511 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
512 | if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ | 512 | BN_ULONG hi; \ |
513 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 513 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ |
514 | 514 | BN_ULLONG tt = t+c0; /* no carry */ \ | |
515 | #define sqr_add_c(a,i,c0,c1,c2) \ | 515 | c0 = (BN_ULONG)Lw(tt); \ |
516 | t=(BN_ULLONG)a[i]*a[i]; \ | 516 | hi = (BN_ULONG)Hw(tt); \ |
517 | t1=(BN_ULONG)Lw(t); \ | 517 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
518 | t2=(BN_ULONG)Hw(t); \ | 518 | t += c0; /* no carry */ \ |
519 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 519 | c0 = (BN_ULONG)Lw(t); \ |
520 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 520 | hi = (BN_ULONG)Hw(t); \ |
521 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
522 | } while(0) | ||
523 | |||
524 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
525 | BN_ULONG hi; \ | ||
526 | BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \ | ||
527 | t += c0; /* no carry */ \ | ||
528 | c0 = (BN_ULONG)Lw(t); \ | ||
529 | hi = (BN_ULONG)Hw(t); \ | ||
530 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
531 | } while(0) | ||
521 | 532 | ||
522 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 533 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
523 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 534 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
524 | 535 | ||
525 | #elif defined(BN_UMULT_LOHI) | 536 | #elif defined(BN_UMULT_LOHI) |
526 | 537 | /* | |
527 | #define mul_add_c(a,b,c0,c1,c2) { \ | 538 | * Keep in mind that additions to hi can not overflow, because |
528 | BN_ULONG ta=(a),tb=(b); \ | 539 | * the high word of a multiplication result cannot be all-ones. |
529 | BN_UMULT_LOHI(t1,t2,ta,tb); \ | 540 | */ |
530 | c0 += t1; t2 += (c0<t1)?1:0; \ | 541 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
531 | c1 += t2; c2 += (c1<t2)?1:0; \ | 542 | BN_ULONG ta = (a), tb = (b); \ |
532 | } | 543 | BN_ULONG lo, hi; \ |
533 | 544 | BN_UMULT_LOHI(lo,hi,ta,tb); \ | |
534 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 545 | c0 += lo; hi += (c0<lo)?1:0; \ |
535 | BN_ULONG ta=(a),tb=(b),t0; \ | 546 | c1 += hi; c2 += (c1<hi)?1:0; \ |
536 | BN_UMULT_LOHI(t0,t1,ta,tb); \ | 547 | } while(0) |
537 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 548 | |
538 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 549 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
539 | c0 += t1; t2 += (c0<t1)?1:0; \ | 550 | BN_ULONG ta = (a), tb = (b); \ |
540 | c1 += t2; c2 += (c1<t2)?1:0; \ | 551 | BN_ULONG lo, hi, tt; \ |
541 | } | 552 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
542 | 553 | c0 += lo; tt = hi+((c0<lo)?1:0); \ | |
543 | #define sqr_add_c(a,i,c0,c1,c2) { \ | 554 | c1 += tt; c2 += (c1<tt)?1:0; \ |
544 | BN_ULONG ta=(a)[i]; \ | 555 | c0 += lo; hi += (c0<lo)?1:0; \ |
545 | BN_UMULT_LOHI(t1,t2,ta,ta); \ | 556 | c1 += hi; c2 += (c1<hi)?1:0; \ |
546 | c0 += t1; t2 += (c0<t1)?1:0; \ | 557 | } while(0) |
547 | c1 += t2; c2 += (c1<t2)?1:0; \ | 558 | |
548 | } | 559 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
560 | BN_ULONG ta = (a)[i]; \ | ||
561 | BN_ULONG lo, hi; \ | ||
562 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
563 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
564 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
565 | } while(0) | ||
549 | 566 | ||
550 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 567 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
551 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 568 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
552 | 569 | ||
553 | #elif defined(BN_UMULT_HIGH) | 570 | #elif defined(BN_UMULT_HIGH) |
554 | 571 | /* | |
555 | #define mul_add_c(a,b,c0,c1,c2) { \ | 572 | * Keep in mind that additions to hi can not overflow, because |
556 | BN_ULONG ta=(a),tb=(b); \ | 573 | * the high word of a multiplication result cannot be all-ones. |
557 | t1 = ta * tb; \ | 574 | */ |
558 | t2 = BN_UMULT_HIGH(ta,tb); \ | 575 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
559 | c0 += t1; t2 += (c0<t1)?1:0; \ | 576 | BN_ULONG ta = (a), tb = (b); \ |
560 | c1 += t2; c2 += (c1<t2)?1:0; \ | 577 | BN_ULONG lo = ta * tb; \ |
561 | } | 578 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ |
562 | 579 | c0 += lo; hi += (c0<lo)?1:0; \ | |
563 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 580 | c1 += hi; c2 += (c1<hi)?1:0; \ |
564 | BN_ULONG ta=(a),tb=(b),t0; \ | 581 | } while(0) |
565 | t1 = BN_UMULT_HIGH(ta,tb); \ | 582 | |
566 | t0 = ta * tb; \ | 583 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
567 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 584 | BN_ULONG ta = (a), tb = (b), tt; \ |
568 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 585 | BN_ULONG lo = ta * tb; \ |
569 | c0 += t1; t2 += (c0<t1)?1:0; \ | 586 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ |
570 | c1 += t2; c2 += (c1<t2)?1:0; \ | 587 | c0 += lo; tt = hi + ((c0<lo)?1:0); \ |
571 | } | 588 | c1 += tt; c2 += (c1<tt)?1:0; \ |
572 | 589 | c0 += lo; hi += (c0<lo)?1:0; \ | |
573 | #define sqr_add_c(a,i,c0,c1,c2) { \ | 590 | c1 += hi; c2 += (c1<hi)?1:0; \ |
574 | BN_ULONG ta=(a)[i]; \ | 591 | } while(0) |
575 | t1 = ta * ta; \ | 592 | |
576 | t2 = BN_UMULT_HIGH(ta,ta); \ | 593 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
577 | c0 += t1; t2 += (c0<t1)?1:0; \ | 594 | BN_ULONG ta = (a)[i]; \ |
578 | c1 += t2; c2 += (c1<t2)?1:0; \ | 595 | BN_ULONG lo = ta * ta; \ |
579 | } | 596 | BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \ |
597 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
598 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
599 | } while(0) | ||
580 | 600 | ||
581 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 601 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
582 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 602 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
583 | 603 | ||
584 | #else /* !BN_LLONG */ | 604 | #else /* !BN_LLONG */ |
585 | #define mul_add_c(a,b,c0,c1,c2) \ | 605 | /* |
586 | t1=LBITS(a); t2=HBITS(a); \ | 606 | * Keep in mind that additions to hi can not overflow, because |
587 | bl=LBITS(b); bh=HBITS(b); \ | 607 | * the high word of a multiplication result cannot be all-ones. |
588 | mul64(t1,t2,bl,bh); \ | 608 | */ |
589 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 609 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
590 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 610 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ |
591 | 611 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ | |
592 | #define mul_add_c2(a,b,c0,c1,c2) \ | 612 | mul64(lo,hi,bl,bh); \ |
593 | t1=LBITS(a); t2=HBITS(a); \ | 613 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ |
594 | bl=LBITS(b); bh=HBITS(b); \ | 614 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
595 | mul64(t1,t2,bl,bh); \ | 615 | } while(0) |
596 | if (t2 & BN_TBIT) c2++; \ | 616 | |
597 | t2=(t2+t2)&BN_MASK2; \ | 617 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
598 | if (t1 & BN_TBIT) t2++; \ | 618 | BN_ULONG tt; \ |
599 | t1=(t1+t1)&BN_MASK2; \ | 619 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ |
600 | c0=(c0+t1)&BN_MASK2; \ | 620 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ |
601 | if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ | 621 | mul64(lo,hi,bl,bh); \ |
602 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 622 | tt = hi; \ |
603 | 623 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \ | |
604 | #define sqr_add_c(a,i,c0,c1,c2) \ | 624 | c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \ |
605 | sqr64(t1,t2,(a)[i]); \ | 625 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ |
606 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 626 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
607 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 627 | } while(0) |
628 | |||
629 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
630 | BN_ULONG lo, hi; \ | ||
631 | sqr64(lo,hi,(a)[i]); \ | ||
632 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | ||
633 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
634 | } while(0) | ||
608 | 635 | ||
609 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 636 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
610 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 637 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
@@ -613,12 +640,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
613 | void | 640 | void |
614 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 641 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
615 | { | 642 | { |
616 | #ifdef BN_LLONG | ||
617 | BN_ULLONG t; | ||
618 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
619 | BN_ULONG bl, bh; | ||
620 | #endif | ||
621 | BN_ULONG t1, t2; | ||
622 | BN_ULONG c1, c2, c3; | 643 | BN_ULONG c1, c2, c3; |
623 | 644 | ||
624 | c1 = 0; | 645 | c1 = 0; |
@@ -723,12 +744,6 @@ bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
723 | void | 744 | void |
724 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 745 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
725 | { | 746 | { |
726 | #ifdef BN_LLONG | ||
727 | BN_ULLONG t; | ||
728 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
729 | BN_ULONG bl, bh; | ||
730 | #endif | ||
731 | BN_ULONG t1, t2; | ||
732 | BN_ULONG c1, c2, c3; | 747 | BN_ULONG c1, c2, c3; |
733 | 748 | ||
734 | c1 = 0; | 749 | c1 = 0; |
@@ -769,12 +784,6 @@ bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
769 | void | 784 | void |
770 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | 785 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
771 | { | 786 | { |
772 | #ifdef BN_LLONG | ||
773 | BN_ULLONG t, tt; | ||
774 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
775 | BN_ULONG bl, bh; | ||
776 | #endif | ||
777 | BN_ULONG t1, t2; | ||
778 | BN_ULONG c1, c2, c3; | 787 | BN_ULONG c1, c2, c3; |
779 | 788 | ||
780 | c1 = 0; | 789 | c1 = 0; |
@@ -851,12 +860,6 @@ bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | |||
851 | void | 860 | void |
852 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | 861 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
853 | { | 862 | { |
854 | #ifdef BN_LLONG | ||
855 | BN_ULLONG t, tt; | ||
856 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
857 | BN_ULONG bl, bh; | ||
858 | #endif | ||
859 | BN_ULONG t1, t2; | ||
860 | BN_ULONG c1, c2, c3; | 863 | BN_ULONG c1, c2, c3; |
861 | 864 | ||
862 | c1 = 0; | 865 | c1 = 0; |
diff --git a/src/lib/libssl/src/crypto/bn/asm/mips.pl b/src/lib/libssl/src/crypto/bn/asm/mips.pl index d2f3ef7bbf..215c9a7483 100644 --- a/src/lib/libssl/src/crypto/bn/asm/mips.pl +++ b/src/lib/libssl/src/crypto/bn/asm/mips.pl | |||
@@ -1872,6 +1872,41 @@ ___ | |||
1872 | 1872 | ||
1873 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); | 1873 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); |
1874 | 1874 | ||
1875 | sub add_c2 () { | ||
1876 | my ($hi,$lo,$c0,$c1,$c2, | ||
1877 | $warm, # !$warm denotes first call with specific sequence of | ||
1878 | # $c_[XYZ] when there is no Z-carry to accumulate yet; | ||
1879 | $an,$bn # these two are arguments for multiplication which | ||
1880 | # result is used in *next* step [which is why it's | ||
1881 | # commented as "forward multiplication" below]; | ||
1882 | )=@_; | ||
1883 | $code.=<<___; | ||
1884 | mflo $lo | ||
1885 | mfhi $hi | ||
1886 | $ADDU $c0,$lo | ||
1887 | sltu $at,$c0,$lo | ||
1888 | $MULTU $an,$bn # forward multiplication | ||
1889 | $ADDU $c0,$lo | ||
1890 | $ADDU $at,$hi | ||
1891 | sltu $lo,$c0,$lo | ||
1892 | $ADDU $c1,$at | ||
1893 | $ADDU $hi,$lo | ||
1894 | ___ | ||
1895 | $code.=<<___ if (!$warm); | ||
1896 | sltu $c2,$c1,$at | ||
1897 | $ADDU $c1,$hi | ||
1898 | sltu $hi,$c1,$hi | ||
1899 | $ADDU $c2,$hi | ||
1900 | ___ | ||
1901 | $code.=<<___ if ($warm); | ||
1902 | sltu $at,$c1,$at | ||
1903 | $ADDU $c1,$hi | ||
1904 | $ADDU $c2,$at | ||
1905 | sltu $hi,$c1,$hi | ||
1906 | $ADDU $c2,$hi | ||
1907 | ___ | ||
1908 | } | ||
1909 | |||
1875 | $code.=<<___; | 1910 | $code.=<<___; |
1876 | 1911 | ||
1877 | .align 5 | 1912 | .align 5 |
@@ -1920,21 +1955,10 @@ $code.=<<___; | |||
1920 | sltu $at,$c_2,$t_1 | 1955 | sltu $at,$c_2,$t_1 |
1921 | $ADDU $c_3,$t_2,$at | 1956 | $ADDU $c_3,$t_2,$at |
1922 | $ST $c_2,$BNSZ($a0) | 1957 | $ST $c_2,$BNSZ($a0) |
1923 | 1958 | ___ | |
1924 | mflo $t_1 | 1959 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
1925 | mfhi $t_2 | 1960 | $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); |
1926 | slt $c_2,$t_2,$zero | 1961 | $code.=<<___; |
1927 | $SLL $t_2,1 | ||
1928 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
1929 | slt $a2,$t_1,$zero | ||
1930 | $ADDU $t_2,$a2 | ||
1931 | $SLL $t_1,1 | ||
1932 | $ADDU $c_3,$t_1 | ||
1933 | sltu $at,$c_3,$t_1 | ||
1934 | $ADDU $t_2,$at | ||
1935 | $ADDU $c_1,$t_2 | ||
1936 | sltu $at,$c_1,$t_2 | ||
1937 | $ADDU $c_2,$at | ||
1938 | mflo $t_1 | 1962 | mflo $t_1 |
1939 | mfhi $t_2 | 1963 | mfhi $t_2 |
1940 | $ADDU $c_3,$t_1 | 1964 | $ADDU $c_3,$t_1 |
@@ -1945,67 +1969,19 @@ $code.=<<___; | |||
1945 | sltu $at,$c_1,$t_2 | 1969 | sltu $at,$c_1,$t_2 |
1946 | $ADDU $c_2,$at | 1970 | $ADDU $c_2,$at |
1947 | $ST $c_3,2*$BNSZ($a0) | 1971 | $ST $c_3,2*$BNSZ($a0) |
1948 | 1972 | ___ | |
1949 | mflo $t_1 | 1973 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
1950 | mfhi $t_2 | 1974 | $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); |
1951 | slt $c_3,$t_2,$zero | 1975 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
1952 | $SLL $t_2,1 | 1976 | $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1); |
1953 | $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); | 1977 | $code.=<<___; |
1954 | slt $a2,$t_1,$zero | ||
1955 | $ADDU $t_2,$a2 | ||
1956 | $SLL $t_1,1 | ||
1957 | $ADDU $c_1,$t_1 | ||
1958 | sltu $at,$c_1,$t_1 | ||
1959 | $ADDU $t_2,$at | ||
1960 | $ADDU $c_2,$t_2 | ||
1961 | sltu $at,$c_2,$t_2 | ||
1962 | $ADDU $c_3,$at | ||
1963 | mflo $t_1 | ||
1964 | mfhi $t_2 | ||
1965 | slt $at,$t_2,$zero | ||
1966 | $ADDU $c_3,$at | ||
1967 | $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); | ||
1968 | $SLL $t_2,1 | ||
1969 | slt $a2,$t_1,$zero | ||
1970 | $ADDU $t_2,$a2 | ||
1971 | $SLL $t_1,1 | ||
1972 | $ADDU $c_1,$t_1 | ||
1973 | sltu $at,$c_1,$t_1 | ||
1974 | $ADDU $t_2,$at | ||
1975 | $ADDU $c_2,$t_2 | ||
1976 | sltu $at,$c_2,$t_2 | ||
1977 | $ADDU $c_3,$at | ||
1978 | $ST $c_1,3*$BNSZ($a0) | 1978 | $ST $c_1,3*$BNSZ($a0) |
1979 | 1979 | ___ | |
1980 | mflo $t_1 | 1980 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
1981 | mfhi $t_2 | 1981 | $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); |
1982 | slt $c_1,$t_2,$zero | 1982 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
1983 | $SLL $t_2,1 | 1983 | $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); |
1984 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | 1984 | $code.=<<___; |
1985 | slt $a2,$t_1,$zero | ||
1986 | $ADDU $t_2,$a2 | ||
1987 | $SLL $t_1,1 | ||
1988 | $ADDU $c_2,$t_1 | ||
1989 | sltu $at,$c_2,$t_1 | ||
1990 | $ADDU $t_2,$at | ||
1991 | $ADDU $c_3,$t_2 | ||
1992 | sltu $at,$c_3,$t_2 | ||
1993 | $ADDU $c_1,$at | ||
1994 | mflo $t_1 | ||
1995 | mfhi $t_2 | ||
1996 | slt $at,$t_2,$zero | ||
1997 | $ADDU $c_1,$at | ||
1998 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
1999 | $SLL $t_2,1 | ||
2000 | slt $a2,$t_1,$zero | ||
2001 | $ADDU $t_2,$a2 | ||
2002 | $SLL $t_1,1 | ||
2003 | $ADDU $c_2,$t_1 | ||
2004 | sltu $at,$c_2,$t_1 | ||
2005 | $ADDU $t_2,$at | ||
2006 | $ADDU $c_3,$t_2 | ||
2007 | sltu $at,$c_3,$t_2 | ||
2008 | $ADDU $c_1,$at | ||
2009 | mflo $t_1 | 1985 | mflo $t_1 |
2010 | mfhi $t_2 | 1986 | mfhi $t_2 |
2011 | $ADDU $c_2,$t_1 | 1987 | $ADDU $c_2,$t_1 |
@@ -2016,97 +1992,23 @@ $code.=<<___; | |||
2016 | sltu $at,$c_3,$t_2 | 1992 | sltu $at,$c_3,$t_2 |
2017 | $ADDU $c_1,$at | 1993 | $ADDU $c_1,$at |
2018 | $ST $c_2,4*$BNSZ($a0) | 1994 | $ST $c_2,4*$BNSZ($a0) |
2019 | 1995 | ___ | |
2020 | mflo $t_1 | 1996 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
2021 | mfhi $t_2 | 1997 | $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); |
2022 | slt $c_2,$t_2,$zero | 1998 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
2023 | $SLL $t_2,1 | 1999 | $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2); |
2024 | $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); | 2000 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
2025 | slt $a2,$t_1,$zero | 2001 | $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3); |
2026 | $ADDU $t_2,$a2 | 2002 | $code.=<<___; |
2027 | $SLL $t_1,1 | ||
2028 | $ADDU $c_3,$t_1 | ||
2029 | sltu $at,$c_3,$t_1 | ||
2030 | $ADDU $t_2,$at | ||
2031 | $ADDU $c_1,$t_2 | ||
2032 | sltu $at,$c_1,$t_2 | ||
2033 | $ADDU $c_2,$at | ||
2034 | mflo $t_1 | ||
2035 | mfhi $t_2 | ||
2036 | slt $at,$t_2,$zero | ||
2037 | $ADDU $c_2,$at | ||
2038 | $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | ||
2039 | $SLL $t_2,1 | ||
2040 | slt $a2,$t_1,$zero | ||
2041 | $ADDU $t_2,$a2 | ||
2042 | $SLL $t_1,1 | ||
2043 | $ADDU $c_3,$t_1 | ||
2044 | sltu $at,$c_3,$t_1 | ||
2045 | $ADDU $t_2,$at | ||
2046 | $ADDU $c_1,$t_2 | ||
2047 | sltu $at,$c_1,$t_2 | ||
2048 | $ADDU $c_2,$at | ||
2049 | mflo $t_1 | ||
2050 | mfhi $t_2 | ||
2051 | slt $at,$t_2,$zero | ||
2052 | $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); | ||
2053 | $ADDU $c_2,$at | ||
2054 | $SLL $t_2,1 | ||
2055 | slt $a2,$t_1,$zero | ||
2056 | $ADDU $t_2,$a2 | ||
2057 | $SLL $t_1,1 | ||
2058 | $ADDU $c_3,$t_1 | ||
2059 | sltu $at,$c_3,$t_1 | ||
2060 | $ADDU $t_2,$at | ||
2061 | $ADDU $c_1,$t_2 | ||
2062 | sltu $at,$c_1,$t_2 | ||
2063 | $ADDU $c_2,$at | ||
2064 | $ST $c_3,5*$BNSZ($a0) | 2003 | $ST $c_3,5*$BNSZ($a0) |
2065 | 2004 | ___ | |
2066 | mflo $t_1 | 2005 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
2067 | mfhi $t_2 | 2006 | $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3); |
2068 | slt $c_3,$t_2,$zero | 2007 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
2069 | $SLL $t_2,1 | 2008 | $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3); |
2070 | $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); | 2009 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
2071 | slt $a2,$t_1,$zero | 2010 | $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); |
2072 | $ADDU $t_2,$a2 | 2011 | $code.=<<___; |
2073 | $SLL $t_1,1 | ||
2074 | $ADDU $c_1,$t_1 | ||
2075 | sltu $at,$c_1,$t_1 | ||
2076 | $ADDU $t_2,$at | ||
2077 | $ADDU $c_2,$t_2 | ||
2078 | sltu $at,$c_2,$t_2 | ||
2079 | $ADDU $c_3,$at | ||
2080 | mflo $t_1 | ||
2081 | mfhi $t_2 | ||
2082 | slt $at,$t_2,$zero | ||
2083 | $ADDU $c_3,$at | ||
2084 | $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); | ||
2085 | $SLL $t_2,1 | ||
2086 | slt $a2,$t_1,$zero | ||
2087 | $ADDU $t_2,$a2 | ||
2088 | $SLL $t_1,1 | ||
2089 | $ADDU $c_1,$t_1 | ||
2090 | sltu $at,$c_1,$t_1 | ||
2091 | $ADDU $t_2,$at | ||
2092 | $ADDU $c_2,$t_2 | ||
2093 | sltu $at,$c_2,$t_2 | ||
2094 | $ADDU $c_3,$at | ||
2095 | mflo $t_1 | ||
2096 | mfhi $t_2 | ||
2097 | slt $at,$t_2,$zero | ||
2098 | $ADDU $c_3,$at | ||
2099 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
2100 | $SLL $t_2,1 | ||
2101 | slt $a2,$t_1,$zero | ||
2102 | $ADDU $t_2,$a2 | ||
2103 | $SLL $t_1,1 | ||
2104 | $ADDU $c_1,$t_1 | ||
2105 | sltu $at,$c_1,$t_1 | ||
2106 | $ADDU $t_2,$at | ||
2107 | $ADDU $c_2,$t_2 | ||
2108 | sltu $at,$c_2,$t_2 | ||
2109 | $ADDU $c_3,$at | ||
2110 | mflo $t_1 | 2012 | mflo $t_1 |
2111 | mfhi $t_2 | 2013 | mfhi $t_2 |
2112 | $ADDU $c_1,$t_1 | 2014 | $ADDU $c_1,$t_1 |
@@ -2117,112 +2019,25 @@ $code.=<<___; | |||
2117 | sltu $at,$c_2,$t_2 | 2019 | sltu $at,$c_2,$t_2 |
2118 | $ADDU $c_3,$at | 2020 | $ADDU $c_3,$at |
2119 | $ST $c_1,6*$BNSZ($a0) | 2021 | $ST $c_1,6*$BNSZ($a0) |
2120 | 2022 | ___ | |
2121 | mflo $t_1 | 2023 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
2122 | mfhi $t_2 | 2024 | $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); |
2123 | slt $c_1,$t_2,$zero | 2025 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
2124 | $SLL $t_2,1 | 2026 | $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1); |
2125 | $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); | 2027 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
2126 | slt $a2,$t_1,$zero | 2028 | $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1); |
2127 | $ADDU $t_2,$a2 | 2029 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
2128 | $SLL $t_1,1 | 2030 | $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2); |
2129 | $ADDU $c_2,$t_1 | 2031 | $code.=<<___; |
2130 | sltu $at,$c_2,$t_1 | ||
2131 | $ADDU $t_2,$at | ||
2132 | $ADDU $c_3,$t_2 | ||
2133 | sltu $at,$c_3,$t_2 | ||
2134 | $ADDU $c_1,$at | ||
2135 | mflo $t_1 | ||
2136 | mfhi $t_2 | ||
2137 | slt $at,$t_2,$zero | ||
2138 | $ADDU $c_1,$at | ||
2139 | $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); | ||
2140 | $SLL $t_2,1 | ||
2141 | slt $a2,$t_1,$zero | ||
2142 | $ADDU $t_2,$a2 | ||
2143 | $SLL $t_1,1 | ||
2144 | $ADDU $c_2,$t_1 | ||
2145 | sltu $at,$c_2,$t_1 | ||
2146 | $ADDU $t_2,$at | ||
2147 | $ADDU $c_3,$t_2 | ||
2148 | sltu $at,$c_3,$t_2 | ||
2149 | $ADDU $c_1,$at | ||
2150 | mflo $t_1 | ||
2151 | mfhi $t_2 | ||
2152 | slt $at,$t_2,$zero | ||
2153 | $ADDU $c_1,$at | ||
2154 | $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); | ||
2155 | $SLL $t_2,1 | ||
2156 | slt $a2,$t_1,$zero | ||
2157 | $ADDU $t_2,$a2 | ||
2158 | $SLL $t_1,1 | ||
2159 | $ADDU $c_2,$t_1 | ||
2160 | sltu $at,$c_2,$t_1 | ||
2161 | $ADDU $t_2,$at | ||
2162 | $ADDU $c_3,$t_2 | ||
2163 | sltu $at,$c_3,$t_2 | ||
2164 | $ADDU $c_1,$at | ||
2165 | mflo $t_1 | ||
2166 | mfhi $t_2 | ||
2167 | slt $at,$t_2,$zero | ||
2168 | $ADDU $c_1,$at | ||
2169 | $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); | ||
2170 | $SLL $t_2,1 | ||
2171 | slt $a2,$t_1,$zero | ||
2172 | $ADDU $t_2,$a2 | ||
2173 | $SLL $t_1,1 | ||
2174 | $ADDU $c_2,$t_1 | ||
2175 | sltu $at,$c_2,$t_1 | ||
2176 | $ADDU $t_2,$at | ||
2177 | $ADDU $c_3,$t_2 | ||
2178 | sltu $at,$c_3,$t_2 | ||
2179 | $ADDU $c_1,$at | ||
2180 | $ST $c_2,7*$BNSZ($a0) | 2032 | $ST $c_2,7*$BNSZ($a0) |
2181 | 2033 | ___ | |
2182 | mflo $t_1 | 2034 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
2183 | mfhi $t_2 | 2035 | $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2); |
2184 | slt $c_2,$t_2,$zero | 2036 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
2185 | $SLL $t_2,1 | 2037 | $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2); |
2186 | $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); | 2038 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
2187 | slt $a2,$t_1,$zero | 2039 | $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); |
2188 | $ADDU $t_2,$a2 | 2040 | $code.=<<___; |
2189 | $SLL $t_1,1 | ||
2190 | $ADDU $c_3,$t_1 | ||
2191 | sltu $at,$c_3,$t_1 | ||
2192 | $ADDU $t_2,$at | ||
2193 | $ADDU $c_1,$t_2 | ||
2194 | sltu $at,$c_1,$t_2 | ||
2195 | $ADDU $c_2,$at | ||
2196 | mflo $t_1 | ||
2197 | mfhi $t_2 | ||
2198 | slt $at,$t_2,$zero | ||
2199 | $ADDU $c_2,$at | ||
2200 | $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); | ||
2201 | $SLL $t_2,1 | ||
2202 | slt $a2,$t_1,$zero | ||
2203 | $ADDU $t_2,$a2 | ||
2204 | $SLL $t_1,1 | ||
2205 | $ADDU $c_3,$t_1 | ||
2206 | sltu $at,$c_3,$t_1 | ||
2207 | $ADDU $t_2,$at | ||
2208 | $ADDU $c_1,$t_2 | ||
2209 | sltu $at,$c_1,$t_2 | ||
2210 | $ADDU $c_2,$at | ||
2211 | mflo $t_1 | ||
2212 | mfhi $t_2 | ||
2213 | slt $at,$t_2,$zero | ||
2214 | $ADDU $c_2,$at | ||
2215 | $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); | ||
2216 | $SLL $t_2,1 | ||
2217 | slt $a2,$t_1,$zero | ||
2218 | $ADDU $t_2,$a2 | ||
2219 | $SLL $t_1,1 | ||
2220 | $ADDU $c_3,$t_1 | ||
2221 | sltu $at,$c_3,$t_1 | ||
2222 | $ADDU $t_2,$at | ||
2223 | $ADDU $c_1,$t_2 | ||
2224 | sltu $at,$c_1,$t_2 | ||
2225 | $ADDU $c_2,$at | ||
2226 | mflo $t_1 | 2041 | mflo $t_1 |
2227 | mfhi $t_2 | 2042 | mfhi $t_2 |
2228 | $ADDU $c_3,$t_1 | 2043 | $ADDU $c_3,$t_1 |
@@ -2233,82 +2048,21 @@ $code.=<<___; | |||
2233 | sltu $at,$c_1,$t_2 | 2048 | sltu $at,$c_1,$t_2 |
2234 | $ADDU $c_2,$at | 2049 | $ADDU $c_2,$at |
2235 | $ST $c_3,8*$BNSZ($a0) | 2050 | $ST $c_3,8*$BNSZ($a0) |
2236 | 2051 | ___ | |
2237 | mflo $t_1 | 2052 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
2238 | mfhi $t_2 | 2053 | $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); |
2239 | slt $c_3,$t_2,$zero | 2054 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
2240 | $SLL $t_2,1 | 2055 | $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3); |
2241 | $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); | 2056 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
2242 | slt $a2,$t_1,$zero | 2057 | $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1); |
2243 | $ADDU $t_2,$a2 | 2058 | $code.=<<___; |
2244 | $SLL $t_1,1 | ||
2245 | $ADDU $c_1,$t_1 | ||
2246 | sltu $at,$c_1,$t_1 | ||
2247 | $ADDU $t_2,$at | ||
2248 | $ADDU $c_2,$t_2 | ||
2249 | sltu $at,$c_2,$t_2 | ||
2250 | $ADDU $c_3,$at | ||
2251 | mflo $t_1 | ||
2252 | mfhi $t_2 | ||
2253 | slt $at,$t_2,$zero | ||
2254 | $ADDU $c_3,$at | ||
2255 | $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); | ||
2256 | $SLL $t_2,1 | ||
2257 | slt $a2,$t_1,$zero | ||
2258 | $ADDU $t_2,$a2 | ||
2259 | $SLL $t_1,1 | ||
2260 | $ADDU $c_1,$t_1 | ||
2261 | sltu $at,$c_1,$t_1 | ||
2262 | $ADDU $t_2,$at | ||
2263 | $ADDU $c_2,$t_2 | ||
2264 | sltu $at,$c_2,$t_2 | ||
2265 | $ADDU $c_3,$at | ||
2266 | mflo $t_1 | ||
2267 | mfhi $t_2 | ||
2268 | slt $at,$t_2,$zero | ||
2269 | $ADDU $c_3,$at | ||
2270 | $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); | ||
2271 | $SLL $t_2,1 | ||
2272 | slt $a2,$t_1,$zero | ||
2273 | $ADDU $t_2,$a2 | ||
2274 | $SLL $t_1,1 | ||
2275 | $ADDU $c_1,$t_1 | ||
2276 | sltu $at,$c_1,$t_1 | ||
2277 | $ADDU $t_2,$at | ||
2278 | $ADDU $c_2,$t_2 | ||
2279 | sltu $at,$c_2,$t_2 | ||
2280 | $ADDU $c_3,$at | ||
2281 | $ST $c_1,9*$BNSZ($a0) | 2059 | $ST $c_1,9*$BNSZ($a0) |
2282 | 2060 | ___ | |
2283 | mflo $t_1 | 2061 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
2284 | mfhi $t_2 | 2062 | $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1); |
2285 | slt $c_1,$t_2,$zero | 2063 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
2286 | $SLL $t_2,1 | 2064 | $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); |
2287 | $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); | 2065 | $code.=<<___; |
2288 | slt $a2,$t_1,$zero | ||
2289 | $ADDU $t_2,$a2 | ||
2290 | $SLL $t_1,1 | ||
2291 | $ADDU $c_2,$t_1 | ||
2292 | sltu $at,$c_2,$t_1 | ||
2293 | $ADDU $t_2,$at | ||
2294 | $ADDU $c_3,$t_2 | ||
2295 | sltu $at,$c_3,$t_2 | ||
2296 | $ADDU $c_1,$at | ||
2297 | mflo $t_1 | ||
2298 | mfhi $t_2 | ||
2299 | slt $at,$t_2,$zero | ||
2300 | $ADDU $c_1,$at | ||
2301 | $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); | ||
2302 | $SLL $t_2,1 | ||
2303 | slt $a2,$t_1,$zero | ||
2304 | $ADDU $t_2,$a2 | ||
2305 | $SLL $t_1,1 | ||
2306 | $ADDU $c_2,$t_1 | ||
2307 | sltu $at,$c_2,$t_1 | ||
2308 | $ADDU $t_2,$at | ||
2309 | $ADDU $c_3,$t_2 | ||
2310 | sltu $at,$c_3,$t_2 | ||
2311 | $ADDU $c_1,$at | ||
2312 | mflo $t_1 | 2066 | mflo $t_1 |
2313 | mfhi $t_2 | 2067 | mfhi $t_2 |
2314 | $ADDU $c_2,$t_1 | 2068 | $ADDU $c_2,$t_1 |
@@ -2319,52 +2073,17 @@ $code.=<<___; | |||
2319 | sltu $at,$c_3,$t_2 | 2073 | sltu $at,$c_3,$t_2 |
2320 | $ADDU $c_1,$at | 2074 | $ADDU $c_1,$at |
2321 | $ST $c_2,10*$BNSZ($a0) | 2075 | $ST $c_2,10*$BNSZ($a0) |
2322 | 2076 | ___ | |
2323 | mflo $t_1 | 2077 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
2324 | mfhi $t_2 | 2078 | $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); |
2325 | slt $c_2,$t_2,$zero | 2079 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
2326 | $SLL $t_2,1 | 2080 | $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3); |
2327 | $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); | 2081 | $code.=<<___; |
2328 | slt $a2,$t_1,$zero | ||
2329 | $ADDU $t_2,$a2 | ||
2330 | $SLL $t_1,1 | ||
2331 | $ADDU $c_3,$t_1 | ||
2332 | sltu $at,$c_3,$t_1 | ||
2333 | $ADDU $t_2,$at | ||
2334 | $ADDU $c_1,$t_2 | ||
2335 | sltu $at,$c_1,$t_2 | ||
2336 | $ADDU $c_2,$at | ||
2337 | mflo $t_1 | ||
2338 | mfhi $t_2 | ||
2339 | slt $at,$t_2,$zero | ||
2340 | $ADDU $c_2,$at | ||
2341 | $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); | ||
2342 | $SLL $t_2,1 | ||
2343 | slt $a2,$t_1,$zero | ||
2344 | $ADDU $t_2,$a2 | ||
2345 | $SLL $t_1,1 | ||
2346 | $ADDU $c_3,$t_1 | ||
2347 | sltu $at,$c_3,$t_1 | ||
2348 | $ADDU $t_2,$at | ||
2349 | $ADDU $c_1,$t_2 | ||
2350 | sltu $at,$c_1,$t_2 | ||
2351 | $ADDU $c_2,$at | ||
2352 | $ST $c_3,11*$BNSZ($a0) | 2082 | $ST $c_3,11*$BNSZ($a0) |
2353 | 2083 | ___ | |
2354 | mflo $t_1 | 2084 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
2355 | mfhi $t_2 | 2085 | $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); |
2356 | slt $c_3,$t_2,$zero | 2086 | $code.=<<___; |
2357 | $SLL $t_2,1 | ||
2358 | $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); | ||
2359 | slt $a2,$t_1,$zero | ||
2360 | $ADDU $t_2,$a2 | ||
2361 | $SLL $t_1,1 | ||
2362 | $ADDU $c_1,$t_1 | ||
2363 | sltu $at,$c_1,$t_1 | ||
2364 | $ADDU $t_2,$at | ||
2365 | $ADDU $c_2,$t_2 | ||
2366 | sltu $at,$c_2,$t_2 | ||
2367 | $ADDU $c_3,$at | ||
2368 | mflo $t_1 | 2087 | mflo $t_1 |
2369 | mfhi $t_2 | 2088 | mfhi $t_2 |
2370 | $ADDU $c_1,$t_1 | 2089 | $ADDU $c_1,$t_1 |
@@ -2375,21 +2094,10 @@ $code.=<<___; | |||
2375 | sltu $at,$c_2,$t_2 | 2094 | sltu $at,$c_2,$t_2 |
2376 | $ADDU $c_3,$at | 2095 | $ADDU $c_3,$at |
2377 | $ST $c_1,12*$BNSZ($a0) | 2096 | $ST $c_1,12*$BNSZ($a0) |
2378 | 2097 | ___ | |
2379 | mflo $t_1 | 2098 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
2380 | mfhi $t_2 | 2099 | $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); |
2381 | slt $c_1,$t_2,$zero | 2100 | $code.=<<___; |
2382 | $SLL $t_2,1 | ||
2383 | $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); | ||
2384 | slt $a2,$t_1,$zero | ||
2385 | $ADDU $t_2,$a2 | ||
2386 | $SLL $t_1,1 | ||
2387 | $ADDU $c_2,$t_1 | ||
2388 | sltu $at,$c_2,$t_1 | ||
2389 | $ADDU $t_2,$at | ||
2390 | $ADDU $c_3,$t_2 | ||
2391 | sltu $at,$c_3,$t_2 | ||
2392 | $ADDU $c_1,$at | ||
2393 | $ST $c_2,13*$BNSZ($a0) | 2101 | $ST $c_2,13*$BNSZ($a0) |
2394 | 2102 | ||
2395 | mflo $t_1 | 2103 | mflo $t_1 |
@@ -2457,21 +2165,10 @@ $code.=<<___; | |||
2457 | sltu $at,$c_2,$t_1 | 2165 | sltu $at,$c_2,$t_1 |
2458 | $ADDU $c_3,$t_2,$at | 2166 | $ADDU $c_3,$t_2,$at |
2459 | $ST $c_2,$BNSZ($a0) | 2167 | $ST $c_2,$BNSZ($a0) |
2460 | 2168 | ___ | |
2461 | mflo $t_1 | 2169 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
2462 | mfhi $t_2 | 2170 | $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); |
2463 | slt $c_2,$t_2,$zero | 2171 | $code.=<<___; |
2464 | $SLL $t_2,1 | ||
2465 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
2466 | slt $a2,$t_1,$zero | ||
2467 | $ADDU $t_2,$a2 | ||
2468 | $SLL $t_1,1 | ||
2469 | $ADDU $c_3,$t_1 | ||
2470 | sltu $at,$c_3,$t_1 | ||
2471 | $ADDU $t_2,$at | ||
2472 | $ADDU $c_1,$t_2 | ||
2473 | sltu $at,$c_1,$t_2 | ||
2474 | $ADDU $c_2,$at | ||
2475 | mflo $t_1 | 2172 | mflo $t_1 |
2476 | mfhi $t_2 | 2173 | mfhi $t_2 |
2477 | $ADDU $c_3,$t_1 | 2174 | $ADDU $c_3,$t_1 |
@@ -2482,52 +2179,17 @@ $code.=<<___; | |||
2482 | sltu $at,$c_1,$t_2 | 2179 | sltu $at,$c_1,$t_2 |
2483 | $ADDU $c_2,$at | 2180 | $ADDU $c_2,$at |
2484 | $ST $c_3,2*$BNSZ($a0) | 2181 | $ST $c_3,2*$BNSZ($a0) |
2485 | 2182 | ___ | |
2486 | mflo $t_1 | 2183 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
2487 | mfhi $t_2 | 2184 | $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); |
2488 | slt $c_3,$t_2,$zero | 2185 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
2489 | $SLL $t_2,1 | 2186 | $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); |
2490 | $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); | 2187 | $code.=<<___; |
2491 | slt $a2,$t_1,$zero | ||
2492 | $ADDU $t_2,$a2 | ||
2493 | $SLL $t_1,1 | ||
2494 | $ADDU $c_1,$t_1 | ||
2495 | sltu $at,$c_1,$t_1 | ||
2496 | $ADDU $t_2,$at | ||
2497 | $ADDU $c_2,$t_2 | ||
2498 | sltu $at,$c_2,$t_2 | ||
2499 | $ADDU $c_3,$at | ||
2500 | mflo $t_1 | ||
2501 | mfhi $t_2 | ||
2502 | slt $at,$t_2,$zero | ||
2503 | $ADDU $c_3,$at | ||
2504 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | ||
2505 | $SLL $t_2,1 | ||
2506 | slt $a2,$t_1,$zero | ||
2507 | $ADDU $t_2,$a2 | ||
2508 | $SLL $t_1,1 | ||
2509 | $ADDU $c_1,$t_1 | ||
2510 | sltu $at,$c_1,$t_1 | ||
2511 | $ADDU $t_2,$at | ||
2512 | $ADDU $c_2,$t_2 | ||
2513 | sltu $at,$c_2,$t_2 | ||
2514 | $ADDU $c_3,$at | ||
2515 | $ST $c_1,3*$BNSZ($a0) | 2188 | $ST $c_1,3*$BNSZ($a0) |
2516 | 2189 | ___ | |
2517 | mflo $t_1 | 2190 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
2518 | mfhi $t_2 | 2191 | $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); |
2519 | slt $c_1,$t_2,$zero | 2192 | $code.=<<___; |
2520 | $SLL $t_2,1 | ||
2521 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
2522 | slt $a2,$t_1,$zero | ||
2523 | $ADDU $t_2,$a2 | ||
2524 | $SLL $t_1,1 | ||
2525 | $ADDU $c_2,$t_1 | ||
2526 | sltu $at,$c_2,$t_1 | ||
2527 | $ADDU $t_2,$at | ||
2528 | $ADDU $c_3,$t_2 | ||
2529 | sltu $at,$c_3,$t_2 | ||
2530 | $ADDU $c_1,$at | ||
2531 | mflo $t_1 | 2193 | mflo $t_1 |
2532 | mfhi $t_2 | 2194 | mfhi $t_2 |
2533 | $ADDU $c_2,$t_1 | 2195 | $ADDU $c_2,$t_1 |
@@ -2538,21 +2200,10 @@ $code.=<<___; | |||
2538 | sltu $at,$c_3,$t_2 | 2200 | sltu $at,$c_3,$t_2 |
2539 | $ADDU $c_1,$at | 2201 | $ADDU $c_1,$at |
2540 | $ST $c_2,4*$BNSZ($a0) | 2202 | $ST $c_2,4*$BNSZ($a0) |
2541 | 2203 | ___ | |
2542 | mflo $t_1 | 2204 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
2543 | mfhi $t_2 | 2205 | $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); |
2544 | slt $c_2,$t_2,$zero | 2206 | $code.=<<___; |
2545 | $SLL $t_2,1 | ||
2546 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
2547 | slt $a2,$t_1,$zero | ||
2548 | $ADDU $t_2,$a2 | ||
2549 | $SLL $t_1,1 | ||
2550 | $ADDU $c_3,$t_1 | ||
2551 | sltu $at,$c_3,$t_1 | ||
2552 | $ADDU $t_2,$at | ||
2553 | $ADDU $c_1,$t_2 | ||
2554 | sltu $at,$c_1,$t_2 | ||
2555 | $ADDU $c_2,$at | ||
2556 | $ST $c_3,5*$BNSZ($a0) | 2207 | $ST $c_3,5*$BNSZ($a0) |
2557 | 2208 | ||
2558 | mflo $t_1 | 2209 | mflo $t_1 |
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c b/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c index c9a2b6be73..9deffa71f1 100644 --- a/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c +++ b/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: x86_64-gcc.c,v 1.4 2014/10/28 07:35:58 jsg Exp $ */ | 1 | /* $OpenBSD: x86_64-gcc.c,v 1.5 2015/02/25 15:39:49 bcook Exp $ */ |
2 | #include "../bn_lcl.h" | 2 | #include "../bn_lcl.h" |
3 | #if !(defined(__GNUC__) && __GNUC__>=2) | 3 | #if !(defined(__GNUC__) && __GNUC__>=2) |
4 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ | 4 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ |
@@ -270,77 +270,76 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
270 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | 270 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ |
271 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | 271 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ |
272 | 272 | ||
273 | /* | ||
274 | * Keep in mind that carrying into high part of multiplication result | ||
275 | * can not overflow, because it cannot be all-ones. | ||
276 | */ | ||
273 | #if 0 | 277 | #if 0 |
274 | /* original macros are kept for reference purposes */ | 278 | /* original macros are kept for reference purposes */ |
275 | #define mul_add_c(a,b,c0,c1,c2) { \ | 279 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
276 | BN_ULONG ta=(a),tb=(b); \ | 280 | BN_ULONG ta = (a), tb = (b); \ |
277 | t1 = ta * tb; \ | 281 | BN_ULONG lo, hi; \ |
278 | t2 = BN_UMULT_HIGH(ta,tb); \ | 282 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
279 | c0 += t1; t2 += (c0<t1)?1:0; \ | 283 | c0 += lo; hi += (c0<lo)?1:0; \ |
280 | c1 += t2; c2 += (c1<t2)?1:0; \ | 284 | c1 += hi; c2 += (c1<hi)?1:0; \ |
281 | } | 285 | } while(0) |
282 | 286 | ||
283 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 287 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
284 | BN_ULONG ta=(a),tb=(b),t0; \ | 288 | BN_ULONG ta = (a), tb = (b); \ |
285 | t1 = BN_UMULT_HIGH(ta,tb); \ | 289 | BN_ULONG lo, hi, tt; \ |
286 | t0 = ta * tb; \ | 290 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
287 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 291 | c0 += lo; tt = hi+((c0<lo)?1:0); \ |
288 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 292 | c1 += tt; c2 += (c1<tt)?1:0; \ |
289 | c0 += t1; t2 += (c0<t1)?1:0; \ | 293 | c0 += lo; hi += (c0<lo)?1:0; \ |
290 | c1 += t2; c2 += (c1<t2)?1:0; \ | 294 | c1 += hi; c2 += (c1<hi)?1:0; \ |
291 | } | 295 | } while(0) |
296 | |||
297 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
298 | BN_ULONG ta = (a)[i]; \ | ||
299 | BN_ULONG lo, hi; \ | ||
300 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
301 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
302 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
303 | } while(0) | ||
292 | #else | 304 | #else |
293 | #define mul_add_c(a,b,c0,c1,c2) do { \ | 305 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
306 | BN_ULONG t1,t2; \ | ||
294 | asm ("mulq %3" \ | 307 | asm ("mulq %3" \ |
295 | : "=a"(t1),"=d"(t2) \ | 308 | : "=a"(t1),"=d"(t2) \ |
296 | : "a"(a),"m"(b) \ | 309 | : "a"(a),"m"(b) \ |
297 | : "cc"); \ | 310 | : "cc"); \ |
298 | asm ("addq %2,%0; adcq %3,%1" \ | 311 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
299 | : "+r"(c0),"+d"(t2) \ | 312 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
300 | : "a"(t1),"g"(0) \ | 313 | : "r"(t1),"r"(t2),"g"(0) \ |
301 | : "cc"); \ | 314 | : "cc"); \ |
302 | asm ("addq %2,%0; adcq %3,%1" \ | ||
303 | : "+r"(c1),"+r"(c2) \ | ||
304 | : "d"(t2),"g"(0) \ | ||
305 | : "cc"); \ | ||
306 | } while (0) | 315 | } while (0) |
307 | 316 | ||
308 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | 317 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
318 | BN_ULONG t1,t2; \ | ||
309 | asm ("mulq %2" \ | 319 | asm ("mulq %2" \ |
310 | : "=a"(t1),"=d"(t2) \ | 320 | : "=a"(t1),"=d"(t2) \ |
311 | : "a"(a[i]) \ | 321 | : "a"(a[i]) \ |
312 | : "cc"); \ | 322 | : "cc"); \ |
313 | asm ("addq %2,%0; adcq %3,%1" \ | 323 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
314 | : "+r"(c0),"+d"(t2) \ | 324 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
315 | : "a"(t1),"g"(0) \ | 325 | : "r"(t1),"r"(t2),"g"(0) \ |
316 | : "cc"); \ | 326 | : "cc"); \ |
317 | asm ("addq %2,%0; adcq %3,%1" \ | ||
318 | : "+r"(c1),"+r"(c2) \ | ||
319 | : "d"(t2),"g"(0) \ | ||
320 | : "cc"); \ | ||
321 | } while (0) | 327 | } while (0) |
322 | 328 | ||
323 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | 329 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
330 | BN_ULONG t1,t2; \ | ||
324 | asm ("mulq %3" \ | 331 | asm ("mulq %3" \ |
325 | : "=a"(t1),"=d"(t2) \ | 332 | : "=a"(t1),"=d"(t2) \ |
326 | : "a"(a),"m"(b) \ | 333 | : "a"(a),"m"(b) \ |
327 | : "cc"); \ | 334 | : "cc"); \ |
328 | asm ("addq %0,%0; adcq %2,%1" \ | 335 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
329 | : "+d"(t2),"+r"(c2) \ | 336 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
330 | : "g"(0) \ | 337 | : "r"(t1),"r"(t2),"g"(0) \ |
331 | : "cc"); \ | 338 | : "cc"); \ |
332 | asm ("addq %0,%0; adcq %2,%1" \ | 339 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
333 | : "+a"(t1),"+d"(t2) \ | 340 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
334 | : "g"(0) \ | 341 | : "r"(t1),"r"(t2),"g"(0) \ |
335 | : "cc"); \ | 342 | : "cc"); \ |
336 | asm ("addq %2,%0; adcq %3,%1" \ | ||
337 | : "+r"(c0),"+d"(t2) \ | ||
338 | : "a"(t1),"g"(0) \ | ||
339 | : "cc"); \ | ||
340 | asm ("addq %2,%0; adcq %3,%1" \ | ||
341 | : "+r"(c1),"+r"(c2) \ | ||
342 | : "d"(t2),"g"(0) \ | ||
343 | : "cc"); \ | ||
344 | } while (0) | 343 | } while (0) |
345 | #endif | 344 | #endif |
346 | 345 | ||
@@ -349,7 +348,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
349 | 348 | ||
350 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 349 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
351 | { | 350 | { |
352 | BN_ULONG t1,t2; | ||
353 | BN_ULONG c1,c2,c3; | 351 | BN_ULONG c1,c2,c3; |
354 | 352 | ||
355 | c1=0; | 353 | c1=0; |
@@ -453,7 +451,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
453 | 451 | ||
454 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 452 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
455 | { | 453 | { |
456 | BN_ULONG t1,t2; | ||
457 | BN_ULONG c1,c2,c3; | 454 | BN_ULONG c1,c2,c3; |
458 | 455 | ||
459 | c1=0; | 456 | c1=0; |
@@ -493,7 +490,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
493 | 490 | ||
494 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | 491 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
495 | { | 492 | { |
496 | BN_ULONG t1,t2; | ||
497 | BN_ULONG c1,c2,c3; | 493 | BN_ULONG c1,c2,c3; |
498 | 494 | ||
499 | c1=0; | 495 | c1=0; |
@@ -569,7 +565,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | |||
569 | 565 | ||
570 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | 566 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
571 | { | 567 | { |
572 | BN_ULONG t1,t2; | ||
573 | BN_ULONG c1,c2,c3; | 568 | BN_ULONG c1,c2,c3; |
574 | 569 | ||
575 | c1=0; | 570 | c1=0; |
diff --git a/src/lib/libssl/src/crypto/bn/bn_asm.c b/src/lib/libssl/src/crypto/bn/bn_asm.c index c6efd2513a..49f0ba5d7b 100644 --- a/src/lib/libssl/src/crypto/bn/bn_asm.c +++ b/src/lib/libssl/src/crypto/bn/bn_asm.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_asm.c,v 1.13 2014/07/11 08:44:47 jsing Exp $ */ | 1 | /* $OpenBSD: bn_asm.c,v 1.14 2015/02/25 15:39:49 bcook Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -495,116 +495,143 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
495 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | 495 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ |
496 | 496 | ||
497 | #ifdef BN_LLONG | 497 | #ifdef BN_LLONG |
498 | #define mul_add_c(a,b,c0,c1,c2) \ | 498 | /* |
499 | t=(BN_ULLONG)a*b; \ | 499 | * Keep in mind that additions to multiplication result can not |
500 | t1=(BN_ULONG)Lw(t); \ | 500 | * overflow, because its high half cannot be all-ones. |
501 | t2=(BN_ULONG)Hw(t); \ | 501 | */ |
502 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 502 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
503 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 503 | BN_ULONG hi; \ |
504 | 504 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ | |
505 | #define mul_add_c2(a,b,c0,c1,c2) \ | 505 | t += c0; /* no carry */ \ |
506 | t=(BN_ULLONG)a*b; \ | 506 | c0 = (BN_ULONG)Lw(t); \ |
507 | tt=(t+t)&BN_MASK; \ | 507 | hi = (BN_ULONG)Hw(t); \ |
508 | if (tt < t) c2++; \ | 508 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
509 | t1=(BN_ULONG)Lw(tt); \ | 509 | } while(0) |
510 | t2=(BN_ULONG)Hw(tt); \ | 510 | |
511 | c0=(c0+t1)&BN_MASK2; \ | 511 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
512 | if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ | 512 | BN_ULONG hi; \ |
513 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 513 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ |
514 | 514 | BN_ULLONG tt = t+c0; /* no carry */ \ | |
515 | #define sqr_add_c(a,i,c0,c1,c2) \ | 515 | c0 = (BN_ULONG)Lw(tt); \ |
516 | t=(BN_ULLONG)a[i]*a[i]; \ | 516 | hi = (BN_ULONG)Hw(tt); \ |
517 | t1=(BN_ULONG)Lw(t); \ | 517 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
518 | t2=(BN_ULONG)Hw(t); \ | 518 | t += c0; /* no carry */ \ |
519 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 519 | c0 = (BN_ULONG)Lw(t); \ |
520 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 520 | hi = (BN_ULONG)Hw(t); \ |
521 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
522 | } while(0) | ||
523 | |||
524 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
525 | BN_ULONG hi; \ | ||
526 | BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \ | ||
527 | t += c0; /* no carry */ \ | ||
528 | c0 = (BN_ULONG)Lw(t); \ | ||
529 | hi = (BN_ULONG)Hw(t); \ | ||
530 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
531 | } while(0) | ||
521 | 532 | ||
522 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 533 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
523 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 534 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
524 | 535 | ||
525 | #elif defined(BN_UMULT_LOHI) | 536 | #elif defined(BN_UMULT_LOHI) |
526 | 537 | /* | |
527 | #define mul_add_c(a,b,c0,c1,c2) { \ | 538 | * Keep in mind that additions to hi can not overflow, because |
528 | BN_ULONG ta=(a),tb=(b); \ | 539 | * the high word of a multiplication result cannot be all-ones. |
529 | BN_UMULT_LOHI(t1,t2,ta,tb); \ | 540 | */ |
530 | c0 += t1; t2 += (c0<t1)?1:0; \ | 541 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
531 | c1 += t2; c2 += (c1<t2)?1:0; \ | 542 | BN_ULONG ta = (a), tb = (b); \ |
532 | } | 543 | BN_ULONG lo, hi; \ |
533 | 544 | BN_UMULT_LOHI(lo,hi,ta,tb); \ | |
534 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 545 | c0 += lo; hi += (c0<lo)?1:0; \ |
535 | BN_ULONG ta=(a),tb=(b),t0; \ | 546 | c1 += hi; c2 += (c1<hi)?1:0; \ |
536 | BN_UMULT_LOHI(t0,t1,ta,tb); \ | 547 | } while(0) |
537 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 548 | |
538 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 549 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
539 | c0 += t1; t2 += (c0<t1)?1:0; \ | 550 | BN_ULONG ta = (a), tb = (b); \ |
540 | c1 += t2; c2 += (c1<t2)?1:0; \ | 551 | BN_ULONG lo, hi, tt; \ |
541 | } | 552 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
542 | 553 | c0 += lo; tt = hi+((c0<lo)?1:0); \ | |
543 | #define sqr_add_c(a,i,c0,c1,c2) { \ | 554 | c1 += tt; c2 += (c1<tt)?1:0; \ |
544 | BN_ULONG ta=(a)[i]; \ | 555 | c0 += lo; hi += (c0<lo)?1:0; \ |
545 | BN_UMULT_LOHI(t1,t2,ta,ta); \ | 556 | c1 += hi; c2 += (c1<hi)?1:0; \ |
546 | c0 += t1; t2 += (c0<t1)?1:0; \ | 557 | } while(0) |
547 | c1 += t2; c2 += (c1<t2)?1:0; \ | 558 | |
548 | } | 559 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
560 | BN_ULONG ta = (a)[i]; \ | ||
561 | BN_ULONG lo, hi; \ | ||
562 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
563 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
564 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
565 | } while(0) | ||
549 | 566 | ||
550 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 567 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
551 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 568 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
552 | 569 | ||
553 | #elif defined(BN_UMULT_HIGH) | 570 | #elif defined(BN_UMULT_HIGH) |
554 | 571 | /* | |
555 | #define mul_add_c(a,b,c0,c1,c2) { \ | 572 | * Keep in mind that additions to hi can not overflow, because |
556 | BN_ULONG ta=(a),tb=(b); \ | 573 | * the high word of a multiplication result cannot be all-ones. |
557 | t1 = ta * tb; \ | 574 | */ |
558 | t2 = BN_UMULT_HIGH(ta,tb); \ | 575 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
559 | c0 += t1; t2 += (c0<t1)?1:0; \ | 576 | BN_ULONG ta = (a), tb = (b); \ |
560 | c1 += t2; c2 += (c1<t2)?1:0; \ | 577 | BN_ULONG lo = ta * tb; \ |
561 | } | 578 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ |
562 | 579 | c0 += lo; hi += (c0<lo)?1:0; \ | |
563 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 580 | c1 += hi; c2 += (c1<hi)?1:0; \ |
564 | BN_ULONG ta=(a),tb=(b),t0; \ | 581 | } while(0) |
565 | t1 = BN_UMULT_HIGH(ta,tb); \ | 582 | |
566 | t0 = ta * tb; \ | 583 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
567 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 584 | BN_ULONG ta = (a), tb = (b), tt; \ |
568 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 585 | BN_ULONG lo = ta * tb; \ |
569 | c0 += t1; t2 += (c0<t1)?1:0; \ | 586 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ |
570 | c1 += t2; c2 += (c1<t2)?1:0; \ | 587 | c0 += lo; tt = hi + ((c0<lo)?1:0); \ |
571 | } | 588 | c1 += tt; c2 += (c1<tt)?1:0; \ |
572 | 589 | c0 += lo; hi += (c0<lo)?1:0; \ | |
573 | #define sqr_add_c(a,i,c0,c1,c2) { \ | 590 | c1 += hi; c2 += (c1<hi)?1:0; \ |
574 | BN_ULONG ta=(a)[i]; \ | 591 | } while(0) |
575 | t1 = ta * ta; \ | 592 | |
576 | t2 = BN_UMULT_HIGH(ta,ta); \ | 593 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
577 | c0 += t1; t2 += (c0<t1)?1:0; \ | 594 | BN_ULONG ta = (a)[i]; \ |
578 | c1 += t2; c2 += (c1<t2)?1:0; \ | 595 | BN_ULONG lo = ta * ta; \ |
579 | } | 596 | BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \ |
597 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
598 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
599 | } while(0) | ||
580 | 600 | ||
581 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 601 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
582 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 602 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
583 | 603 | ||
584 | #else /* !BN_LLONG */ | 604 | #else /* !BN_LLONG */ |
585 | #define mul_add_c(a,b,c0,c1,c2) \ | 605 | /* |
586 | t1=LBITS(a); t2=HBITS(a); \ | 606 | * Keep in mind that additions to hi can not overflow, because |
587 | bl=LBITS(b); bh=HBITS(b); \ | 607 | * the high word of a multiplication result cannot be all-ones. |
588 | mul64(t1,t2,bl,bh); \ | 608 | */ |
589 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 609 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
590 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 610 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ |
591 | 611 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ | |
592 | #define mul_add_c2(a,b,c0,c1,c2) \ | 612 | mul64(lo,hi,bl,bh); \ |
593 | t1=LBITS(a); t2=HBITS(a); \ | 613 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ |
594 | bl=LBITS(b); bh=HBITS(b); \ | 614 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
595 | mul64(t1,t2,bl,bh); \ | 615 | } while(0) |
596 | if (t2 & BN_TBIT) c2++; \ | 616 | |
597 | t2=(t2+t2)&BN_MASK2; \ | 617 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
598 | if (t1 & BN_TBIT) t2++; \ | 618 | BN_ULONG tt; \ |
599 | t1=(t1+t1)&BN_MASK2; \ | 619 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ |
600 | c0=(c0+t1)&BN_MASK2; \ | 620 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ |
601 | if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ | 621 | mul64(lo,hi,bl,bh); \ |
602 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 622 | tt = hi; \ |
603 | 623 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \ | |
604 | #define sqr_add_c(a,i,c0,c1,c2) \ | 624 | c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \ |
605 | sqr64(t1,t2,(a)[i]); \ | 625 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ |
606 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 626 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
607 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 627 | } while(0) |
628 | |||
629 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
630 | BN_ULONG lo, hi; \ | ||
631 | sqr64(lo,hi,(a)[i]); \ | ||
632 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | ||
633 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
634 | } while(0) | ||
608 | 635 | ||
609 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 636 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
610 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 637 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
@@ -613,12 +640,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
613 | void | 640 | void |
614 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 641 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
615 | { | 642 | { |
616 | #ifdef BN_LLONG | ||
617 | BN_ULLONG t; | ||
618 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
619 | BN_ULONG bl, bh; | ||
620 | #endif | ||
621 | BN_ULONG t1, t2; | ||
622 | BN_ULONG c1, c2, c3; | 643 | BN_ULONG c1, c2, c3; |
623 | 644 | ||
624 | c1 = 0; | 645 | c1 = 0; |
@@ -723,12 +744,6 @@ bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
723 | void | 744 | void |
724 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 745 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
725 | { | 746 | { |
726 | #ifdef BN_LLONG | ||
727 | BN_ULLONG t; | ||
728 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
729 | BN_ULONG bl, bh; | ||
730 | #endif | ||
731 | BN_ULONG t1, t2; | ||
732 | BN_ULONG c1, c2, c3; | 747 | BN_ULONG c1, c2, c3; |
733 | 748 | ||
734 | c1 = 0; | 749 | c1 = 0; |
@@ -769,12 +784,6 @@ bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
769 | void | 784 | void |
770 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | 785 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
771 | { | 786 | { |
772 | #ifdef BN_LLONG | ||
773 | BN_ULLONG t, tt; | ||
774 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
775 | BN_ULONG bl, bh; | ||
776 | #endif | ||
777 | BN_ULONG t1, t2; | ||
778 | BN_ULONG c1, c2, c3; | 787 | BN_ULONG c1, c2, c3; |
779 | 788 | ||
780 | c1 = 0; | 789 | c1 = 0; |
@@ -851,12 +860,6 @@ bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | |||
851 | void | 860 | void |
852 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | 861 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
853 | { | 862 | { |
854 | #ifdef BN_LLONG | ||
855 | BN_ULLONG t, tt; | ||
856 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
857 | BN_ULONG bl, bh; | ||
858 | #endif | ||
859 | BN_ULONG t1, t2; | ||
860 | BN_ULONG c1, c2, c3; | 863 | BN_ULONG c1, c2, c3; |
861 | 864 | ||
862 | c1 = 0; | 865 | c1 = 0; |