summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authorbcook <>2015-02-25 15:39:49 +0000
committerbcook <>2015-02-25 15:39:49 +0000
commit432e1d553bd75841b5b29f1a8008b519d538f765 (patch)
treefca56e3d23c024e7f0d0132456914f4f3181e5df /src/lib
parentc95a8d3fbea64773cc8d6de4314c26a413e58a60 (diff)
downloadopenbsd-432e1d553bd75841b5b29f1a8008b519d538f765.tar.gz
openbsd-432e1d553bd75841b5b29f1a8008b519d538f765.tar.bz2
openbsd-432e1d553bd75841b5b29f1a8008b519d538f765.zip
Fix CVE-2014-3570: properly calculate the square of a BIGNUM value.
See https://www.openssl.org/news/secadv_20150108.txt for a more detailed discussion. Original OpenSSL patch here: https://github.com/openssl/openssl/commit/a7a44ba55cb4f884c6bc9ceac90072dea38e66d0 The regression test is modified a little for KNF. ok miod@
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/libcrypto/bn/asm/mips.pl611
-rw-r--r--src/lib/libcrypto/bn/asm/x86_64-gcc.c103
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c243
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/mips.pl611
-rw-r--r--src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c103
-rw-r--r--src/lib/libssl/src/crypto/bn/bn_asm.c243
6 files changed, 606 insertions, 1308 deletions
diff --git a/src/lib/libcrypto/bn/asm/mips.pl b/src/lib/libcrypto/bn/asm/mips.pl
index d2f3ef7bbf..215c9a7483 100644
--- a/src/lib/libcrypto/bn/asm/mips.pl
+++ b/src/lib/libcrypto/bn/asm/mips.pl
@@ -1872,6 +1872,41 @@ ___
1872 1872
1873($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); 1873($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3);
1874 1874
1875sub add_c2 () {
1876my ($hi,$lo,$c0,$c1,$c2,
1877 $warm, # !$warm denotes first call with specific sequence of
1878 # $c_[XYZ] when there is no Z-carry to accumulate yet;
1879 $an,$bn # these two are arguments for multiplication which
1880 # result is used in *next* step [which is why it's
1881 # commented as "forward multiplication" below];
1882 )=@_;
1883$code.=<<___;
1884 mflo $lo
1885 mfhi $hi
1886 $ADDU $c0,$lo
1887 sltu $at,$c0,$lo
1888 $MULTU $an,$bn # forward multiplication
1889 $ADDU $c0,$lo
1890 $ADDU $at,$hi
1891 sltu $lo,$c0,$lo
1892 $ADDU $c1,$at
1893 $ADDU $hi,$lo
1894___
1895$code.=<<___ if (!$warm);
1896 sltu $c2,$c1,$at
1897 $ADDU $c1,$hi
1898 sltu $hi,$c1,$hi
1899 $ADDU $c2,$hi
1900___
1901$code.=<<___ if ($warm);
1902 sltu $at,$c1,$at
1903 $ADDU $c1,$hi
1904 $ADDU $c2,$at
1905 sltu $hi,$c1,$hi
1906 $ADDU $c2,$hi
1907___
1908}
1909
1875$code.=<<___; 1910$code.=<<___;
1876 1911
1877.align 5 1912.align 5
@@ -1920,21 +1955,10 @@ $code.=<<___;
1920 sltu $at,$c_2,$t_1 1955 sltu $at,$c_2,$t_1
1921 $ADDU $c_3,$t_2,$at 1956 $ADDU $c_3,$t_2,$at
1922 $ST $c_2,$BNSZ($a0) 1957 $ST $c_2,$BNSZ($a0)
1923 1958___
1924 mflo $t_1 1959 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
1925 mfhi $t_2 1960 $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
1926 slt $c_2,$t_2,$zero 1961$code.=<<___;
1927 $SLL $t_2,1
1928 $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
1929 slt $a2,$t_1,$zero
1930 $ADDU $t_2,$a2
1931 $SLL $t_1,1
1932 $ADDU $c_3,$t_1
1933 sltu $at,$c_3,$t_1
1934 $ADDU $t_2,$at
1935 $ADDU $c_1,$t_2
1936 sltu $at,$c_1,$t_2
1937 $ADDU $c_2,$at
1938 mflo $t_1 1962 mflo $t_1
1939 mfhi $t_2 1963 mfhi $t_2
1940 $ADDU $c_3,$t_1 1964 $ADDU $c_3,$t_1
@@ -1945,67 +1969,19 @@ $code.=<<___;
1945 sltu $at,$c_1,$t_2 1969 sltu $at,$c_1,$t_2
1946 $ADDU $c_2,$at 1970 $ADDU $c_2,$at
1947 $ST $c_3,2*$BNSZ($a0) 1971 $ST $c_3,2*$BNSZ($a0)
1948 1972___
1949 mflo $t_1 1973 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
1950 mfhi $t_2 1974 $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3);
1951 slt $c_3,$t_2,$zero 1975 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
1952 $SLL $t_2,1 1976 $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1);
1953 $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); 1977$code.=<<___;
1954 slt $a2,$t_1,$zero
1955 $ADDU $t_2,$a2
1956 $SLL $t_1,1
1957 $ADDU $c_1,$t_1
1958 sltu $at,$c_1,$t_1
1959 $ADDU $t_2,$at
1960 $ADDU $c_2,$t_2
1961 sltu $at,$c_2,$t_2
1962 $ADDU $c_3,$at
1963 mflo $t_1
1964 mfhi $t_2
1965 slt $at,$t_2,$zero
1966 $ADDU $c_3,$at
1967 $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1);
1968 $SLL $t_2,1
1969 slt $a2,$t_1,$zero
1970 $ADDU $t_2,$a2
1971 $SLL $t_1,1
1972 $ADDU $c_1,$t_1
1973 sltu $at,$c_1,$t_1
1974 $ADDU $t_2,$at
1975 $ADDU $c_2,$t_2
1976 sltu $at,$c_2,$t_2
1977 $ADDU $c_3,$at
1978 $ST $c_1,3*$BNSZ($a0) 1978 $ST $c_1,3*$BNSZ($a0)
1979 1979___
1980 mflo $t_1 1980 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
1981 mfhi $t_2 1981 $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1);
1982 slt $c_1,$t_2,$zero 1982 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
1983 $SLL $t_2,1 1983 $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
1984 $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); 1984$code.=<<___;
1985 slt $a2,$t_1,$zero
1986 $ADDU $t_2,$a2
1987 $SLL $t_1,1
1988 $ADDU $c_2,$t_1
1989 sltu $at,$c_2,$t_1
1990 $ADDU $t_2,$at
1991 $ADDU $c_3,$t_2
1992 sltu $at,$c_3,$t_2
1993 $ADDU $c_1,$at
1994 mflo $t_1
1995 mfhi $t_2
1996 slt $at,$t_2,$zero
1997 $ADDU $c_1,$at
1998 $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
1999 $SLL $t_2,1
2000 slt $a2,$t_1,$zero
2001 $ADDU $t_2,$a2
2002 $SLL $t_1,1
2003 $ADDU $c_2,$t_1
2004 sltu $at,$c_2,$t_1
2005 $ADDU $t_2,$at
2006 $ADDU $c_3,$t_2
2007 sltu $at,$c_3,$t_2
2008 $ADDU $c_1,$at
2009 mflo $t_1 1985 mflo $t_1
2010 mfhi $t_2 1986 mfhi $t_2
2011 $ADDU $c_2,$t_1 1987 $ADDU $c_2,$t_1
@@ -2016,97 +1992,23 @@ $code.=<<___;
2016 sltu $at,$c_3,$t_2 1992 sltu $at,$c_3,$t_2
2017 $ADDU $c_1,$at 1993 $ADDU $c_1,$at
2018 $ST $c_2,4*$BNSZ($a0) 1994 $ST $c_2,4*$BNSZ($a0)
2019 1995___
2020 mflo $t_1 1996 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2021 mfhi $t_2 1997 $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2);
2022 slt $c_2,$t_2,$zero 1998 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2023 $SLL $t_2,1 1999 $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2);
2024 $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); 2000 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2025 slt $a2,$t_1,$zero 2001 $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3);
2026 $ADDU $t_2,$a2 2002$code.=<<___;
2027 $SLL $t_1,1
2028 $ADDU $c_3,$t_1
2029 sltu $at,$c_3,$t_1
2030 $ADDU $t_2,$at
2031 $ADDU $c_1,$t_2
2032 sltu $at,$c_1,$t_2
2033 $ADDU $c_2,$at
2034 mflo $t_1
2035 mfhi $t_2
2036 slt $at,$t_2,$zero
2037 $ADDU $c_2,$at
2038 $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2);
2039 $SLL $t_2,1
2040 slt $a2,$t_1,$zero
2041 $ADDU $t_2,$a2
2042 $SLL $t_1,1
2043 $ADDU $c_3,$t_1
2044 sltu $at,$c_3,$t_1
2045 $ADDU $t_2,$at
2046 $ADDU $c_1,$t_2
2047 sltu $at,$c_1,$t_2
2048 $ADDU $c_2,$at
2049 mflo $t_1
2050 mfhi $t_2
2051 slt $at,$t_2,$zero
2052 $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3);
2053 $ADDU $c_2,$at
2054 $SLL $t_2,1
2055 slt $a2,$t_1,$zero
2056 $ADDU $t_2,$a2
2057 $SLL $t_1,1
2058 $ADDU $c_3,$t_1
2059 sltu $at,$c_3,$t_1
2060 $ADDU $t_2,$at
2061 $ADDU $c_1,$t_2
2062 sltu $at,$c_1,$t_2
2063 $ADDU $c_2,$at
2064 $ST $c_3,5*$BNSZ($a0) 2003 $ST $c_3,5*$BNSZ($a0)
2065 2004___
2066 mflo $t_1 2005 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2067 mfhi $t_2 2006 $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3);
2068 slt $c_3,$t_2,$zero 2007 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2069 $SLL $t_2,1 2008 $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3);
2070 $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); 2009 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2071 slt $a2,$t_1,$zero 2010 $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
2072 $ADDU $t_2,$a2 2011$code.=<<___;
2073 $SLL $t_1,1
2074 $ADDU $c_1,$t_1
2075 sltu $at,$c_1,$t_1
2076 $ADDU $t_2,$at
2077 $ADDU $c_2,$t_2
2078 sltu $at,$c_2,$t_2
2079 $ADDU $c_3,$at
2080 mflo $t_1
2081 mfhi $t_2
2082 slt $at,$t_2,$zero
2083 $ADDU $c_3,$at
2084 $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3);
2085 $SLL $t_2,1
2086 slt $a2,$t_1,$zero
2087 $ADDU $t_2,$a2
2088 $SLL $t_1,1
2089 $ADDU $c_1,$t_1
2090 sltu $at,$c_1,$t_1
2091 $ADDU $t_2,$at
2092 $ADDU $c_2,$t_2
2093 sltu $at,$c_2,$t_2
2094 $ADDU $c_3,$at
2095 mflo $t_1
2096 mfhi $t_2
2097 slt $at,$t_2,$zero
2098 $ADDU $c_3,$at
2099 $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
2100 $SLL $t_2,1
2101 slt $a2,$t_1,$zero
2102 $ADDU $t_2,$a2
2103 $SLL $t_1,1
2104 $ADDU $c_1,$t_1
2105 sltu $at,$c_1,$t_1
2106 $ADDU $t_2,$at
2107 $ADDU $c_2,$t_2
2108 sltu $at,$c_2,$t_2
2109 $ADDU $c_3,$at
2110 mflo $t_1 2012 mflo $t_1
2111 mfhi $t_2 2013 mfhi $t_2
2112 $ADDU $c_1,$t_1 2014 $ADDU $c_1,$t_1
@@ -2117,112 +2019,25 @@ $code.=<<___;
2117 sltu $at,$c_2,$t_2 2019 sltu $at,$c_2,$t_2
2118 $ADDU $c_3,$at 2020 $ADDU $c_3,$at
2119 $ST $c_1,6*$BNSZ($a0) 2021 $ST $c_1,6*$BNSZ($a0)
2120 2022___
2121 mflo $t_1 2023 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2122 mfhi $t_2 2024 $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1);
2123 slt $c_1,$t_2,$zero 2025 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2124 $SLL $t_2,1 2026 $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1);
2125 $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); 2027 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2126 slt $a2,$t_1,$zero 2028 $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1);
2127 $ADDU $t_2,$a2 2029 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2128 $SLL $t_1,1 2030 $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2);
2129 $ADDU $c_2,$t_1 2031$code.=<<___;
2130 sltu $at,$c_2,$t_1
2131 $ADDU $t_2,$at
2132 $ADDU $c_3,$t_2
2133 sltu $at,$c_3,$t_2
2134 $ADDU $c_1,$at
2135 mflo $t_1
2136 mfhi $t_2
2137 slt $at,$t_2,$zero
2138 $ADDU $c_1,$at
2139 $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1);
2140 $SLL $t_2,1
2141 slt $a2,$t_1,$zero
2142 $ADDU $t_2,$a2
2143 $SLL $t_1,1
2144 $ADDU $c_2,$t_1
2145 sltu $at,$c_2,$t_1
2146 $ADDU $t_2,$at
2147 $ADDU $c_3,$t_2
2148 sltu $at,$c_3,$t_2
2149 $ADDU $c_1,$at
2150 mflo $t_1
2151 mfhi $t_2
2152 slt $at,$t_2,$zero
2153 $ADDU $c_1,$at
2154 $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1);
2155 $SLL $t_2,1
2156 slt $a2,$t_1,$zero
2157 $ADDU $t_2,$a2
2158 $SLL $t_1,1
2159 $ADDU $c_2,$t_1
2160 sltu $at,$c_2,$t_1
2161 $ADDU $t_2,$at
2162 $ADDU $c_3,$t_2
2163 sltu $at,$c_3,$t_2
2164 $ADDU $c_1,$at
2165 mflo $t_1
2166 mfhi $t_2
2167 slt $at,$t_2,$zero
2168 $ADDU $c_1,$at
2169 $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2);
2170 $SLL $t_2,1
2171 slt $a2,$t_1,$zero
2172 $ADDU $t_2,$a2
2173 $SLL $t_1,1
2174 $ADDU $c_2,$t_1
2175 sltu $at,$c_2,$t_1
2176 $ADDU $t_2,$at
2177 $ADDU $c_3,$t_2
2178 sltu $at,$c_3,$t_2
2179 $ADDU $c_1,$at
2180 $ST $c_2,7*$BNSZ($a0) 2032 $ST $c_2,7*$BNSZ($a0)
2181 2033___
2182 mflo $t_1 2034 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2183 mfhi $t_2 2035 $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2);
2184 slt $c_2,$t_2,$zero 2036 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2185 $SLL $t_2,1 2037 $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2);
2186 $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); 2038 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2187 slt $a2,$t_1,$zero 2039 $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2);
2188 $ADDU $t_2,$a2 2040$code.=<<___;
2189 $SLL $t_1,1
2190 $ADDU $c_3,$t_1
2191 sltu $at,$c_3,$t_1
2192 $ADDU $t_2,$at
2193 $ADDU $c_1,$t_2
2194 sltu $at,$c_1,$t_2
2195 $ADDU $c_2,$at
2196 mflo $t_1
2197 mfhi $t_2
2198 slt $at,$t_2,$zero
2199 $ADDU $c_2,$at
2200 $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2);
2201 $SLL $t_2,1
2202 slt $a2,$t_1,$zero
2203 $ADDU $t_2,$a2
2204 $SLL $t_1,1
2205 $ADDU $c_3,$t_1
2206 sltu $at,$c_3,$t_1
2207 $ADDU $t_2,$at
2208 $ADDU $c_1,$t_2
2209 sltu $at,$c_1,$t_2
2210 $ADDU $c_2,$at
2211 mflo $t_1
2212 mfhi $t_2
2213 slt $at,$t_2,$zero
2214 $ADDU $c_2,$at
2215 $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2);
2216 $SLL $t_2,1
2217 slt $a2,$t_1,$zero
2218 $ADDU $t_2,$a2
2219 $SLL $t_1,1
2220 $ADDU $c_3,$t_1
2221 sltu $at,$c_3,$t_1
2222 $ADDU $t_2,$at
2223 $ADDU $c_1,$t_2
2224 sltu $at,$c_1,$t_2
2225 $ADDU $c_2,$at
2226 mflo $t_1 2041 mflo $t_1
2227 mfhi $t_2 2042 mfhi $t_2
2228 $ADDU $c_3,$t_1 2043 $ADDU $c_3,$t_1
@@ -2233,82 +2048,21 @@ $code.=<<___;
2233 sltu $at,$c_1,$t_2 2048 sltu $at,$c_1,$t_2
2234 $ADDU $c_2,$at 2049 $ADDU $c_2,$at
2235 $ST $c_3,8*$BNSZ($a0) 2050 $ST $c_3,8*$BNSZ($a0)
2236 2051___
2237 mflo $t_1 2052 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2238 mfhi $t_2 2053 $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3);
2239 slt $c_3,$t_2,$zero 2054 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2240 $SLL $t_2,1 2055 $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3);
2241 $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); 2056 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2242 slt $a2,$t_1,$zero 2057 $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1);
2243 $ADDU $t_2,$a2 2058$code.=<<___;
2244 $SLL $t_1,1
2245 $ADDU $c_1,$t_1
2246 sltu $at,$c_1,$t_1
2247 $ADDU $t_2,$at
2248 $ADDU $c_2,$t_2
2249 sltu $at,$c_2,$t_2
2250 $ADDU $c_3,$at
2251 mflo $t_1
2252 mfhi $t_2
2253 slt $at,$t_2,$zero
2254 $ADDU $c_3,$at
2255 $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3);
2256 $SLL $t_2,1
2257 slt $a2,$t_1,$zero
2258 $ADDU $t_2,$a2
2259 $SLL $t_1,1
2260 $ADDU $c_1,$t_1
2261 sltu $at,$c_1,$t_1
2262 $ADDU $t_2,$at
2263 $ADDU $c_2,$t_2
2264 sltu $at,$c_2,$t_2
2265 $ADDU $c_3,$at
2266 mflo $t_1
2267 mfhi $t_2
2268 slt $at,$t_2,$zero
2269 $ADDU $c_3,$at
2270 $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1);
2271 $SLL $t_2,1
2272 slt $a2,$t_1,$zero
2273 $ADDU $t_2,$a2
2274 $SLL $t_1,1
2275 $ADDU $c_1,$t_1
2276 sltu $at,$c_1,$t_1
2277 $ADDU $t_2,$at
2278 $ADDU $c_2,$t_2
2279 sltu $at,$c_2,$t_2
2280 $ADDU $c_3,$at
2281 $ST $c_1,9*$BNSZ($a0) 2059 $ST $c_1,9*$BNSZ($a0)
2282 2060___
2283 mflo $t_1 2061 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2284 mfhi $t_2 2062 $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1);
2285 slt $c_1,$t_2,$zero 2063 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2286 $SLL $t_2,1 2064 $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1);
2287 $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); 2065$code.=<<___;
2288 slt $a2,$t_1,$zero
2289 $ADDU $t_2,$a2
2290 $SLL $t_1,1
2291 $ADDU $c_2,$t_1
2292 sltu $at,$c_2,$t_1
2293 $ADDU $t_2,$at
2294 $ADDU $c_3,$t_2
2295 sltu $at,$c_3,$t_2
2296 $ADDU $c_1,$at
2297 mflo $t_1
2298 mfhi $t_2
2299 slt $at,$t_2,$zero
2300 $ADDU $c_1,$at
2301 $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1);
2302 $SLL $t_2,1
2303 slt $a2,$t_1,$zero
2304 $ADDU $t_2,$a2
2305 $SLL $t_1,1
2306 $ADDU $c_2,$t_1
2307 sltu $at,$c_2,$t_1
2308 $ADDU $t_2,$at
2309 $ADDU $c_3,$t_2
2310 sltu $at,$c_3,$t_2
2311 $ADDU $c_1,$at
2312 mflo $t_1 2066 mflo $t_1
2313 mfhi $t_2 2067 mfhi $t_2
2314 $ADDU $c_2,$t_1 2068 $ADDU $c_2,$t_1
@@ -2319,52 +2073,17 @@ $code.=<<___;
2319 sltu $at,$c_3,$t_2 2073 sltu $at,$c_3,$t_2
2320 $ADDU $c_1,$at 2074 $ADDU $c_1,$at
2321 $ST $c_2,10*$BNSZ($a0) 2075 $ST $c_2,10*$BNSZ($a0)
2322 2076___
2323 mflo $t_1 2077 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2324 mfhi $t_2 2078 $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2);
2325 slt $c_2,$t_2,$zero 2079 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2326 $SLL $t_2,1 2080 $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3);
2327 $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); 2081$code.=<<___;
2328 slt $a2,$t_1,$zero
2329 $ADDU $t_2,$a2
2330 $SLL $t_1,1
2331 $ADDU $c_3,$t_1
2332 sltu $at,$c_3,$t_1
2333 $ADDU $t_2,$at
2334 $ADDU $c_1,$t_2
2335 sltu $at,$c_1,$t_2
2336 $ADDU $c_2,$at
2337 mflo $t_1
2338 mfhi $t_2
2339 slt $at,$t_2,$zero
2340 $ADDU $c_2,$at
2341 $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3);
2342 $SLL $t_2,1
2343 slt $a2,$t_1,$zero
2344 $ADDU $t_2,$a2
2345 $SLL $t_1,1
2346 $ADDU $c_3,$t_1
2347 sltu $at,$c_3,$t_1
2348 $ADDU $t_2,$at
2349 $ADDU $c_1,$t_2
2350 sltu $at,$c_1,$t_2
2351 $ADDU $c_2,$at
2352 $ST $c_3,11*$BNSZ($a0) 2082 $ST $c_3,11*$BNSZ($a0)
2353 2083___
2354 mflo $t_1 2084 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2355 mfhi $t_2 2085 $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3);
2356 slt $c_3,$t_2,$zero 2086$code.=<<___;
2357 $SLL $t_2,1
2358 $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3);
2359 slt $a2,$t_1,$zero
2360 $ADDU $t_2,$a2
2361 $SLL $t_1,1
2362 $ADDU $c_1,$t_1
2363 sltu $at,$c_1,$t_1
2364 $ADDU $t_2,$at
2365 $ADDU $c_2,$t_2
2366 sltu $at,$c_2,$t_2
2367 $ADDU $c_3,$at
2368 mflo $t_1 2087 mflo $t_1
2369 mfhi $t_2 2088 mfhi $t_2
2370 $ADDU $c_1,$t_1 2089 $ADDU $c_1,$t_1
@@ -2375,21 +2094,10 @@ $code.=<<___;
2375 sltu $at,$c_2,$t_2 2094 sltu $at,$c_2,$t_2
2376 $ADDU $c_3,$at 2095 $ADDU $c_3,$at
2377 $ST $c_1,12*$BNSZ($a0) 2096 $ST $c_1,12*$BNSZ($a0)
2378 2097___
2379 mflo $t_1 2098 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2380 mfhi $t_2 2099 $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2);
2381 slt $c_1,$t_2,$zero 2100$code.=<<___;
2382 $SLL $t_2,1
2383 $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2);
2384 slt $a2,$t_1,$zero
2385 $ADDU $t_2,$a2
2386 $SLL $t_1,1
2387 $ADDU $c_2,$t_1
2388 sltu $at,$c_2,$t_1
2389 $ADDU $t_2,$at
2390 $ADDU $c_3,$t_2
2391 sltu $at,$c_3,$t_2
2392 $ADDU $c_1,$at
2393 $ST $c_2,13*$BNSZ($a0) 2101 $ST $c_2,13*$BNSZ($a0)
2394 2102
2395 mflo $t_1 2103 mflo $t_1
@@ -2457,21 +2165,10 @@ $code.=<<___;
2457 sltu $at,$c_2,$t_1 2165 sltu $at,$c_2,$t_1
2458 $ADDU $c_3,$t_2,$at 2166 $ADDU $c_3,$t_2,$at
2459 $ST $c_2,$BNSZ($a0) 2167 $ST $c_2,$BNSZ($a0)
2460 2168___
2461 mflo $t_1 2169 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2462 mfhi $t_2 2170 $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
2463 slt $c_2,$t_2,$zero 2171$code.=<<___;
2464 $SLL $t_2,1
2465 $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
2466 slt $a2,$t_1,$zero
2467 $ADDU $t_2,$a2
2468 $SLL $t_1,1
2469 $ADDU $c_3,$t_1
2470 sltu $at,$c_3,$t_1
2471 $ADDU $t_2,$at
2472 $ADDU $c_1,$t_2
2473 sltu $at,$c_1,$t_2
2474 $ADDU $c_2,$at
2475 mflo $t_1 2172 mflo $t_1
2476 mfhi $t_2 2173 mfhi $t_2
2477 $ADDU $c_3,$t_1 2174 $ADDU $c_3,$t_1
@@ -2482,52 +2179,17 @@ $code.=<<___;
2482 sltu $at,$c_1,$t_2 2179 sltu $at,$c_1,$t_2
2483 $ADDU $c_2,$at 2180 $ADDU $c_2,$at
2484 $ST $c_3,2*$BNSZ($a0) 2181 $ST $c_3,2*$BNSZ($a0)
2485 2182___
2486 mflo $t_1 2183 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2487 mfhi $t_2 2184 $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3);
2488 slt $c_3,$t_2,$zero 2185 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2489 $SLL $t_2,1 2186 $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1);
2490 $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); 2187$code.=<<___;
2491 slt $a2,$t_1,$zero
2492 $ADDU $t_2,$a2
2493 $SLL $t_1,1
2494 $ADDU $c_1,$t_1
2495 sltu $at,$c_1,$t_1
2496 $ADDU $t_2,$at
2497 $ADDU $c_2,$t_2
2498 sltu $at,$c_2,$t_2
2499 $ADDU $c_3,$at
2500 mflo $t_1
2501 mfhi $t_2
2502 slt $at,$t_2,$zero
2503 $ADDU $c_3,$at
2504 $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1);
2505 $SLL $t_2,1
2506 slt $a2,$t_1,$zero
2507 $ADDU $t_2,$a2
2508 $SLL $t_1,1
2509 $ADDU $c_1,$t_1
2510 sltu $at,$c_1,$t_1
2511 $ADDU $t_2,$at
2512 $ADDU $c_2,$t_2
2513 sltu $at,$c_2,$t_2
2514 $ADDU $c_3,$at
2515 $ST $c_1,3*$BNSZ($a0) 2188 $ST $c_1,3*$BNSZ($a0)
2516 2189___
2517 mflo $t_1 2190 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2518 mfhi $t_2 2191 $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
2519 slt $c_1,$t_2,$zero 2192$code.=<<___;
2520 $SLL $t_2,1
2521 $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
2522 slt $a2,$t_1,$zero
2523 $ADDU $t_2,$a2
2524 $SLL $t_1,1
2525 $ADDU $c_2,$t_1
2526 sltu $at,$c_2,$t_1
2527 $ADDU $t_2,$at
2528 $ADDU $c_3,$t_2
2529 sltu $at,$c_3,$t_2
2530 $ADDU $c_1,$at
2531 mflo $t_1 2193 mflo $t_1
2532 mfhi $t_2 2194 mfhi $t_2
2533 $ADDU $c_2,$t_1 2195 $ADDU $c_2,$t_1
@@ -2538,21 +2200,10 @@ $code.=<<___;
2538 sltu $at,$c_3,$t_2 2200 sltu $at,$c_3,$t_2
2539 $ADDU $c_1,$at 2201 $ADDU $c_1,$at
2540 $ST $c_2,4*$BNSZ($a0) 2202 $ST $c_2,4*$BNSZ($a0)
2541 2203___
2542 mflo $t_1 2204 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2543 mfhi $t_2 2205 $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
2544 slt $c_2,$t_2,$zero 2206$code.=<<___;
2545 $SLL $t_2,1
2546 $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
2547 slt $a2,$t_1,$zero
2548 $ADDU $t_2,$a2
2549 $SLL $t_1,1
2550 $ADDU $c_3,$t_1
2551 sltu $at,$c_3,$t_1
2552 $ADDU $t_2,$at
2553 $ADDU $c_1,$t_2
2554 sltu $at,$c_1,$t_2
2555 $ADDU $c_2,$at
2556 $ST $c_3,5*$BNSZ($a0) 2207 $ST $c_3,5*$BNSZ($a0)
2557 2208
2558 mflo $t_1 2209 mflo $t_1
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
index c9a2b6be73..9deffa71f1 100644
--- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c
+++ b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: x86_64-gcc.c,v 1.4 2014/10/28 07:35:58 jsg Exp $ */ 1/* $OpenBSD: x86_64-gcc.c,v 1.5 2015/02/25 15:39:49 bcook Exp $ */
2#include "../bn_lcl.h" 2#include "../bn_lcl.h"
3#if !(defined(__GNUC__) && __GNUC__>=2) 3#if !(defined(__GNUC__) && __GNUC__>=2)
4# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ 4# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */
@@ -270,77 +270,76 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
270/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ 270/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
271/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ 271/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
272 272
273/*
274 * Keep in mind that carrying into high part of multiplication result
275 * can not overflow, because it cannot be all-ones.
276 */
273#if 0 277#if 0
274/* original macros are kept for reference purposes */ 278/* original macros are kept for reference purposes */
275#define mul_add_c(a,b,c0,c1,c2) { \ 279#define mul_add_c(a,b,c0,c1,c2) do { \
276 BN_ULONG ta=(a),tb=(b); \ 280 BN_ULONG ta = (a), tb = (b); \
277 t1 = ta * tb; \ 281 BN_ULONG lo, hi; \
278 t2 = BN_UMULT_HIGH(ta,tb); \ 282 BN_UMULT_LOHI(lo,hi,ta,tb); \
279 c0 += t1; t2 += (c0<t1)?1:0; \ 283 c0 += lo; hi += (c0<lo)?1:0; \
280 c1 += t2; c2 += (c1<t2)?1:0; \ 284 c1 += hi; c2 += (c1<hi)?1:0; \
281 } 285 } while(0)
282 286
283#define mul_add_c2(a,b,c0,c1,c2) { \ 287#define mul_add_c2(a,b,c0,c1,c2) do { \
284 BN_ULONG ta=(a),tb=(b),t0; \ 288 BN_ULONG ta = (a), tb = (b); \
285 t1 = BN_UMULT_HIGH(ta,tb); \ 289 BN_ULONG lo, hi, tt; \
286 t0 = ta * tb; \ 290 BN_UMULT_LOHI(lo,hi,ta,tb); \
287 t2 = t1+t1; c2 += (t2<t1)?1:0; \ 291 c0 += lo; tt = hi+((c0<lo)?1:0); \
288 t1 = t0+t0; t2 += (t1<t0)?1:0; \ 292 c1 += tt; c2 += (c1<tt)?1:0; \
289 c0 += t1; t2 += (c0<t1)?1:0; \ 293 c0 += lo; hi += (c0<lo)?1:0; \
290 c1 += t2; c2 += (c1<t2)?1:0; \ 294 c1 += hi; c2 += (c1<hi)?1:0; \
291 } 295 } while(0)
296
297#define sqr_add_c(a,i,c0,c1,c2) do { \
298 BN_ULONG ta = (a)[i]; \
299 BN_ULONG lo, hi; \
300 BN_UMULT_LOHI(lo,hi,ta,ta); \
301 c0 += lo; hi += (c0<lo)?1:0; \
302 c1 += hi; c2 += (c1<hi)?1:0; \
303 } while(0)
292#else 304#else
293#define mul_add_c(a,b,c0,c1,c2) do { \ 305#define mul_add_c(a,b,c0,c1,c2) do { \
306 BN_ULONG t1,t2; \
294 asm ("mulq %3" \ 307 asm ("mulq %3" \
295 : "=a"(t1),"=d"(t2) \ 308 : "=a"(t1),"=d"(t2) \
296 : "a"(a),"m"(b) \ 309 : "a"(a),"m"(b) \
297 : "cc"); \ 310 : "cc"); \
298 asm ("addq %2,%0; adcq %3,%1" \ 311 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
299 : "+r"(c0),"+d"(t2) \ 312 : "+r"(c0),"+r"(c1),"+r"(c2) \
300 : "a"(t1),"g"(0) \ 313 : "r"(t1),"r"(t2),"g"(0) \
301 : "cc"); \ 314 : "cc"); \
302 asm ("addq %2,%0; adcq %3,%1" \
303 : "+r"(c1),"+r"(c2) \
304 : "d"(t2),"g"(0) \
305 : "cc"); \
306 } while (0) 315 } while (0)
307 316
308#define sqr_add_c(a,i,c0,c1,c2) do { \ 317#define sqr_add_c(a,i,c0,c1,c2) do { \
318 BN_ULONG t1,t2; \
309 asm ("mulq %2" \ 319 asm ("mulq %2" \
310 : "=a"(t1),"=d"(t2) \ 320 : "=a"(t1),"=d"(t2) \
311 : "a"(a[i]) \ 321 : "a"(a[i]) \
312 : "cc"); \ 322 : "cc"); \
313 asm ("addq %2,%0; adcq %3,%1" \ 323 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
314 : "+r"(c0),"+d"(t2) \ 324 : "+r"(c0),"+r"(c1),"+r"(c2) \
315 : "a"(t1),"g"(0) \ 325 : "r"(t1),"r"(t2),"g"(0) \
316 : "cc"); \ 326 : "cc"); \
317 asm ("addq %2,%0; adcq %3,%1" \
318 : "+r"(c1),"+r"(c2) \
319 : "d"(t2),"g"(0) \
320 : "cc"); \
321 } while (0) 327 } while (0)
322 328
323#define mul_add_c2(a,b,c0,c1,c2) do { \ 329#define mul_add_c2(a,b,c0,c1,c2) do { \
330 BN_ULONG t1,t2; \
324 asm ("mulq %3" \ 331 asm ("mulq %3" \
325 : "=a"(t1),"=d"(t2) \ 332 : "=a"(t1),"=d"(t2) \
326 : "a"(a),"m"(b) \ 333 : "a"(a),"m"(b) \
327 : "cc"); \ 334 : "cc"); \
328 asm ("addq %0,%0; adcq %2,%1" \ 335 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
329 : "+d"(t2),"+r"(c2) \ 336 : "+r"(c0),"+r"(c1),"+r"(c2) \
330 : "g"(0) \ 337 : "r"(t1),"r"(t2),"g"(0) \
331 : "cc"); \ 338 : "cc"); \
332 asm ("addq %0,%0; adcq %2,%1" \ 339 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
333 : "+a"(t1),"+d"(t2) \ 340 : "+r"(c0),"+r"(c1),"+r"(c2) \
334 : "g"(0) \ 341 : "r"(t1),"r"(t2),"g"(0) \
335 : "cc"); \ 342 : "cc"); \
336 asm ("addq %2,%0; adcq %3,%1" \
337 : "+r"(c0),"+d"(t2) \
338 : "a"(t1),"g"(0) \
339 : "cc"); \
340 asm ("addq %2,%0; adcq %3,%1" \
341 : "+r"(c1),"+r"(c2) \
342 : "d"(t2),"g"(0) \
343 : "cc"); \
344 } while (0) 343 } while (0)
345#endif 344#endif
346 345
@@ -349,7 +348,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
349 348
350void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 349void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
351 { 350 {
352 BN_ULONG t1,t2;
353 BN_ULONG c1,c2,c3; 351 BN_ULONG c1,c2,c3;
354 352
355 c1=0; 353 c1=0;
@@ -453,7 +451,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
453 451
454void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 452void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
455 { 453 {
456 BN_ULONG t1,t2;
457 BN_ULONG c1,c2,c3; 454 BN_ULONG c1,c2,c3;
458 455
459 c1=0; 456 c1=0;
@@ -493,7 +490,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
493 490
494void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) 491void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
495 { 492 {
496 BN_ULONG t1,t2;
497 BN_ULONG c1,c2,c3; 493 BN_ULONG c1,c2,c3;
498 494
499 c1=0; 495 c1=0;
@@ -569,7 +565,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
569 565
570void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) 566void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
571 { 567 {
572 BN_ULONG t1,t2;
573 BN_ULONG c1,c2,c3; 568 BN_ULONG c1,c2,c3;
574 569
575 c1=0; 570 c1=0;
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
index c6efd2513a..49f0ba5d7b 100644
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_asm.c,v 1.13 2014/07/11 08:44:47 jsing Exp $ */ 1/* $OpenBSD: bn_asm.c,v 1.14 2015/02/25 15:39:49 bcook Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -495,116 +495,143 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
495/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ 495/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
496 496
497#ifdef BN_LLONG 497#ifdef BN_LLONG
498#define mul_add_c(a,b,c0,c1,c2) \ 498/*
499 t=(BN_ULLONG)a*b; \ 499 * Keep in mind that additions to multiplication result can not
500 t1=(BN_ULONG)Lw(t); \ 500 * overflow, because its high half cannot be all-ones.
501 t2=(BN_ULONG)Hw(t); \ 501 */
502 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 502#define mul_add_c(a,b,c0,c1,c2) do { \
503 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 503 BN_ULONG hi; \
504 504 BN_ULLONG t = (BN_ULLONG)(a)*(b); \
505#define mul_add_c2(a,b,c0,c1,c2) \ 505 t += c0; /* no carry */ \
506 t=(BN_ULLONG)a*b; \ 506 c0 = (BN_ULONG)Lw(t); \
507 tt=(t+t)&BN_MASK; \ 507 hi = (BN_ULONG)Hw(t); \
508 if (tt < t) c2++; \ 508 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
509 t1=(BN_ULONG)Lw(tt); \ 509 } while(0)
510 t2=(BN_ULONG)Hw(tt); \ 510
511 c0=(c0+t1)&BN_MASK2; \ 511#define mul_add_c2(a,b,c0,c1,c2) do { \
512 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ 512 BN_ULONG hi; \
513 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 513 BN_ULLONG t = (BN_ULLONG)(a)*(b); \
514 514 BN_ULLONG tt = t+c0; /* no carry */ \
515#define sqr_add_c(a,i,c0,c1,c2) \ 515 c0 = (BN_ULONG)Lw(tt); \
516 t=(BN_ULLONG)a[i]*a[i]; \ 516 hi = (BN_ULONG)Hw(tt); \
517 t1=(BN_ULONG)Lw(t); \ 517 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
518 t2=(BN_ULONG)Hw(t); \ 518 t += c0; /* no carry */ \
519 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 519 c0 = (BN_ULONG)Lw(t); \
520 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 520 hi = (BN_ULONG)Hw(t); \
521 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
522 } while(0)
523
524#define sqr_add_c(a,i,c0,c1,c2) do { \
525 BN_ULONG hi; \
526 BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \
527 t += c0; /* no carry */ \
528 c0 = (BN_ULONG)Lw(t); \
529 hi = (BN_ULONG)Hw(t); \
530 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
531 } while(0)
521 532
522#define sqr_add_c2(a,i,j,c0,c1,c2) \ 533#define sqr_add_c2(a,i,j,c0,c1,c2) \
523 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 534 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
524 535
525#elif defined(BN_UMULT_LOHI) 536#elif defined(BN_UMULT_LOHI)
526 537/*
527#define mul_add_c(a,b,c0,c1,c2) { \ 538 * Keep in mind that additions to hi can not overflow, because
528 BN_ULONG ta=(a),tb=(b); \ 539 * the high word of a multiplication result cannot be all-ones.
529 BN_UMULT_LOHI(t1,t2,ta,tb); \ 540 */
530 c0 += t1; t2 += (c0<t1)?1:0; \ 541#define mul_add_c(a,b,c0,c1,c2) do { \
531 c1 += t2; c2 += (c1<t2)?1:0; \ 542 BN_ULONG ta = (a), tb = (b); \
532 } 543 BN_ULONG lo, hi; \
533 544 BN_UMULT_LOHI(lo,hi,ta,tb); \
534#define mul_add_c2(a,b,c0,c1,c2) { \ 545 c0 += lo; hi += (c0<lo)?1:0; \
535 BN_ULONG ta=(a),tb=(b),t0; \ 546 c1 += hi; c2 += (c1<hi)?1:0; \
536 BN_UMULT_LOHI(t0,t1,ta,tb); \ 547 } while(0)
537 t2 = t1+t1; c2 += (t2<t1)?1:0; \ 548
538 t1 = t0+t0; t2 += (t1<t0)?1:0; \ 549#define mul_add_c2(a,b,c0,c1,c2) do { \
539 c0 += t1; t2 += (c0<t1)?1:0; \ 550 BN_ULONG ta = (a), tb = (b); \
540 c1 += t2; c2 += (c1<t2)?1:0; \ 551 BN_ULONG lo, hi, tt; \
541 } 552 BN_UMULT_LOHI(lo,hi,ta,tb); \
542 553 c0 += lo; tt = hi+((c0<lo)?1:0); \
543#define sqr_add_c(a,i,c0,c1,c2) { \ 554 c1 += tt; c2 += (c1<tt)?1:0; \
544 BN_ULONG ta=(a)[i]; \ 555 c0 += lo; hi += (c0<lo)?1:0; \
545 BN_UMULT_LOHI(t1,t2,ta,ta); \ 556 c1 += hi; c2 += (c1<hi)?1:0; \
546 c0 += t1; t2 += (c0<t1)?1:0; \ 557 } while(0)
547 c1 += t2; c2 += (c1<t2)?1:0; \ 558
548 } 559#define sqr_add_c(a,i,c0,c1,c2) do { \
560 BN_ULONG ta = (a)[i]; \
561 BN_ULONG lo, hi; \
562 BN_UMULT_LOHI(lo,hi,ta,ta); \
563 c0 += lo; hi += (c0<lo)?1:0; \
564 c1 += hi; c2 += (c1<hi)?1:0; \
565 } while(0)
549 566
550#define sqr_add_c2(a,i,j,c0,c1,c2) \ 567#define sqr_add_c2(a,i,j,c0,c1,c2) \
551 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 568 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
552 569
553#elif defined(BN_UMULT_HIGH) 570#elif defined(BN_UMULT_HIGH)
554 571/*
555#define mul_add_c(a,b,c0,c1,c2) { \ 572 * Keep in mind that additions to hi can not overflow, because
556 BN_ULONG ta=(a),tb=(b); \ 573 * the high word of a multiplication result cannot be all-ones.
557 t1 = ta * tb; \ 574 */
558 t2 = BN_UMULT_HIGH(ta,tb); \ 575#define mul_add_c(a,b,c0,c1,c2) do { \
559 c0 += t1; t2 += (c0<t1)?1:0; \ 576 BN_ULONG ta = (a), tb = (b); \
560 c1 += t2; c2 += (c1<t2)?1:0; \ 577 BN_ULONG lo = ta * tb; \
561 } 578 BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \
562 579 c0 += lo; hi += (c0<lo)?1:0; \
563#define mul_add_c2(a,b,c0,c1,c2) { \ 580 c1 += hi; c2 += (c1<hi)?1:0; \
564 BN_ULONG ta=(a),tb=(b),t0; \ 581 } while(0)
565 t1 = BN_UMULT_HIGH(ta,tb); \ 582
566 t0 = ta * tb; \ 583#define mul_add_c2(a,b,c0,c1,c2) do { \
567 t2 = t1+t1; c2 += (t2<t1)?1:0; \ 584 BN_ULONG ta = (a), tb = (b), tt; \
568 t1 = t0+t0; t2 += (t1<t0)?1:0; \ 585 BN_ULONG lo = ta * tb; \
569 c0 += t1; t2 += (c0<t1)?1:0; \ 586 BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \
570 c1 += t2; c2 += (c1<t2)?1:0; \ 587 c0 += lo; tt = hi + ((c0<lo)?1:0); \
571 } 588 c1 += tt; c2 += (c1<tt)?1:0; \
572 589 c0 += lo; hi += (c0<lo)?1:0; \
573#define sqr_add_c(a,i,c0,c1,c2) { \ 590 c1 += hi; c2 += (c1<hi)?1:0; \
574 BN_ULONG ta=(a)[i]; \ 591 } while(0)
575 t1 = ta * ta; \ 592
576 t2 = BN_UMULT_HIGH(ta,ta); \ 593#define sqr_add_c(a,i,c0,c1,c2) do { \
577 c0 += t1; t2 += (c0<t1)?1:0; \ 594 BN_ULONG ta = (a)[i]; \
578 c1 += t2; c2 += (c1<t2)?1:0; \ 595 BN_ULONG lo = ta * ta; \
579 } 596 BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \
597 c0 += lo; hi += (c0<lo)?1:0; \
598 c1 += hi; c2 += (c1<hi)?1:0; \
599 } while(0)
580 600
581#define sqr_add_c2(a,i,j,c0,c1,c2) \ 601#define sqr_add_c2(a,i,j,c0,c1,c2) \
582 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 602 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
583 603
584#else /* !BN_LLONG */ 604#else /* !BN_LLONG */
585#define mul_add_c(a,b,c0,c1,c2) \ 605/*
586 t1=LBITS(a); t2=HBITS(a); \ 606 * Keep in mind that additions to hi can not overflow, because
587 bl=LBITS(b); bh=HBITS(b); \ 607 * the high word of a multiplication result cannot be all-ones.
588 mul64(t1,t2,bl,bh); \ 608 */
589 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 609#define mul_add_c(a,b,c0,c1,c2) do { \
590 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 610 BN_ULONG lo = LBITS(a), hi = HBITS(a); \
591 611 BN_ULONG bl = LBITS(b), bh = HBITS(b); \
592#define mul_add_c2(a,b,c0,c1,c2) \ 612 mul64(lo,hi,bl,bh); \
593 t1=LBITS(a); t2=HBITS(a); \ 613 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
594 bl=LBITS(b); bh=HBITS(b); \ 614 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
595 mul64(t1,t2,bl,bh); \ 615 } while(0)
596 if (t2 & BN_TBIT) c2++; \ 616
597 t2=(t2+t2)&BN_MASK2; \ 617#define mul_add_c2(a,b,c0,c1,c2) do { \
598 if (t1 & BN_TBIT) t2++; \ 618 BN_ULONG tt; \
599 t1=(t1+t1)&BN_MASK2; \ 619 BN_ULONG lo = LBITS(a), hi = HBITS(a); \
600 c0=(c0+t1)&BN_MASK2; \ 620 BN_ULONG bl = LBITS(b), bh = HBITS(b); \
601 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ 621 mul64(lo,hi,bl,bh); \
602 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 622 tt = hi; \
603 623 c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
604#define sqr_add_c(a,i,c0,c1,c2) \ 624 c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
605 sqr64(t1,t2,(a)[i]); \ 625 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
606 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 626 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
607 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 627 } while(0)
628
629#define sqr_add_c(a,i,c0,c1,c2) do { \
630 BN_ULONG lo, hi; \
631 sqr64(lo,hi,(a)[i]); \
632 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
633 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
634 } while(0)
608 635
609#define sqr_add_c2(a,i,j,c0,c1,c2) \ 636#define sqr_add_c2(a,i,j,c0,c1,c2) \
610 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 637 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
@@ -613,12 +640,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
613void 640void
614bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 641bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
615{ 642{
616#ifdef BN_LLONG
617 BN_ULLONG t;
618#elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH)
619 BN_ULONG bl, bh;
620#endif
621 BN_ULONG t1, t2;
622 BN_ULONG c1, c2, c3; 643 BN_ULONG c1, c2, c3;
623 644
624 c1 = 0; 645 c1 = 0;
@@ -723,12 +744,6 @@ bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
723void 744void
724bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 745bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
725{ 746{
726#ifdef BN_LLONG
727 BN_ULLONG t;
728#elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH)
729 BN_ULONG bl, bh;
730#endif
731 BN_ULONG t1, t2;
732 BN_ULONG c1, c2, c3; 747 BN_ULONG c1, c2, c3;
733 748
734 c1 = 0; 749 c1 = 0;
@@ -769,12 +784,6 @@ bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
769void 784void
770bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) 785bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
771{ 786{
772#ifdef BN_LLONG
773 BN_ULLONG t, tt;
774#elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH)
775 BN_ULONG bl, bh;
776#endif
777 BN_ULONG t1, t2;
778 BN_ULONG c1, c2, c3; 787 BN_ULONG c1, c2, c3;
779 788
780 c1 = 0; 789 c1 = 0;
@@ -851,12 +860,6 @@ bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
851void 860void
852bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) 861bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
853{ 862{
854#ifdef BN_LLONG
855 BN_ULLONG t, tt;
856#elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH)
857 BN_ULONG bl, bh;
858#endif
859 BN_ULONG t1, t2;
860 BN_ULONG c1, c2, c3; 863 BN_ULONG c1, c2, c3;
861 864
862 c1 = 0; 865 c1 = 0;
diff --git a/src/lib/libssl/src/crypto/bn/asm/mips.pl b/src/lib/libssl/src/crypto/bn/asm/mips.pl
index d2f3ef7bbf..215c9a7483 100644
--- a/src/lib/libssl/src/crypto/bn/asm/mips.pl
+++ b/src/lib/libssl/src/crypto/bn/asm/mips.pl
@@ -1872,6 +1872,41 @@ ___
1872 1872
1873($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); 1873($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3);
1874 1874
1875sub add_c2 () {
1876my ($hi,$lo,$c0,$c1,$c2,
1877 $warm, # !$warm denotes first call with specific sequence of
1878 # $c_[XYZ] when there is no Z-carry to accumulate yet;
1879 $an,$bn # these two are arguments for multiplication which
1880 # result is used in *next* step [which is why it's
1881 # commented as "forward multiplication" below];
1882 )=@_;
1883$code.=<<___;
1884 mflo $lo
1885 mfhi $hi
1886 $ADDU $c0,$lo
1887 sltu $at,$c0,$lo
1888 $MULTU $an,$bn # forward multiplication
1889 $ADDU $c0,$lo
1890 $ADDU $at,$hi
1891 sltu $lo,$c0,$lo
1892 $ADDU $c1,$at
1893 $ADDU $hi,$lo
1894___
1895$code.=<<___ if (!$warm);
1896 sltu $c2,$c1,$at
1897 $ADDU $c1,$hi
1898 sltu $hi,$c1,$hi
1899 $ADDU $c2,$hi
1900___
1901$code.=<<___ if ($warm);
1902 sltu $at,$c1,$at
1903 $ADDU $c1,$hi
1904 $ADDU $c2,$at
1905 sltu $hi,$c1,$hi
1906 $ADDU $c2,$hi
1907___
1908}
1909
1875$code.=<<___; 1910$code.=<<___;
1876 1911
1877.align 5 1912.align 5
@@ -1920,21 +1955,10 @@ $code.=<<___;
1920 sltu $at,$c_2,$t_1 1955 sltu $at,$c_2,$t_1
1921 $ADDU $c_3,$t_2,$at 1956 $ADDU $c_3,$t_2,$at
1922 $ST $c_2,$BNSZ($a0) 1957 $ST $c_2,$BNSZ($a0)
1923 1958___
1924 mflo $t_1 1959 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
1925 mfhi $t_2 1960 $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
1926 slt $c_2,$t_2,$zero 1961$code.=<<___;
1927 $SLL $t_2,1
1928 $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
1929 slt $a2,$t_1,$zero
1930 $ADDU $t_2,$a2
1931 $SLL $t_1,1
1932 $ADDU $c_3,$t_1
1933 sltu $at,$c_3,$t_1
1934 $ADDU $t_2,$at
1935 $ADDU $c_1,$t_2
1936 sltu $at,$c_1,$t_2
1937 $ADDU $c_2,$at
1938 mflo $t_1 1962 mflo $t_1
1939 mfhi $t_2 1963 mfhi $t_2
1940 $ADDU $c_3,$t_1 1964 $ADDU $c_3,$t_1
@@ -1945,67 +1969,19 @@ $code.=<<___;
1945 sltu $at,$c_1,$t_2 1969 sltu $at,$c_1,$t_2
1946 $ADDU $c_2,$at 1970 $ADDU $c_2,$at
1947 $ST $c_3,2*$BNSZ($a0) 1971 $ST $c_3,2*$BNSZ($a0)
1948 1972___
1949 mflo $t_1 1973 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
1950 mfhi $t_2 1974 $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3);
1951 slt $c_3,$t_2,$zero 1975 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
1952 $SLL $t_2,1 1976 $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1);
1953 $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); 1977$code.=<<___;
1954 slt $a2,$t_1,$zero
1955 $ADDU $t_2,$a2
1956 $SLL $t_1,1
1957 $ADDU $c_1,$t_1
1958 sltu $at,$c_1,$t_1
1959 $ADDU $t_2,$at
1960 $ADDU $c_2,$t_2
1961 sltu $at,$c_2,$t_2
1962 $ADDU $c_3,$at
1963 mflo $t_1
1964 mfhi $t_2
1965 slt $at,$t_2,$zero
1966 $ADDU $c_3,$at
1967 $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1);
1968 $SLL $t_2,1
1969 slt $a2,$t_1,$zero
1970 $ADDU $t_2,$a2
1971 $SLL $t_1,1
1972 $ADDU $c_1,$t_1
1973 sltu $at,$c_1,$t_1
1974 $ADDU $t_2,$at
1975 $ADDU $c_2,$t_2
1976 sltu $at,$c_2,$t_2
1977 $ADDU $c_3,$at
1978 $ST $c_1,3*$BNSZ($a0) 1978 $ST $c_1,3*$BNSZ($a0)
1979 1979___
1980 mflo $t_1 1980 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
1981 mfhi $t_2 1981 $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1);
1982 slt $c_1,$t_2,$zero 1982 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
1983 $SLL $t_2,1 1983 $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
1984 $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); 1984$code.=<<___;
1985 slt $a2,$t_1,$zero
1986 $ADDU $t_2,$a2
1987 $SLL $t_1,1
1988 $ADDU $c_2,$t_1
1989 sltu $at,$c_2,$t_1
1990 $ADDU $t_2,$at
1991 $ADDU $c_3,$t_2
1992 sltu $at,$c_3,$t_2
1993 $ADDU $c_1,$at
1994 mflo $t_1
1995 mfhi $t_2
1996 slt $at,$t_2,$zero
1997 $ADDU $c_1,$at
1998 $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
1999 $SLL $t_2,1
2000 slt $a2,$t_1,$zero
2001 $ADDU $t_2,$a2
2002 $SLL $t_1,1
2003 $ADDU $c_2,$t_1
2004 sltu $at,$c_2,$t_1
2005 $ADDU $t_2,$at
2006 $ADDU $c_3,$t_2
2007 sltu $at,$c_3,$t_2
2008 $ADDU $c_1,$at
2009 mflo $t_1 1985 mflo $t_1
2010 mfhi $t_2 1986 mfhi $t_2
2011 $ADDU $c_2,$t_1 1987 $ADDU $c_2,$t_1
@@ -2016,97 +1992,23 @@ $code.=<<___;
2016 sltu $at,$c_3,$t_2 1992 sltu $at,$c_3,$t_2
2017 $ADDU $c_1,$at 1993 $ADDU $c_1,$at
2018 $ST $c_2,4*$BNSZ($a0) 1994 $ST $c_2,4*$BNSZ($a0)
2019 1995___
2020 mflo $t_1 1996 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2021 mfhi $t_2 1997 $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2);
2022 slt $c_2,$t_2,$zero 1998 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2023 $SLL $t_2,1 1999 $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2);
2024 $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); 2000 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2025 slt $a2,$t_1,$zero 2001 $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3);
2026 $ADDU $t_2,$a2 2002$code.=<<___;
2027 $SLL $t_1,1
2028 $ADDU $c_3,$t_1
2029 sltu $at,$c_3,$t_1
2030 $ADDU $t_2,$at
2031 $ADDU $c_1,$t_2
2032 sltu $at,$c_1,$t_2
2033 $ADDU $c_2,$at
2034 mflo $t_1
2035 mfhi $t_2
2036 slt $at,$t_2,$zero
2037 $ADDU $c_2,$at
2038 $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2);
2039 $SLL $t_2,1
2040 slt $a2,$t_1,$zero
2041 $ADDU $t_2,$a2
2042 $SLL $t_1,1
2043 $ADDU $c_3,$t_1
2044 sltu $at,$c_3,$t_1
2045 $ADDU $t_2,$at
2046 $ADDU $c_1,$t_2
2047 sltu $at,$c_1,$t_2
2048 $ADDU $c_2,$at
2049 mflo $t_1
2050 mfhi $t_2
2051 slt $at,$t_2,$zero
2052 $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3);
2053 $ADDU $c_2,$at
2054 $SLL $t_2,1
2055 slt $a2,$t_1,$zero
2056 $ADDU $t_2,$a2
2057 $SLL $t_1,1
2058 $ADDU $c_3,$t_1
2059 sltu $at,$c_3,$t_1
2060 $ADDU $t_2,$at
2061 $ADDU $c_1,$t_2
2062 sltu $at,$c_1,$t_2
2063 $ADDU $c_2,$at
2064 $ST $c_3,5*$BNSZ($a0) 2003 $ST $c_3,5*$BNSZ($a0)
2065 2004___
2066 mflo $t_1 2005 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2067 mfhi $t_2 2006 $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3);
2068 slt $c_3,$t_2,$zero 2007 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2069 $SLL $t_2,1 2008 $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3);
2070 $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); 2009 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2071 slt $a2,$t_1,$zero 2010 $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
2072 $ADDU $t_2,$a2 2011$code.=<<___;
2073 $SLL $t_1,1
2074 $ADDU $c_1,$t_1
2075 sltu $at,$c_1,$t_1
2076 $ADDU $t_2,$at
2077 $ADDU $c_2,$t_2
2078 sltu $at,$c_2,$t_2
2079 $ADDU $c_3,$at
2080 mflo $t_1
2081 mfhi $t_2
2082 slt $at,$t_2,$zero
2083 $ADDU $c_3,$at
2084 $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3);
2085 $SLL $t_2,1
2086 slt $a2,$t_1,$zero
2087 $ADDU $t_2,$a2
2088 $SLL $t_1,1
2089 $ADDU $c_1,$t_1
2090 sltu $at,$c_1,$t_1
2091 $ADDU $t_2,$at
2092 $ADDU $c_2,$t_2
2093 sltu $at,$c_2,$t_2
2094 $ADDU $c_3,$at
2095 mflo $t_1
2096 mfhi $t_2
2097 slt $at,$t_2,$zero
2098 $ADDU $c_3,$at
2099 $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
2100 $SLL $t_2,1
2101 slt $a2,$t_1,$zero
2102 $ADDU $t_2,$a2
2103 $SLL $t_1,1
2104 $ADDU $c_1,$t_1
2105 sltu $at,$c_1,$t_1
2106 $ADDU $t_2,$at
2107 $ADDU $c_2,$t_2
2108 sltu $at,$c_2,$t_2
2109 $ADDU $c_3,$at
2110 mflo $t_1 2012 mflo $t_1
2111 mfhi $t_2 2013 mfhi $t_2
2112 $ADDU $c_1,$t_1 2014 $ADDU $c_1,$t_1
@@ -2117,112 +2019,25 @@ $code.=<<___;
2117 sltu $at,$c_2,$t_2 2019 sltu $at,$c_2,$t_2
2118 $ADDU $c_3,$at 2020 $ADDU $c_3,$at
2119 $ST $c_1,6*$BNSZ($a0) 2021 $ST $c_1,6*$BNSZ($a0)
2120 2022___
2121 mflo $t_1 2023 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2122 mfhi $t_2 2024 $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1);
2123 slt $c_1,$t_2,$zero 2025 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2124 $SLL $t_2,1 2026 $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1);
2125 $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); 2027 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2126 slt $a2,$t_1,$zero 2028 $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1);
2127 $ADDU $t_2,$a2 2029 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2128 $SLL $t_1,1 2030 $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2);
2129 $ADDU $c_2,$t_1 2031$code.=<<___;
2130 sltu $at,$c_2,$t_1
2131 $ADDU $t_2,$at
2132 $ADDU $c_3,$t_2
2133 sltu $at,$c_3,$t_2
2134 $ADDU $c_1,$at
2135 mflo $t_1
2136 mfhi $t_2
2137 slt $at,$t_2,$zero
2138 $ADDU $c_1,$at
2139 $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1);
2140 $SLL $t_2,1
2141 slt $a2,$t_1,$zero
2142 $ADDU $t_2,$a2
2143 $SLL $t_1,1
2144 $ADDU $c_2,$t_1
2145 sltu $at,$c_2,$t_1
2146 $ADDU $t_2,$at
2147 $ADDU $c_3,$t_2
2148 sltu $at,$c_3,$t_2
2149 $ADDU $c_1,$at
2150 mflo $t_1
2151 mfhi $t_2
2152 slt $at,$t_2,$zero
2153 $ADDU $c_1,$at
2154 $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1);
2155 $SLL $t_2,1
2156 slt $a2,$t_1,$zero
2157 $ADDU $t_2,$a2
2158 $SLL $t_1,1
2159 $ADDU $c_2,$t_1
2160 sltu $at,$c_2,$t_1
2161 $ADDU $t_2,$at
2162 $ADDU $c_3,$t_2
2163 sltu $at,$c_3,$t_2
2164 $ADDU $c_1,$at
2165 mflo $t_1
2166 mfhi $t_2
2167 slt $at,$t_2,$zero
2168 $ADDU $c_1,$at
2169 $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2);
2170 $SLL $t_2,1
2171 slt $a2,$t_1,$zero
2172 $ADDU $t_2,$a2
2173 $SLL $t_1,1
2174 $ADDU $c_2,$t_1
2175 sltu $at,$c_2,$t_1
2176 $ADDU $t_2,$at
2177 $ADDU $c_3,$t_2
2178 sltu $at,$c_3,$t_2
2179 $ADDU $c_1,$at
2180 $ST $c_2,7*$BNSZ($a0) 2032 $ST $c_2,7*$BNSZ($a0)
2181 2033___
2182 mflo $t_1 2034 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2183 mfhi $t_2 2035 $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2);
2184 slt $c_2,$t_2,$zero 2036 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2185 $SLL $t_2,1 2037 $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2);
2186 $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); 2038 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2187 slt $a2,$t_1,$zero 2039 $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2);
2188 $ADDU $t_2,$a2 2040$code.=<<___;
2189 $SLL $t_1,1
2190 $ADDU $c_3,$t_1
2191 sltu $at,$c_3,$t_1
2192 $ADDU $t_2,$at
2193 $ADDU $c_1,$t_2
2194 sltu $at,$c_1,$t_2
2195 $ADDU $c_2,$at
2196 mflo $t_1
2197 mfhi $t_2
2198 slt $at,$t_2,$zero
2199 $ADDU $c_2,$at
2200 $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2);
2201 $SLL $t_2,1
2202 slt $a2,$t_1,$zero
2203 $ADDU $t_2,$a2
2204 $SLL $t_1,1
2205 $ADDU $c_3,$t_1
2206 sltu $at,$c_3,$t_1
2207 $ADDU $t_2,$at
2208 $ADDU $c_1,$t_2
2209 sltu $at,$c_1,$t_2
2210 $ADDU $c_2,$at
2211 mflo $t_1
2212 mfhi $t_2
2213 slt $at,$t_2,$zero
2214 $ADDU $c_2,$at
2215 $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2);
2216 $SLL $t_2,1
2217 slt $a2,$t_1,$zero
2218 $ADDU $t_2,$a2
2219 $SLL $t_1,1
2220 $ADDU $c_3,$t_1
2221 sltu $at,$c_3,$t_1
2222 $ADDU $t_2,$at
2223 $ADDU $c_1,$t_2
2224 sltu $at,$c_1,$t_2
2225 $ADDU $c_2,$at
2226 mflo $t_1 2041 mflo $t_1
2227 mfhi $t_2 2042 mfhi $t_2
2228 $ADDU $c_3,$t_1 2043 $ADDU $c_3,$t_1
@@ -2233,82 +2048,21 @@ $code.=<<___;
2233 sltu $at,$c_1,$t_2 2048 sltu $at,$c_1,$t_2
2234 $ADDU $c_2,$at 2049 $ADDU $c_2,$at
2235 $ST $c_3,8*$BNSZ($a0) 2050 $ST $c_3,8*$BNSZ($a0)
2236 2051___
2237 mflo $t_1 2052 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2238 mfhi $t_2 2053 $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3);
2239 slt $c_3,$t_2,$zero 2054 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2240 $SLL $t_2,1 2055 $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3);
2241 $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); 2056 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2242 slt $a2,$t_1,$zero 2057 $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1);
2243 $ADDU $t_2,$a2 2058$code.=<<___;
2244 $SLL $t_1,1
2245 $ADDU $c_1,$t_1
2246 sltu $at,$c_1,$t_1
2247 $ADDU $t_2,$at
2248 $ADDU $c_2,$t_2
2249 sltu $at,$c_2,$t_2
2250 $ADDU $c_3,$at
2251 mflo $t_1
2252 mfhi $t_2
2253 slt $at,$t_2,$zero
2254 $ADDU $c_3,$at
2255 $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3);
2256 $SLL $t_2,1
2257 slt $a2,$t_1,$zero
2258 $ADDU $t_2,$a2
2259 $SLL $t_1,1
2260 $ADDU $c_1,$t_1
2261 sltu $at,$c_1,$t_1
2262 $ADDU $t_2,$at
2263 $ADDU $c_2,$t_2
2264 sltu $at,$c_2,$t_2
2265 $ADDU $c_3,$at
2266 mflo $t_1
2267 mfhi $t_2
2268 slt $at,$t_2,$zero
2269 $ADDU $c_3,$at
2270 $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1);
2271 $SLL $t_2,1
2272 slt $a2,$t_1,$zero
2273 $ADDU $t_2,$a2
2274 $SLL $t_1,1
2275 $ADDU $c_1,$t_1
2276 sltu $at,$c_1,$t_1
2277 $ADDU $t_2,$at
2278 $ADDU $c_2,$t_2
2279 sltu $at,$c_2,$t_2
2280 $ADDU $c_3,$at
2281 $ST $c_1,9*$BNSZ($a0) 2059 $ST $c_1,9*$BNSZ($a0)
2282 2060___
2283 mflo $t_1 2061 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2284 mfhi $t_2 2062 $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1);
2285 slt $c_1,$t_2,$zero 2063 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1,
2286 $SLL $t_2,1 2064 $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1);
2287 $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); 2065$code.=<<___;
2288 slt $a2,$t_1,$zero
2289 $ADDU $t_2,$a2
2290 $SLL $t_1,1
2291 $ADDU $c_2,$t_1
2292 sltu $at,$c_2,$t_1
2293 $ADDU $t_2,$at
2294 $ADDU $c_3,$t_2
2295 sltu $at,$c_3,$t_2
2296 $ADDU $c_1,$at
2297 mflo $t_1
2298 mfhi $t_2
2299 slt $at,$t_2,$zero
2300 $ADDU $c_1,$at
2301 $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1);
2302 $SLL $t_2,1
2303 slt $a2,$t_1,$zero
2304 $ADDU $t_2,$a2
2305 $SLL $t_1,1
2306 $ADDU $c_2,$t_1
2307 sltu $at,$c_2,$t_1
2308 $ADDU $t_2,$at
2309 $ADDU $c_3,$t_2
2310 sltu $at,$c_3,$t_2
2311 $ADDU $c_1,$at
2312 mflo $t_1 2066 mflo $t_1
2313 mfhi $t_2 2067 mfhi $t_2
2314 $ADDU $c_2,$t_1 2068 $ADDU $c_2,$t_1
@@ -2319,52 +2073,17 @@ $code.=<<___;
2319 sltu $at,$c_3,$t_2 2073 sltu $at,$c_3,$t_2
2320 $ADDU $c_1,$at 2074 $ADDU $c_1,$at
2321 $ST $c_2,10*$BNSZ($a0) 2075 $ST $c_2,10*$BNSZ($a0)
2322 2076___
2323 mflo $t_1 2077 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2324 mfhi $t_2 2078 $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2);
2325 slt $c_2,$t_2,$zero 2079 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1,
2326 $SLL $t_2,1 2080 $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3);
2327 $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); 2081$code.=<<___;
2328 slt $a2,$t_1,$zero
2329 $ADDU $t_2,$a2
2330 $SLL $t_1,1
2331 $ADDU $c_3,$t_1
2332 sltu $at,$c_3,$t_1
2333 $ADDU $t_2,$at
2334 $ADDU $c_1,$t_2
2335 sltu $at,$c_1,$t_2
2336 $ADDU $c_2,$at
2337 mflo $t_1
2338 mfhi $t_2
2339 slt $at,$t_2,$zero
2340 $ADDU $c_2,$at
2341 $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3);
2342 $SLL $t_2,1
2343 slt $a2,$t_1,$zero
2344 $ADDU $t_2,$a2
2345 $SLL $t_1,1
2346 $ADDU $c_3,$t_1
2347 sltu $at,$c_3,$t_1
2348 $ADDU $t_2,$at
2349 $ADDU $c_1,$t_2
2350 sltu $at,$c_1,$t_2
2351 $ADDU $c_2,$at
2352 $ST $c_3,11*$BNSZ($a0) 2082 $ST $c_3,11*$BNSZ($a0)
2353 2083___
2354 mflo $t_1 2084 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2355 mfhi $t_2 2085 $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3);
2356 slt $c_3,$t_2,$zero 2086$code.=<<___;
2357 $SLL $t_2,1
2358 $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3);
2359 slt $a2,$t_1,$zero
2360 $ADDU $t_2,$a2
2361 $SLL $t_1,1
2362 $ADDU $c_1,$t_1
2363 sltu $at,$c_1,$t_1
2364 $ADDU $t_2,$at
2365 $ADDU $c_2,$t_2
2366 sltu $at,$c_2,$t_2
2367 $ADDU $c_3,$at
2368 mflo $t_1 2087 mflo $t_1
2369 mfhi $t_2 2088 mfhi $t_2
2370 $ADDU $c_1,$t_1 2089 $ADDU $c_1,$t_1
@@ -2375,21 +2094,10 @@ $code.=<<___;
2375 sltu $at,$c_2,$t_2 2094 sltu $at,$c_2,$t_2
2376 $ADDU $c_3,$at 2095 $ADDU $c_3,$at
2377 $ST $c_1,12*$BNSZ($a0) 2096 $ST $c_1,12*$BNSZ($a0)
2378 2097___
2379 mflo $t_1 2098 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2380 mfhi $t_2 2099 $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2);
2381 slt $c_1,$t_2,$zero 2100$code.=<<___;
2382 $SLL $t_2,1
2383 $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2);
2384 slt $a2,$t_1,$zero
2385 $ADDU $t_2,$a2
2386 $SLL $t_1,1
2387 $ADDU $c_2,$t_1
2388 sltu $at,$c_2,$t_1
2389 $ADDU $t_2,$at
2390 $ADDU $c_3,$t_2
2391 sltu $at,$c_3,$t_2
2392 $ADDU $c_1,$at
2393 $ST $c_2,13*$BNSZ($a0) 2101 $ST $c_2,13*$BNSZ($a0)
2394 2102
2395 mflo $t_1 2103 mflo $t_1
@@ -2457,21 +2165,10 @@ $code.=<<___;
2457 sltu $at,$c_2,$t_1 2165 sltu $at,$c_2,$t_1
2458 $ADDU $c_3,$t_2,$at 2166 $ADDU $c_3,$t_2,$at
2459 $ST $c_2,$BNSZ($a0) 2167 $ST $c_2,$BNSZ($a0)
2460 2168___
2461 mflo $t_1 2169 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2462 mfhi $t_2 2170 $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2);
2463 slt $c_2,$t_2,$zero 2171$code.=<<___;
2464 $SLL $t_2,1
2465 $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2);
2466 slt $a2,$t_1,$zero
2467 $ADDU $t_2,$a2
2468 $SLL $t_1,1
2469 $ADDU $c_3,$t_1
2470 sltu $at,$c_3,$t_1
2471 $ADDU $t_2,$at
2472 $ADDU $c_1,$t_2
2473 sltu $at,$c_1,$t_2
2474 $ADDU $c_2,$at
2475 mflo $t_1 2172 mflo $t_1
2476 mfhi $t_2 2173 mfhi $t_2
2477 $ADDU $c_3,$t_1 2174 $ADDU $c_3,$t_1
@@ -2482,52 +2179,17 @@ $code.=<<___;
2482 sltu $at,$c_1,$t_2 2179 sltu $at,$c_1,$t_2
2483 $ADDU $c_2,$at 2180 $ADDU $c_2,$at
2484 $ST $c_3,2*$BNSZ($a0) 2181 $ST $c_3,2*$BNSZ($a0)
2485 2182___
2486 mflo $t_1 2183 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0,
2487 mfhi $t_2 2184 $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3);
2488 slt $c_3,$t_2,$zero 2185 &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1,
2489 $SLL $t_2,1 2186 $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1);
2490 $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); 2187$code.=<<___;
2491 slt $a2,$t_1,$zero
2492 $ADDU $t_2,$a2
2493 $SLL $t_1,1
2494 $ADDU $c_1,$t_1
2495 sltu $at,$c_1,$t_1
2496 $ADDU $t_2,$at
2497 $ADDU $c_2,$t_2
2498 sltu $at,$c_2,$t_2
2499 $ADDU $c_3,$at
2500 mflo $t_1
2501 mfhi $t_2
2502 slt $at,$t_2,$zero
2503 $ADDU $c_3,$at
2504 $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1);
2505 $SLL $t_2,1
2506 slt $a2,$t_1,$zero
2507 $ADDU $t_2,$a2
2508 $SLL $t_1,1
2509 $ADDU $c_1,$t_1
2510 sltu $at,$c_1,$t_1
2511 $ADDU $t_2,$at
2512 $ADDU $c_2,$t_2
2513 sltu $at,$c_2,$t_2
2514 $ADDU $c_3,$at
2515 $ST $c_1,3*$BNSZ($a0) 2188 $ST $c_1,3*$BNSZ($a0)
2516 2189___
2517 mflo $t_1 2190 &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0,
2518 mfhi $t_2 2191 $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1);
2519 slt $c_1,$t_2,$zero 2192$code.=<<___;
2520 $SLL $t_2,1
2521 $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1);
2522 slt $a2,$t_1,$zero
2523 $ADDU $t_2,$a2
2524 $SLL $t_1,1
2525 $ADDU $c_2,$t_1
2526 sltu $at,$c_2,$t_1
2527 $ADDU $t_2,$at
2528 $ADDU $c_3,$t_2
2529 sltu $at,$c_3,$t_2
2530 $ADDU $c_1,$at
2531 mflo $t_1 2193 mflo $t_1
2532 mfhi $t_2 2194 mfhi $t_2
2533 $ADDU $c_2,$t_1 2195 $ADDU $c_2,$t_1
@@ -2538,21 +2200,10 @@ $code.=<<___;
2538 sltu $at,$c_3,$t_2 2200 sltu $at,$c_3,$t_2
2539 $ADDU $c_1,$at 2201 $ADDU $c_1,$at
2540 $ST $c_2,4*$BNSZ($a0) 2202 $ST $c_2,4*$BNSZ($a0)
2541 2203___
2542 mflo $t_1 2204 &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0,
2543 mfhi $t_2 2205 $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3);
2544 slt $c_2,$t_2,$zero 2206$code.=<<___;
2545 $SLL $t_2,1
2546 $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3);
2547 slt $a2,$t_1,$zero
2548 $ADDU $t_2,$a2
2549 $SLL $t_1,1
2550 $ADDU $c_3,$t_1
2551 sltu $at,$c_3,$t_1
2552 $ADDU $t_2,$at
2553 $ADDU $c_1,$t_2
2554 sltu $at,$c_1,$t_2
2555 $ADDU $c_2,$at
2556 $ST $c_3,5*$BNSZ($a0) 2207 $ST $c_3,5*$BNSZ($a0)
2557 2208
2558 mflo $t_1 2209 mflo $t_1
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c b/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c
index c9a2b6be73..9deffa71f1 100644
--- a/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c
+++ b/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: x86_64-gcc.c,v 1.4 2014/10/28 07:35:58 jsg Exp $ */ 1/* $OpenBSD: x86_64-gcc.c,v 1.5 2015/02/25 15:39:49 bcook Exp $ */
2#include "../bn_lcl.h" 2#include "../bn_lcl.h"
3#if !(defined(__GNUC__) && __GNUC__>=2) 3#if !(defined(__GNUC__) && __GNUC__>=2)
4# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ 4# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */
@@ -270,77 +270,76 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
270/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ 270/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
271/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ 271/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
272 272
273/*
274 * Keep in mind that carrying into high part of multiplication result
275 * can not overflow, because it cannot be all-ones.
276 */
273#if 0 277#if 0
274/* original macros are kept for reference purposes */ 278/* original macros are kept for reference purposes */
275#define mul_add_c(a,b,c0,c1,c2) { \ 279#define mul_add_c(a,b,c0,c1,c2) do { \
276 BN_ULONG ta=(a),tb=(b); \ 280 BN_ULONG ta = (a), tb = (b); \
277 t1 = ta * tb; \ 281 BN_ULONG lo, hi; \
278 t2 = BN_UMULT_HIGH(ta,tb); \ 282 BN_UMULT_LOHI(lo,hi,ta,tb); \
279 c0 += t1; t2 += (c0<t1)?1:0; \ 283 c0 += lo; hi += (c0<lo)?1:0; \
280 c1 += t2; c2 += (c1<t2)?1:0; \ 284 c1 += hi; c2 += (c1<hi)?1:0; \
281 } 285 } while(0)
282 286
283#define mul_add_c2(a,b,c0,c1,c2) { \ 287#define mul_add_c2(a,b,c0,c1,c2) do { \
284 BN_ULONG ta=(a),tb=(b),t0; \ 288 BN_ULONG ta = (a), tb = (b); \
285 t1 = BN_UMULT_HIGH(ta,tb); \ 289 BN_ULONG lo, hi, tt; \
286 t0 = ta * tb; \ 290 BN_UMULT_LOHI(lo,hi,ta,tb); \
287 t2 = t1+t1; c2 += (t2<t1)?1:0; \ 291 c0 += lo; tt = hi+((c0<lo)?1:0); \
288 t1 = t0+t0; t2 += (t1<t0)?1:0; \ 292 c1 += tt; c2 += (c1<tt)?1:0; \
289 c0 += t1; t2 += (c0<t1)?1:0; \ 293 c0 += lo; hi += (c0<lo)?1:0; \
290 c1 += t2; c2 += (c1<t2)?1:0; \ 294 c1 += hi; c2 += (c1<hi)?1:0; \
291 } 295 } while(0)
296
297#define sqr_add_c(a,i,c0,c1,c2) do { \
298 BN_ULONG ta = (a)[i]; \
299 BN_ULONG lo, hi; \
300 BN_UMULT_LOHI(lo,hi,ta,ta); \
301 c0 += lo; hi += (c0<lo)?1:0; \
302 c1 += hi; c2 += (c1<hi)?1:0; \
303 } while(0)
292#else 304#else
293#define mul_add_c(a,b,c0,c1,c2) do { \ 305#define mul_add_c(a,b,c0,c1,c2) do { \
306 BN_ULONG t1,t2; \
294 asm ("mulq %3" \ 307 asm ("mulq %3" \
295 : "=a"(t1),"=d"(t2) \ 308 : "=a"(t1),"=d"(t2) \
296 : "a"(a),"m"(b) \ 309 : "a"(a),"m"(b) \
297 : "cc"); \ 310 : "cc"); \
298 asm ("addq %2,%0; adcq %3,%1" \ 311 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
299 : "+r"(c0),"+d"(t2) \ 312 : "+r"(c0),"+r"(c1),"+r"(c2) \
300 : "a"(t1),"g"(0) \ 313 : "r"(t1),"r"(t2),"g"(0) \
301 : "cc"); \ 314 : "cc"); \
302 asm ("addq %2,%0; adcq %3,%1" \
303 : "+r"(c1),"+r"(c2) \
304 : "d"(t2),"g"(0) \
305 : "cc"); \
306 } while (0) 315 } while (0)
307 316
308#define sqr_add_c(a,i,c0,c1,c2) do { \ 317#define sqr_add_c(a,i,c0,c1,c2) do { \
318 BN_ULONG t1,t2; \
309 asm ("mulq %2" \ 319 asm ("mulq %2" \
310 : "=a"(t1),"=d"(t2) \ 320 : "=a"(t1),"=d"(t2) \
311 : "a"(a[i]) \ 321 : "a"(a[i]) \
312 : "cc"); \ 322 : "cc"); \
313 asm ("addq %2,%0; adcq %3,%1" \ 323 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
314 : "+r"(c0),"+d"(t2) \ 324 : "+r"(c0),"+r"(c1),"+r"(c2) \
315 : "a"(t1),"g"(0) \ 325 : "r"(t1),"r"(t2),"g"(0) \
316 : "cc"); \ 326 : "cc"); \
317 asm ("addq %2,%0; adcq %3,%1" \
318 : "+r"(c1),"+r"(c2) \
319 : "d"(t2),"g"(0) \
320 : "cc"); \
321 } while (0) 327 } while (0)
322 328
323#define mul_add_c2(a,b,c0,c1,c2) do { \ 329#define mul_add_c2(a,b,c0,c1,c2) do { \
330 BN_ULONG t1,t2; \
324 asm ("mulq %3" \ 331 asm ("mulq %3" \
325 : "=a"(t1),"=d"(t2) \ 332 : "=a"(t1),"=d"(t2) \
326 : "a"(a),"m"(b) \ 333 : "a"(a),"m"(b) \
327 : "cc"); \ 334 : "cc"); \
328 asm ("addq %0,%0; adcq %2,%1" \ 335 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
329 : "+d"(t2),"+r"(c2) \ 336 : "+r"(c0),"+r"(c1),"+r"(c2) \
330 : "g"(0) \ 337 : "r"(t1),"r"(t2),"g"(0) \
331 : "cc"); \ 338 : "cc"); \
332 asm ("addq %0,%0; adcq %2,%1" \ 339 asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
333 : "+a"(t1),"+d"(t2) \ 340 : "+r"(c0),"+r"(c1),"+r"(c2) \
334 : "g"(0) \ 341 : "r"(t1),"r"(t2),"g"(0) \
335 : "cc"); \ 342 : "cc"); \
336 asm ("addq %2,%0; adcq %3,%1" \
337 : "+r"(c0),"+d"(t2) \
338 : "a"(t1),"g"(0) \
339 : "cc"); \
340 asm ("addq %2,%0; adcq %3,%1" \
341 : "+r"(c1),"+r"(c2) \
342 : "d"(t2),"g"(0) \
343 : "cc"); \
344 } while (0) 343 } while (0)
345#endif 344#endif
346 345
@@ -349,7 +348,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
349 348
350void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 349void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
351 { 350 {
352 BN_ULONG t1,t2;
353 BN_ULONG c1,c2,c3; 351 BN_ULONG c1,c2,c3;
354 352
355 c1=0; 353 c1=0;
@@ -453,7 +451,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
453 451
454void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 452void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
455 { 453 {
456 BN_ULONG t1,t2;
457 BN_ULONG c1,c2,c3; 454 BN_ULONG c1,c2,c3;
458 455
459 c1=0; 456 c1=0;
@@ -493,7 +490,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
493 490
494void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) 491void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
495 { 492 {
496 BN_ULONG t1,t2;
497 BN_ULONG c1,c2,c3; 493 BN_ULONG c1,c2,c3;
498 494
499 c1=0; 495 c1=0;
@@ -569,7 +565,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
569 565
570void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) 566void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
571 { 567 {
572 BN_ULONG t1,t2;
573 BN_ULONG c1,c2,c3; 568 BN_ULONG c1,c2,c3;
574 569
575 c1=0; 570 c1=0;
diff --git a/src/lib/libssl/src/crypto/bn/bn_asm.c b/src/lib/libssl/src/crypto/bn/bn_asm.c
index c6efd2513a..49f0ba5d7b 100644
--- a/src/lib/libssl/src/crypto/bn/bn_asm.c
+++ b/src/lib/libssl/src/crypto/bn/bn_asm.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_asm.c,v 1.13 2014/07/11 08:44:47 jsing Exp $ */ 1/* $OpenBSD: bn_asm.c,v 1.14 2015/02/25 15:39:49 bcook Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -495,116 +495,143 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
495/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ 495/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
496 496
497#ifdef BN_LLONG 497#ifdef BN_LLONG
498#define mul_add_c(a,b,c0,c1,c2) \ 498/*
499 t=(BN_ULLONG)a*b; \ 499 * Keep in mind that additions to multiplication result can not
500 t1=(BN_ULONG)Lw(t); \ 500 * overflow, because its high half cannot be all-ones.
501 t2=(BN_ULONG)Hw(t); \ 501 */
502 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 502#define mul_add_c(a,b,c0,c1,c2) do { \
503 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 503 BN_ULONG hi; \
504 504 BN_ULLONG t = (BN_ULLONG)(a)*(b); \
505#define mul_add_c2(a,b,c0,c1,c2) \ 505 t += c0; /* no carry */ \
506 t=(BN_ULLONG)a*b; \ 506 c0 = (BN_ULONG)Lw(t); \
507 tt=(t+t)&BN_MASK; \ 507 hi = (BN_ULONG)Hw(t); \
508 if (tt < t) c2++; \ 508 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
509 t1=(BN_ULONG)Lw(tt); \ 509 } while(0)
510 t2=(BN_ULONG)Hw(tt); \ 510
511 c0=(c0+t1)&BN_MASK2; \ 511#define mul_add_c2(a,b,c0,c1,c2) do { \
512 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ 512 BN_ULONG hi; \
513 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 513 BN_ULLONG t = (BN_ULLONG)(a)*(b); \
514 514 BN_ULLONG tt = t+c0; /* no carry */ \
515#define sqr_add_c(a,i,c0,c1,c2) \ 515 c0 = (BN_ULONG)Lw(tt); \
516 t=(BN_ULLONG)a[i]*a[i]; \ 516 hi = (BN_ULONG)Hw(tt); \
517 t1=(BN_ULONG)Lw(t); \ 517 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
518 t2=(BN_ULONG)Hw(t); \ 518 t += c0; /* no carry */ \
519 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 519 c0 = (BN_ULONG)Lw(t); \
520 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 520 hi = (BN_ULONG)Hw(t); \
521 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
522 } while(0)
523
524#define sqr_add_c(a,i,c0,c1,c2) do { \
525 BN_ULONG hi; \
526 BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \
527 t += c0; /* no carry */ \
528 c0 = (BN_ULONG)Lw(t); \
529 hi = (BN_ULONG)Hw(t); \
530 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
531 } while(0)
521 532
522#define sqr_add_c2(a,i,j,c0,c1,c2) \ 533#define sqr_add_c2(a,i,j,c0,c1,c2) \
523 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 534 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
524 535
525#elif defined(BN_UMULT_LOHI) 536#elif defined(BN_UMULT_LOHI)
526 537/*
527#define mul_add_c(a,b,c0,c1,c2) { \ 538 * Keep in mind that additions to hi can not overflow, because
528 BN_ULONG ta=(a),tb=(b); \ 539 * the high word of a multiplication result cannot be all-ones.
529 BN_UMULT_LOHI(t1,t2,ta,tb); \ 540 */
530 c0 += t1; t2 += (c0<t1)?1:0; \ 541#define mul_add_c(a,b,c0,c1,c2) do { \
531 c1 += t2; c2 += (c1<t2)?1:0; \ 542 BN_ULONG ta = (a), tb = (b); \
532 } 543 BN_ULONG lo, hi; \
533 544 BN_UMULT_LOHI(lo,hi,ta,tb); \
534#define mul_add_c2(a,b,c0,c1,c2) { \ 545 c0 += lo; hi += (c0<lo)?1:0; \
535 BN_ULONG ta=(a),tb=(b),t0; \ 546 c1 += hi; c2 += (c1<hi)?1:0; \
536 BN_UMULT_LOHI(t0,t1,ta,tb); \ 547 } while(0)
537 t2 = t1+t1; c2 += (t2<t1)?1:0; \ 548
538 t1 = t0+t0; t2 += (t1<t0)?1:0; \ 549#define mul_add_c2(a,b,c0,c1,c2) do { \
539 c0 += t1; t2 += (c0<t1)?1:0; \ 550 BN_ULONG ta = (a), tb = (b); \
540 c1 += t2; c2 += (c1<t2)?1:0; \ 551 BN_ULONG lo, hi, tt; \
541 } 552 BN_UMULT_LOHI(lo,hi,ta,tb); \
542 553 c0 += lo; tt = hi+((c0<lo)?1:0); \
543#define sqr_add_c(a,i,c0,c1,c2) { \ 554 c1 += tt; c2 += (c1<tt)?1:0; \
544 BN_ULONG ta=(a)[i]; \ 555 c0 += lo; hi += (c0<lo)?1:0; \
545 BN_UMULT_LOHI(t1,t2,ta,ta); \ 556 c1 += hi; c2 += (c1<hi)?1:0; \
546 c0 += t1; t2 += (c0<t1)?1:0; \ 557 } while(0)
547 c1 += t2; c2 += (c1<t2)?1:0; \ 558
548 } 559#define sqr_add_c(a,i,c0,c1,c2) do { \
560 BN_ULONG ta = (a)[i]; \
561 BN_ULONG lo, hi; \
562 BN_UMULT_LOHI(lo,hi,ta,ta); \
563 c0 += lo; hi += (c0<lo)?1:0; \
564 c1 += hi; c2 += (c1<hi)?1:0; \
565 } while(0)
549 566
550#define sqr_add_c2(a,i,j,c0,c1,c2) \ 567#define sqr_add_c2(a,i,j,c0,c1,c2) \
551 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 568 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
552 569
553#elif defined(BN_UMULT_HIGH) 570#elif defined(BN_UMULT_HIGH)
554 571/*
555#define mul_add_c(a,b,c0,c1,c2) { \ 572 * Keep in mind that additions to hi can not overflow, because
556 BN_ULONG ta=(a),tb=(b); \ 573 * the high word of a multiplication result cannot be all-ones.
557 t1 = ta * tb; \ 574 */
558 t2 = BN_UMULT_HIGH(ta,tb); \ 575#define mul_add_c(a,b,c0,c1,c2) do { \
559 c0 += t1; t2 += (c0<t1)?1:0; \ 576 BN_ULONG ta = (a), tb = (b); \
560 c1 += t2; c2 += (c1<t2)?1:0; \ 577 BN_ULONG lo = ta * tb; \
561 } 578 BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \
562 579 c0 += lo; hi += (c0<lo)?1:0; \
563#define mul_add_c2(a,b,c0,c1,c2) { \ 580 c1 += hi; c2 += (c1<hi)?1:0; \
564 BN_ULONG ta=(a),tb=(b),t0; \ 581 } while(0)
565 t1 = BN_UMULT_HIGH(ta,tb); \ 582
566 t0 = ta * tb; \ 583#define mul_add_c2(a,b,c0,c1,c2) do { \
567 t2 = t1+t1; c2 += (t2<t1)?1:0; \ 584 BN_ULONG ta = (a), tb = (b), tt; \
568 t1 = t0+t0; t2 += (t1<t0)?1:0; \ 585 BN_ULONG lo = ta * tb; \
569 c0 += t1; t2 += (c0<t1)?1:0; \ 586 BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \
570 c1 += t2; c2 += (c1<t2)?1:0; \ 587 c0 += lo; tt = hi + ((c0<lo)?1:0); \
571 } 588 c1 += tt; c2 += (c1<tt)?1:0; \
572 589 c0 += lo; hi += (c0<lo)?1:0; \
573#define sqr_add_c(a,i,c0,c1,c2) { \ 590 c1 += hi; c2 += (c1<hi)?1:0; \
574 BN_ULONG ta=(a)[i]; \ 591 } while(0)
575 t1 = ta * ta; \ 592
576 t2 = BN_UMULT_HIGH(ta,ta); \ 593#define sqr_add_c(a,i,c0,c1,c2) do { \
577 c0 += t1; t2 += (c0<t1)?1:0; \ 594 BN_ULONG ta = (a)[i]; \
578 c1 += t2; c2 += (c1<t2)?1:0; \ 595 BN_ULONG lo = ta * ta; \
579 } 596 BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \
597 c0 += lo; hi += (c0<lo)?1:0; \
598 c1 += hi; c2 += (c1<hi)?1:0; \
599 } while(0)
580 600
581#define sqr_add_c2(a,i,j,c0,c1,c2) \ 601#define sqr_add_c2(a,i,j,c0,c1,c2) \
582 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 602 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
583 603
584#else /* !BN_LLONG */ 604#else /* !BN_LLONG */
585#define mul_add_c(a,b,c0,c1,c2) \ 605/*
586 t1=LBITS(a); t2=HBITS(a); \ 606 * Keep in mind that additions to hi can not overflow, because
587 bl=LBITS(b); bh=HBITS(b); \ 607 * the high word of a multiplication result cannot be all-ones.
588 mul64(t1,t2,bl,bh); \ 608 */
589 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 609#define mul_add_c(a,b,c0,c1,c2) do { \
590 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 610 BN_ULONG lo = LBITS(a), hi = HBITS(a); \
591 611 BN_ULONG bl = LBITS(b), bh = HBITS(b); \
592#define mul_add_c2(a,b,c0,c1,c2) \ 612 mul64(lo,hi,bl,bh); \
593 t1=LBITS(a); t2=HBITS(a); \ 613 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
594 bl=LBITS(b); bh=HBITS(b); \ 614 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
595 mul64(t1,t2,bl,bh); \ 615 } while(0)
596 if (t2 & BN_TBIT) c2++; \ 616
597 t2=(t2+t2)&BN_MASK2; \ 617#define mul_add_c2(a,b,c0,c1,c2) do { \
598 if (t1 & BN_TBIT) t2++; \ 618 BN_ULONG tt; \
599 t1=(t1+t1)&BN_MASK2; \ 619 BN_ULONG lo = LBITS(a), hi = HBITS(a); \
600 c0=(c0+t1)&BN_MASK2; \ 620 BN_ULONG bl = LBITS(b), bh = HBITS(b); \
601 if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ 621 mul64(lo,hi,bl,bh); \
602 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 622 tt = hi; \
603 623 c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
604#define sqr_add_c(a,i,c0,c1,c2) \ 624 c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
605 sqr64(t1,t2,(a)[i]); \ 625 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
606 c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 626 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
607 c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 627 } while(0)
628
629#define sqr_add_c(a,i,c0,c1,c2) do { \
630 BN_ULONG lo, hi; \
631 sqr64(lo,hi,(a)[i]); \
632 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
633 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
634 } while(0)
608 635
609#define sqr_add_c2(a,i,j,c0,c1,c2) \ 636#define sqr_add_c2(a,i,j,c0,c1,c2) \
610 mul_add_c2((a)[i],(a)[j],c0,c1,c2) 637 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
@@ -613,12 +640,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
613void 640void
614bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 641bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
615{ 642{
616#ifdef BN_LLONG
617 BN_ULLONG t;
618#elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH)
619 BN_ULONG bl, bh;
620#endif
621 BN_ULONG t1, t2;
622 BN_ULONG c1, c2, c3; 643 BN_ULONG c1, c2, c3;
623 644
624 c1 = 0; 645 c1 = 0;
@@ -723,12 +744,6 @@ bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
723void 744void
724bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 745bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
725{ 746{
726#ifdef BN_LLONG
727 BN_ULLONG t;
728#elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH)
729 BN_ULONG bl, bh;
730#endif
731 BN_ULONG t1, t2;
732 BN_ULONG c1, c2, c3; 747 BN_ULONG c1, c2, c3;
733 748
734 c1 = 0; 749 c1 = 0;
@@ -769,12 +784,6 @@ bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
769void 784void
770bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) 785bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
771{ 786{
772#ifdef BN_LLONG
773 BN_ULLONG t, tt;
774#elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH)
775 BN_ULONG bl, bh;
776#endif
777 BN_ULONG t1, t2;
778 BN_ULONG c1, c2, c3; 787 BN_ULONG c1, c2, c3;
779 788
780 c1 = 0; 789 c1 = 0;
@@ -851,12 +860,6 @@ bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
851void 860void
852bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) 861bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
853{ 862{
854#ifdef BN_LLONG
855 BN_ULLONG t, tt;
856#elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH)
857 BN_ULONG bl, bh;
858#endif
859 BN_ULONG t1, t2;
860 BN_ULONG c1, c2, c3; 863 BN_ULONG c1, c2, c3;
861 864
862 c1 = 0; 865 c1 = 0;