diff options
| author | bcook <> | 2015-02-25 15:39:49 +0000 |
|---|---|---|
| committer | bcook <> | 2015-02-25 15:39:49 +0000 |
| commit | 432e1d553bd75841b5b29f1a8008b519d538f765 (patch) | |
| tree | fca56e3d23c024e7f0d0132456914f4f3181e5df /src/lib/libcrypto/bn/asm | |
| parent | c95a8d3fbea64773cc8d6de4314c26a413e58a60 (diff) | |
| download | openbsd-432e1d553bd75841b5b29f1a8008b519d538f765.tar.gz openbsd-432e1d553bd75841b5b29f1a8008b519d538f765.tar.bz2 openbsd-432e1d553bd75841b5b29f1a8008b519d538f765.zip | |
Fix CVE-2014-3570: properly calculate the square of a BIGNUM value.
See https://www.openssl.org/news/secadv_20150108.txt for a more detailed
discussion.
Original OpenSSL patch here:
https://github.com/openssl/openssl/commit/a7a44ba55cb4f884c6bc9ceac90072dea38e66d0
The regression test is modified a little for KNF.
ok miod@
Diffstat (limited to 'src/lib/libcrypto/bn/asm')
| -rw-r--r-- | src/lib/libcrypto/bn/asm/mips.pl | 611 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/x86_64-gcc.c | 103 |
2 files changed, 180 insertions, 534 deletions
diff --git a/src/lib/libcrypto/bn/asm/mips.pl b/src/lib/libcrypto/bn/asm/mips.pl index d2f3ef7bbf..215c9a7483 100644 --- a/src/lib/libcrypto/bn/asm/mips.pl +++ b/src/lib/libcrypto/bn/asm/mips.pl | |||
| @@ -1872,6 +1872,41 @@ ___ | |||
| 1872 | 1872 | ||
| 1873 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); | 1873 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); |
| 1874 | 1874 | ||
| 1875 | sub add_c2 () { | ||
| 1876 | my ($hi,$lo,$c0,$c1,$c2, | ||
| 1877 | $warm, # !$warm denotes first call with specific sequence of | ||
| 1878 | # $c_[XYZ] when there is no Z-carry to accumulate yet; | ||
| 1879 | $an,$bn # these two are arguments for multiplication which | ||
| 1880 | # result is used in *next* step [which is why it's | ||
| 1881 | # commented as "forward multiplication" below]; | ||
| 1882 | )=@_; | ||
| 1883 | $code.=<<___; | ||
| 1884 | mflo $lo | ||
| 1885 | mfhi $hi | ||
| 1886 | $ADDU $c0,$lo | ||
| 1887 | sltu $at,$c0,$lo | ||
| 1888 | $MULTU $an,$bn # forward multiplication | ||
| 1889 | $ADDU $c0,$lo | ||
| 1890 | $ADDU $at,$hi | ||
| 1891 | sltu $lo,$c0,$lo | ||
| 1892 | $ADDU $c1,$at | ||
| 1893 | $ADDU $hi,$lo | ||
| 1894 | ___ | ||
| 1895 | $code.=<<___ if (!$warm); | ||
| 1896 | sltu $c2,$c1,$at | ||
| 1897 | $ADDU $c1,$hi | ||
| 1898 | sltu $hi,$c1,$hi | ||
| 1899 | $ADDU $c2,$hi | ||
| 1900 | ___ | ||
| 1901 | $code.=<<___ if ($warm); | ||
| 1902 | sltu $at,$c1,$at | ||
| 1903 | $ADDU $c1,$hi | ||
| 1904 | $ADDU $c2,$at | ||
| 1905 | sltu $hi,$c1,$hi | ||
| 1906 | $ADDU $c2,$hi | ||
| 1907 | ___ | ||
| 1908 | } | ||
| 1909 | |||
| 1875 | $code.=<<___; | 1910 | $code.=<<___; |
| 1876 | 1911 | ||
| 1877 | .align 5 | 1912 | .align 5 |
| @@ -1920,21 +1955,10 @@ $code.=<<___; | |||
| 1920 | sltu $at,$c_2,$t_1 | 1955 | sltu $at,$c_2,$t_1 |
| 1921 | $ADDU $c_3,$t_2,$at | 1956 | $ADDU $c_3,$t_2,$at |
| 1922 | $ST $c_2,$BNSZ($a0) | 1957 | $ST $c_2,$BNSZ($a0) |
| 1923 | 1958 | ___ | |
| 1924 | mflo $t_1 | 1959 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 1925 | mfhi $t_2 | 1960 | $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); |
| 1926 | slt $c_2,$t_2,$zero | 1961 | $code.=<<___; |
| 1927 | $SLL $t_2,1 | ||
| 1928 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
| 1929 | slt $a2,$t_1,$zero | ||
| 1930 | $ADDU $t_2,$a2 | ||
| 1931 | $SLL $t_1,1 | ||
| 1932 | $ADDU $c_3,$t_1 | ||
| 1933 | sltu $at,$c_3,$t_1 | ||
| 1934 | $ADDU $t_2,$at | ||
| 1935 | $ADDU $c_1,$t_2 | ||
| 1936 | sltu $at,$c_1,$t_2 | ||
| 1937 | $ADDU $c_2,$at | ||
| 1938 | mflo $t_1 | 1962 | mflo $t_1 |
| 1939 | mfhi $t_2 | 1963 | mfhi $t_2 |
| 1940 | $ADDU $c_3,$t_1 | 1964 | $ADDU $c_3,$t_1 |
| @@ -1945,67 +1969,19 @@ $code.=<<___; | |||
| 1945 | sltu $at,$c_1,$t_2 | 1969 | sltu $at,$c_1,$t_2 |
| 1946 | $ADDU $c_2,$at | 1970 | $ADDU $c_2,$at |
| 1947 | $ST $c_3,2*$BNSZ($a0) | 1971 | $ST $c_3,2*$BNSZ($a0) |
| 1948 | 1972 | ___ | |
| 1949 | mflo $t_1 | 1973 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 1950 | mfhi $t_2 | 1974 | $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); |
| 1951 | slt $c_3,$t_2,$zero | 1975 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 1952 | $SLL $t_2,1 | 1976 | $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1); |
| 1953 | $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); | 1977 | $code.=<<___; |
| 1954 | slt $a2,$t_1,$zero | ||
| 1955 | $ADDU $t_2,$a2 | ||
| 1956 | $SLL $t_1,1 | ||
| 1957 | $ADDU $c_1,$t_1 | ||
| 1958 | sltu $at,$c_1,$t_1 | ||
| 1959 | $ADDU $t_2,$at | ||
| 1960 | $ADDU $c_2,$t_2 | ||
| 1961 | sltu $at,$c_2,$t_2 | ||
| 1962 | $ADDU $c_3,$at | ||
| 1963 | mflo $t_1 | ||
| 1964 | mfhi $t_2 | ||
| 1965 | slt $at,$t_2,$zero | ||
| 1966 | $ADDU $c_3,$at | ||
| 1967 | $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); | ||
| 1968 | $SLL $t_2,1 | ||
| 1969 | slt $a2,$t_1,$zero | ||
| 1970 | $ADDU $t_2,$a2 | ||
| 1971 | $SLL $t_1,1 | ||
| 1972 | $ADDU $c_1,$t_1 | ||
| 1973 | sltu $at,$c_1,$t_1 | ||
| 1974 | $ADDU $t_2,$at | ||
| 1975 | $ADDU $c_2,$t_2 | ||
| 1976 | sltu $at,$c_2,$t_2 | ||
| 1977 | $ADDU $c_3,$at | ||
| 1978 | $ST $c_1,3*$BNSZ($a0) | 1978 | $ST $c_1,3*$BNSZ($a0) |
| 1979 | 1979 | ___ | |
| 1980 | mflo $t_1 | 1980 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 1981 | mfhi $t_2 | 1981 | $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); |
| 1982 | slt $c_1,$t_2,$zero | 1982 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 1983 | $SLL $t_2,1 | 1983 | $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); |
| 1984 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | 1984 | $code.=<<___; |
| 1985 | slt $a2,$t_1,$zero | ||
| 1986 | $ADDU $t_2,$a2 | ||
| 1987 | $SLL $t_1,1 | ||
| 1988 | $ADDU $c_2,$t_1 | ||
| 1989 | sltu $at,$c_2,$t_1 | ||
| 1990 | $ADDU $t_2,$at | ||
| 1991 | $ADDU $c_3,$t_2 | ||
| 1992 | sltu $at,$c_3,$t_2 | ||
| 1993 | $ADDU $c_1,$at | ||
| 1994 | mflo $t_1 | ||
| 1995 | mfhi $t_2 | ||
| 1996 | slt $at,$t_2,$zero | ||
| 1997 | $ADDU $c_1,$at | ||
| 1998 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
| 1999 | $SLL $t_2,1 | ||
| 2000 | slt $a2,$t_1,$zero | ||
| 2001 | $ADDU $t_2,$a2 | ||
| 2002 | $SLL $t_1,1 | ||
| 2003 | $ADDU $c_2,$t_1 | ||
| 2004 | sltu $at,$c_2,$t_1 | ||
| 2005 | $ADDU $t_2,$at | ||
| 2006 | $ADDU $c_3,$t_2 | ||
| 2007 | sltu $at,$c_3,$t_2 | ||
| 2008 | $ADDU $c_1,$at | ||
| 2009 | mflo $t_1 | 1985 | mflo $t_1 |
| 2010 | mfhi $t_2 | 1986 | mfhi $t_2 |
| 2011 | $ADDU $c_2,$t_1 | 1987 | $ADDU $c_2,$t_1 |
| @@ -2016,97 +1992,23 @@ $code.=<<___; | |||
| 2016 | sltu $at,$c_3,$t_2 | 1992 | sltu $at,$c_3,$t_2 |
| 2017 | $ADDU $c_1,$at | 1993 | $ADDU $c_1,$at |
| 2018 | $ST $c_2,4*$BNSZ($a0) | 1994 | $ST $c_2,4*$BNSZ($a0) |
| 2019 | 1995 | ___ | |
| 2020 | mflo $t_1 | 1996 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2021 | mfhi $t_2 | 1997 | $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); |
| 2022 | slt $c_2,$t_2,$zero | 1998 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2023 | $SLL $t_2,1 | 1999 | $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2); |
| 2024 | $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); | 2000 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2025 | slt $a2,$t_1,$zero | 2001 | $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3); |
| 2026 | $ADDU $t_2,$a2 | 2002 | $code.=<<___; |
| 2027 | $SLL $t_1,1 | ||
| 2028 | $ADDU $c_3,$t_1 | ||
| 2029 | sltu $at,$c_3,$t_1 | ||
| 2030 | $ADDU $t_2,$at | ||
| 2031 | $ADDU $c_1,$t_2 | ||
| 2032 | sltu $at,$c_1,$t_2 | ||
| 2033 | $ADDU $c_2,$at | ||
| 2034 | mflo $t_1 | ||
| 2035 | mfhi $t_2 | ||
| 2036 | slt $at,$t_2,$zero | ||
| 2037 | $ADDU $c_2,$at | ||
| 2038 | $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | ||
| 2039 | $SLL $t_2,1 | ||
| 2040 | slt $a2,$t_1,$zero | ||
| 2041 | $ADDU $t_2,$a2 | ||
| 2042 | $SLL $t_1,1 | ||
| 2043 | $ADDU $c_3,$t_1 | ||
| 2044 | sltu $at,$c_3,$t_1 | ||
| 2045 | $ADDU $t_2,$at | ||
| 2046 | $ADDU $c_1,$t_2 | ||
| 2047 | sltu $at,$c_1,$t_2 | ||
| 2048 | $ADDU $c_2,$at | ||
| 2049 | mflo $t_1 | ||
| 2050 | mfhi $t_2 | ||
| 2051 | slt $at,$t_2,$zero | ||
| 2052 | $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); | ||
| 2053 | $ADDU $c_2,$at | ||
| 2054 | $SLL $t_2,1 | ||
| 2055 | slt $a2,$t_1,$zero | ||
| 2056 | $ADDU $t_2,$a2 | ||
| 2057 | $SLL $t_1,1 | ||
| 2058 | $ADDU $c_3,$t_1 | ||
| 2059 | sltu $at,$c_3,$t_1 | ||
| 2060 | $ADDU $t_2,$at | ||
| 2061 | $ADDU $c_1,$t_2 | ||
| 2062 | sltu $at,$c_1,$t_2 | ||
| 2063 | $ADDU $c_2,$at | ||
| 2064 | $ST $c_3,5*$BNSZ($a0) | 2003 | $ST $c_3,5*$BNSZ($a0) |
| 2065 | 2004 | ___ | |
| 2066 | mflo $t_1 | 2005 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2067 | mfhi $t_2 | 2006 | $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3); |
| 2068 | slt $c_3,$t_2,$zero | 2007 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2069 | $SLL $t_2,1 | 2008 | $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3); |
| 2070 | $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); | 2009 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2071 | slt $a2,$t_1,$zero | 2010 | $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); |
| 2072 | $ADDU $t_2,$a2 | 2011 | $code.=<<___; |
| 2073 | $SLL $t_1,1 | ||
| 2074 | $ADDU $c_1,$t_1 | ||
| 2075 | sltu $at,$c_1,$t_1 | ||
| 2076 | $ADDU $t_2,$at | ||
| 2077 | $ADDU $c_2,$t_2 | ||
| 2078 | sltu $at,$c_2,$t_2 | ||
| 2079 | $ADDU $c_3,$at | ||
| 2080 | mflo $t_1 | ||
| 2081 | mfhi $t_2 | ||
| 2082 | slt $at,$t_2,$zero | ||
| 2083 | $ADDU $c_3,$at | ||
| 2084 | $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); | ||
| 2085 | $SLL $t_2,1 | ||
| 2086 | slt $a2,$t_1,$zero | ||
| 2087 | $ADDU $t_2,$a2 | ||
| 2088 | $SLL $t_1,1 | ||
| 2089 | $ADDU $c_1,$t_1 | ||
| 2090 | sltu $at,$c_1,$t_1 | ||
| 2091 | $ADDU $t_2,$at | ||
| 2092 | $ADDU $c_2,$t_2 | ||
| 2093 | sltu $at,$c_2,$t_2 | ||
| 2094 | $ADDU $c_3,$at | ||
| 2095 | mflo $t_1 | ||
| 2096 | mfhi $t_2 | ||
| 2097 | slt $at,$t_2,$zero | ||
| 2098 | $ADDU $c_3,$at | ||
| 2099 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
| 2100 | $SLL $t_2,1 | ||
| 2101 | slt $a2,$t_1,$zero | ||
| 2102 | $ADDU $t_2,$a2 | ||
| 2103 | $SLL $t_1,1 | ||
| 2104 | $ADDU $c_1,$t_1 | ||
| 2105 | sltu $at,$c_1,$t_1 | ||
| 2106 | $ADDU $t_2,$at | ||
| 2107 | $ADDU $c_2,$t_2 | ||
| 2108 | sltu $at,$c_2,$t_2 | ||
| 2109 | $ADDU $c_3,$at | ||
| 2110 | mflo $t_1 | 2012 | mflo $t_1 |
| 2111 | mfhi $t_2 | 2013 | mfhi $t_2 |
| 2112 | $ADDU $c_1,$t_1 | 2014 | $ADDU $c_1,$t_1 |
| @@ -2117,112 +2019,25 @@ $code.=<<___; | |||
| 2117 | sltu $at,$c_2,$t_2 | 2019 | sltu $at,$c_2,$t_2 |
| 2118 | $ADDU $c_3,$at | 2020 | $ADDU $c_3,$at |
| 2119 | $ST $c_1,6*$BNSZ($a0) | 2021 | $ST $c_1,6*$BNSZ($a0) |
| 2120 | 2022 | ___ | |
| 2121 | mflo $t_1 | 2023 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2122 | mfhi $t_2 | 2024 | $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); |
| 2123 | slt $c_1,$t_2,$zero | 2025 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2124 | $SLL $t_2,1 | 2026 | $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1); |
| 2125 | $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); | 2027 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2126 | slt $a2,$t_1,$zero | 2028 | $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1); |
| 2127 | $ADDU $t_2,$a2 | 2029 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2128 | $SLL $t_1,1 | 2030 | $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2); |
| 2129 | $ADDU $c_2,$t_1 | 2031 | $code.=<<___; |
| 2130 | sltu $at,$c_2,$t_1 | ||
| 2131 | $ADDU $t_2,$at | ||
| 2132 | $ADDU $c_3,$t_2 | ||
| 2133 | sltu $at,$c_3,$t_2 | ||
| 2134 | $ADDU $c_1,$at | ||
| 2135 | mflo $t_1 | ||
| 2136 | mfhi $t_2 | ||
| 2137 | slt $at,$t_2,$zero | ||
| 2138 | $ADDU $c_1,$at | ||
| 2139 | $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); | ||
| 2140 | $SLL $t_2,1 | ||
| 2141 | slt $a2,$t_1,$zero | ||
| 2142 | $ADDU $t_2,$a2 | ||
| 2143 | $SLL $t_1,1 | ||
| 2144 | $ADDU $c_2,$t_1 | ||
| 2145 | sltu $at,$c_2,$t_1 | ||
| 2146 | $ADDU $t_2,$at | ||
| 2147 | $ADDU $c_3,$t_2 | ||
| 2148 | sltu $at,$c_3,$t_2 | ||
| 2149 | $ADDU $c_1,$at | ||
| 2150 | mflo $t_1 | ||
| 2151 | mfhi $t_2 | ||
| 2152 | slt $at,$t_2,$zero | ||
| 2153 | $ADDU $c_1,$at | ||
| 2154 | $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); | ||
| 2155 | $SLL $t_2,1 | ||
| 2156 | slt $a2,$t_1,$zero | ||
| 2157 | $ADDU $t_2,$a2 | ||
| 2158 | $SLL $t_1,1 | ||
| 2159 | $ADDU $c_2,$t_1 | ||
| 2160 | sltu $at,$c_2,$t_1 | ||
| 2161 | $ADDU $t_2,$at | ||
| 2162 | $ADDU $c_3,$t_2 | ||
| 2163 | sltu $at,$c_3,$t_2 | ||
| 2164 | $ADDU $c_1,$at | ||
| 2165 | mflo $t_1 | ||
| 2166 | mfhi $t_2 | ||
| 2167 | slt $at,$t_2,$zero | ||
| 2168 | $ADDU $c_1,$at | ||
| 2169 | $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); | ||
| 2170 | $SLL $t_2,1 | ||
| 2171 | slt $a2,$t_1,$zero | ||
| 2172 | $ADDU $t_2,$a2 | ||
| 2173 | $SLL $t_1,1 | ||
| 2174 | $ADDU $c_2,$t_1 | ||
| 2175 | sltu $at,$c_2,$t_1 | ||
| 2176 | $ADDU $t_2,$at | ||
| 2177 | $ADDU $c_3,$t_2 | ||
| 2178 | sltu $at,$c_3,$t_2 | ||
| 2179 | $ADDU $c_1,$at | ||
| 2180 | $ST $c_2,7*$BNSZ($a0) | 2032 | $ST $c_2,7*$BNSZ($a0) |
| 2181 | 2033 | ___ | |
| 2182 | mflo $t_1 | 2034 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2183 | mfhi $t_2 | 2035 | $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2); |
| 2184 | slt $c_2,$t_2,$zero | 2036 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2185 | $SLL $t_2,1 | 2037 | $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2); |
| 2186 | $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); | 2038 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2187 | slt $a2,$t_1,$zero | 2039 | $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); |
| 2188 | $ADDU $t_2,$a2 | 2040 | $code.=<<___; |
| 2189 | $SLL $t_1,1 | ||
| 2190 | $ADDU $c_3,$t_1 | ||
| 2191 | sltu $at,$c_3,$t_1 | ||
| 2192 | $ADDU $t_2,$at | ||
| 2193 | $ADDU $c_1,$t_2 | ||
| 2194 | sltu $at,$c_1,$t_2 | ||
| 2195 | $ADDU $c_2,$at | ||
| 2196 | mflo $t_1 | ||
| 2197 | mfhi $t_2 | ||
| 2198 | slt $at,$t_2,$zero | ||
| 2199 | $ADDU $c_2,$at | ||
| 2200 | $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); | ||
| 2201 | $SLL $t_2,1 | ||
| 2202 | slt $a2,$t_1,$zero | ||
| 2203 | $ADDU $t_2,$a2 | ||
| 2204 | $SLL $t_1,1 | ||
| 2205 | $ADDU $c_3,$t_1 | ||
| 2206 | sltu $at,$c_3,$t_1 | ||
| 2207 | $ADDU $t_2,$at | ||
| 2208 | $ADDU $c_1,$t_2 | ||
| 2209 | sltu $at,$c_1,$t_2 | ||
| 2210 | $ADDU $c_2,$at | ||
| 2211 | mflo $t_1 | ||
| 2212 | mfhi $t_2 | ||
| 2213 | slt $at,$t_2,$zero | ||
| 2214 | $ADDU $c_2,$at | ||
| 2215 | $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); | ||
| 2216 | $SLL $t_2,1 | ||
| 2217 | slt $a2,$t_1,$zero | ||
| 2218 | $ADDU $t_2,$a2 | ||
| 2219 | $SLL $t_1,1 | ||
| 2220 | $ADDU $c_3,$t_1 | ||
| 2221 | sltu $at,$c_3,$t_1 | ||
| 2222 | $ADDU $t_2,$at | ||
| 2223 | $ADDU $c_1,$t_2 | ||
| 2224 | sltu $at,$c_1,$t_2 | ||
| 2225 | $ADDU $c_2,$at | ||
| 2226 | mflo $t_1 | 2041 | mflo $t_1 |
| 2227 | mfhi $t_2 | 2042 | mfhi $t_2 |
| 2228 | $ADDU $c_3,$t_1 | 2043 | $ADDU $c_3,$t_1 |
| @@ -2233,82 +2048,21 @@ $code.=<<___; | |||
| 2233 | sltu $at,$c_1,$t_2 | 2048 | sltu $at,$c_1,$t_2 |
| 2234 | $ADDU $c_2,$at | 2049 | $ADDU $c_2,$at |
| 2235 | $ST $c_3,8*$BNSZ($a0) | 2050 | $ST $c_3,8*$BNSZ($a0) |
| 2236 | 2051 | ___ | |
| 2237 | mflo $t_1 | 2052 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2238 | mfhi $t_2 | 2053 | $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); |
| 2239 | slt $c_3,$t_2,$zero | 2054 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2240 | $SLL $t_2,1 | 2055 | $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3); |
| 2241 | $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); | 2056 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2242 | slt $a2,$t_1,$zero | 2057 | $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1); |
| 2243 | $ADDU $t_2,$a2 | 2058 | $code.=<<___; |
| 2244 | $SLL $t_1,1 | ||
| 2245 | $ADDU $c_1,$t_1 | ||
| 2246 | sltu $at,$c_1,$t_1 | ||
| 2247 | $ADDU $t_2,$at | ||
| 2248 | $ADDU $c_2,$t_2 | ||
| 2249 | sltu $at,$c_2,$t_2 | ||
| 2250 | $ADDU $c_3,$at | ||
| 2251 | mflo $t_1 | ||
| 2252 | mfhi $t_2 | ||
| 2253 | slt $at,$t_2,$zero | ||
| 2254 | $ADDU $c_3,$at | ||
| 2255 | $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); | ||
| 2256 | $SLL $t_2,1 | ||
| 2257 | slt $a2,$t_1,$zero | ||
| 2258 | $ADDU $t_2,$a2 | ||
| 2259 | $SLL $t_1,1 | ||
| 2260 | $ADDU $c_1,$t_1 | ||
| 2261 | sltu $at,$c_1,$t_1 | ||
| 2262 | $ADDU $t_2,$at | ||
| 2263 | $ADDU $c_2,$t_2 | ||
| 2264 | sltu $at,$c_2,$t_2 | ||
| 2265 | $ADDU $c_3,$at | ||
| 2266 | mflo $t_1 | ||
| 2267 | mfhi $t_2 | ||
| 2268 | slt $at,$t_2,$zero | ||
| 2269 | $ADDU $c_3,$at | ||
| 2270 | $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); | ||
| 2271 | $SLL $t_2,1 | ||
| 2272 | slt $a2,$t_1,$zero | ||
| 2273 | $ADDU $t_2,$a2 | ||
| 2274 | $SLL $t_1,1 | ||
| 2275 | $ADDU $c_1,$t_1 | ||
| 2276 | sltu $at,$c_1,$t_1 | ||
| 2277 | $ADDU $t_2,$at | ||
| 2278 | $ADDU $c_2,$t_2 | ||
| 2279 | sltu $at,$c_2,$t_2 | ||
| 2280 | $ADDU $c_3,$at | ||
| 2281 | $ST $c_1,9*$BNSZ($a0) | 2059 | $ST $c_1,9*$BNSZ($a0) |
| 2282 | 2060 | ___ | |
| 2283 | mflo $t_1 | 2061 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2284 | mfhi $t_2 | 2062 | $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1); |
| 2285 | slt $c_1,$t_2,$zero | 2063 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2286 | $SLL $t_2,1 | 2064 | $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); |
| 2287 | $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); | 2065 | $code.=<<___; |
| 2288 | slt $a2,$t_1,$zero | ||
| 2289 | $ADDU $t_2,$a2 | ||
| 2290 | $SLL $t_1,1 | ||
| 2291 | $ADDU $c_2,$t_1 | ||
| 2292 | sltu $at,$c_2,$t_1 | ||
| 2293 | $ADDU $t_2,$at | ||
| 2294 | $ADDU $c_3,$t_2 | ||
| 2295 | sltu $at,$c_3,$t_2 | ||
| 2296 | $ADDU $c_1,$at | ||
| 2297 | mflo $t_1 | ||
| 2298 | mfhi $t_2 | ||
| 2299 | slt $at,$t_2,$zero | ||
| 2300 | $ADDU $c_1,$at | ||
| 2301 | $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); | ||
| 2302 | $SLL $t_2,1 | ||
| 2303 | slt $a2,$t_1,$zero | ||
| 2304 | $ADDU $t_2,$a2 | ||
| 2305 | $SLL $t_1,1 | ||
| 2306 | $ADDU $c_2,$t_1 | ||
| 2307 | sltu $at,$c_2,$t_1 | ||
| 2308 | $ADDU $t_2,$at | ||
| 2309 | $ADDU $c_3,$t_2 | ||
| 2310 | sltu $at,$c_3,$t_2 | ||
| 2311 | $ADDU $c_1,$at | ||
| 2312 | mflo $t_1 | 2066 | mflo $t_1 |
| 2313 | mfhi $t_2 | 2067 | mfhi $t_2 |
| 2314 | $ADDU $c_2,$t_1 | 2068 | $ADDU $c_2,$t_1 |
| @@ -2319,52 +2073,17 @@ $code.=<<___; | |||
| 2319 | sltu $at,$c_3,$t_2 | 2073 | sltu $at,$c_3,$t_2 |
| 2320 | $ADDU $c_1,$at | 2074 | $ADDU $c_1,$at |
| 2321 | $ST $c_2,10*$BNSZ($a0) | 2075 | $ST $c_2,10*$BNSZ($a0) |
| 2322 | 2076 | ___ | |
| 2323 | mflo $t_1 | 2077 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2324 | mfhi $t_2 | 2078 | $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); |
| 2325 | slt $c_2,$t_2,$zero | 2079 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2326 | $SLL $t_2,1 | 2080 | $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3); |
| 2327 | $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); | 2081 | $code.=<<___; |
| 2328 | slt $a2,$t_1,$zero | ||
| 2329 | $ADDU $t_2,$a2 | ||
| 2330 | $SLL $t_1,1 | ||
| 2331 | $ADDU $c_3,$t_1 | ||
| 2332 | sltu $at,$c_3,$t_1 | ||
| 2333 | $ADDU $t_2,$at | ||
| 2334 | $ADDU $c_1,$t_2 | ||
| 2335 | sltu $at,$c_1,$t_2 | ||
| 2336 | $ADDU $c_2,$at | ||
| 2337 | mflo $t_1 | ||
| 2338 | mfhi $t_2 | ||
| 2339 | slt $at,$t_2,$zero | ||
| 2340 | $ADDU $c_2,$at | ||
| 2341 | $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); | ||
| 2342 | $SLL $t_2,1 | ||
| 2343 | slt $a2,$t_1,$zero | ||
| 2344 | $ADDU $t_2,$a2 | ||
| 2345 | $SLL $t_1,1 | ||
| 2346 | $ADDU $c_3,$t_1 | ||
| 2347 | sltu $at,$c_3,$t_1 | ||
| 2348 | $ADDU $t_2,$at | ||
| 2349 | $ADDU $c_1,$t_2 | ||
| 2350 | sltu $at,$c_1,$t_2 | ||
| 2351 | $ADDU $c_2,$at | ||
| 2352 | $ST $c_3,11*$BNSZ($a0) | 2082 | $ST $c_3,11*$BNSZ($a0) |
| 2353 | 2083 | ___ | |
| 2354 | mflo $t_1 | 2084 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2355 | mfhi $t_2 | 2085 | $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); |
| 2356 | slt $c_3,$t_2,$zero | 2086 | $code.=<<___; |
| 2357 | $SLL $t_2,1 | ||
| 2358 | $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); | ||
| 2359 | slt $a2,$t_1,$zero | ||
| 2360 | $ADDU $t_2,$a2 | ||
| 2361 | $SLL $t_1,1 | ||
| 2362 | $ADDU $c_1,$t_1 | ||
| 2363 | sltu $at,$c_1,$t_1 | ||
| 2364 | $ADDU $t_2,$at | ||
| 2365 | $ADDU $c_2,$t_2 | ||
| 2366 | sltu $at,$c_2,$t_2 | ||
| 2367 | $ADDU $c_3,$at | ||
| 2368 | mflo $t_1 | 2087 | mflo $t_1 |
| 2369 | mfhi $t_2 | 2088 | mfhi $t_2 |
| 2370 | $ADDU $c_1,$t_1 | 2089 | $ADDU $c_1,$t_1 |
| @@ -2375,21 +2094,10 @@ $code.=<<___; | |||
| 2375 | sltu $at,$c_2,$t_2 | 2094 | sltu $at,$c_2,$t_2 |
| 2376 | $ADDU $c_3,$at | 2095 | $ADDU $c_3,$at |
| 2377 | $ST $c_1,12*$BNSZ($a0) | 2096 | $ST $c_1,12*$BNSZ($a0) |
| 2378 | 2097 | ___ | |
| 2379 | mflo $t_1 | 2098 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2380 | mfhi $t_2 | 2099 | $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); |
| 2381 | slt $c_1,$t_2,$zero | 2100 | $code.=<<___; |
| 2382 | $SLL $t_2,1 | ||
| 2383 | $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); | ||
| 2384 | slt $a2,$t_1,$zero | ||
| 2385 | $ADDU $t_2,$a2 | ||
| 2386 | $SLL $t_1,1 | ||
| 2387 | $ADDU $c_2,$t_1 | ||
| 2388 | sltu $at,$c_2,$t_1 | ||
| 2389 | $ADDU $t_2,$at | ||
| 2390 | $ADDU $c_3,$t_2 | ||
| 2391 | sltu $at,$c_3,$t_2 | ||
| 2392 | $ADDU $c_1,$at | ||
| 2393 | $ST $c_2,13*$BNSZ($a0) | 2101 | $ST $c_2,13*$BNSZ($a0) |
| 2394 | 2102 | ||
| 2395 | mflo $t_1 | 2103 | mflo $t_1 |
| @@ -2457,21 +2165,10 @@ $code.=<<___; | |||
| 2457 | sltu $at,$c_2,$t_1 | 2165 | sltu $at,$c_2,$t_1 |
| 2458 | $ADDU $c_3,$t_2,$at | 2166 | $ADDU $c_3,$t_2,$at |
| 2459 | $ST $c_2,$BNSZ($a0) | 2167 | $ST $c_2,$BNSZ($a0) |
| 2460 | 2168 | ___ | |
| 2461 | mflo $t_1 | 2169 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2462 | mfhi $t_2 | 2170 | $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); |
| 2463 | slt $c_2,$t_2,$zero | 2171 | $code.=<<___; |
| 2464 | $SLL $t_2,1 | ||
| 2465 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
| 2466 | slt $a2,$t_1,$zero | ||
| 2467 | $ADDU $t_2,$a2 | ||
| 2468 | $SLL $t_1,1 | ||
| 2469 | $ADDU $c_3,$t_1 | ||
| 2470 | sltu $at,$c_3,$t_1 | ||
| 2471 | $ADDU $t_2,$at | ||
| 2472 | $ADDU $c_1,$t_2 | ||
| 2473 | sltu $at,$c_1,$t_2 | ||
| 2474 | $ADDU $c_2,$at | ||
| 2475 | mflo $t_1 | 2172 | mflo $t_1 |
| 2476 | mfhi $t_2 | 2173 | mfhi $t_2 |
| 2477 | $ADDU $c_3,$t_1 | 2174 | $ADDU $c_3,$t_1 |
| @@ -2482,52 +2179,17 @@ $code.=<<___; | |||
| 2482 | sltu $at,$c_1,$t_2 | 2179 | sltu $at,$c_1,$t_2 |
| 2483 | $ADDU $c_2,$at | 2180 | $ADDU $c_2,$at |
| 2484 | $ST $c_3,2*$BNSZ($a0) | 2181 | $ST $c_3,2*$BNSZ($a0) |
| 2485 | 2182 | ___ | |
| 2486 | mflo $t_1 | 2183 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2487 | mfhi $t_2 | 2184 | $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); |
| 2488 | slt $c_3,$t_2,$zero | 2185 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2489 | $SLL $t_2,1 | 2186 | $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); |
| 2490 | $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); | 2187 | $code.=<<___; |
| 2491 | slt $a2,$t_1,$zero | ||
| 2492 | $ADDU $t_2,$a2 | ||
| 2493 | $SLL $t_1,1 | ||
| 2494 | $ADDU $c_1,$t_1 | ||
| 2495 | sltu $at,$c_1,$t_1 | ||
| 2496 | $ADDU $t_2,$at | ||
| 2497 | $ADDU $c_2,$t_2 | ||
| 2498 | sltu $at,$c_2,$t_2 | ||
| 2499 | $ADDU $c_3,$at | ||
| 2500 | mflo $t_1 | ||
| 2501 | mfhi $t_2 | ||
| 2502 | slt $at,$t_2,$zero | ||
| 2503 | $ADDU $c_3,$at | ||
| 2504 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | ||
| 2505 | $SLL $t_2,1 | ||
| 2506 | slt $a2,$t_1,$zero | ||
| 2507 | $ADDU $t_2,$a2 | ||
| 2508 | $SLL $t_1,1 | ||
| 2509 | $ADDU $c_1,$t_1 | ||
| 2510 | sltu $at,$c_1,$t_1 | ||
| 2511 | $ADDU $t_2,$at | ||
| 2512 | $ADDU $c_2,$t_2 | ||
| 2513 | sltu $at,$c_2,$t_2 | ||
| 2514 | $ADDU $c_3,$at | ||
| 2515 | $ST $c_1,3*$BNSZ($a0) | 2188 | $ST $c_1,3*$BNSZ($a0) |
| 2516 | 2189 | ___ | |
| 2517 | mflo $t_1 | 2190 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2518 | mfhi $t_2 | 2191 | $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); |
| 2519 | slt $c_1,$t_2,$zero | 2192 | $code.=<<___; |
| 2520 | $SLL $t_2,1 | ||
| 2521 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
| 2522 | slt $a2,$t_1,$zero | ||
| 2523 | $ADDU $t_2,$a2 | ||
| 2524 | $SLL $t_1,1 | ||
| 2525 | $ADDU $c_2,$t_1 | ||
| 2526 | sltu $at,$c_2,$t_1 | ||
| 2527 | $ADDU $t_2,$at | ||
| 2528 | $ADDU $c_3,$t_2 | ||
| 2529 | sltu $at,$c_3,$t_2 | ||
| 2530 | $ADDU $c_1,$at | ||
| 2531 | mflo $t_1 | 2193 | mflo $t_1 |
| 2532 | mfhi $t_2 | 2194 | mfhi $t_2 |
| 2533 | $ADDU $c_2,$t_1 | 2195 | $ADDU $c_2,$t_1 |
| @@ -2538,21 +2200,10 @@ $code.=<<___; | |||
| 2538 | sltu $at,$c_3,$t_2 | 2200 | sltu $at,$c_3,$t_2 |
| 2539 | $ADDU $c_1,$at | 2201 | $ADDU $c_1,$at |
| 2540 | $ST $c_2,4*$BNSZ($a0) | 2202 | $ST $c_2,4*$BNSZ($a0) |
| 2541 | 2203 | ___ | |
| 2542 | mflo $t_1 | 2204 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2543 | mfhi $t_2 | 2205 | $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); |
| 2544 | slt $c_2,$t_2,$zero | 2206 | $code.=<<___; |
| 2545 | $SLL $t_2,1 | ||
| 2546 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
| 2547 | slt $a2,$t_1,$zero | ||
| 2548 | $ADDU $t_2,$a2 | ||
| 2549 | $SLL $t_1,1 | ||
| 2550 | $ADDU $c_3,$t_1 | ||
| 2551 | sltu $at,$c_3,$t_1 | ||
| 2552 | $ADDU $t_2,$at | ||
| 2553 | $ADDU $c_1,$t_2 | ||
| 2554 | sltu $at,$c_1,$t_2 | ||
| 2555 | $ADDU $c_2,$at | ||
| 2556 | $ST $c_3,5*$BNSZ($a0) | 2207 | $ST $c_3,5*$BNSZ($a0) |
| 2557 | 2208 | ||
| 2558 | mflo $t_1 | 2209 | mflo $t_1 |
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c index c9a2b6be73..9deffa71f1 100644 --- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c +++ b/src/lib/libcrypto/bn/asm/x86_64-gcc.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: x86_64-gcc.c,v 1.4 2014/10/28 07:35:58 jsg Exp $ */ | 1 | /* $OpenBSD: x86_64-gcc.c,v 1.5 2015/02/25 15:39:49 bcook Exp $ */ |
| 2 | #include "../bn_lcl.h" | 2 | #include "../bn_lcl.h" |
| 3 | #if !(defined(__GNUC__) && __GNUC__>=2) | 3 | #if !(defined(__GNUC__) && __GNUC__>=2) |
| 4 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ | 4 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ |
| @@ -270,77 +270,76 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
| 270 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | 270 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ |
| 271 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | 271 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ |
| 272 | 272 | ||
| 273 | /* | ||
| 274 | * Keep in mind that carrying into high part of multiplication result | ||
| 275 | * can not overflow, because it cannot be all-ones. | ||
| 276 | */ | ||
| 273 | #if 0 | 277 | #if 0 |
| 274 | /* original macros are kept for reference purposes */ | 278 | /* original macros are kept for reference purposes */ |
| 275 | #define mul_add_c(a,b,c0,c1,c2) { \ | 279 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 276 | BN_ULONG ta=(a),tb=(b); \ | 280 | BN_ULONG ta = (a), tb = (b); \ |
| 277 | t1 = ta * tb; \ | 281 | BN_ULONG lo, hi; \ |
| 278 | t2 = BN_UMULT_HIGH(ta,tb); \ | 282 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
| 279 | c0 += t1; t2 += (c0<t1)?1:0; \ | 283 | c0 += lo; hi += (c0<lo)?1:0; \ |
| 280 | c1 += t2; c2 += (c1<t2)?1:0; \ | 284 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 281 | } | 285 | } while(0) |
| 282 | 286 | ||
| 283 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 287 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 284 | BN_ULONG ta=(a),tb=(b),t0; \ | 288 | BN_ULONG ta = (a), tb = (b); \ |
| 285 | t1 = BN_UMULT_HIGH(ta,tb); \ | 289 | BN_ULONG lo, hi, tt; \ |
| 286 | t0 = ta * tb; \ | 290 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
| 287 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 291 | c0 += lo; tt = hi+((c0<lo)?1:0); \ |
| 288 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 292 | c1 += tt; c2 += (c1<tt)?1:0; \ |
| 289 | c0 += t1; t2 += (c0<t1)?1:0; \ | 293 | c0 += lo; hi += (c0<lo)?1:0; \ |
| 290 | c1 += t2; c2 += (c1<t2)?1:0; \ | 294 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 291 | } | 295 | } while(0) |
| 296 | |||
| 297 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
| 298 | BN_ULONG ta = (a)[i]; \ | ||
| 299 | BN_ULONG lo, hi; \ | ||
| 300 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
| 301 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
| 302 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
| 303 | } while(0) | ||
| 292 | #else | 304 | #else |
| 293 | #define mul_add_c(a,b,c0,c1,c2) do { \ | 305 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 306 | BN_ULONG t1,t2; \ | ||
| 294 | asm ("mulq %3" \ | 307 | asm ("mulq %3" \ |
| 295 | : "=a"(t1),"=d"(t2) \ | 308 | : "=a"(t1),"=d"(t2) \ |
| 296 | : "a"(a),"m"(b) \ | 309 | : "a"(a),"m"(b) \ |
| 297 | : "cc"); \ | 310 | : "cc"); \ |
| 298 | asm ("addq %2,%0; adcq %3,%1" \ | 311 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 299 | : "+r"(c0),"+d"(t2) \ | 312 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 300 | : "a"(t1),"g"(0) \ | 313 | : "r"(t1),"r"(t2),"g"(0) \ |
| 301 | : "cc"); \ | 314 | : "cc"); \ |
| 302 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 303 | : "+r"(c1),"+r"(c2) \ | ||
| 304 | : "d"(t2),"g"(0) \ | ||
| 305 | : "cc"); \ | ||
| 306 | } while (0) | 315 | } while (0) |
| 307 | 316 | ||
| 308 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | 317 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
| 318 | BN_ULONG t1,t2; \ | ||
| 309 | asm ("mulq %2" \ | 319 | asm ("mulq %2" \ |
| 310 | : "=a"(t1),"=d"(t2) \ | 320 | : "=a"(t1),"=d"(t2) \ |
| 311 | : "a"(a[i]) \ | 321 | : "a"(a[i]) \ |
| 312 | : "cc"); \ | 322 | : "cc"); \ |
| 313 | asm ("addq %2,%0; adcq %3,%1" \ | 323 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 314 | : "+r"(c0),"+d"(t2) \ | 324 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 315 | : "a"(t1),"g"(0) \ | 325 | : "r"(t1),"r"(t2),"g"(0) \ |
| 316 | : "cc"); \ | 326 | : "cc"); \ |
| 317 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 318 | : "+r"(c1),"+r"(c2) \ | ||
| 319 | : "d"(t2),"g"(0) \ | ||
| 320 | : "cc"); \ | ||
| 321 | } while (0) | 327 | } while (0) |
| 322 | 328 | ||
| 323 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | 329 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 330 | BN_ULONG t1,t2; \ | ||
| 324 | asm ("mulq %3" \ | 331 | asm ("mulq %3" \ |
| 325 | : "=a"(t1),"=d"(t2) \ | 332 | : "=a"(t1),"=d"(t2) \ |
| 326 | : "a"(a),"m"(b) \ | 333 | : "a"(a),"m"(b) \ |
| 327 | : "cc"); \ | 334 | : "cc"); \ |
| 328 | asm ("addq %0,%0; adcq %2,%1" \ | 335 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 329 | : "+d"(t2),"+r"(c2) \ | 336 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 330 | : "g"(0) \ | 337 | : "r"(t1),"r"(t2),"g"(0) \ |
| 331 | : "cc"); \ | 338 | : "cc"); \ |
| 332 | asm ("addq %0,%0; adcq %2,%1" \ | 339 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 333 | : "+a"(t1),"+d"(t2) \ | 340 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 334 | : "g"(0) \ | 341 | : "r"(t1),"r"(t2),"g"(0) \ |
| 335 | : "cc"); \ | 342 | : "cc"); \ |
| 336 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 337 | : "+r"(c0),"+d"(t2) \ | ||
| 338 | : "a"(t1),"g"(0) \ | ||
| 339 | : "cc"); \ | ||
| 340 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 341 | : "+r"(c1),"+r"(c2) \ | ||
| 342 | : "d"(t2),"g"(0) \ | ||
| 343 | : "cc"); \ | ||
| 344 | } while (0) | 343 | } while (0) |
| 345 | #endif | 344 | #endif |
| 346 | 345 | ||
| @@ -349,7 +348,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
| 349 | 348 | ||
| 350 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 349 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 351 | { | 350 | { |
| 352 | BN_ULONG t1,t2; | ||
| 353 | BN_ULONG c1,c2,c3; | 351 | BN_ULONG c1,c2,c3; |
| 354 | 352 | ||
| 355 | c1=0; | 353 | c1=0; |
| @@ -453,7 +451,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 453 | 451 | ||
| 454 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 452 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 455 | { | 453 | { |
| 456 | BN_ULONG t1,t2; | ||
| 457 | BN_ULONG c1,c2,c3; | 454 | BN_ULONG c1,c2,c3; |
| 458 | 455 | ||
| 459 | c1=0; | 456 | c1=0; |
| @@ -493,7 +490,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 493 | 490 | ||
| 494 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | 491 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
| 495 | { | 492 | { |
| 496 | BN_ULONG t1,t2; | ||
| 497 | BN_ULONG c1,c2,c3; | 493 | BN_ULONG c1,c2,c3; |
| 498 | 494 | ||
| 499 | c1=0; | 495 | c1=0; |
| @@ -569,7 +565,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | |||
| 569 | 565 | ||
| 570 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | 566 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
| 571 | { | 567 | { |
| 572 | BN_ULONG t1,t2; | ||
| 573 | BN_ULONG c1,c2,c3; | 568 | BN_ULONG c1,c2,c3; |
| 574 | 569 | ||
| 575 | c1=0; | 570 | c1=0; |
