diff options
| author | bcook <> | 2015-02-25 15:39:49 +0000 |
|---|---|---|
| committer | bcook <> | 2015-02-25 15:39:49 +0000 |
| commit | f3031aa7bff24911a8cae9bdd7cdcd88d8554f42 (patch) | |
| tree | fca56e3d23c024e7f0d0132456914f4f3181e5df /src | |
| parent | 2725a02f7a7b4932578ec02826b4501c29e21ddf (diff) | |
| download | openbsd-f3031aa7bff24911a8cae9bdd7cdcd88d8554f42.tar.gz openbsd-f3031aa7bff24911a8cae9bdd7cdcd88d8554f42.tar.bz2 openbsd-f3031aa7bff24911a8cae9bdd7cdcd88d8554f42.zip | |
Fix CVE-2014-3570: properly calculate the square of a BIGNUM value.
See https://www.openssl.org/news/secadv_20150108.txt for a more detailed
discussion.
Original OpenSSL patch here:
https://github.com/openssl/openssl/commit/a7a44ba55cb4f884c6bc9ceac90072dea38e66d0
The regression test is modified a little for KNF.
ok miod@
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib/libcrypto/bn/asm/mips.pl | 611 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/x86_64-gcc.c | 103 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 243 | ||||
| -rw-r--r-- | src/lib/libssl/src/crypto/bn/asm/mips.pl | 611 | ||||
| -rw-r--r-- | src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c | 103 | ||||
| -rw-r--r-- | src/lib/libssl/src/crypto/bn/bn_asm.c | 243 | ||||
| -rw-r--r-- | src/regress/lib/libcrypto/bn/general/bntest.c | 89 |
7 files changed, 672 insertions, 1331 deletions
diff --git a/src/lib/libcrypto/bn/asm/mips.pl b/src/lib/libcrypto/bn/asm/mips.pl index d2f3ef7bbf..215c9a7483 100644 --- a/src/lib/libcrypto/bn/asm/mips.pl +++ b/src/lib/libcrypto/bn/asm/mips.pl | |||
| @@ -1872,6 +1872,41 @@ ___ | |||
| 1872 | 1872 | ||
| 1873 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); | 1873 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); |
| 1874 | 1874 | ||
| 1875 | sub add_c2 () { | ||
| 1876 | my ($hi,$lo,$c0,$c1,$c2, | ||
| 1877 | $warm, # !$warm denotes first call with specific sequence of | ||
| 1878 | # $c_[XYZ] when there is no Z-carry to accumulate yet; | ||
| 1879 | $an,$bn # these two are arguments for multiplication which | ||
| 1880 | # result is used in *next* step [which is why it's | ||
| 1881 | # commented as "forward multiplication" below]; | ||
| 1882 | )=@_; | ||
| 1883 | $code.=<<___; | ||
| 1884 | mflo $lo | ||
| 1885 | mfhi $hi | ||
| 1886 | $ADDU $c0,$lo | ||
| 1887 | sltu $at,$c0,$lo | ||
| 1888 | $MULTU $an,$bn # forward multiplication | ||
| 1889 | $ADDU $c0,$lo | ||
| 1890 | $ADDU $at,$hi | ||
| 1891 | sltu $lo,$c0,$lo | ||
| 1892 | $ADDU $c1,$at | ||
| 1893 | $ADDU $hi,$lo | ||
| 1894 | ___ | ||
| 1895 | $code.=<<___ if (!$warm); | ||
| 1896 | sltu $c2,$c1,$at | ||
| 1897 | $ADDU $c1,$hi | ||
| 1898 | sltu $hi,$c1,$hi | ||
| 1899 | $ADDU $c2,$hi | ||
| 1900 | ___ | ||
| 1901 | $code.=<<___ if ($warm); | ||
| 1902 | sltu $at,$c1,$at | ||
| 1903 | $ADDU $c1,$hi | ||
| 1904 | $ADDU $c2,$at | ||
| 1905 | sltu $hi,$c1,$hi | ||
| 1906 | $ADDU $c2,$hi | ||
| 1907 | ___ | ||
| 1908 | } | ||
| 1909 | |||
| 1875 | $code.=<<___; | 1910 | $code.=<<___; |
| 1876 | 1911 | ||
| 1877 | .align 5 | 1912 | .align 5 |
| @@ -1920,21 +1955,10 @@ $code.=<<___; | |||
| 1920 | sltu $at,$c_2,$t_1 | 1955 | sltu $at,$c_2,$t_1 |
| 1921 | $ADDU $c_3,$t_2,$at | 1956 | $ADDU $c_3,$t_2,$at |
| 1922 | $ST $c_2,$BNSZ($a0) | 1957 | $ST $c_2,$BNSZ($a0) |
| 1923 | 1958 | ___ | |
| 1924 | mflo $t_1 | 1959 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 1925 | mfhi $t_2 | 1960 | $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); |
| 1926 | slt $c_2,$t_2,$zero | 1961 | $code.=<<___; |
| 1927 | $SLL $t_2,1 | ||
| 1928 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
| 1929 | slt $a2,$t_1,$zero | ||
| 1930 | $ADDU $t_2,$a2 | ||
| 1931 | $SLL $t_1,1 | ||
| 1932 | $ADDU $c_3,$t_1 | ||
| 1933 | sltu $at,$c_3,$t_1 | ||
| 1934 | $ADDU $t_2,$at | ||
| 1935 | $ADDU $c_1,$t_2 | ||
| 1936 | sltu $at,$c_1,$t_2 | ||
| 1937 | $ADDU $c_2,$at | ||
| 1938 | mflo $t_1 | 1962 | mflo $t_1 |
| 1939 | mfhi $t_2 | 1963 | mfhi $t_2 |
| 1940 | $ADDU $c_3,$t_1 | 1964 | $ADDU $c_3,$t_1 |
| @@ -1945,67 +1969,19 @@ $code.=<<___; | |||
| 1945 | sltu $at,$c_1,$t_2 | 1969 | sltu $at,$c_1,$t_2 |
| 1946 | $ADDU $c_2,$at | 1970 | $ADDU $c_2,$at |
| 1947 | $ST $c_3,2*$BNSZ($a0) | 1971 | $ST $c_3,2*$BNSZ($a0) |
| 1948 | 1972 | ___ | |
| 1949 | mflo $t_1 | 1973 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 1950 | mfhi $t_2 | 1974 | $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); |
| 1951 | slt $c_3,$t_2,$zero | 1975 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 1952 | $SLL $t_2,1 | 1976 | $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1); |
| 1953 | $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); | 1977 | $code.=<<___; |
| 1954 | slt $a2,$t_1,$zero | ||
| 1955 | $ADDU $t_2,$a2 | ||
| 1956 | $SLL $t_1,1 | ||
| 1957 | $ADDU $c_1,$t_1 | ||
| 1958 | sltu $at,$c_1,$t_1 | ||
| 1959 | $ADDU $t_2,$at | ||
| 1960 | $ADDU $c_2,$t_2 | ||
| 1961 | sltu $at,$c_2,$t_2 | ||
| 1962 | $ADDU $c_3,$at | ||
| 1963 | mflo $t_1 | ||
| 1964 | mfhi $t_2 | ||
| 1965 | slt $at,$t_2,$zero | ||
| 1966 | $ADDU $c_3,$at | ||
| 1967 | $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); | ||
| 1968 | $SLL $t_2,1 | ||
| 1969 | slt $a2,$t_1,$zero | ||
| 1970 | $ADDU $t_2,$a2 | ||
| 1971 | $SLL $t_1,1 | ||
| 1972 | $ADDU $c_1,$t_1 | ||
| 1973 | sltu $at,$c_1,$t_1 | ||
| 1974 | $ADDU $t_2,$at | ||
| 1975 | $ADDU $c_2,$t_2 | ||
| 1976 | sltu $at,$c_2,$t_2 | ||
| 1977 | $ADDU $c_3,$at | ||
| 1978 | $ST $c_1,3*$BNSZ($a0) | 1978 | $ST $c_1,3*$BNSZ($a0) |
| 1979 | 1979 | ___ | |
| 1980 | mflo $t_1 | 1980 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 1981 | mfhi $t_2 | 1981 | $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); |
| 1982 | slt $c_1,$t_2,$zero | 1982 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 1983 | $SLL $t_2,1 | 1983 | $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); |
| 1984 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | 1984 | $code.=<<___; |
| 1985 | slt $a2,$t_1,$zero | ||
| 1986 | $ADDU $t_2,$a2 | ||
| 1987 | $SLL $t_1,1 | ||
| 1988 | $ADDU $c_2,$t_1 | ||
| 1989 | sltu $at,$c_2,$t_1 | ||
| 1990 | $ADDU $t_2,$at | ||
| 1991 | $ADDU $c_3,$t_2 | ||
| 1992 | sltu $at,$c_3,$t_2 | ||
| 1993 | $ADDU $c_1,$at | ||
| 1994 | mflo $t_1 | ||
| 1995 | mfhi $t_2 | ||
| 1996 | slt $at,$t_2,$zero | ||
| 1997 | $ADDU $c_1,$at | ||
| 1998 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
| 1999 | $SLL $t_2,1 | ||
| 2000 | slt $a2,$t_1,$zero | ||
| 2001 | $ADDU $t_2,$a2 | ||
| 2002 | $SLL $t_1,1 | ||
| 2003 | $ADDU $c_2,$t_1 | ||
| 2004 | sltu $at,$c_2,$t_1 | ||
| 2005 | $ADDU $t_2,$at | ||
| 2006 | $ADDU $c_3,$t_2 | ||
| 2007 | sltu $at,$c_3,$t_2 | ||
| 2008 | $ADDU $c_1,$at | ||
| 2009 | mflo $t_1 | 1985 | mflo $t_1 |
| 2010 | mfhi $t_2 | 1986 | mfhi $t_2 |
| 2011 | $ADDU $c_2,$t_1 | 1987 | $ADDU $c_2,$t_1 |
| @@ -2016,97 +1992,23 @@ $code.=<<___; | |||
| 2016 | sltu $at,$c_3,$t_2 | 1992 | sltu $at,$c_3,$t_2 |
| 2017 | $ADDU $c_1,$at | 1993 | $ADDU $c_1,$at |
| 2018 | $ST $c_2,4*$BNSZ($a0) | 1994 | $ST $c_2,4*$BNSZ($a0) |
| 2019 | 1995 | ___ | |
| 2020 | mflo $t_1 | 1996 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2021 | mfhi $t_2 | 1997 | $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); |
| 2022 | slt $c_2,$t_2,$zero | 1998 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2023 | $SLL $t_2,1 | 1999 | $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2); |
| 2024 | $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); | 2000 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2025 | slt $a2,$t_1,$zero | 2001 | $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3); |
| 2026 | $ADDU $t_2,$a2 | 2002 | $code.=<<___; |
| 2027 | $SLL $t_1,1 | ||
| 2028 | $ADDU $c_3,$t_1 | ||
| 2029 | sltu $at,$c_3,$t_1 | ||
| 2030 | $ADDU $t_2,$at | ||
| 2031 | $ADDU $c_1,$t_2 | ||
| 2032 | sltu $at,$c_1,$t_2 | ||
| 2033 | $ADDU $c_2,$at | ||
| 2034 | mflo $t_1 | ||
| 2035 | mfhi $t_2 | ||
| 2036 | slt $at,$t_2,$zero | ||
| 2037 | $ADDU $c_2,$at | ||
| 2038 | $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | ||
| 2039 | $SLL $t_2,1 | ||
| 2040 | slt $a2,$t_1,$zero | ||
| 2041 | $ADDU $t_2,$a2 | ||
| 2042 | $SLL $t_1,1 | ||
| 2043 | $ADDU $c_3,$t_1 | ||
| 2044 | sltu $at,$c_3,$t_1 | ||
| 2045 | $ADDU $t_2,$at | ||
| 2046 | $ADDU $c_1,$t_2 | ||
| 2047 | sltu $at,$c_1,$t_2 | ||
| 2048 | $ADDU $c_2,$at | ||
| 2049 | mflo $t_1 | ||
| 2050 | mfhi $t_2 | ||
| 2051 | slt $at,$t_2,$zero | ||
| 2052 | $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); | ||
| 2053 | $ADDU $c_2,$at | ||
| 2054 | $SLL $t_2,1 | ||
| 2055 | slt $a2,$t_1,$zero | ||
| 2056 | $ADDU $t_2,$a2 | ||
| 2057 | $SLL $t_1,1 | ||
| 2058 | $ADDU $c_3,$t_1 | ||
| 2059 | sltu $at,$c_3,$t_1 | ||
| 2060 | $ADDU $t_2,$at | ||
| 2061 | $ADDU $c_1,$t_2 | ||
| 2062 | sltu $at,$c_1,$t_2 | ||
| 2063 | $ADDU $c_2,$at | ||
| 2064 | $ST $c_3,5*$BNSZ($a0) | 2003 | $ST $c_3,5*$BNSZ($a0) |
| 2065 | 2004 | ___ | |
| 2066 | mflo $t_1 | 2005 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2067 | mfhi $t_2 | 2006 | $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3); |
| 2068 | slt $c_3,$t_2,$zero | 2007 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2069 | $SLL $t_2,1 | 2008 | $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3); |
| 2070 | $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); | 2009 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2071 | slt $a2,$t_1,$zero | 2010 | $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); |
| 2072 | $ADDU $t_2,$a2 | 2011 | $code.=<<___; |
| 2073 | $SLL $t_1,1 | ||
| 2074 | $ADDU $c_1,$t_1 | ||
| 2075 | sltu $at,$c_1,$t_1 | ||
| 2076 | $ADDU $t_2,$at | ||
| 2077 | $ADDU $c_2,$t_2 | ||
| 2078 | sltu $at,$c_2,$t_2 | ||
| 2079 | $ADDU $c_3,$at | ||
| 2080 | mflo $t_1 | ||
| 2081 | mfhi $t_2 | ||
| 2082 | slt $at,$t_2,$zero | ||
| 2083 | $ADDU $c_3,$at | ||
| 2084 | $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); | ||
| 2085 | $SLL $t_2,1 | ||
| 2086 | slt $a2,$t_1,$zero | ||
| 2087 | $ADDU $t_2,$a2 | ||
| 2088 | $SLL $t_1,1 | ||
| 2089 | $ADDU $c_1,$t_1 | ||
| 2090 | sltu $at,$c_1,$t_1 | ||
| 2091 | $ADDU $t_2,$at | ||
| 2092 | $ADDU $c_2,$t_2 | ||
| 2093 | sltu $at,$c_2,$t_2 | ||
| 2094 | $ADDU $c_3,$at | ||
| 2095 | mflo $t_1 | ||
| 2096 | mfhi $t_2 | ||
| 2097 | slt $at,$t_2,$zero | ||
| 2098 | $ADDU $c_3,$at | ||
| 2099 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
| 2100 | $SLL $t_2,1 | ||
| 2101 | slt $a2,$t_1,$zero | ||
| 2102 | $ADDU $t_2,$a2 | ||
| 2103 | $SLL $t_1,1 | ||
| 2104 | $ADDU $c_1,$t_1 | ||
| 2105 | sltu $at,$c_1,$t_1 | ||
| 2106 | $ADDU $t_2,$at | ||
| 2107 | $ADDU $c_2,$t_2 | ||
| 2108 | sltu $at,$c_2,$t_2 | ||
| 2109 | $ADDU $c_3,$at | ||
| 2110 | mflo $t_1 | 2012 | mflo $t_1 |
| 2111 | mfhi $t_2 | 2013 | mfhi $t_2 |
| 2112 | $ADDU $c_1,$t_1 | 2014 | $ADDU $c_1,$t_1 |
| @@ -2117,112 +2019,25 @@ $code.=<<___; | |||
| 2117 | sltu $at,$c_2,$t_2 | 2019 | sltu $at,$c_2,$t_2 |
| 2118 | $ADDU $c_3,$at | 2020 | $ADDU $c_3,$at |
| 2119 | $ST $c_1,6*$BNSZ($a0) | 2021 | $ST $c_1,6*$BNSZ($a0) |
| 2120 | 2022 | ___ | |
| 2121 | mflo $t_1 | 2023 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2122 | mfhi $t_2 | 2024 | $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); |
| 2123 | slt $c_1,$t_2,$zero | 2025 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2124 | $SLL $t_2,1 | 2026 | $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1); |
| 2125 | $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); | 2027 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2126 | slt $a2,$t_1,$zero | 2028 | $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1); |
| 2127 | $ADDU $t_2,$a2 | 2029 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2128 | $SLL $t_1,1 | 2030 | $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2); |
| 2129 | $ADDU $c_2,$t_1 | 2031 | $code.=<<___; |
| 2130 | sltu $at,$c_2,$t_1 | ||
| 2131 | $ADDU $t_2,$at | ||
| 2132 | $ADDU $c_3,$t_2 | ||
| 2133 | sltu $at,$c_3,$t_2 | ||
| 2134 | $ADDU $c_1,$at | ||
| 2135 | mflo $t_1 | ||
| 2136 | mfhi $t_2 | ||
| 2137 | slt $at,$t_2,$zero | ||
| 2138 | $ADDU $c_1,$at | ||
| 2139 | $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); | ||
| 2140 | $SLL $t_2,1 | ||
| 2141 | slt $a2,$t_1,$zero | ||
| 2142 | $ADDU $t_2,$a2 | ||
| 2143 | $SLL $t_1,1 | ||
| 2144 | $ADDU $c_2,$t_1 | ||
| 2145 | sltu $at,$c_2,$t_1 | ||
| 2146 | $ADDU $t_2,$at | ||
| 2147 | $ADDU $c_3,$t_2 | ||
| 2148 | sltu $at,$c_3,$t_2 | ||
| 2149 | $ADDU $c_1,$at | ||
| 2150 | mflo $t_1 | ||
| 2151 | mfhi $t_2 | ||
| 2152 | slt $at,$t_2,$zero | ||
| 2153 | $ADDU $c_1,$at | ||
| 2154 | $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); | ||
| 2155 | $SLL $t_2,1 | ||
| 2156 | slt $a2,$t_1,$zero | ||
| 2157 | $ADDU $t_2,$a2 | ||
| 2158 | $SLL $t_1,1 | ||
| 2159 | $ADDU $c_2,$t_1 | ||
| 2160 | sltu $at,$c_2,$t_1 | ||
| 2161 | $ADDU $t_2,$at | ||
| 2162 | $ADDU $c_3,$t_2 | ||
| 2163 | sltu $at,$c_3,$t_2 | ||
| 2164 | $ADDU $c_1,$at | ||
| 2165 | mflo $t_1 | ||
| 2166 | mfhi $t_2 | ||
| 2167 | slt $at,$t_2,$zero | ||
| 2168 | $ADDU $c_1,$at | ||
| 2169 | $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); | ||
| 2170 | $SLL $t_2,1 | ||
| 2171 | slt $a2,$t_1,$zero | ||
| 2172 | $ADDU $t_2,$a2 | ||
| 2173 | $SLL $t_1,1 | ||
| 2174 | $ADDU $c_2,$t_1 | ||
| 2175 | sltu $at,$c_2,$t_1 | ||
| 2176 | $ADDU $t_2,$at | ||
| 2177 | $ADDU $c_3,$t_2 | ||
| 2178 | sltu $at,$c_3,$t_2 | ||
| 2179 | $ADDU $c_1,$at | ||
| 2180 | $ST $c_2,7*$BNSZ($a0) | 2032 | $ST $c_2,7*$BNSZ($a0) |
| 2181 | 2033 | ___ | |
| 2182 | mflo $t_1 | 2034 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2183 | mfhi $t_2 | 2035 | $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2); |
| 2184 | slt $c_2,$t_2,$zero | 2036 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2185 | $SLL $t_2,1 | 2037 | $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2); |
| 2186 | $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); | 2038 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2187 | slt $a2,$t_1,$zero | 2039 | $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); |
| 2188 | $ADDU $t_2,$a2 | 2040 | $code.=<<___; |
| 2189 | $SLL $t_1,1 | ||
| 2190 | $ADDU $c_3,$t_1 | ||
| 2191 | sltu $at,$c_3,$t_1 | ||
| 2192 | $ADDU $t_2,$at | ||
| 2193 | $ADDU $c_1,$t_2 | ||
| 2194 | sltu $at,$c_1,$t_2 | ||
| 2195 | $ADDU $c_2,$at | ||
| 2196 | mflo $t_1 | ||
| 2197 | mfhi $t_2 | ||
| 2198 | slt $at,$t_2,$zero | ||
| 2199 | $ADDU $c_2,$at | ||
| 2200 | $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); | ||
| 2201 | $SLL $t_2,1 | ||
| 2202 | slt $a2,$t_1,$zero | ||
| 2203 | $ADDU $t_2,$a2 | ||
| 2204 | $SLL $t_1,1 | ||
| 2205 | $ADDU $c_3,$t_1 | ||
| 2206 | sltu $at,$c_3,$t_1 | ||
| 2207 | $ADDU $t_2,$at | ||
| 2208 | $ADDU $c_1,$t_2 | ||
| 2209 | sltu $at,$c_1,$t_2 | ||
| 2210 | $ADDU $c_2,$at | ||
| 2211 | mflo $t_1 | ||
| 2212 | mfhi $t_2 | ||
| 2213 | slt $at,$t_2,$zero | ||
| 2214 | $ADDU $c_2,$at | ||
| 2215 | $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); | ||
| 2216 | $SLL $t_2,1 | ||
| 2217 | slt $a2,$t_1,$zero | ||
| 2218 | $ADDU $t_2,$a2 | ||
| 2219 | $SLL $t_1,1 | ||
| 2220 | $ADDU $c_3,$t_1 | ||
| 2221 | sltu $at,$c_3,$t_1 | ||
| 2222 | $ADDU $t_2,$at | ||
| 2223 | $ADDU $c_1,$t_2 | ||
| 2224 | sltu $at,$c_1,$t_2 | ||
| 2225 | $ADDU $c_2,$at | ||
| 2226 | mflo $t_1 | 2041 | mflo $t_1 |
| 2227 | mfhi $t_2 | 2042 | mfhi $t_2 |
| 2228 | $ADDU $c_3,$t_1 | 2043 | $ADDU $c_3,$t_1 |
| @@ -2233,82 +2048,21 @@ $code.=<<___; | |||
| 2233 | sltu $at,$c_1,$t_2 | 2048 | sltu $at,$c_1,$t_2 |
| 2234 | $ADDU $c_2,$at | 2049 | $ADDU $c_2,$at |
| 2235 | $ST $c_3,8*$BNSZ($a0) | 2050 | $ST $c_3,8*$BNSZ($a0) |
| 2236 | 2051 | ___ | |
| 2237 | mflo $t_1 | 2052 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2238 | mfhi $t_2 | 2053 | $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); |
| 2239 | slt $c_3,$t_2,$zero | 2054 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2240 | $SLL $t_2,1 | 2055 | $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3); |
| 2241 | $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); | 2056 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2242 | slt $a2,$t_1,$zero | 2057 | $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1); |
| 2243 | $ADDU $t_2,$a2 | 2058 | $code.=<<___; |
| 2244 | $SLL $t_1,1 | ||
| 2245 | $ADDU $c_1,$t_1 | ||
| 2246 | sltu $at,$c_1,$t_1 | ||
| 2247 | $ADDU $t_2,$at | ||
| 2248 | $ADDU $c_2,$t_2 | ||
| 2249 | sltu $at,$c_2,$t_2 | ||
| 2250 | $ADDU $c_3,$at | ||
| 2251 | mflo $t_1 | ||
| 2252 | mfhi $t_2 | ||
| 2253 | slt $at,$t_2,$zero | ||
| 2254 | $ADDU $c_3,$at | ||
| 2255 | $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); | ||
| 2256 | $SLL $t_2,1 | ||
| 2257 | slt $a2,$t_1,$zero | ||
| 2258 | $ADDU $t_2,$a2 | ||
| 2259 | $SLL $t_1,1 | ||
| 2260 | $ADDU $c_1,$t_1 | ||
| 2261 | sltu $at,$c_1,$t_1 | ||
| 2262 | $ADDU $t_2,$at | ||
| 2263 | $ADDU $c_2,$t_2 | ||
| 2264 | sltu $at,$c_2,$t_2 | ||
| 2265 | $ADDU $c_3,$at | ||
| 2266 | mflo $t_1 | ||
| 2267 | mfhi $t_2 | ||
| 2268 | slt $at,$t_2,$zero | ||
| 2269 | $ADDU $c_3,$at | ||
| 2270 | $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); | ||
| 2271 | $SLL $t_2,1 | ||
| 2272 | slt $a2,$t_1,$zero | ||
| 2273 | $ADDU $t_2,$a2 | ||
| 2274 | $SLL $t_1,1 | ||
| 2275 | $ADDU $c_1,$t_1 | ||
| 2276 | sltu $at,$c_1,$t_1 | ||
| 2277 | $ADDU $t_2,$at | ||
| 2278 | $ADDU $c_2,$t_2 | ||
| 2279 | sltu $at,$c_2,$t_2 | ||
| 2280 | $ADDU $c_3,$at | ||
| 2281 | $ST $c_1,9*$BNSZ($a0) | 2059 | $ST $c_1,9*$BNSZ($a0) |
| 2282 | 2060 | ___ | |
| 2283 | mflo $t_1 | 2061 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2284 | mfhi $t_2 | 2062 | $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1); |
| 2285 | slt $c_1,$t_2,$zero | 2063 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2286 | $SLL $t_2,1 | 2064 | $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); |
| 2287 | $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); | 2065 | $code.=<<___; |
| 2288 | slt $a2,$t_1,$zero | ||
| 2289 | $ADDU $t_2,$a2 | ||
| 2290 | $SLL $t_1,1 | ||
| 2291 | $ADDU $c_2,$t_1 | ||
| 2292 | sltu $at,$c_2,$t_1 | ||
| 2293 | $ADDU $t_2,$at | ||
| 2294 | $ADDU $c_3,$t_2 | ||
| 2295 | sltu $at,$c_3,$t_2 | ||
| 2296 | $ADDU $c_1,$at | ||
| 2297 | mflo $t_1 | ||
| 2298 | mfhi $t_2 | ||
| 2299 | slt $at,$t_2,$zero | ||
| 2300 | $ADDU $c_1,$at | ||
| 2301 | $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); | ||
| 2302 | $SLL $t_2,1 | ||
| 2303 | slt $a2,$t_1,$zero | ||
| 2304 | $ADDU $t_2,$a2 | ||
| 2305 | $SLL $t_1,1 | ||
| 2306 | $ADDU $c_2,$t_1 | ||
| 2307 | sltu $at,$c_2,$t_1 | ||
| 2308 | $ADDU $t_2,$at | ||
| 2309 | $ADDU $c_3,$t_2 | ||
| 2310 | sltu $at,$c_3,$t_2 | ||
| 2311 | $ADDU $c_1,$at | ||
| 2312 | mflo $t_1 | 2066 | mflo $t_1 |
| 2313 | mfhi $t_2 | 2067 | mfhi $t_2 |
| 2314 | $ADDU $c_2,$t_1 | 2068 | $ADDU $c_2,$t_1 |
| @@ -2319,52 +2073,17 @@ $code.=<<___; | |||
| 2319 | sltu $at,$c_3,$t_2 | 2073 | sltu $at,$c_3,$t_2 |
| 2320 | $ADDU $c_1,$at | 2074 | $ADDU $c_1,$at |
| 2321 | $ST $c_2,10*$BNSZ($a0) | 2075 | $ST $c_2,10*$BNSZ($a0) |
| 2322 | 2076 | ___ | |
| 2323 | mflo $t_1 | 2077 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2324 | mfhi $t_2 | 2078 | $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); |
| 2325 | slt $c_2,$t_2,$zero | 2079 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2326 | $SLL $t_2,1 | 2080 | $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3); |
| 2327 | $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); | 2081 | $code.=<<___; |
| 2328 | slt $a2,$t_1,$zero | ||
| 2329 | $ADDU $t_2,$a2 | ||
| 2330 | $SLL $t_1,1 | ||
| 2331 | $ADDU $c_3,$t_1 | ||
| 2332 | sltu $at,$c_3,$t_1 | ||
| 2333 | $ADDU $t_2,$at | ||
| 2334 | $ADDU $c_1,$t_2 | ||
| 2335 | sltu $at,$c_1,$t_2 | ||
| 2336 | $ADDU $c_2,$at | ||
| 2337 | mflo $t_1 | ||
| 2338 | mfhi $t_2 | ||
| 2339 | slt $at,$t_2,$zero | ||
| 2340 | $ADDU $c_2,$at | ||
| 2341 | $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); | ||
| 2342 | $SLL $t_2,1 | ||
| 2343 | slt $a2,$t_1,$zero | ||
| 2344 | $ADDU $t_2,$a2 | ||
| 2345 | $SLL $t_1,1 | ||
| 2346 | $ADDU $c_3,$t_1 | ||
| 2347 | sltu $at,$c_3,$t_1 | ||
| 2348 | $ADDU $t_2,$at | ||
| 2349 | $ADDU $c_1,$t_2 | ||
| 2350 | sltu $at,$c_1,$t_2 | ||
| 2351 | $ADDU $c_2,$at | ||
| 2352 | $ST $c_3,11*$BNSZ($a0) | 2082 | $ST $c_3,11*$BNSZ($a0) |
| 2353 | 2083 | ___ | |
| 2354 | mflo $t_1 | 2084 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2355 | mfhi $t_2 | 2085 | $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); |
| 2356 | slt $c_3,$t_2,$zero | 2086 | $code.=<<___; |
| 2357 | $SLL $t_2,1 | ||
| 2358 | $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); | ||
| 2359 | slt $a2,$t_1,$zero | ||
| 2360 | $ADDU $t_2,$a2 | ||
| 2361 | $SLL $t_1,1 | ||
| 2362 | $ADDU $c_1,$t_1 | ||
| 2363 | sltu $at,$c_1,$t_1 | ||
| 2364 | $ADDU $t_2,$at | ||
| 2365 | $ADDU $c_2,$t_2 | ||
| 2366 | sltu $at,$c_2,$t_2 | ||
| 2367 | $ADDU $c_3,$at | ||
| 2368 | mflo $t_1 | 2087 | mflo $t_1 |
| 2369 | mfhi $t_2 | 2088 | mfhi $t_2 |
| 2370 | $ADDU $c_1,$t_1 | 2089 | $ADDU $c_1,$t_1 |
| @@ -2375,21 +2094,10 @@ $code.=<<___; | |||
| 2375 | sltu $at,$c_2,$t_2 | 2094 | sltu $at,$c_2,$t_2 |
| 2376 | $ADDU $c_3,$at | 2095 | $ADDU $c_3,$at |
| 2377 | $ST $c_1,12*$BNSZ($a0) | 2096 | $ST $c_1,12*$BNSZ($a0) |
| 2378 | 2097 | ___ | |
| 2379 | mflo $t_1 | 2098 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2380 | mfhi $t_2 | 2099 | $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); |
| 2381 | slt $c_1,$t_2,$zero | 2100 | $code.=<<___; |
| 2382 | $SLL $t_2,1 | ||
| 2383 | $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); | ||
| 2384 | slt $a2,$t_1,$zero | ||
| 2385 | $ADDU $t_2,$a2 | ||
| 2386 | $SLL $t_1,1 | ||
| 2387 | $ADDU $c_2,$t_1 | ||
| 2388 | sltu $at,$c_2,$t_1 | ||
| 2389 | $ADDU $t_2,$at | ||
| 2390 | $ADDU $c_3,$t_2 | ||
| 2391 | sltu $at,$c_3,$t_2 | ||
| 2392 | $ADDU $c_1,$at | ||
| 2393 | $ST $c_2,13*$BNSZ($a0) | 2101 | $ST $c_2,13*$BNSZ($a0) |
| 2394 | 2102 | ||
| 2395 | mflo $t_1 | 2103 | mflo $t_1 |
| @@ -2457,21 +2165,10 @@ $code.=<<___; | |||
| 2457 | sltu $at,$c_2,$t_1 | 2165 | sltu $at,$c_2,$t_1 |
| 2458 | $ADDU $c_3,$t_2,$at | 2166 | $ADDU $c_3,$t_2,$at |
| 2459 | $ST $c_2,$BNSZ($a0) | 2167 | $ST $c_2,$BNSZ($a0) |
| 2460 | 2168 | ___ | |
| 2461 | mflo $t_1 | 2169 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2462 | mfhi $t_2 | 2170 | $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); |
| 2463 | slt $c_2,$t_2,$zero | 2171 | $code.=<<___; |
| 2464 | $SLL $t_2,1 | ||
| 2465 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
| 2466 | slt $a2,$t_1,$zero | ||
| 2467 | $ADDU $t_2,$a2 | ||
| 2468 | $SLL $t_1,1 | ||
| 2469 | $ADDU $c_3,$t_1 | ||
| 2470 | sltu $at,$c_3,$t_1 | ||
| 2471 | $ADDU $t_2,$at | ||
| 2472 | $ADDU $c_1,$t_2 | ||
| 2473 | sltu $at,$c_1,$t_2 | ||
| 2474 | $ADDU $c_2,$at | ||
| 2475 | mflo $t_1 | 2172 | mflo $t_1 |
| 2476 | mfhi $t_2 | 2173 | mfhi $t_2 |
| 2477 | $ADDU $c_3,$t_1 | 2174 | $ADDU $c_3,$t_1 |
| @@ -2482,52 +2179,17 @@ $code.=<<___; | |||
| 2482 | sltu $at,$c_1,$t_2 | 2179 | sltu $at,$c_1,$t_2 |
| 2483 | $ADDU $c_2,$at | 2180 | $ADDU $c_2,$at |
| 2484 | $ST $c_3,2*$BNSZ($a0) | 2181 | $ST $c_3,2*$BNSZ($a0) |
| 2485 | 2182 | ___ | |
| 2486 | mflo $t_1 | 2183 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2487 | mfhi $t_2 | 2184 | $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); |
| 2488 | slt $c_3,$t_2,$zero | 2185 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2489 | $SLL $t_2,1 | 2186 | $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); |
| 2490 | $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); | 2187 | $code.=<<___; |
| 2491 | slt $a2,$t_1,$zero | ||
| 2492 | $ADDU $t_2,$a2 | ||
| 2493 | $SLL $t_1,1 | ||
| 2494 | $ADDU $c_1,$t_1 | ||
| 2495 | sltu $at,$c_1,$t_1 | ||
| 2496 | $ADDU $t_2,$at | ||
| 2497 | $ADDU $c_2,$t_2 | ||
| 2498 | sltu $at,$c_2,$t_2 | ||
| 2499 | $ADDU $c_3,$at | ||
| 2500 | mflo $t_1 | ||
| 2501 | mfhi $t_2 | ||
| 2502 | slt $at,$t_2,$zero | ||
| 2503 | $ADDU $c_3,$at | ||
| 2504 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | ||
| 2505 | $SLL $t_2,1 | ||
| 2506 | slt $a2,$t_1,$zero | ||
| 2507 | $ADDU $t_2,$a2 | ||
| 2508 | $SLL $t_1,1 | ||
| 2509 | $ADDU $c_1,$t_1 | ||
| 2510 | sltu $at,$c_1,$t_1 | ||
| 2511 | $ADDU $t_2,$at | ||
| 2512 | $ADDU $c_2,$t_2 | ||
| 2513 | sltu $at,$c_2,$t_2 | ||
| 2514 | $ADDU $c_3,$at | ||
| 2515 | $ST $c_1,3*$BNSZ($a0) | 2188 | $ST $c_1,3*$BNSZ($a0) |
| 2516 | 2189 | ___ | |
| 2517 | mflo $t_1 | 2190 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2518 | mfhi $t_2 | 2191 | $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); |
| 2519 | slt $c_1,$t_2,$zero | 2192 | $code.=<<___; |
| 2520 | $SLL $t_2,1 | ||
| 2521 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
| 2522 | slt $a2,$t_1,$zero | ||
| 2523 | $ADDU $t_2,$a2 | ||
| 2524 | $SLL $t_1,1 | ||
| 2525 | $ADDU $c_2,$t_1 | ||
| 2526 | sltu $at,$c_2,$t_1 | ||
| 2527 | $ADDU $t_2,$at | ||
| 2528 | $ADDU $c_3,$t_2 | ||
| 2529 | sltu $at,$c_3,$t_2 | ||
| 2530 | $ADDU $c_1,$at | ||
| 2531 | mflo $t_1 | 2193 | mflo $t_1 |
| 2532 | mfhi $t_2 | 2194 | mfhi $t_2 |
| 2533 | $ADDU $c_2,$t_1 | 2195 | $ADDU $c_2,$t_1 |
| @@ -2538,21 +2200,10 @@ $code.=<<___; | |||
| 2538 | sltu $at,$c_3,$t_2 | 2200 | sltu $at,$c_3,$t_2 |
| 2539 | $ADDU $c_1,$at | 2201 | $ADDU $c_1,$at |
| 2540 | $ST $c_2,4*$BNSZ($a0) | 2202 | $ST $c_2,4*$BNSZ($a0) |
| 2541 | 2203 | ___ | |
| 2542 | mflo $t_1 | 2204 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2543 | mfhi $t_2 | 2205 | $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); |
| 2544 | slt $c_2,$t_2,$zero | 2206 | $code.=<<___; |
| 2545 | $SLL $t_2,1 | ||
| 2546 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
| 2547 | slt $a2,$t_1,$zero | ||
| 2548 | $ADDU $t_2,$a2 | ||
| 2549 | $SLL $t_1,1 | ||
| 2550 | $ADDU $c_3,$t_1 | ||
| 2551 | sltu $at,$c_3,$t_1 | ||
| 2552 | $ADDU $t_2,$at | ||
| 2553 | $ADDU $c_1,$t_2 | ||
| 2554 | sltu $at,$c_1,$t_2 | ||
| 2555 | $ADDU $c_2,$at | ||
| 2556 | $ST $c_3,5*$BNSZ($a0) | 2207 | $ST $c_3,5*$BNSZ($a0) |
| 2557 | 2208 | ||
| 2558 | mflo $t_1 | 2209 | mflo $t_1 |
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c index c9a2b6be73..9deffa71f1 100644 --- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c +++ b/src/lib/libcrypto/bn/asm/x86_64-gcc.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: x86_64-gcc.c,v 1.4 2014/10/28 07:35:58 jsg Exp $ */ | 1 | /* $OpenBSD: x86_64-gcc.c,v 1.5 2015/02/25 15:39:49 bcook Exp $ */ |
| 2 | #include "../bn_lcl.h" | 2 | #include "../bn_lcl.h" |
| 3 | #if !(defined(__GNUC__) && __GNUC__>=2) | 3 | #if !(defined(__GNUC__) && __GNUC__>=2) |
| 4 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ | 4 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ |
| @@ -270,77 +270,76 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
| 270 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | 270 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ |
| 271 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | 271 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ |
| 272 | 272 | ||
| 273 | /* | ||
| 274 | * Keep in mind that carrying into high part of multiplication result | ||
| 275 | * can not overflow, because it cannot be all-ones. | ||
| 276 | */ | ||
| 273 | #if 0 | 277 | #if 0 |
| 274 | /* original macros are kept for reference purposes */ | 278 | /* original macros are kept for reference purposes */ |
| 275 | #define mul_add_c(a,b,c0,c1,c2) { \ | 279 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 276 | BN_ULONG ta=(a),tb=(b); \ | 280 | BN_ULONG ta = (a), tb = (b); \ |
| 277 | t1 = ta * tb; \ | 281 | BN_ULONG lo, hi; \ |
| 278 | t2 = BN_UMULT_HIGH(ta,tb); \ | 282 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
| 279 | c0 += t1; t2 += (c0<t1)?1:0; \ | 283 | c0 += lo; hi += (c0<lo)?1:0; \ |
| 280 | c1 += t2; c2 += (c1<t2)?1:0; \ | 284 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 281 | } | 285 | } while(0) |
| 282 | 286 | ||
| 283 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 287 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 284 | BN_ULONG ta=(a),tb=(b),t0; \ | 288 | BN_ULONG ta = (a), tb = (b); \ |
| 285 | t1 = BN_UMULT_HIGH(ta,tb); \ | 289 | BN_ULONG lo, hi, tt; \ |
| 286 | t0 = ta * tb; \ | 290 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
| 287 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 291 | c0 += lo; tt = hi+((c0<lo)?1:0); \ |
| 288 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 292 | c1 += tt; c2 += (c1<tt)?1:0; \ |
| 289 | c0 += t1; t2 += (c0<t1)?1:0; \ | 293 | c0 += lo; hi += (c0<lo)?1:0; \ |
| 290 | c1 += t2; c2 += (c1<t2)?1:0; \ | 294 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 291 | } | 295 | } while(0) |
| 296 | |||
| 297 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
| 298 | BN_ULONG ta = (a)[i]; \ | ||
| 299 | BN_ULONG lo, hi; \ | ||
| 300 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
| 301 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
| 302 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
| 303 | } while(0) | ||
| 292 | #else | 304 | #else |
| 293 | #define mul_add_c(a,b,c0,c1,c2) do { \ | 305 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 306 | BN_ULONG t1,t2; \ | ||
| 294 | asm ("mulq %3" \ | 307 | asm ("mulq %3" \ |
| 295 | : "=a"(t1),"=d"(t2) \ | 308 | : "=a"(t1),"=d"(t2) \ |
| 296 | : "a"(a),"m"(b) \ | 309 | : "a"(a),"m"(b) \ |
| 297 | : "cc"); \ | 310 | : "cc"); \ |
| 298 | asm ("addq %2,%0; adcq %3,%1" \ | 311 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 299 | : "+r"(c0),"+d"(t2) \ | 312 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 300 | : "a"(t1),"g"(0) \ | 313 | : "r"(t1),"r"(t2),"g"(0) \ |
| 301 | : "cc"); \ | 314 | : "cc"); \ |
| 302 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 303 | : "+r"(c1),"+r"(c2) \ | ||
| 304 | : "d"(t2),"g"(0) \ | ||
| 305 | : "cc"); \ | ||
| 306 | } while (0) | 315 | } while (0) |
| 307 | 316 | ||
| 308 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | 317 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
| 318 | BN_ULONG t1,t2; \ | ||
| 309 | asm ("mulq %2" \ | 319 | asm ("mulq %2" \ |
| 310 | : "=a"(t1),"=d"(t2) \ | 320 | : "=a"(t1),"=d"(t2) \ |
| 311 | : "a"(a[i]) \ | 321 | : "a"(a[i]) \ |
| 312 | : "cc"); \ | 322 | : "cc"); \ |
| 313 | asm ("addq %2,%0; adcq %3,%1" \ | 323 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 314 | : "+r"(c0),"+d"(t2) \ | 324 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 315 | : "a"(t1),"g"(0) \ | 325 | : "r"(t1),"r"(t2),"g"(0) \ |
| 316 | : "cc"); \ | 326 | : "cc"); \ |
| 317 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 318 | : "+r"(c1),"+r"(c2) \ | ||
| 319 | : "d"(t2),"g"(0) \ | ||
| 320 | : "cc"); \ | ||
| 321 | } while (0) | 327 | } while (0) |
| 322 | 328 | ||
| 323 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | 329 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 330 | BN_ULONG t1,t2; \ | ||
| 324 | asm ("mulq %3" \ | 331 | asm ("mulq %3" \ |
| 325 | : "=a"(t1),"=d"(t2) \ | 332 | : "=a"(t1),"=d"(t2) \ |
| 326 | : "a"(a),"m"(b) \ | 333 | : "a"(a),"m"(b) \ |
| 327 | : "cc"); \ | 334 | : "cc"); \ |
| 328 | asm ("addq %0,%0; adcq %2,%1" \ | 335 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 329 | : "+d"(t2),"+r"(c2) \ | 336 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 330 | : "g"(0) \ | 337 | : "r"(t1),"r"(t2),"g"(0) \ |
| 331 | : "cc"); \ | 338 | : "cc"); \ |
| 332 | asm ("addq %0,%0; adcq %2,%1" \ | 339 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 333 | : "+a"(t1),"+d"(t2) \ | 340 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 334 | : "g"(0) \ | 341 | : "r"(t1),"r"(t2),"g"(0) \ |
| 335 | : "cc"); \ | 342 | : "cc"); \ |
| 336 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 337 | : "+r"(c0),"+d"(t2) \ | ||
| 338 | : "a"(t1),"g"(0) \ | ||
| 339 | : "cc"); \ | ||
| 340 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 341 | : "+r"(c1),"+r"(c2) \ | ||
| 342 | : "d"(t2),"g"(0) \ | ||
| 343 | : "cc"); \ | ||
| 344 | } while (0) | 343 | } while (0) |
| 345 | #endif | 344 | #endif |
| 346 | 345 | ||
| @@ -349,7 +348,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
| 349 | 348 | ||
| 350 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 349 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 351 | { | 350 | { |
| 352 | BN_ULONG t1,t2; | ||
| 353 | BN_ULONG c1,c2,c3; | 351 | BN_ULONG c1,c2,c3; |
| 354 | 352 | ||
| 355 | c1=0; | 353 | c1=0; |
| @@ -453,7 +451,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 453 | 451 | ||
| 454 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 452 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 455 | { | 453 | { |
| 456 | BN_ULONG t1,t2; | ||
| 457 | BN_ULONG c1,c2,c3; | 454 | BN_ULONG c1,c2,c3; |
| 458 | 455 | ||
| 459 | c1=0; | 456 | c1=0; |
| @@ -493,7 +490,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 493 | 490 | ||
| 494 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | 491 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
| 495 | { | 492 | { |
| 496 | BN_ULONG t1,t2; | ||
| 497 | BN_ULONG c1,c2,c3; | 493 | BN_ULONG c1,c2,c3; |
| 498 | 494 | ||
| 499 | c1=0; | 495 | c1=0; |
| @@ -569,7 +565,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | |||
| 569 | 565 | ||
| 570 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | 566 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
| 571 | { | 567 | { |
| 572 | BN_ULONG t1,t2; | ||
| 573 | BN_ULONG c1,c2,c3; | 568 | BN_ULONG c1,c2,c3; |
| 574 | 569 | ||
| 575 | c1=0; | 570 | c1=0; |
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index c6efd2513a..49f0ba5d7b 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_asm.c,v 1.13 2014/07/11 08:44:47 jsing Exp $ */ | 1 | /* $OpenBSD: bn_asm.c,v 1.14 2015/02/25 15:39:49 bcook Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -495,116 +495,143 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
| 495 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | 495 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ |
| 496 | 496 | ||
| 497 | #ifdef BN_LLONG | 497 | #ifdef BN_LLONG |
| 498 | #define mul_add_c(a,b,c0,c1,c2) \ | 498 | /* |
| 499 | t=(BN_ULLONG)a*b; \ | 499 | * Keep in mind that additions to multiplication result can not |
| 500 | t1=(BN_ULONG)Lw(t); \ | 500 | * overflow, because its high half cannot be all-ones. |
| 501 | t2=(BN_ULONG)Hw(t); \ | 501 | */ |
| 502 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 502 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 503 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 503 | BN_ULONG hi; \ |
| 504 | 504 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ | |
| 505 | #define mul_add_c2(a,b,c0,c1,c2) \ | 505 | t += c0; /* no carry */ \ |
| 506 | t=(BN_ULLONG)a*b; \ | 506 | c0 = (BN_ULONG)Lw(t); \ |
| 507 | tt=(t+t)&BN_MASK; \ | 507 | hi = (BN_ULONG)Hw(t); \ |
| 508 | if (tt < t) c2++; \ | 508 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
| 509 | t1=(BN_ULONG)Lw(tt); \ | 509 | } while(0) |
| 510 | t2=(BN_ULONG)Hw(tt); \ | 510 | |
| 511 | c0=(c0+t1)&BN_MASK2; \ | 511 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 512 | if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ | 512 | BN_ULONG hi; \ |
| 513 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 513 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ |
| 514 | 514 | BN_ULLONG tt = t+c0; /* no carry */ \ | |
| 515 | #define sqr_add_c(a,i,c0,c1,c2) \ | 515 | c0 = (BN_ULONG)Lw(tt); \ |
| 516 | t=(BN_ULLONG)a[i]*a[i]; \ | 516 | hi = (BN_ULONG)Hw(tt); \ |
| 517 | t1=(BN_ULONG)Lw(t); \ | 517 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
| 518 | t2=(BN_ULONG)Hw(t); \ | 518 | t += c0; /* no carry */ \ |
| 519 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 519 | c0 = (BN_ULONG)Lw(t); \ |
| 520 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 520 | hi = (BN_ULONG)Hw(t); \ |
| 521 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
| 522 | } while(0) | ||
| 523 | |||
| 524 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
| 525 | BN_ULONG hi; \ | ||
| 526 | BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \ | ||
| 527 | t += c0; /* no carry */ \ | ||
| 528 | c0 = (BN_ULONG)Lw(t); \ | ||
| 529 | hi = (BN_ULONG)Hw(t); \ | ||
| 530 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
| 531 | } while(0) | ||
| 521 | 532 | ||
| 522 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 533 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
| 523 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 534 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
| 524 | 535 | ||
| 525 | #elif defined(BN_UMULT_LOHI) | 536 | #elif defined(BN_UMULT_LOHI) |
| 526 | 537 | /* | |
| 527 | #define mul_add_c(a,b,c0,c1,c2) { \ | 538 | * Keep in mind that additions to hi can not overflow, because |
| 528 | BN_ULONG ta=(a),tb=(b); \ | 539 | * the high word of a multiplication result cannot be all-ones. |
| 529 | BN_UMULT_LOHI(t1,t2,ta,tb); \ | 540 | */ |
| 530 | c0 += t1; t2 += (c0<t1)?1:0; \ | 541 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 531 | c1 += t2; c2 += (c1<t2)?1:0; \ | 542 | BN_ULONG ta = (a), tb = (b); \ |
| 532 | } | 543 | BN_ULONG lo, hi; \ |
| 533 | 544 | BN_UMULT_LOHI(lo,hi,ta,tb); \ | |
| 534 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 545 | c0 += lo; hi += (c0<lo)?1:0; \ |
| 535 | BN_ULONG ta=(a),tb=(b),t0; \ | 546 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 536 | BN_UMULT_LOHI(t0,t1,ta,tb); \ | 547 | } while(0) |
| 537 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 548 | |
| 538 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 549 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 539 | c0 += t1; t2 += (c0<t1)?1:0; \ | 550 | BN_ULONG ta = (a), tb = (b); \ |
| 540 | c1 += t2; c2 += (c1<t2)?1:0; \ | 551 | BN_ULONG lo, hi, tt; \ |
| 541 | } | 552 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
| 542 | 553 | c0 += lo; tt = hi+((c0<lo)?1:0); \ | |
| 543 | #define sqr_add_c(a,i,c0,c1,c2) { \ | 554 | c1 += tt; c2 += (c1<tt)?1:0; \ |
| 544 | BN_ULONG ta=(a)[i]; \ | 555 | c0 += lo; hi += (c0<lo)?1:0; \ |
| 545 | BN_UMULT_LOHI(t1,t2,ta,ta); \ | 556 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 546 | c0 += t1; t2 += (c0<t1)?1:0; \ | 557 | } while(0) |
| 547 | c1 += t2; c2 += (c1<t2)?1:0; \ | 558 | |
| 548 | } | 559 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
| 560 | BN_ULONG ta = (a)[i]; \ | ||
| 561 | BN_ULONG lo, hi; \ | ||
| 562 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
| 563 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
| 564 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
| 565 | } while(0) | ||
| 549 | 566 | ||
| 550 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 567 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
| 551 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 568 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
| 552 | 569 | ||
| 553 | #elif defined(BN_UMULT_HIGH) | 570 | #elif defined(BN_UMULT_HIGH) |
| 554 | 571 | /* | |
| 555 | #define mul_add_c(a,b,c0,c1,c2) { \ | 572 | * Keep in mind that additions to hi can not overflow, because |
| 556 | BN_ULONG ta=(a),tb=(b); \ | 573 | * the high word of a multiplication result cannot be all-ones. |
| 557 | t1 = ta * tb; \ | 574 | */ |
| 558 | t2 = BN_UMULT_HIGH(ta,tb); \ | 575 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 559 | c0 += t1; t2 += (c0<t1)?1:0; \ | 576 | BN_ULONG ta = (a), tb = (b); \ |
| 560 | c1 += t2; c2 += (c1<t2)?1:0; \ | 577 | BN_ULONG lo = ta * tb; \ |
| 561 | } | 578 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ |
| 562 | 579 | c0 += lo; hi += (c0<lo)?1:0; \ | |
| 563 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 580 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 564 | BN_ULONG ta=(a),tb=(b),t0; \ | 581 | } while(0) |
| 565 | t1 = BN_UMULT_HIGH(ta,tb); \ | 582 | |
| 566 | t0 = ta * tb; \ | 583 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 567 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 584 | BN_ULONG ta = (a), tb = (b), tt; \ |
| 568 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 585 | BN_ULONG lo = ta * tb; \ |
| 569 | c0 += t1; t2 += (c0<t1)?1:0; \ | 586 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ |
| 570 | c1 += t2; c2 += (c1<t2)?1:0; \ | 587 | c0 += lo; tt = hi + ((c0<lo)?1:0); \ |
| 571 | } | 588 | c1 += tt; c2 += (c1<tt)?1:0; \ |
| 572 | 589 | c0 += lo; hi += (c0<lo)?1:0; \ | |
| 573 | #define sqr_add_c(a,i,c0,c1,c2) { \ | 590 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 574 | BN_ULONG ta=(a)[i]; \ | 591 | } while(0) |
| 575 | t1 = ta * ta; \ | 592 | |
| 576 | t2 = BN_UMULT_HIGH(ta,ta); \ | 593 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
| 577 | c0 += t1; t2 += (c0<t1)?1:0; \ | 594 | BN_ULONG ta = (a)[i]; \ |
| 578 | c1 += t2; c2 += (c1<t2)?1:0; \ | 595 | BN_ULONG lo = ta * ta; \ |
| 579 | } | 596 | BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \ |
| 597 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
| 598 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
| 599 | } while(0) | ||
| 580 | 600 | ||
| 581 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 601 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
| 582 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 602 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
| 583 | 603 | ||
| 584 | #else /* !BN_LLONG */ | 604 | #else /* !BN_LLONG */ |
| 585 | #define mul_add_c(a,b,c0,c1,c2) \ | 605 | /* |
| 586 | t1=LBITS(a); t2=HBITS(a); \ | 606 | * Keep in mind that additions to hi can not overflow, because |
| 587 | bl=LBITS(b); bh=HBITS(b); \ | 607 | * the high word of a multiplication result cannot be all-ones. |
| 588 | mul64(t1,t2,bl,bh); \ | 608 | */ |
| 589 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 609 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 590 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 610 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ |
| 591 | 611 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ | |
| 592 | #define mul_add_c2(a,b,c0,c1,c2) \ | 612 | mul64(lo,hi,bl,bh); \ |
| 593 | t1=LBITS(a); t2=HBITS(a); \ | 613 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ |
| 594 | bl=LBITS(b); bh=HBITS(b); \ | 614 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
| 595 | mul64(t1,t2,bl,bh); \ | 615 | } while(0) |
| 596 | if (t2 & BN_TBIT) c2++; \ | 616 | |
| 597 | t2=(t2+t2)&BN_MASK2; \ | 617 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 598 | if (t1 & BN_TBIT) t2++; \ | 618 | BN_ULONG tt; \ |
| 599 | t1=(t1+t1)&BN_MASK2; \ | 619 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ |
| 600 | c0=(c0+t1)&BN_MASK2; \ | 620 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ |
| 601 | if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ | 621 | mul64(lo,hi,bl,bh); \ |
| 602 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 622 | tt = hi; \ |
| 603 | 623 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \ | |
| 604 | #define sqr_add_c(a,i,c0,c1,c2) \ | 624 | c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \ |
| 605 | sqr64(t1,t2,(a)[i]); \ | 625 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ |
| 606 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 626 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
| 607 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 627 | } while(0) |
| 628 | |||
| 629 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
| 630 | BN_ULONG lo, hi; \ | ||
| 631 | sqr64(lo,hi,(a)[i]); \ | ||
| 632 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | ||
| 633 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
| 634 | } while(0) | ||
| 608 | 635 | ||
| 609 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 636 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
| 610 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 637 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
| @@ -613,12 +640,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
| 613 | void | 640 | void |
| 614 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 641 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 615 | { | 642 | { |
| 616 | #ifdef BN_LLONG | ||
| 617 | BN_ULLONG t; | ||
| 618 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
| 619 | BN_ULONG bl, bh; | ||
| 620 | #endif | ||
| 621 | BN_ULONG t1, t2; | ||
| 622 | BN_ULONG c1, c2, c3; | 643 | BN_ULONG c1, c2, c3; |
| 623 | 644 | ||
| 624 | c1 = 0; | 645 | c1 = 0; |
| @@ -723,12 +744,6 @@ bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 723 | void | 744 | void |
| 724 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 745 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 725 | { | 746 | { |
| 726 | #ifdef BN_LLONG | ||
| 727 | BN_ULLONG t; | ||
| 728 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
| 729 | BN_ULONG bl, bh; | ||
| 730 | #endif | ||
| 731 | BN_ULONG t1, t2; | ||
| 732 | BN_ULONG c1, c2, c3; | 747 | BN_ULONG c1, c2, c3; |
| 733 | 748 | ||
| 734 | c1 = 0; | 749 | c1 = 0; |
| @@ -769,12 +784,6 @@ bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 769 | void | 784 | void |
| 770 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | 785 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
| 771 | { | 786 | { |
| 772 | #ifdef BN_LLONG | ||
| 773 | BN_ULLONG t, tt; | ||
| 774 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
| 775 | BN_ULONG bl, bh; | ||
| 776 | #endif | ||
| 777 | BN_ULONG t1, t2; | ||
| 778 | BN_ULONG c1, c2, c3; | 787 | BN_ULONG c1, c2, c3; |
| 779 | 788 | ||
| 780 | c1 = 0; | 789 | c1 = 0; |
| @@ -851,12 +860,6 @@ bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | |||
| 851 | void | 860 | void |
| 852 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | 861 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
| 853 | { | 862 | { |
| 854 | #ifdef BN_LLONG | ||
| 855 | BN_ULLONG t, tt; | ||
| 856 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
| 857 | BN_ULONG bl, bh; | ||
| 858 | #endif | ||
| 859 | BN_ULONG t1, t2; | ||
| 860 | BN_ULONG c1, c2, c3; | 863 | BN_ULONG c1, c2, c3; |
| 861 | 864 | ||
| 862 | c1 = 0; | 865 | c1 = 0; |
diff --git a/src/lib/libssl/src/crypto/bn/asm/mips.pl b/src/lib/libssl/src/crypto/bn/asm/mips.pl index d2f3ef7bbf..215c9a7483 100644 --- a/src/lib/libssl/src/crypto/bn/asm/mips.pl +++ b/src/lib/libssl/src/crypto/bn/asm/mips.pl | |||
| @@ -1872,6 +1872,41 @@ ___ | |||
| 1872 | 1872 | ||
| 1873 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); | 1873 | ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); |
| 1874 | 1874 | ||
| 1875 | sub add_c2 () { | ||
| 1876 | my ($hi,$lo,$c0,$c1,$c2, | ||
| 1877 | $warm, # !$warm denotes first call with specific sequence of | ||
| 1878 | # $c_[XYZ] when there is no Z-carry to accumulate yet; | ||
| 1879 | $an,$bn # these two are arguments for multiplication which | ||
| 1880 | # result is used in *next* step [which is why it's | ||
| 1881 | # commented as "forward multiplication" below]; | ||
| 1882 | )=@_; | ||
| 1883 | $code.=<<___; | ||
| 1884 | mflo $lo | ||
| 1885 | mfhi $hi | ||
| 1886 | $ADDU $c0,$lo | ||
| 1887 | sltu $at,$c0,$lo | ||
| 1888 | $MULTU $an,$bn # forward multiplication | ||
| 1889 | $ADDU $c0,$lo | ||
| 1890 | $ADDU $at,$hi | ||
| 1891 | sltu $lo,$c0,$lo | ||
| 1892 | $ADDU $c1,$at | ||
| 1893 | $ADDU $hi,$lo | ||
| 1894 | ___ | ||
| 1895 | $code.=<<___ if (!$warm); | ||
| 1896 | sltu $c2,$c1,$at | ||
| 1897 | $ADDU $c1,$hi | ||
| 1898 | sltu $hi,$c1,$hi | ||
| 1899 | $ADDU $c2,$hi | ||
| 1900 | ___ | ||
| 1901 | $code.=<<___ if ($warm); | ||
| 1902 | sltu $at,$c1,$at | ||
| 1903 | $ADDU $c1,$hi | ||
| 1904 | $ADDU $c2,$at | ||
| 1905 | sltu $hi,$c1,$hi | ||
| 1906 | $ADDU $c2,$hi | ||
| 1907 | ___ | ||
| 1908 | } | ||
| 1909 | |||
| 1875 | $code.=<<___; | 1910 | $code.=<<___; |
| 1876 | 1911 | ||
| 1877 | .align 5 | 1912 | .align 5 |
| @@ -1920,21 +1955,10 @@ $code.=<<___; | |||
| 1920 | sltu $at,$c_2,$t_1 | 1955 | sltu $at,$c_2,$t_1 |
| 1921 | $ADDU $c_3,$t_2,$at | 1956 | $ADDU $c_3,$t_2,$at |
| 1922 | $ST $c_2,$BNSZ($a0) | 1957 | $ST $c_2,$BNSZ($a0) |
| 1923 | 1958 | ___ | |
| 1924 | mflo $t_1 | 1959 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 1925 | mfhi $t_2 | 1960 | $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); |
| 1926 | slt $c_2,$t_2,$zero | 1961 | $code.=<<___; |
| 1927 | $SLL $t_2,1 | ||
| 1928 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
| 1929 | slt $a2,$t_1,$zero | ||
| 1930 | $ADDU $t_2,$a2 | ||
| 1931 | $SLL $t_1,1 | ||
| 1932 | $ADDU $c_3,$t_1 | ||
| 1933 | sltu $at,$c_3,$t_1 | ||
| 1934 | $ADDU $t_2,$at | ||
| 1935 | $ADDU $c_1,$t_2 | ||
| 1936 | sltu $at,$c_1,$t_2 | ||
| 1937 | $ADDU $c_2,$at | ||
| 1938 | mflo $t_1 | 1962 | mflo $t_1 |
| 1939 | mfhi $t_2 | 1963 | mfhi $t_2 |
| 1940 | $ADDU $c_3,$t_1 | 1964 | $ADDU $c_3,$t_1 |
| @@ -1945,67 +1969,19 @@ $code.=<<___; | |||
| 1945 | sltu $at,$c_1,$t_2 | 1969 | sltu $at,$c_1,$t_2 |
| 1946 | $ADDU $c_2,$at | 1970 | $ADDU $c_2,$at |
| 1947 | $ST $c_3,2*$BNSZ($a0) | 1971 | $ST $c_3,2*$BNSZ($a0) |
| 1948 | 1972 | ___ | |
| 1949 | mflo $t_1 | 1973 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 1950 | mfhi $t_2 | 1974 | $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); |
| 1951 | slt $c_3,$t_2,$zero | 1975 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 1952 | $SLL $t_2,1 | 1976 | $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1); |
| 1953 | $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); | 1977 | $code.=<<___; |
| 1954 | slt $a2,$t_1,$zero | ||
| 1955 | $ADDU $t_2,$a2 | ||
| 1956 | $SLL $t_1,1 | ||
| 1957 | $ADDU $c_1,$t_1 | ||
| 1958 | sltu $at,$c_1,$t_1 | ||
| 1959 | $ADDU $t_2,$at | ||
| 1960 | $ADDU $c_2,$t_2 | ||
| 1961 | sltu $at,$c_2,$t_2 | ||
| 1962 | $ADDU $c_3,$at | ||
| 1963 | mflo $t_1 | ||
| 1964 | mfhi $t_2 | ||
| 1965 | slt $at,$t_2,$zero | ||
| 1966 | $ADDU $c_3,$at | ||
| 1967 | $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); | ||
| 1968 | $SLL $t_2,1 | ||
| 1969 | slt $a2,$t_1,$zero | ||
| 1970 | $ADDU $t_2,$a2 | ||
| 1971 | $SLL $t_1,1 | ||
| 1972 | $ADDU $c_1,$t_1 | ||
| 1973 | sltu $at,$c_1,$t_1 | ||
| 1974 | $ADDU $t_2,$at | ||
| 1975 | $ADDU $c_2,$t_2 | ||
| 1976 | sltu $at,$c_2,$t_2 | ||
| 1977 | $ADDU $c_3,$at | ||
| 1978 | $ST $c_1,3*$BNSZ($a0) | 1978 | $ST $c_1,3*$BNSZ($a0) |
| 1979 | 1979 | ___ | |
| 1980 | mflo $t_1 | 1980 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 1981 | mfhi $t_2 | 1981 | $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); |
| 1982 | slt $c_1,$t_2,$zero | 1982 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 1983 | $SLL $t_2,1 | 1983 | $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); |
| 1984 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | 1984 | $code.=<<___; |
| 1985 | slt $a2,$t_1,$zero | ||
| 1986 | $ADDU $t_2,$a2 | ||
| 1987 | $SLL $t_1,1 | ||
| 1988 | $ADDU $c_2,$t_1 | ||
| 1989 | sltu $at,$c_2,$t_1 | ||
| 1990 | $ADDU $t_2,$at | ||
| 1991 | $ADDU $c_3,$t_2 | ||
| 1992 | sltu $at,$c_3,$t_2 | ||
| 1993 | $ADDU $c_1,$at | ||
| 1994 | mflo $t_1 | ||
| 1995 | mfhi $t_2 | ||
| 1996 | slt $at,$t_2,$zero | ||
| 1997 | $ADDU $c_1,$at | ||
| 1998 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
| 1999 | $SLL $t_2,1 | ||
| 2000 | slt $a2,$t_1,$zero | ||
| 2001 | $ADDU $t_2,$a2 | ||
| 2002 | $SLL $t_1,1 | ||
| 2003 | $ADDU $c_2,$t_1 | ||
| 2004 | sltu $at,$c_2,$t_1 | ||
| 2005 | $ADDU $t_2,$at | ||
| 2006 | $ADDU $c_3,$t_2 | ||
| 2007 | sltu $at,$c_3,$t_2 | ||
| 2008 | $ADDU $c_1,$at | ||
| 2009 | mflo $t_1 | 1985 | mflo $t_1 |
| 2010 | mfhi $t_2 | 1986 | mfhi $t_2 |
| 2011 | $ADDU $c_2,$t_1 | 1987 | $ADDU $c_2,$t_1 |
| @@ -2016,97 +1992,23 @@ $code.=<<___; | |||
| 2016 | sltu $at,$c_3,$t_2 | 1992 | sltu $at,$c_3,$t_2 |
| 2017 | $ADDU $c_1,$at | 1993 | $ADDU $c_1,$at |
| 2018 | $ST $c_2,4*$BNSZ($a0) | 1994 | $ST $c_2,4*$BNSZ($a0) |
| 2019 | 1995 | ___ | |
| 2020 | mflo $t_1 | 1996 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2021 | mfhi $t_2 | 1997 | $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); |
| 2022 | slt $c_2,$t_2,$zero | 1998 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2023 | $SLL $t_2,1 | 1999 | $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2); |
| 2024 | $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); | 2000 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2025 | slt $a2,$t_1,$zero | 2001 | $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3); |
| 2026 | $ADDU $t_2,$a2 | 2002 | $code.=<<___; |
| 2027 | $SLL $t_1,1 | ||
| 2028 | $ADDU $c_3,$t_1 | ||
| 2029 | sltu $at,$c_3,$t_1 | ||
| 2030 | $ADDU $t_2,$at | ||
| 2031 | $ADDU $c_1,$t_2 | ||
| 2032 | sltu $at,$c_1,$t_2 | ||
| 2033 | $ADDU $c_2,$at | ||
| 2034 | mflo $t_1 | ||
| 2035 | mfhi $t_2 | ||
| 2036 | slt $at,$t_2,$zero | ||
| 2037 | $ADDU $c_2,$at | ||
| 2038 | $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); | ||
| 2039 | $SLL $t_2,1 | ||
| 2040 | slt $a2,$t_1,$zero | ||
| 2041 | $ADDU $t_2,$a2 | ||
| 2042 | $SLL $t_1,1 | ||
| 2043 | $ADDU $c_3,$t_1 | ||
| 2044 | sltu $at,$c_3,$t_1 | ||
| 2045 | $ADDU $t_2,$at | ||
| 2046 | $ADDU $c_1,$t_2 | ||
| 2047 | sltu $at,$c_1,$t_2 | ||
| 2048 | $ADDU $c_2,$at | ||
| 2049 | mflo $t_1 | ||
| 2050 | mfhi $t_2 | ||
| 2051 | slt $at,$t_2,$zero | ||
| 2052 | $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); | ||
| 2053 | $ADDU $c_2,$at | ||
| 2054 | $SLL $t_2,1 | ||
| 2055 | slt $a2,$t_1,$zero | ||
| 2056 | $ADDU $t_2,$a2 | ||
| 2057 | $SLL $t_1,1 | ||
| 2058 | $ADDU $c_3,$t_1 | ||
| 2059 | sltu $at,$c_3,$t_1 | ||
| 2060 | $ADDU $t_2,$at | ||
| 2061 | $ADDU $c_1,$t_2 | ||
| 2062 | sltu $at,$c_1,$t_2 | ||
| 2063 | $ADDU $c_2,$at | ||
| 2064 | $ST $c_3,5*$BNSZ($a0) | 2003 | $ST $c_3,5*$BNSZ($a0) |
| 2065 | 2004 | ___ | |
| 2066 | mflo $t_1 | 2005 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2067 | mfhi $t_2 | 2006 | $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3); |
| 2068 | slt $c_3,$t_2,$zero | 2007 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2069 | $SLL $t_2,1 | 2008 | $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3); |
| 2070 | $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); | 2009 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2071 | slt $a2,$t_1,$zero | 2010 | $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); |
| 2072 | $ADDU $t_2,$a2 | 2011 | $code.=<<___; |
| 2073 | $SLL $t_1,1 | ||
| 2074 | $ADDU $c_1,$t_1 | ||
| 2075 | sltu $at,$c_1,$t_1 | ||
| 2076 | $ADDU $t_2,$at | ||
| 2077 | $ADDU $c_2,$t_2 | ||
| 2078 | sltu $at,$c_2,$t_2 | ||
| 2079 | $ADDU $c_3,$at | ||
| 2080 | mflo $t_1 | ||
| 2081 | mfhi $t_2 | ||
| 2082 | slt $at,$t_2,$zero | ||
| 2083 | $ADDU $c_3,$at | ||
| 2084 | $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); | ||
| 2085 | $SLL $t_2,1 | ||
| 2086 | slt $a2,$t_1,$zero | ||
| 2087 | $ADDU $t_2,$a2 | ||
| 2088 | $SLL $t_1,1 | ||
| 2089 | $ADDU $c_1,$t_1 | ||
| 2090 | sltu $at,$c_1,$t_1 | ||
| 2091 | $ADDU $t_2,$at | ||
| 2092 | $ADDU $c_2,$t_2 | ||
| 2093 | sltu $at,$c_2,$t_2 | ||
| 2094 | $ADDU $c_3,$at | ||
| 2095 | mflo $t_1 | ||
| 2096 | mfhi $t_2 | ||
| 2097 | slt $at,$t_2,$zero | ||
| 2098 | $ADDU $c_3,$at | ||
| 2099 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
| 2100 | $SLL $t_2,1 | ||
| 2101 | slt $a2,$t_1,$zero | ||
| 2102 | $ADDU $t_2,$a2 | ||
| 2103 | $SLL $t_1,1 | ||
| 2104 | $ADDU $c_1,$t_1 | ||
| 2105 | sltu $at,$c_1,$t_1 | ||
| 2106 | $ADDU $t_2,$at | ||
| 2107 | $ADDU $c_2,$t_2 | ||
| 2108 | sltu $at,$c_2,$t_2 | ||
| 2109 | $ADDU $c_3,$at | ||
| 2110 | mflo $t_1 | 2012 | mflo $t_1 |
| 2111 | mfhi $t_2 | 2013 | mfhi $t_2 |
| 2112 | $ADDU $c_1,$t_1 | 2014 | $ADDU $c_1,$t_1 |
| @@ -2117,112 +2019,25 @@ $code.=<<___; | |||
| 2117 | sltu $at,$c_2,$t_2 | 2019 | sltu $at,$c_2,$t_2 |
| 2118 | $ADDU $c_3,$at | 2020 | $ADDU $c_3,$at |
| 2119 | $ST $c_1,6*$BNSZ($a0) | 2021 | $ST $c_1,6*$BNSZ($a0) |
| 2120 | 2022 | ___ | |
| 2121 | mflo $t_1 | 2023 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2122 | mfhi $t_2 | 2024 | $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); |
| 2123 | slt $c_1,$t_2,$zero | 2025 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2124 | $SLL $t_2,1 | 2026 | $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1); |
| 2125 | $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); | 2027 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2126 | slt $a2,$t_1,$zero | 2028 | $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1); |
| 2127 | $ADDU $t_2,$a2 | 2029 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2128 | $SLL $t_1,1 | 2030 | $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2); |
| 2129 | $ADDU $c_2,$t_1 | 2031 | $code.=<<___; |
| 2130 | sltu $at,$c_2,$t_1 | ||
| 2131 | $ADDU $t_2,$at | ||
| 2132 | $ADDU $c_3,$t_2 | ||
| 2133 | sltu $at,$c_3,$t_2 | ||
| 2134 | $ADDU $c_1,$at | ||
| 2135 | mflo $t_1 | ||
| 2136 | mfhi $t_2 | ||
| 2137 | slt $at,$t_2,$zero | ||
| 2138 | $ADDU $c_1,$at | ||
| 2139 | $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); | ||
| 2140 | $SLL $t_2,1 | ||
| 2141 | slt $a2,$t_1,$zero | ||
| 2142 | $ADDU $t_2,$a2 | ||
| 2143 | $SLL $t_1,1 | ||
| 2144 | $ADDU $c_2,$t_1 | ||
| 2145 | sltu $at,$c_2,$t_1 | ||
| 2146 | $ADDU $t_2,$at | ||
| 2147 | $ADDU $c_3,$t_2 | ||
| 2148 | sltu $at,$c_3,$t_2 | ||
| 2149 | $ADDU $c_1,$at | ||
| 2150 | mflo $t_1 | ||
| 2151 | mfhi $t_2 | ||
| 2152 | slt $at,$t_2,$zero | ||
| 2153 | $ADDU $c_1,$at | ||
| 2154 | $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); | ||
| 2155 | $SLL $t_2,1 | ||
| 2156 | slt $a2,$t_1,$zero | ||
| 2157 | $ADDU $t_2,$a2 | ||
| 2158 | $SLL $t_1,1 | ||
| 2159 | $ADDU $c_2,$t_1 | ||
| 2160 | sltu $at,$c_2,$t_1 | ||
| 2161 | $ADDU $t_2,$at | ||
| 2162 | $ADDU $c_3,$t_2 | ||
| 2163 | sltu $at,$c_3,$t_2 | ||
| 2164 | $ADDU $c_1,$at | ||
| 2165 | mflo $t_1 | ||
| 2166 | mfhi $t_2 | ||
| 2167 | slt $at,$t_2,$zero | ||
| 2168 | $ADDU $c_1,$at | ||
| 2169 | $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); | ||
| 2170 | $SLL $t_2,1 | ||
| 2171 | slt $a2,$t_1,$zero | ||
| 2172 | $ADDU $t_2,$a2 | ||
| 2173 | $SLL $t_1,1 | ||
| 2174 | $ADDU $c_2,$t_1 | ||
| 2175 | sltu $at,$c_2,$t_1 | ||
| 2176 | $ADDU $t_2,$at | ||
| 2177 | $ADDU $c_3,$t_2 | ||
| 2178 | sltu $at,$c_3,$t_2 | ||
| 2179 | $ADDU $c_1,$at | ||
| 2180 | $ST $c_2,7*$BNSZ($a0) | 2032 | $ST $c_2,7*$BNSZ($a0) |
| 2181 | 2033 | ___ | |
| 2182 | mflo $t_1 | 2034 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2183 | mfhi $t_2 | 2035 | $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2); |
| 2184 | slt $c_2,$t_2,$zero | 2036 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2185 | $SLL $t_2,1 | 2037 | $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2); |
| 2186 | $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); | 2038 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2187 | slt $a2,$t_1,$zero | 2039 | $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); |
| 2188 | $ADDU $t_2,$a2 | 2040 | $code.=<<___; |
| 2189 | $SLL $t_1,1 | ||
| 2190 | $ADDU $c_3,$t_1 | ||
| 2191 | sltu $at,$c_3,$t_1 | ||
| 2192 | $ADDU $t_2,$at | ||
| 2193 | $ADDU $c_1,$t_2 | ||
| 2194 | sltu $at,$c_1,$t_2 | ||
| 2195 | $ADDU $c_2,$at | ||
| 2196 | mflo $t_1 | ||
| 2197 | mfhi $t_2 | ||
| 2198 | slt $at,$t_2,$zero | ||
| 2199 | $ADDU $c_2,$at | ||
| 2200 | $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); | ||
| 2201 | $SLL $t_2,1 | ||
| 2202 | slt $a2,$t_1,$zero | ||
| 2203 | $ADDU $t_2,$a2 | ||
| 2204 | $SLL $t_1,1 | ||
| 2205 | $ADDU $c_3,$t_1 | ||
| 2206 | sltu $at,$c_3,$t_1 | ||
| 2207 | $ADDU $t_2,$at | ||
| 2208 | $ADDU $c_1,$t_2 | ||
| 2209 | sltu $at,$c_1,$t_2 | ||
| 2210 | $ADDU $c_2,$at | ||
| 2211 | mflo $t_1 | ||
| 2212 | mfhi $t_2 | ||
| 2213 | slt $at,$t_2,$zero | ||
| 2214 | $ADDU $c_2,$at | ||
| 2215 | $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); | ||
| 2216 | $SLL $t_2,1 | ||
| 2217 | slt $a2,$t_1,$zero | ||
| 2218 | $ADDU $t_2,$a2 | ||
| 2219 | $SLL $t_1,1 | ||
| 2220 | $ADDU $c_3,$t_1 | ||
| 2221 | sltu $at,$c_3,$t_1 | ||
| 2222 | $ADDU $t_2,$at | ||
| 2223 | $ADDU $c_1,$t_2 | ||
| 2224 | sltu $at,$c_1,$t_2 | ||
| 2225 | $ADDU $c_2,$at | ||
| 2226 | mflo $t_1 | 2041 | mflo $t_1 |
| 2227 | mfhi $t_2 | 2042 | mfhi $t_2 |
| 2228 | $ADDU $c_3,$t_1 | 2043 | $ADDU $c_3,$t_1 |
| @@ -2233,82 +2048,21 @@ $code.=<<___; | |||
| 2233 | sltu $at,$c_1,$t_2 | 2048 | sltu $at,$c_1,$t_2 |
| 2234 | $ADDU $c_2,$at | 2049 | $ADDU $c_2,$at |
| 2235 | $ST $c_3,8*$BNSZ($a0) | 2050 | $ST $c_3,8*$BNSZ($a0) |
| 2236 | 2051 | ___ | |
| 2237 | mflo $t_1 | 2052 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2238 | mfhi $t_2 | 2053 | $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); |
| 2239 | slt $c_3,$t_2,$zero | 2054 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2240 | $SLL $t_2,1 | 2055 | $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3); |
| 2241 | $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); | 2056 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2242 | slt $a2,$t_1,$zero | 2057 | $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1); |
| 2243 | $ADDU $t_2,$a2 | 2058 | $code.=<<___; |
| 2244 | $SLL $t_1,1 | ||
| 2245 | $ADDU $c_1,$t_1 | ||
| 2246 | sltu $at,$c_1,$t_1 | ||
| 2247 | $ADDU $t_2,$at | ||
| 2248 | $ADDU $c_2,$t_2 | ||
| 2249 | sltu $at,$c_2,$t_2 | ||
| 2250 | $ADDU $c_3,$at | ||
| 2251 | mflo $t_1 | ||
| 2252 | mfhi $t_2 | ||
| 2253 | slt $at,$t_2,$zero | ||
| 2254 | $ADDU $c_3,$at | ||
| 2255 | $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); | ||
| 2256 | $SLL $t_2,1 | ||
| 2257 | slt $a2,$t_1,$zero | ||
| 2258 | $ADDU $t_2,$a2 | ||
| 2259 | $SLL $t_1,1 | ||
| 2260 | $ADDU $c_1,$t_1 | ||
| 2261 | sltu $at,$c_1,$t_1 | ||
| 2262 | $ADDU $t_2,$at | ||
| 2263 | $ADDU $c_2,$t_2 | ||
| 2264 | sltu $at,$c_2,$t_2 | ||
| 2265 | $ADDU $c_3,$at | ||
| 2266 | mflo $t_1 | ||
| 2267 | mfhi $t_2 | ||
| 2268 | slt $at,$t_2,$zero | ||
| 2269 | $ADDU $c_3,$at | ||
| 2270 | $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); | ||
| 2271 | $SLL $t_2,1 | ||
| 2272 | slt $a2,$t_1,$zero | ||
| 2273 | $ADDU $t_2,$a2 | ||
| 2274 | $SLL $t_1,1 | ||
| 2275 | $ADDU $c_1,$t_1 | ||
| 2276 | sltu $at,$c_1,$t_1 | ||
| 2277 | $ADDU $t_2,$at | ||
| 2278 | $ADDU $c_2,$t_2 | ||
| 2279 | sltu $at,$c_2,$t_2 | ||
| 2280 | $ADDU $c_3,$at | ||
| 2281 | $ST $c_1,9*$BNSZ($a0) | 2059 | $ST $c_1,9*$BNSZ($a0) |
| 2282 | 2060 | ___ | |
| 2283 | mflo $t_1 | 2061 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2284 | mfhi $t_2 | 2062 | $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1); |
| 2285 | slt $c_1,$t_2,$zero | 2063 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, |
| 2286 | $SLL $t_2,1 | 2064 | $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); |
| 2287 | $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); | 2065 | $code.=<<___; |
| 2288 | slt $a2,$t_1,$zero | ||
| 2289 | $ADDU $t_2,$a2 | ||
| 2290 | $SLL $t_1,1 | ||
| 2291 | $ADDU $c_2,$t_1 | ||
| 2292 | sltu $at,$c_2,$t_1 | ||
| 2293 | $ADDU $t_2,$at | ||
| 2294 | $ADDU $c_3,$t_2 | ||
| 2295 | sltu $at,$c_3,$t_2 | ||
| 2296 | $ADDU $c_1,$at | ||
| 2297 | mflo $t_1 | ||
| 2298 | mfhi $t_2 | ||
| 2299 | slt $at,$t_2,$zero | ||
| 2300 | $ADDU $c_1,$at | ||
| 2301 | $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); | ||
| 2302 | $SLL $t_2,1 | ||
| 2303 | slt $a2,$t_1,$zero | ||
| 2304 | $ADDU $t_2,$a2 | ||
| 2305 | $SLL $t_1,1 | ||
| 2306 | $ADDU $c_2,$t_1 | ||
| 2307 | sltu $at,$c_2,$t_1 | ||
| 2308 | $ADDU $t_2,$at | ||
| 2309 | $ADDU $c_3,$t_2 | ||
| 2310 | sltu $at,$c_3,$t_2 | ||
| 2311 | $ADDU $c_1,$at | ||
| 2312 | mflo $t_1 | 2066 | mflo $t_1 |
| 2313 | mfhi $t_2 | 2067 | mfhi $t_2 |
| 2314 | $ADDU $c_2,$t_1 | 2068 | $ADDU $c_2,$t_1 |
| @@ -2319,52 +2073,17 @@ $code.=<<___; | |||
| 2319 | sltu $at,$c_3,$t_2 | 2073 | sltu $at,$c_3,$t_2 |
| 2320 | $ADDU $c_1,$at | 2074 | $ADDU $c_1,$at |
| 2321 | $ST $c_2,10*$BNSZ($a0) | 2075 | $ST $c_2,10*$BNSZ($a0) |
| 2322 | 2076 | ___ | |
| 2323 | mflo $t_1 | 2077 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2324 | mfhi $t_2 | 2078 | $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); |
| 2325 | slt $c_2,$t_2,$zero | 2079 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, |
| 2326 | $SLL $t_2,1 | 2080 | $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3); |
| 2327 | $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); | 2081 | $code.=<<___; |
| 2328 | slt $a2,$t_1,$zero | ||
| 2329 | $ADDU $t_2,$a2 | ||
| 2330 | $SLL $t_1,1 | ||
| 2331 | $ADDU $c_3,$t_1 | ||
| 2332 | sltu $at,$c_3,$t_1 | ||
| 2333 | $ADDU $t_2,$at | ||
| 2334 | $ADDU $c_1,$t_2 | ||
| 2335 | sltu $at,$c_1,$t_2 | ||
| 2336 | $ADDU $c_2,$at | ||
| 2337 | mflo $t_1 | ||
| 2338 | mfhi $t_2 | ||
| 2339 | slt $at,$t_2,$zero | ||
| 2340 | $ADDU $c_2,$at | ||
| 2341 | $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); | ||
| 2342 | $SLL $t_2,1 | ||
| 2343 | slt $a2,$t_1,$zero | ||
| 2344 | $ADDU $t_2,$a2 | ||
| 2345 | $SLL $t_1,1 | ||
| 2346 | $ADDU $c_3,$t_1 | ||
| 2347 | sltu $at,$c_3,$t_1 | ||
| 2348 | $ADDU $t_2,$at | ||
| 2349 | $ADDU $c_1,$t_2 | ||
| 2350 | sltu $at,$c_1,$t_2 | ||
| 2351 | $ADDU $c_2,$at | ||
| 2352 | $ST $c_3,11*$BNSZ($a0) | 2082 | $ST $c_3,11*$BNSZ($a0) |
| 2353 | 2083 | ___ | |
| 2354 | mflo $t_1 | 2084 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2355 | mfhi $t_2 | 2085 | $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); |
| 2356 | slt $c_3,$t_2,$zero | 2086 | $code.=<<___; |
| 2357 | $SLL $t_2,1 | ||
| 2358 | $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); | ||
| 2359 | slt $a2,$t_1,$zero | ||
| 2360 | $ADDU $t_2,$a2 | ||
| 2361 | $SLL $t_1,1 | ||
| 2362 | $ADDU $c_1,$t_1 | ||
| 2363 | sltu $at,$c_1,$t_1 | ||
| 2364 | $ADDU $t_2,$at | ||
| 2365 | $ADDU $c_2,$t_2 | ||
| 2366 | sltu $at,$c_2,$t_2 | ||
| 2367 | $ADDU $c_3,$at | ||
| 2368 | mflo $t_1 | 2087 | mflo $t_1 |
| 2369 | mfhi $t_2 | 2088 | mfhi $t_2 |
| 2370 | $ADDU $c_1,$t_1 | 2089 | $ADDU $c_1,$t_1 |
| @@ -2375,21 +2094,10 @@ $code.=<<___; | |||
| 2375 | sltu $at,$c_2,$t_2 | 2094 | sltu $at,$c_2,$t_2 |
| 2376 | $ADDU $c_3,$at | 2095 | $ADDU $c_3,$at |
| 2377 | $ST $c_1,12*$BNSZ($a0) | 2096 | $ST $c_1,12*$BNSZ($a0) |
| 2378 | 2097 | ___ | |
| 2379 | mflo $t_1 | 2098 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2380 | mfhi $t_2 | 2099 | $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); |
| 2381 | slt $c_1,$t_2,$zero | 2100 | $code.=<<___; |
| 2382 | $SLL $t_2,1 | ||
| 2383 | $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); | ||
| 2384 | slt $a2,$t_1,$zero | ||
| 2385 | $ADDU $t_2,$a2 | ||
| 2386 | $SLL $t_1,1 | ||
| 2387 | $ADDU $c_2,$t_1 | ||
| 2388 | sltu $at,$c_2,$t_1 | ||
| 2389 | $ADDU $t_2,$at | ||
| 2390 | $ADDU $c_3,$t_2 | ||
| 2391 | sltu $at,$c_3,$t_2 | ||
| 2392 | $ADDU $c_1,$at | ||
| 2393 | $ST $c_2,13*$BNSZ($a0) | 2101 | $ST $c_2,13*$BNSZ($a0) |
| 2394 | 2102 | ||
| 2395 | mflo $t_1 | 2103 | mflo $t_1 |
| @@ -2457,21 +2165,10 @@ $code.=<<___; | |||
| 2457 | sltu $at,$c_2,$t_1 | 2165 | sltu $at,$c_2,$t_1 |
| 2458 | $ADDU $c_3,$t_2,$at | 2166 | $ADDU $c_3,$t_2,$at |
| 2459 | $ST $c_2,$BNSZ($a0) | 2167 | $ST $c_2,$BNSZ($a0) |
| 2460 | 2168 | ___ | |
| 2461 | mflo $t_1 | 2169 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2462 | mfhi $t_2 | 2170 | $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); |
| 2463 | slt $c_2,$t_2,$zero | 2171 | $code.=<<___; |
| 2464 | $SLL $t_2,1 | ||
| 2465 | $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); | ||
| 2466 | slt $a2,$t_1,$zero | ||
| 2467 | $ADDU $t_2,$a2 | ||
| 2468 | $SLL $t_1,1 | ||
| 2469 | $ADDU $c_3,$t_1 | ||
| 2470 | sltu $at,$c_3,$t_1 | ||
| 2471 | $ADDU $t_2,$at | ||
| 2472 | $ADDU $c_1,$t_2 | ||
| 2473 | sltu $at,$c_1,$t_2 | ||
| 2474 | $ADDU $c_2,$at | ||
| 2475 | mflo $t_1 | 2172 | mflo $t_1 |
| 2476 | mfhi $t_2 | 2173 | mfhi $t_2 |
| 2477 | $ADDU $c_3,$t_1 | 2174 | $ADDU $c_3,$t_1 |
| @@ -2482,52 +2179,17 @@ $code.=<<___; | |||
| 2482 | sltu $at,$c_1,$t_2 | 2179 | sltu $at,$c_1,$t_2 |
| 2483 | $ADDU $c_2,$at | 2180 | $ADDU $c_2,$at |
| 2484 | $ST $c_3,2*$BNSZ($a0) | 2181 | $ST $c_3,2*$BNSZ($a0) |
| 2485 | 2182 | ___ | |
| 2486 | mflo $t_1 | 2183 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, |
| 2487 | mfhi $t_2 | 2184 | $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); |
| 2488 | slt $c_3,$t_2,$zero | 2185 | &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, |
| 2489 | $SLL $t_2,1 | 2186 | $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); |
| 2490 | $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); | 2187 | $code.=<<___; |
| 2491 | slt $a2,$t_1,$zero | ||
| 2492 | $ADDU $t_2,$a2 | ||
| 2493 | $SLL $t_1,1 | ||
| 2494 | $ADDU $c_1,$t_1 | ||
| 2495 | sltu $at,$c_1,$t_1 | ||
| 2496 | $ADDU $t_2,$at | ||
| 2497 | $ADDU $c_2,$t_2 | ||
| 2498 | sltu $at,$c_2,$t_2 | ||
| 2499 | $ADDU $c_3,$at | ||
| 2500 | mflo $t_1 | ||
| 2501 | mfhi $t_2 | ||
| 2502 | slt $at,$t_2,$zero | ||
| 2503 | $ADDU $c_3,$at | ||
| 2504 | $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); | ||
| 2505 | $SLL $t_2,1 | ||
| 2506 | slt $a2,$t_1,$zero | ||
| 2507 | $ADDU $t_2,$a2 | ||
| 2508 | $SLL $t_1,1 | ||
| 2509 | $ADDU $c_1,$t_1 | ||
| 2510 | sltu $at,$c_1,$t_1 | ||
| 2511 | $ADDU $t_2,$at | ||
| 2512 | $ADDU $c_2,$t_2 | ||
| 2513 | sltu $at,$c_2,$t_2 | ||
| 2514 | $ADDU $c_3,$at | ||
| 2515 | $ST $c_1,3*$BNSZ($a0) | 2188 | $ST $c_1,3*$BNSZ($a0) |
| 2516 | 2189 | ___ | |
| 2517 | mflo $t_1 | 2190 | &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, |
| 2518 | mfhi $t_2 | 2191 | $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); |
| 2519 | slt $c_1,$t_2,$zero | 2192 | $code.=<<___; |
| 2520 | $SLL $t_2,1 | ||
| 2521 | $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); | ||
| 2522 | slt $a2,$t_1,$zero | ||
| 2523 | $ADDU $t_2,$a2 | ||
| 2524 | $SLL $t_1,1 | ||
| 2525 | $ADDU $c_2,$t_1 | ||
| 2526 | sltu $at,$c_2,$t_1 | ||
| 2527 | $ADDU $t_2,$at | ||
| 2528 | $ADDU $c_3,$t_2 | ||
| 2529 | sltu $at,$c_3,$t_2 | ||
| 2530 | $ADDU $c_1,$at | ||
| 2531 | mflo $t_1 | 2193 | mflo $t_1 |
| 2532 | mfhi $t_2 | 2194 | mfhi $t_2 |
| 2533 | $ADDU $c_2,$t_1 | 2195 | $ADDU $c_2,$t_1 |
| @@ -2538,21 +2200,10 @@ $code.=<<___; | |||
| 2538 | sltu $at,$c_3,$t_2 | 2200 | sltu $at,$c_3,$t_2 |
| 2539 | $ADDU $c_1,$at | 2201 | $ADDU $c_1,$at |
| 2540 | $ST $c_2,4*$BNSZ($a0) | 2202 | $ST $c_2,4*$BNSZ($a0) |
| 2541 | 2203 | ___ | |
| 2542 | mflo $t_1 | 2204 | &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, |
| 2543 | mfhi $t_2 | 2205 | $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); |
| 2544 | slt $c_2,$t_2,$zero | 2206 | $code.=<<___; |
| 2545 | $SLL $t_2,1 | ||
| 2546 | $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); | ||
| 2547 | slt $a2,$t_1,$zero | ||
| 2548 | $ADDU $t_2,$a2 | ||
| 2549 | $SLL $t_1,1 | ||
| 2550 | $ADDU $c_3,$t_1 | ||
| 2551 | sltu $at,$c_3,$t_1 | ||
| 2552 | $ADDU $t_2,$at | ||
| 2553 | $ADDU $c_1,$t_2 | ||
| 2554 | sltu $at,$c_1,$t_2 | ||
| 2555 | $ADDU $c_2,$at | ||
| 2556 | $ST $c_3,5*$BNSZ($a0) | 2207 | $ST $c_3,5*$BNSZ($a0) |
| 2557 | 2208 | ||
| 2558 | mflo $t_1 | 2209 | mflo $t_1 |
diff --git a/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c b/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c index c9a2b6be73..9deffa71f1 100644 --- a/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c +++ b/src/lib/libssl/src/crypto/bn/asm/x86_64-gcc.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: x86_64-gcc.c,v 1.4 2014/10/28 07:35:58 jsg Exp $ */ | 1 | /* $OpenBSD: x86_64-gcc.c,v 1.5 2015/02/25 15:39:49 bcook Exp $ */ |
| 2 | #include "../bn_lcl.h" | 2 | #include "../bn_lcl.h" |
| 3 | #if !(defined(__GNUC__) && __GNUC__>=2) | 3 | #if !(defined(__GNUC__) && __GNUC__>=2) |
| 4 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ | 4 | # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ |
| @@ -270,77 +270,76 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
| 270 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | 270 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ |
| 271 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | 271 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ |
| 272 | 272 | ||
| 273 | /* | ||
| 274 | * Keep in mind that carrying into high part of multiplication result | ||
| 275 | * can not overflow, because it cannot be all-ones. | ||
| 276 | */ | ||
| 273 | #if 0 | 277 | #if 0 |
| 274 | /* original macros are kept for reference purposes */ | 278 | /* original macros are kept for reference purposes */ |
| 275 | #define mul_add_c(a,b,c0,c1,c2) { \ | 279 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 276 | BN_ULONG ta=(a),tb=(b); \ | 280 | BN_ULONG ta = (a), tb = (b); \ |
| 277 | t1 = ta * tb; \ | 281 | BN_ULONG lo, hi; \ |
| 278 | t2 = BN_UMULT_HIGH(ta,tb); \ | 282 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
| 279 | c0 += t1; t2 += (c0<t1)?1:0; \ | 283 | c0 += lo; hi += (c0<lo)?1:0; \ |
| 280 | c1 += t2; c2 += (c1<t2)?1:0; \ | 284 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 281 | } | 285 | } while(0) |
| 282 | 286 | ||
| 283 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 287 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 284 | BN_ULONG ta=(a),tb=(b),t0; \ | 288 | BN_ULONG ta = (a), tb = (b); \ |
| 285 | t1 = BN_UMULT_HIGH(ta,tb); \ | 289 | BN_ULONG lo, hi, tt; \ |
| 286 | t0 = ta * tb; \ | 290 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
| 287 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 291 | c0 += lo; tt = hi+((c0<lo)?1:0); \ |
| 288 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 292 | c1 += tt; c2 += (c1<tt)?1:0; \ |
| 289 | c0 += t1; t2 += (c0<t1)?1:0; \ | 293 | c0 += lo; hi += (c0<lo)?1:0; \ |
| 290 | c1 += t2; c2 += (c1<t2)?1:0; \ | 294 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 291 | } | 295 | } while(0) |
| 296 | |||
| 297 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
| 298 | BN_ULONG ta = (a)[i]; \ | ||
| 299 | BN_ULONG lo, hi; \ | ||
| 300 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
| 301 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
| 302 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
| 303 | } while(0) | ||
| 292 | #else | 304 | #else |
| 293 | #define mul_add_c(a,b,c0,c1,c2) do { \ | 305 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 306 | BN_ULONG t1,t2; \ | ||
| 294 | asm ("mulq %3" \ | 307 | asm ("mulq %3" \ |
| 295 | : "=a"(t1),"=d"(t2) \ | 308 | : "=a"(t1),"=d"(t2) \ |
| 296 | : "a"(a),"m"(b) \ | 309 | : "a"(a),"m"(b) \ |
| 297 | : "cc"); \ | 310 | : "cc"); \ |
| 298 | asm ("addq %2,%0; adcq %3,%1" \ | 311 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 299 | : "+r"(c0),"+d"(t2) \ | 312 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 300 | : "a"(t1),"g"(0) \ | 313 | : "r"(t1),"r"(t2),"g"(0) \ |
| 301 | : "cc"); \ | 314 | : "cc"); \ |
| 302 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 303 | : "+r"(c1),"+r"(c2) \ | ||
| 304 | : "d"(t2),"g"(0) \ | ||
| 305 | : "cc"); \ | ||
| 306 | } while (0) | 315 | } while (0) |
| 307 | 316 | ||
| 308 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | 317 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
| 318 | BN_ULONG t1,t2; \ | ||
| 309 | asm ("mulq %2" \ | 319 | asm ("mulq %2" \ |
| 310 | : "=a"(t1),"=d"(t2) \ | 320 | : "=a"(t1),"=d"(t2) \ |
| 311 | : "a"(a[i]) \ | 321 | : "a"(a[i]) \ |
| 312 | : "cc"); \ | 322 | : "cc"); \ |
| 313 | asm ("addq %2,%0; adcq %3,%1" \ | 323 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 314 | : "+r"(c0),"+d"(t2) \ | 324 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 315 | : "a"(t1),"g"(0) \ | 325 | : "r"(t1),"r"(t2),"g"(0) \ |
| 316 | : "cc"); \ | 326 | : "cc"); \ |
| 317 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 318 | : "+r"(c1),"+r"(c2) \ | ||
| 319 | : "d"(t2),"g"(0) \ | ||
| 320 | : "cc"); \ | ||
| 321 | } while (0) | 327 | } while (0) |
| 322 | 328 | ||
| 323 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | 329 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 330 | BN_ULONG t1,t2; \ | ||
| 324 | asm ("mulq %3" \ | 331 | asm ("mulq %3" \ |
| 325 | : "=a"(t1),"=d"(t2) \ | 332 | : "=a"(t1),"=d"(t2) \ |
| 326 | : "a"(a),"m"(b) \ | 333 | : "a"(a),"m"(b) \ |
| 327 | : "cc"); \ | 334 | : "cc"); \ |
| 328 | asm ("addq %0,%0; adcq %2,%1" \ | 335 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 329 | : "+d"(t2),"+r"(c2) \ | 336 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 330 | : "g"(0) \ | 337 | : "r"(t1),"r"(t2),"g"(0) \ |
| 331 | : "cc"); \ | 338 | : "cc"); \ |
| 332 | asm ("addq %0,%0; adcq %2,%1" \ | 339 | asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ |
| 333 | : "+a"(t1),"+d"(t2) \ | 340 | : "+r"(c0),"+r"(c1),"+r"(c2) \ |
| 334 | : "g"(0) \ | 341 | : "r"(t1),"r"(t2),"g"(0) \ |
| 335 | : "cc"); \ | 342 | : "cc"); \ |
| 336 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 337 | : "+r"(c0),"+d"(t2) \ | ||
| 338 | : "a"(t1),"g"(0) \ | ||
| 339 | : "cc"); \ | ||
| 340 | asm ("addq %2,%0; adcq %3,%1" \ | ||
| 341 | : "+r"(c1),"+r"(c2) \ | ||
| 342 | : "d"(t2),"g"(0) \ | ||
| 343 | : "cc"); \ | ||
| 344 | } while (0) | 343 | } while (0) |
| 345 | #endif | 344 | #endif |
| 346 | 345 | ||
| @@ -349,7 +348,6 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) | |||
| 349 | 348 | ||
| 350 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 349 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 351 | { | 350 | { |
| 352 | BN_ULONG t1,t2; | ||
| 353 | BN_ULONG c1,c2,c3; | 351 | BN_ULONG c1,c2,c3; |
| 354 | 352 | ||
| 355 | c1=0; | 353 | c1=0; |
| @@ -453,7 +451,6 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 453 | 451 | ||
| 454 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 452 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 455 | { | 453 | { |
| 456 | BN_ULONG t1,t2; | ||
| 457 | BN_ULONG c1,c2,c3; | 454 | BN_ULONG c1,c2,c3; |
| 458 | 455 | ||
| 459 | c1=0; | 456 | c1=0; |
| @@ -493,7 +490,6 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 493 | 490 | ||
| 494 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | 491 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
| 495 | { | 492 | { |
| 496 | BN_ULONG t1,t2; | ||
| 497 | BN_ULONG c1,c2,c3; | 493 | BN_ULONG c1,c2,c3; |
| 498 | 494 | ||
| 499 | c1=0; | 495 | c1=0; |
| @@ -569,7 +565,6 @@ void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | |||
| 569 | 565 | ||
| 570 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | 566 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
| 571 | { | 567 | { |
| 572 | BN_ULONG t1,t2; | ||
| 573 | BN_ULONG c1,c2,c3; | 568 | BN_ULONG c1,c2,c3; |
| 574 | 569 | ||
| 575 | c1=0; | 570 | c1=0; |
diff --git a/src/lib/libssl/src/crypto/bn/bn_asm.c b/src/lib/libssl/src/crypto/bn/bn_asm.c index c6efd2513a..49f0ba5d7b 100644 --- a/src/lib/libssl/src/crypto/bn/bn_asm.c +++ b/src/lib/libssl/src/crypto/bn/bn_asm.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_asm.c,v 1.13 2014/07/11 08:44:47 jsing Exp $ */ | 1 | /* $OpenBSD: bn_asm.c,v 1.14 2015/02/25 15:39:49 bcook Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -495,116 +495,143 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
| 495 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | 495 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ |
| 496 | 496 | ||
| 497 | #ifdef BN_LLONG | 497 | #ifdef BN_LLONG |
| 498 | #define mul_add_c(a,b,c0,c1,c2) \ | 498 | /* |
| 499 | t=(BN_ULLONG)a*b; \ | 499 | * Keep in mind that additions to multiplication result can not |
| 500 | t1=(BN_ULONG)Lw(t); \ | 500 | * overflow, because its high half cannot be all-ones. |
| 501 | t2=(BN_ULONG)Hw(t); \ | 501 | */ |
| 502 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 502 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 503 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 503 | BN_ULONG hi; \ |
| 504 | 504 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ | |
| 505 | #define mul_add_c2(a,b,c0,c1,c2) \ | 505 | t += c0; /* no carry */ \ |
| 506 | t=(BN_ULLONG)a*b; \ | 506 | c0 = (BN_ULONG)Lw(t); \ |
| 507 | tt=(t+t)&BN_MASK; \ | 507 | hi = (BN_ULONG)Hw(t); \ |
| 508 | if (tt < t) c2++; \ | 508 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
| 509 | t1=(BN_ULONG)Lw(tt); \ | 509 | } while(0) |
| 510 | t2=(BN_ULONG)Hw(tt); \ | 510 | |
| 511 | c0=(c0+t1)&BN_MASK2; \ | 511 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 512 | if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ | 512 | BN_ULONG hi; \ |
| 513 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 513 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ |
| 514 | 514 | BN_ULLONG tt = t+c0; /* no carry */ \ | |
| 515 | #define sqr_add_c(a,i,c0,c1,c2) \ | 515 | c0 = (BN_ULONG)Lw(tt); \ |
| 516 | t=(BN_ULLONG)a[i]*a[i]; \ | 516 | hi = (BN_ULONG)Hw(tt); \ |
| 517 | t1=(BN_ULONG)Lw(t); \ | 517 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
| 518 | t2=(BN_ULONG)Hw(t); \ | 518 | t += c0; /* no carry */ \ |
| 519 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 519 | c0 = (BN_ULONG)Lw(t); \ |
| 520 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 520 | hi = (BN_ULONG)Hw(t); \ |
| 521 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
| 522 | } while(0) | ||
| 523 | |||
| 524 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
| 525 | BN_ULONG hi; \ | ||
| 526 | BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \ | ||
| 527 | t += c0; /* no carry */ \ | ||
| 528 | c0 = (BN_ULONG)Lw(t); \ | ||
| 529 | hi = (BN_ULONG)Hw(t); \ | ||
| 530 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
| 531 | } while(0) | ||
| 521 | 532 | ||
| 522 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 533 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
| 523 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 534 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
| 524 | 535 | ||
| 525 | #elif defined(BN_UMULT_LOHI) | 536 | #elif defined(BN_UMULT_LOHI) |
| 526 | 537 | /* | |
| 527 | #define mul_add_c(a,b,c0,c1,c2) { \ | 538 | * Keep in mind that additions to hi can not overflow, because |
| 528 | BN_ULONG ta=(a),tb=(b); \ | 539 | * the high word of a multiplication result cannot be all-ones. |
| 529 | BN_UMULT_LOHI(t1,t2,ta,tb); \ | 540 | */ |
| 530 | c0 += t1; t2 += (c0<t1)?1:0; \ | 541 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 531 | c1 += t2; c2 += (c1<t2)?1:0; \ | 542 | BN_ULONG ta = (a), tb = (b); \ |
| 532 | } | 543 | BN_ULONG lo, hi; \ |
| 533 | 544 | BN_UMULT_LOHI(lo,hi,ta,tb); \ | |
| 534 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 545 | c0 += lo; hi += (c0<lo)?1:0; \ |
| 535 | BN_ULONG ta=(a),tb=(b),t0; \ | 546 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 536 | BN_UMULT_LOHI(t0,t1,ta,tb); \ | 547 | } while(0) |
| 537 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 548 | |
| 538 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 549 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 539 | c0 += t1; t2 += (c0<t1)?1:0; \ | 550 | BN_ULONG ta = (a), tb = (b); \ |
| 540 | c1 += t2; c2 += (c1<t2)?1:0; \ | 551 | BN_ULONG lo, hi, tt; \ |
| 541 | } | 552 | BN_UMULT_LOHI(lo,hi,ta,tb); \ |
| 542 | 553 | c0 += lo; tt = hi+((c0<lo)?1:0); \ | |
| 543 | #define sqr_add_c(a,i,c0,c1,c2) { \ | 554 | c1 += tt; c2 += (c1<tt)?1:0; \ |
| 544 | BN_ULONG ta=(a)[i]; \ | 555 | c0 += lo; hi += (c0<lo)?1:0; \ |
| 545 | BN_UMULT_LOHI(t1,t2,ta,ta); \ | 556 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 546 | c0 += t1; t2 += (c0<t1)?1:0; \ | 557 | } while(0) |
| 547 | c1 += t2; c2 += (c1<t2)?1:0; \ | 558 | |
| 548 | } | 559 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
| 560 | BN_ULONG ta = (a)[i]; \ | ||
| 561 | BN_ULONG lo, hi; \ | ||
| 562 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
| 563 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
| 564 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
| 565 | } while(0) | ||
| 549 | 566 | ||
| 550 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 567 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
| 551 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 568 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
| 552 | 569 | ||
| 553 | #elif defined(BN_UMULT_HIGH) | 570 | #elif defined(BN_UMULT_HIGH) |
| 554 | 571 | /* | |
| 555 | #define mul_add_c(a,b,c0,c1,c2) { \ | 572 | * Keep in mind that additions to hi can not overflow, because |
| 556 | BN_ULONG ta=(a),tb=(b); \ | 573 | * the high word of a multiplication result cannot be all-ones. |
| 557 | t1 = ta * tb; \ | 574 | */ |
| 558 | t2 = BN_UMULT_HIGH(ta,tb); \ | 575 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 559 | c0 += t1; t2 += (c0<t1)?1:0; \ | 576 | BN_ULONG ta = (a), tb = (b); \ |
| 560 | c1 += t2; c2 += (c1<t2)?1:0; \ | 577 | BN_ULONG lo = ta * tb; \ |
| 561 | } | 578 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ |
| 562 | 579 | c0 += lo; hi += (c0<lo)?1:0; \ | |
| 563 | #define mul_add_c2(a,b,c0,c1,c2) { \ | 580 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 564 | BN_ULONG ta=(a),tb=(b),t0; \ | 581 | } while(0) |
| 565 | t1 = BN_UMULT_HIGH(ta,tb); \ | 582 | |
| 566 | t0 = ta * tb; \ | 583 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 567 | t2 = t1+t1; c2 += (t2<t1)?1:0; \ | 584 | BN_ULONG ta = (a), tb = (b), tt; \ |
| 568 | t1 = t0+t0; t2 += (t1<t0)?1:0; \ | 585 | BN_ULONG lo = ta * tb; \ |
| 569 | c0 += t1; t2 += (c0<t1)?1:0; \ | 586 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ |
| 570 | c1 += t2; c2 += (c1<t2)?1:0; \ | 587 | c0 += lo; tt = hi + ((c0<lo)?1:0); \ |
| 571 | } | 588 | c1 += tt; c2 += (c1<tt)?1:0; \ |
| 572 | 589 | c0 += lo; hi += (c0<lo)?1:0; \ | |
| 573 | #define sqr_add_c(a,i,c0,c1,c2) { \ | 590 | c1 += hi; c2 += (c1<hi)?1:0; \ |
| 574 | BN_ULONG ta=(a)[i]; \ | 591 | } while(0) |
| 575 | t1 = ta * ta; \ | 592 | |
| 576 | t2 = BN_UMULT_HIGH(ta,ta); \ | 593 | #define sqr_add_c(a,i,c0,c1,c2) do { \ |
| 577 | c0 += t1; t2 += (c0<t1)?1:0; \ | 594 | BN_ULONG ta = (a)[i]; \ |
| 578 | c1 += t2; c2 += (c1<t2)?1:0; \ | 595 | BN_ULONG lo = ta * ta; \ |
| 579 | } | 596 | BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \ |
| 597 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
| 598 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
| 599 | } while(0) | ||
| 580 | 600 | ||
| 581 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 601 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
| 582 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 602 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
| 583 | 603 | ||
| 584 | #else /* !BN_LLONG */ | 604 | #else /* !BN_LLONG */ |
| 585 | #define mul_add_c(a,b,c0,c1,c2) \ | 605 | /* |
| 586 | t1=LBITS(a); t2=HBITS(a); \ | 606 | * Keep in mind that additions to hi can not overflow, because |
| 587 | bl=LBITS(b); bh=HBITS(b); \ | 607 | * the high word of a multiplication result cannot be all-ones. |
| 588 | mul64(t1,t2,bl,bh); \ | 608 | */ |
| 589 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 609 | #define mul_add_c(a,b,c0,c1,c2) do { \ |
| 590 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 610 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ |
| 591 | 611 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ | |
| 592 | #define mul_add_c2(a,b,c0,c1,c2) \ | 612 | mul64(lo,hi,bl,bh); \ |
| 593 | t1=LBITS(a); t2=HBITS(a); \ | 613 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ |
| 594 | bl=LBITS(b); bh=HBITS(b); \ | 614 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
| 595 | mul64(t1,t2,bl,bh); \ | 615 | } while(0) |
| 596 | if (t2 & BN_TBIT) c2++; \ | 616 | |
| 597 | t2=(t2+t2)&BN_MASK2; \ | 617 | #define mul_add_c2(a,b,c0,c1,c2) do { \ |
| 598 | if (t1 & BN_TBIT) t2++; \ | 618 | BN_ULONG tt; \ |
| 599 | t1=(t1+t1)&BN_MASK2; \ | 619 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ |
| 600 | c0=(c0+t1)&BN_MASK2; \ | 620 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ |
| 601 | if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ | 621 | mul64(lo,hi,bl,bh); \ |
| 602 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 622 | tt = hi; \ |
| 603 | 623 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \ | |
| 604 | #define sqr_add_c(a,i,c0,c1,c2) \ | 624 | c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \ |
| 605 | sqr64(t1,t2,(a)[i]); \ | 625 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ |
| 606 | c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ | 626 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ |
| 607 | c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; | 627 | } while(0) |
| 628 | |||
| 629 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
| 630 | BN_ULONG lo, hi; \ | ||
| 631 | sqr64(lo,hi,(a)[i]); \ | ||
| 632 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | ||
| 633 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
| 634 | } while(0) | ||
| 608 | 635 | ||
| 609 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | 636 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ |
| 610 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | 637 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) |
| @@ -613,12 +640,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
| 613 | void | 640 | void |
| 614 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 641 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 615 | { | 642 | { |
| 616 | #ifdef BN_LLONG | ||
| 617 | BN_ULLONG t; | ||
| 618 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
| 619 | BN_ULONG bl, bh; | ||
| 620 | #endif | ||
| 621 | BN_ULONG t1, t2; | ||
| 622 | BN_ULONG c1, c2, c3; | 643 | BN_ULONG c1, c2, c3; |
| 623 | 644 | ||
| 624 | c1 = 0; | 645 | c1 = 0; |
| @@ -723,12 +744,6 @@ bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 723 | void | 744 | void |
| 724 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 745 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
| 725 | { | 746 | { |
| 726 | #ifdef BN_LLONG | ||
| 727 | BN_ULLONG t; | ||
| 728 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
| 729 | BN_ULONG bl, bh; | ||
| 730 | #endif | ||
| 731 | BN_ULONG t1, t2; | ||
| 732 | BN_ULONG c1, c2, c3; | 747 | BN_ULONG c1, c2, c3; |
| 733 | 748 | ||
| 734 | c1 = 0; | 749 | c1 = 0; |
| @@ -769,12 +784,6 @@ bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | |||
| 769 | void | 784 | void |
| 770 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | 785 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) |
| 771 | { | 786 | { |
| 772 | #ifdef BN_LLONG | ||
| 773 | BN_ULLONG t, tt; | ||
| 774 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
| 775 | BN_ULONG bl, bh; | ||
| 776 | #endif | ||
| 777 | BN_ULONG t1, t2; | ||
| 778 | BN_ULONG c1, c2, c3; | 787 | BN_ULONG c1, c2, c3; |
| 779 | 788 | ||
| 780 | c1 = 0; | 789 | c1 = 0; |
| @@ -851,12 +860,6 @@ bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | |||
| 851 | void | 860 | void |
| 852 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | 861 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) |
| 853 | { | 862 | { |
| 854 | #ifdef BN_LLONG | ||
| 855 | BN_ULLONG t, tt; | ||
| 856 | #elif !defined(BN_UMULT_LOHI) && !defined(BN_UMULT_HIGH) | ||
| 857 | BN_ULONG bl, bh; | ||
| 858 | #endif | ||
| 859 | BN_ULONG t1, t2; | ||
| 860 | BN_ULONG c1, c2, c3; | 863 | BN_ULONG c1, c2, c3; |
| 861 | 864 | ||
| 862 | c1 = 0; | 865 | c1 = 0; |
diff --git a/src/regress/lib/libcrypto/bn/general/bntest.c b/src/regress/lib/libcrypto/bn/general/bntest.c index 9debc00042..6a9c2e8a53 100644 --- a/src/regress/lib/libcrypto/bn/general/bntest.c +++ b/src/regress/lib/libcrypto/bn/general/bntest.c | |||
| @@ -690,42 +690,85 @@ test_mul(BIO *bp) | |||
| 690 | int | 690 | int |
| 691 | test_sqr(BIO *bp, BN_CTX *ctx) | 691 | test_sqr(BIO *bp, BN_CTX *ctx) |
| 692 | { | 692 | { |
| 693 | BIGNUM a, c,d, e; | 693 | BIGNUM *a, *c, *d, *e; |
| 694 | int i; | 694 | int i, ret = 0; |
| 695 | int rc = 1; | ||
| 696 | 695 | ||
| 697 | BN_init(&a); | 696 | a = BN_new(); |
| 698 | BN_init(&c); | 697 | c = BN_new(); |
| 699 | BN_init(&d); | 698 | d = BN_new(); |
| 700 | BN_init(&e); | 699 | e = BN_new(); |
| 701 | 700 | ||
| 702 | for (i = 0; i < num0; i++) { | 701 | for (i = 0; i < num0; i++) { |
| 703 | BN_bntest_rand(&a, 40 + i*10, 0, 0); | 702 | BN_bntest_rand(a, 40 + i * 10, 0, 0); |
| 704 | a.neg = rand_neg(); | 703 | a->neg = rand_neg(); |
| 705 | BN_sqr(&c, &a, ctx); | 704 | BN_sqr(c, a, ctx); |
| 706 | if (bp != NULL) { | 705 | if (bp != NULL) { |
| 707 | if (!results) { | 706 | if (!results) { |
| 708 | BN_print(bp, &a); | 707 | BN_print(bp, a); |
| 709 | BIO_puts(bp, " * "); | 708 | BIO_puts(bp, " * "); |
| 710 | BN_print(bp, &a); | 709 | BN_print(bp, a); |
| 711 | BIO_puts(bp, " - "); | 710 | BIO_puts(bp, " - "); |
| 712 | } | 711 | } |
| 713 | BN_print(bp, &c); | 712 | BN_print(bp, c); |
| 714 | BIO_puts(bp, "\n"); | 713 | BIO_puts(bp, "\n"); |
| 715 | } | 714 | } |
| 716 | BN_div(&d, &e, &c, &a, ctx); | 715 | BN_div(d, e, c, a, ctx); |
| 717 | BN_sub(&d, &d, &a); | 716 | BN_sub(d, d, a); |
| 718 | if (!BN_is_zero(&d) || !BN_is_zero(&e)) { | 717 | if (!BN_is_zero(d) || !BN_is_zero(e)) { |
| 719 | fprintf(stderr, "Square test failed!\n"); | 718 | fprintf(stderr, "Square test failed!\n"); |
| 720 | rc = 0; | 719 | goto err; |
| 721 | break; | ||
| 722 | } | 720 | } |
| 723 | } | 721 | } |
| 724 | BN_free(&a); | 722 | |
| 725 | BN_free(&c); | 723 | /* Regression test for a BN_sqr overflow bug. */ |
| 726 | BN_free(&d); | 724 | BN_hex2bn(&a, "80000000000000008000000000000001" |
| 727 | BN_free(&e); | 725 | "FFFFFFFFFFFFFFFE0000000000000000"); |
| 728 | return (rc); | 726 | BN_sqr(c, a, ctx); |
| 727 | if (bp != NULL) { | ||
| 728 | if (!results) { | ||
| 729 | BN_print(bp, a); | ||
| 730 | BIO_puts(bp, " * "); | ||
| 731 | BN_print(bp, a); | ||
| 732 | BIO_puts(bp, " - "); | ||
| 733 | } | ||
| 734 | BN_print(bp, c); | ||
| 735 | BIO_puts(bp, "\n"); | ||
| 736 | } | ||
| 737 | BN_mul(d, a, a, ctx); | ||
| 738 | if (BN_cmp(c, d)) { | ||
| 739 | fprintf(stderr, | ||
| 740 | "Square test failed: BN_sqr and BN_mul produce " | ||
| 741 | "different results!\n"); | ||
| 742 | goto err; | ||
| 743 | } | ||
| 744 | |||
| 745 | /* Regression test for a BN_sqr overflow bug. */ | ||
| 746 | BN_hex2bn(&a, "80000000000000000000000080000001" | ||
| 747 | "FFFFFFFE000000000000000000000000"); | ||
| 748 | BN_sqr(c, a, ctx); | ||
| 749 | if (bp != NULL) { | ||
| 750 | if (!results) { | ||
| 751 | BN_print(bp, a); | ||
| 752 | BIO_puts(bp, " * "); | ||
| 753 | BN_print(bp, a); | ||
| 754 | BIO_puts(bp, " - "); | ||
| 755 | } | ||
| 756 | BN_print(bp, c); | ||
| 757 | BIO_puts(bp, "\n"); | ||
| 758 | } | ||
| 759 | BN_mul(d, a, a, ctx); | ||
| 760 | if (BN_cmp(c, d)) { | ||
| 761 | fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " | ||
| 762 | "different results!\n"); | ||
| 763 | goto err; | ||
| 764 | } | ||
| 765 | ret = 1; | ||
| 766 | err: | ||
| 767 | BN_free(a); | ||
| 768 | BN_free(c); | ||
| 769 | BN_free(d); | ||
| 770 | BN_free(e); | ||
| 771 | return ret; | ||
| 729 | } | 772 | } |
| 730 | 773 | ||
| 731 | int | 774 | int |
