diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib/libcrypto/bn/bn_local.h | 252 |
1 files changed, 1 insertions, 251 deletions
diff --git a/src/lib/libcrypto/bn/bn_local.h b/src/lib/libcrypto/bn/bn_local.h index 51582f9833..6d308218e7 100644 --- a/src/lib/libcrypto/bn/bn_local.h +++ b/src/lib/libcrypto/bn/bn_local.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_local.h,v 1.10 2023/02/16 11:13:05 jsing Exp $ */ | 1 | /* $OpenBSD: bn_local.h,v 1.11 2023/02/17 05:30:20 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -241,256 +241,6 @@ struct bn_gencb_st { | |||
| 241 | #define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */ | 241 | #define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */ |
| 242 | #define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */ | 242 | #define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */ |
| 243 | 243 | ||
| 244 | #if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) | ||
| 245 | /* | ||
| 246 | * BN_UMULT_HIGH section. | ||
| 247 | * | ||
| 248 | * No, I'm not trying to overwhelm you when stating that the | ||
| 249 | * product of N-bit numbers is 2*N bits wide:-) No, I don't expect | ||
| 250 | * you to be impressed when I say that if the compiler doesn't | ||
| 251 | * support 2*N integer type, then you have to replace every N*N | ||
| 252 | * multiplication with 4 (N/2)*(N/2) accompanied by some shifts | ||
| 253 | * and additions which unavoidably results in severe performance | ||
| 254 | * penalties. Of course provided that the hardware is capable of | ||
| 255 | * producing 2*N result... That's when you normally start | ||
| 256 | * considering assembler implementation. However! It should be | ||
| 257 | * pointed out that some CPUs (most notably Alpha, PowerPC and | ||
| 258 | * upcoming IA-64 family:-) provide *separate* instruction | ||
| 259 | * calculating the upper half of the product placing the result | ||
| 260 | * into a general purpose register. Now *if* the compiler supports | ||
| 261 | * inline assembler, then it's not impossible to implement the | ||
| 262 | * "bignum" routines (and have the compiler optimize 'em) | ||
| 263 | * exhibiting "native" performance in C. That's what BN_UMULT_HIGH | ||
| 264 | * macro is about:-) | ||
| 265 | * | ||
| 266 | * <appro@fy.chalmers.se> | ||
| 267 | */ | ||
| 268 | # if defined(__alpha) | ||
| 269 | # if defined(__GNUC__) && __GNUC__>=2 | ||
| 270 | # define BN_UMULT_HIGH(a,b) ({ \ | ||
| 271 | BN_ULONG ret; \ | ||
| 272 | asm ("umulh %1,%2,%0" \ | ||
| 273 | : "=r"(ret) \ | ||
| 274 | : "r"(a), "r"(b)); \ | ||
| 275 | ret; }) | ||
| 276 | # endif /* compiler */ | ||
| 277 | # elif defined(_ARCH_PPC) && defined(_LP64) | ||
| 278 | # if defined(__GNUC__) && __GNUC__>=2 | ||
| 279 | # define BN_UMULT_HIGH(a,b) ({ \ | ||
| 280 | BN_ULONG ret; \ | ||
| 281 | asm ("mulhdu %0,%1,%2" \ | ||
| 282 | : "=r"(ret) \ | ||
| 283 | : "r"(a), "r"(b)); \ | ||
| 284 | ret; }) | ||
| 285 | # endif /* compiler */ | ||
| 286 | # elif defined(__x86_64) || defined(__x86_64__) | ||
| 287 | # if defined(__GNUC__) && __GNUC__>=2 | ||
| 288 | # define BN_UMULT_HIGH(a,b) ({ \ | ||
| 289 | BN_ULONG ret,discard; \ | ||
| 290 | asm ("mulq %3" \ | ||
| 291 | : "=a"(discard),"=d"(ret) \ | ||
| 292 | : "a"(a), "g"(b) \ | ||
| 293 | : "cc"); \ | ||
| 294 | ret; }) | ||
| 295 | # define BN_UMULT_LOHI(low,high,a,b) \ | ||
| 296 | asm ("mulq %3" \ | ||
| 297 | : "=a"(low),"=d"(high) \ | ||
| 298 | : "a"(a),"g"(b) \ | ||
| 299 | : "cc"); | ||
| 300 | # endif | ||
| 301 | # elif defined(__mips) && defined(_LP64) | ||
| 302 | # if defined(__GNUC__) && __GNUC__>=2 | ||
| 303 | # if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) /* "h" constraint is no more since 4.4 */ | ||
| 304 | # define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64) | ||
| 305 | # define BN_UMULT_LOHI(low,high,a,b) ({ \ | ||
| 306 | __uint128_t ret=(__uint128_t)(a)*(b); \ | ||
| 307 | (high)=ret>>64; (low)=ret; }) | ||
| 308 | # else | ||
| 309 | # define BN_UMULT_HIGH(a,b) ({ \ | ||
| 310 | BN_ULONG ret; \ | ||
| 311 | asm ("dmultu %1,%2" \ | ||
| 312 | : "=h"(ret) \ | ||
| 313 | : "r"(a), "r"(b) : "l"); \ | ||
| 314 | ret; }) | ||
| 315 | # define BN_UMULT_LOHI(low,high,a,b)\ | ||
| 316 | asm ("dmultu %2,%3" \ | ||
| 317 | : "=l"(low),"=h"(high) \ | ||
| 318 | : "r"(a), "r"(b)); | ||
| 319 | # endif | ||
| 320 | # endif | ||
| 321 | # endif /* cpu */ | ||
| 322 | #endif /* OPENSSL_NO_ASM */ | ||
| 323 | |||
| 324 | /************************************************************* | ||
| 325 | * Using the long long type | ||
| 326 | */ | ||
| 327 | #define Lw(t) (((BN_ULONG)(t))&BN_MASK2) | ||
| 328 | #define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) | ||
| 329 | |||
| 330 | #ifndef BN_LLONG | ||
| 331 | /************************************************************* | ||
| 332 | * No long long type | ||
| 333 | */ | ||
| 334 | |||
| 335 | #define LBITS(a) ((a)&BN_MASK2l) | ||
| 336 | #define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) | ||
| 337 | #define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2) | ||
| 338 | |||
| 339 | #define mul64(l,h,bl,bh) \ | ||
| 340 | { \ | ||
| 341 | BN_ULONG m,m1,lt,ht; \ | ||
| 342 | \ | ||
| 343 | lt=l; \ | ||
| 344 | ht=h; \ | ||
| 345 | m =(bh)*(lt); \ | ||
| 346 | lt=(bl)*(lt); \ | ||
| 347 | m1=(bl)*(ht); \ | ||
| 348 | ht =(bh)*(ht); \ | ||
| 349 | m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \ | ||
| 350 | ht+=HBITS(m); \ | ||
| 351 | m1=L2HBITS(m); \ | ||
| 352 | lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \ | ||
| 353 | (l)=lt; \ | ||
| 354 | (h)=ht; \ | ||
| 355 | } | ||
| 356 | |||
| 357 | #define sqr64(lo,ho,in) \ | ||
| 358 | { \ | ||
| 359 | BN_ULONG l,h,m; \ | ||
| 360 | \ | ||
| 361 | h=(in); \ | ||
| 362 | l=LBITS(h); \ | ||
| 363 | h=HBITS(h); \ | ||
| 364 | m =(l)*(h); \ | ||
| 365 | l*=l; \ | ||
| 366 | h*=h; \ | ||
| 367 | h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \ | ||
| 368 | m =(m&BN_MASK2l)<<(BN_BITS4+1); \ | ||
| 369 | l=(l+m)&BN_MASK2; if (l < m) h++; \ | ||
| 370 | (lo)=l; \ | ||
| 371 | (ho)=h; \ | ||
| 372 | } | ||
| 373 | |||
| 374 | #endif /* !BN_LLONG */ | ||
| 375 | |||
| 376 | /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ | ||
| 377 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | ||
| 378 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | ||
| 379 | |||
| 380 | #ifdef BN_LLONG | ||
| 381 | /* | ||
| 382 | * Keep in mind that additions to multiplication result can not | ||
| 383 | * overflow, because its high half cannot be all-ones. | ||
| 384 | */ | ||
| 385 | |||
| 386 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
| 387 | BN_ULONG hi; \ | ||
| 388 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ | ||
| 389 | BN_ULLONG tt = t+c0; /* no carry */ \ | ||
| 390 | c0 = (BN_ULONG)Lw(tt); \ | ||
| 391 | hi = (BN_ULONG)Hw(tt); \ | ||
| 392 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
| 393 | t += c0; /* no carry */ \ | ||
| 394 | c0 = (BN_ULONG)Lw(t); \ | ||
| 395 | hi = (BN_ULONG)Hw(t); \ | ||
| 396 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
| 397 | } while(0) | ||
| 398 | |||
| 399 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
| 400 | BN_ULONG hi; \ | ||
| 401 | BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \ | ||
| 402 | t += c0; /* no carry */ \ | ||
| 403 | c0 = (BN_ULONG)Lw(t); \ | ||
| 404 | hi = (BN_ULONG)Hw(t); \ | ||
| 405 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
| 406 | } while(0) | ||
| 407 | |||
| 408 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
| 409 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
| 410 | |||
| 411 | #elif defined(BN_UMULT_LOHI) | ||
| 412 | /* | ||
| 413 | * Keep in mind that additions to hi can not overflow, because | ||
| 414 | * the high word of a multiplication result cannot be all-ones. | ||
| 415 | */ | ||
| 416 | |||
| 417 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
| 418 | BN_ULONG ta = (a), tb = (b); \ | ||
| 419 | BN_ULONG lo, hi, tt; \ | ||
| 420 | BN_UMULT_LOHI(lo,hi,ta,tb); \ | ||
| 421 | c0 += lo; tt = hi+((c0<lo)?1:0); \ | ||
| 422 | c1 += tt; c2 += (c1<tt)?1:0; \ | ||
| 423 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
| 424 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
| 425 | } while(0) | ||
| 426 | |||
| 427 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
| 428 | BN_ULONG ta = (a)[i]; \ | ||
| 429 | BN_ULONG lo, hi; \ | ||
| 430 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
| 431 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
| 432 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
| 433 | } while(0) | ||
| 434 | |||
| 435 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
| 436 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
| 437 | |||
| 438 | #elif defined(BN_UMULT_HIGH) | ||
| 439 | /* | ||
| 440 | * Keep in mind that additions to hi can not overflow, because | ||
| 441 | * the high word of a multiplication result cannot be all-ones. | ||
| 442 | */ | ||
| 443 | |||
| 444 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
| 445 | BN_ULONG ta = (a), tb = (b), tt; \ | ||
| 446 | BN_ULONG lo = ta * tb; \ | ||
| 447 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ | ||
| 448 | c0 += lo; tt = hi + ((c0<lo)?1:0); \ | ||
| 449 | c1 += tt; c2 += (c1<tt)?1:0; \ | ||
| 450 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
| 451 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
| 452 | } while(0) | ||
| 453 | |||
| 454 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
| 455 | BN_ULONG ta = (a)[i]; \ | ||
| 456 | BN_ULONG lo = ta * ta; \ | ||
| 457 | BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \ | ||
| 458 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
| 459 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
| 460 | } while(0) | ||
| 461 | |||
| 462 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
| 463 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
| 464 | |||
| 465 | #else /* !BN_LLONG */ | ||
| 466 | /* | ||
| 467 | * Keep in mind that additions to hi can not overflow, because | ||
| 468 | * the high word of a multiplication result cannot be all-ones. | ||
| 469 | */ | ||
| 470 | |||
| 471 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
| 472 | BN_ULONG tt; \ | ||
| 473 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ | ||
| 474 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ | ||
| 475 | mul64(lo,hi,bl,bh); \ | ||
| 476 | tt = hi; \ | ||
| 477 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \ | ||
| 478 | c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \ | ||
| 479 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | ||
| 480 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
| 481 | } while(0) | ||
| 482 | |||
| 483 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
| 484 | BN_ULONG lo, hi; \ | ||
| 485 | sqr64(lo,hi,(a)[i]); \ | ||
| 486 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | ||
| 487 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
| 488 | } while(0) | ||
| 489 | |||
| 490 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
| 491 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
| 492 | #endif /* !BN_LLONG */ | ||
| 493 | |||
| 494 | /* The least significant word of a BIGNUM. */ | 244 | /* The least significant word of a BIGNUM. */ |
| 495 | #define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0]) | 245 | #define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0]) |
| 496 | 246 | ||
