Disable FMA by default. Use -Ofma or jit.opt.start("+fma") to enable.

See the discussion in #918 for the rationale.
author: Mike Pall <mike> 2022-12-07 18:38:22 +0100
committer: Mike Pall <mike> 2022-12-07 18:38:22 +0100
commit: de2e1ca9d3d87e74c0c20c1e4ad3c32b31a5875b (patch)
tree: c6dd3a9575b77c9f019c9d8627a814a955227acf /src/lj_vmmath.c
parent: 7d5d4a1b1a690d9fc87253868ba967bf25f4df6e (diff)
download: luajit-de2e1ca9d3d87e74c0c20c1e4ad3c32b31a5875b.tar.gz
luajit-de2e1ca9d3d87e74c0c20c1e4ad3c32b31a5875b.tar.bz2
luajit-de2e1ca9d3d87e74c0c20c1e4ad3c32b31a5875b.zip
1 files changed, 12 insertions, 1 deletions
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index b6cc60ba..d0febd81 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -36,6 +36,17 @@ LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
 /* -- Helper functions ---------------------------------------------------- */
+/* Required to prevent the C compiler from applying FMA optimizations.
+**
+** Yes, there's -ffp-contract and the FP_CONTRACT pragma ... in theory.
+** But the current state of C compilers is a mess in this regard.
+** Also, this function is not performance sensitive at all.
+*/
+LJ_NOINLINE static double lj_vm_floormul(double x, double y)
+{
+  return lj_vm_floor(x / y) * y;
+}
 double lj_vm_foldarith(double x, double y, int op)
 {
  switch (op) {
@@ -43,7 +54,7 @@ double lj_vm_foldarith(double x, double y, int op)
  case IR_SUB - IR_ADD: return x-y; break;
  case IR_MUL - IR_ADD: return x*y; break;
  case IR_DIV - IR_ADD: return x/y; break;
-  case IR_MOD - IR_ADD: return x-lj_vm_floor(x/y)*y; break;
+  case IR_MOD - IR_ADD: return x-lj_vm_floormul(x, y); break;
  case IR_POW - IR_ADD: return pow(x, y); break;
  case IR_NEG - IR_ADD: return -x; break;
  case IR_ABS - IR_ADD: return fabs(x); break;
author	Mike Pall <mike>	2022-12-07 18:38:22 +0100
committer	Mike Pall <mike>	2022-12-07 18:38:22 +0100
commit	de2e1ca9d3d87e74c0c20c1e4ad3c32b31a5875b (patch)
tree	c6dd3a9575b77c9f019c9d8627a814a955227acf /src/lj_vmmath.c
parent	7d5d4a1b1a690d9fc87253868ba967bf25f4df6e (diff)
download	luajit-de2e1ca9d3d87e74c0c20c1e4ad3c32b31a5875b.tar.gz luajit-de2e1ca9d3d87e74c0c20c1e4ad3c32b31a5875b.tar.bz2 luajit-de2e1ca9d3d87e74c0c20c1e4ad3c32b31a5875b.zip