From 138f54352ad604ef50f77cbcc15abec6dbd883c0 Mon Sep 17 00:00:00 2001
From: Mike Pall <mike>
Date: Sun, 22 May 2011 17:41:59 +0200
Subject: Split up FP IR instructions with SPLIT pass for soft-float targets.

---
 lib/dump.lua       |  10 +-
 src/Makefile.dep   |   2 +-
 src/lj_asm.c       | 112 ++++++++++++++++------
 src/lj_ffrecord.c  |   7 +-
 src/lj_ir.c        |   1 +
 src/lj_ir.h        |   2 +-
 src/lj_ircall.h    | 123 ++++++++++++++++++++++++
 src/lj_iropt.h     |   2 +-
 src/lj_jit.h       |   4 +-
 src/lj_opt_split.c | 271 +++++++++++++++++++++++++++++++++++++++++++++++------
 src/lj_record.c    |  10 +-
 src/lj_snap.c      |   4 +-
 src/lj_snap.h      |   3 +-
 13 files changed, 479 insertions(+), 72 deletions(-)

diff --git a/lib/dump.lua b/lib/dump.lua
index 0f9f7b2b..a6b61f53 100644
--- a/lib/dump.lua
+++ b/lib/dump.lua
@@ -147,6 +147,7 @@ local irtype_text = {
   "u32",
   "i64",
   "u64",
+  "sfp",
 }
 
 local colortype_ansi = {
@@ -173,6 +174,7 @@ local colortype_ansi = {
   "\027[35m%s\027[m",
   "\027[35m%s\027[m",
   "\027[35m%s\027[m",
+  "\027[35m%s\027[m",
 }
 
 local function colorize_text(s, t)
@@ -318,11 +320,11 @@ local function printsnap(tr, snap)
       if ref < 0 then
 	out:write(formatk(tr, ref))
       else
-	local m, ot, op1, op2 = traceir(tr, ref)
-	out:write(colorize(format("%04d", ref), band(ot, 31)))
 	if band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
-	  local m, ot, op1, op2 = traceir(tr, ref+1)
-	  out:write(colorize(format("/%04d", ref+1), band(ot, 31)))
+	  out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
+	else
+	  local m, ot, op1, op2 = traceir(tr, ref)
+	  out:write(colorize(format("%04d", ref), band(ot, 31)))
 	end
       end
       out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 89ac79e8..8e0d7a1c 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -108,7 +108,7 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
 lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
  lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
- lj_lib.h
+ lj_vm.h lj_lib.h
 lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
  lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \
  lj_state.h lj_lex.h lj_parse.h lj_char.h
diff --git a/src/lj_asm.c b/src/lj_asm.c
index f33dc790..18383bcc 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -85,6 +85,9 @@ typedef struct ASMState {
 
   IRRef1 phireg[RID_MAX];  /* PHI register references. */
   uint16_t parentmap[LJ_MAX_JSLOTS];  /* Parent slot to RegSP map. */
+#if LJ_SOFTFP
+  uint16_t parentmaphi[LJ_MAX_JSLOTS];  /* Parent slot to hi RegSP map. */
+#endif
 } ASMState;
 
 #define IR(ref)			(&as->ir[(ref)])
@@ -273,9 +276,12 @@ static Reg ra_rematk(ASMState *as, IRIns *ir)
   ra_modified(as, r);
   ir->r = RID_INIT;  /* Do not keep any hint. */
   RA_DBGX((as, "remat     $i $r", ir, r));
+#if !LJ_SOFTFP
   if (ir->o == IR_KNUM) {
     emit_loadn(as, r, ir_knum(ir));
-  } else if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
+  } else
+#endif
+  if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
     ra_sethint(ir->r, RID_BASE);  /* Restore BASE register hint. */
     emit_getgl(as, r, jit_base);
   } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
@@ -596,31 +602,40 @@ static int asm_snap_canremat(ASMState *as)
   return 0;
 }
 
-/* Allocate registers or spill slots for refs escaping to a snapshot. */
+/* Allocate register or spill slot for a ref that escapes to a snapshot. */
+static void asm_snap_alloc1(ASMState *as, IRRef ref)
+{
+  IRIns *ir = IR(ref);
+  if (!ra_used(ir)) {
+    RegSet allow = (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR;
+    /* Get a weak register if we have a free one or can rematerialize. */
+    if ((as->freeset & allow) ||
+	(allow == RSET_FPR && asm_snap_canremat(as))) {
+      Reg r = ra_allocref(as, ref, allow);  /* Allocate a register. */
+      if (!irt_isphi(ir->t))
+	ra_weak(as, r);  /* But mark it as weakly referenced. */
+      checkmclim(as);
+      RA_DBGX((as, "snapreg   $f $r", ref, ir->r));
+    } else {
+      ra_spill(as, ir);  /* Otherwise force a spill slot. */
+      RA_DBGX((as, "snapspill $f $s", ref, ir->s));
+    }
+  }
+}
+
+/* Allocate refs escaping to a snapshot. */
 static void asm_snap_alloc(ASMState *as)
 {
   SnapShot *snap = &as->T->snap[as->snapno];
   SnapEntry *map = &as->T->snapmap[snap->mapofs];
   MSize n, nent = snap->nent;
   for (n = 0; n < nent; n++) {
-    IRRef ref = snap_ref(map[n]);
+    SnapEntry sn = map[n];
+    IRRef ref = snap_ref(sn);
     if (!irref_isk(ref)) {
-      IRIns *ir = IR(ref);
-      if (!ra_used(ir)) {
-	RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
-	/* Get a weak register if we have a free one or can rematerialize. */
-	if ((as->freeset & allow) ||
-	    (allow == RSET_FPR && asm_snap_canremat(as))) {
-	  Reg r = ra_allocref(as, ref, allow);  /* Allocate a register. */
-	  if (!irt_isphi(ir->t))
-	    ra_weak(as, r);  /* But mark it as weakly referenced. */
-	  checkmclim(as);
-	  RA_DBGX((as, "snapreg   $f $r", ref, ir->r));
-	} else {
-	  ra_spill(as, ir);  /* Otherwise force a spill slot. */
-	  RA_DBGX((as, "snapspill $f $s", ref, ir->s));
-	}
-      }
+      asm_snap_alloc1(as, ref);
+      if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM))
+	asm_snap_alloc1(as, ref+1);
     }
   }
 }
@@ -997,6 +1012,15 @@ static void asm_head_root(ASMState *as)
   as->T->topslot = gcref(as->T->startpt)->pt.framesize;
 }
 
+/* Get RegSP for parent slot. */
+static LJ_AINLINE RegSP asm_head_parentrs(ASMState *as, IRIns *ir)
+{
+#if LJ_SOFTFP
+  if (ir->o == IR_HIOP) return as->parentmaphi[(ir-1)->op1];
+#endif
+  return as->parentmap[ir->op1];
+}
+
 /* Head of a side trace.
 **
 ** The current simplistic algorithm requires that all slots inherited
@@ -1022,8 +1046,9 @@ static void asm_head_side(ASMState *as)
   for (i = as->stopins; i > REF_BASE; i--) {
     IRIns *ir = IR(i);
     RegSP rs;
-    lua_assert(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT));
-    rs = as->parentmap[ir->op1];
+    lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
+	       (LJ_SOFTFP && ir->o == IR_HIOP));
+    rs = asm_head_parentrs(as, ir);
     if (ra_hasreg(ir->r)) {
       rset_clear(allow, ir->r);
       if (ra_hasspill(ir->s))
@@ -1052,6 +1077,12 @@ static void asm_head_side(ASMState *as)
   }
   as->T->spadjust = (uint16_t)spadj;
 
+#if !LJ_TARGET_X86ORX64
+  /* Restore BASE register from parent spill slot. */
+  if (ra_hasspill(irp->s))
+    emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, spdelta + sps_scale(irp->s));
+#endif
+
   /* Reload spilled target registers. */
   if (pass2) {
     for (i = as->stopins; i > REF_BASE; i--) {
@@ -1061,12 +1092,12 @@ static void asm_head_side(ASMState *as)
 	Reg r;
 	RegSP rs;
 	irt_clearmark(ir->t);
-	rs = as->parentmap[ir->op1];
+	rs = asm_head_parentrs(as, ir);
 	if (!ra_hasspill(regsp_spill(rs)))
 	  ra_sethint(ir->r, rs);  /* Hint may be gone, set it again. */
 	else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s))
 	  continue;  /* Same spill slot, do nothing. */
-	mask = (irt_isnum(ir->t) ? RSET_FPR : RSET_GPR) & allow;
+	mask = ((!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR) & allow;
 	if (mask == RSET_EMPTY)
 	  lj_trace_err(as->J, LJ_TRERR_NYICOAL);
 	r = ra_allocref(as, i, mask);
@@ -1093,7 +1124,7 @@ static void asm_head_side(ASMState *as)
     while (work) {
       Reg r = rset_pickbot(work);
       IRIns *ir = IR(regcost_ref(as->cost[r]));
-      RegSP rs = as->parentmap[ir->op1];
+      RegSP rs = asm_head_parentrs(as, ir);
       rset_clear(work, r);
       if (ra_hasspill(regsp_spill(rs))) {
 	int32_t ofs = sps_scale(regsp_spill(rs));
@@ -1262,13 +1293,37 @@ static void asm_setup_regsp(ASMState *as)
 		      (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
       continue;
       }
-#if LJ_32 && LJ_HASFFI
+#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
     case IR_HIOP:
-      if ((ir-1)->o == IR_CALLN) {
+      switch ((ir-1)->o) {
+#if LJ_SOFTFP
+      case IR_SLOAD:
+	if (((ir-1)->op2 & IRSLOAD_PARENT)) {
+	  RegSP rs = as->parentmaphi[(ir-1)->op1];
+	  lua_assert(regsp_used(rs));
+	  as->stopins = i;
+	  if (!ra_hasspill(regsp_spill(rs)) && ra_hasreg(regsp_reg(rs))) {
+	    ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs));
+	    continue;
+	  }
+	}
+	break;
+#endif
+      case IR_CALLN: case IR_CALLXS:
+#if LJ_SOFTFP
+      case IR_MIN: case IR_MAX:
+#endif
 	ir->prev = REGSP_HINT(RID_RETHI);
 	continue;
+      default:
+	break;
       }
       break;
+#endif
+#if LJ_SOFTFP
+    case IR_MIN: case IR_MAX:
+      if ((ir+1)->o != IR_HIOP) break;
+      /* fallthrough */
 #endif
     /* C calls evict all scratch regs and return results in RID_RET. */
     case IR_SNEW: case IR_XSNEW: case IR_NEWREF:
@@ -1387,7 +1442,10 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
   as->loopinv = 0;
   if (J->parent) {
     as->parent = traceref(J, J->parent);
-    lj_snap_regspmap(as->parentmap, as->parent, J->exitno);
+    lj_snap_regspmap(as->parentmap, as->parent, J->exitno, 0);
+#if LJ_SOFTFP
+    lj_snap_regspmap(as->parentmaphi, as->parent, J->exitno, 1);
+#endif
   } else {
     as->parent = NULL;
   }
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 91d31b29..0de54f04 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -438,7 +438,12 @@ static void LJ_FASTCALL recff_math_unary(jit_State *J, RecordFFData *rd)
 static void LJ_FASTCALL recff_math_binary(jit_State *J, RecordFFData *rd)
 {
   TRef tr = lj_ir_tonum(J, J->base[0]);
-  J->base[0] = emitir(IRTN(rd->data), tr, lj_ir_tonum(J, J->base[1]));
+#if LJ_TARGET_X86ORX64
+  TRef tr2 = lj_ir_tonum(J, J->base[1]);
+#else
+  TRef tr2 = lj_opt_narrow_toint(J, J->base[1]);
+#endif
+  J->base[0] = emitir(IRTN(rd->data), tr, tr2);
 }
 
 /* Record math.asin, math.acos, math.atan. */
diff --git a/src/lj_ir.c b/src/lj_ir.c
index b7d1e7a7..59ffcfde 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -27,6 +27,7 @@
 #include "lj_cdata.h"
 #include "lj_carith.h"
 #endif
+#include "lj_vm.h"
 #include "lj_lib.h"
 
 /* Some local macros to save typing. Undef'd at the end. */
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 1bc6c332..aac34350 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -283,7 +283,7 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
   _(NIL) _(FALSE) _(TRUE) _(LIGHTUD) _(STR) _(P32) _(THREAD) \
   _(PROTO) _(FUNC) _(P64) _(CDATA) _(TAB) _(UDATA) \
   _(FLOAT) _(NUM) _(I8) _(U8) _(I16) _(U16) _(INT) _(U32) _(I64) _(U64) \
-  /* There is room for 10 more types. */
+  _(SOFTFP)  /* There is room for 9 more types. */
 
 /* IR result type and flags (8 bit). */
 typedef enum {
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 3131b15d..b83a0a81 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -38,6 +38,72 @@ typedef struct CCallInfo {
 #define CCI_FASTCALL		0x0800	/* Fastcall convention. */
 
 /* Function definitions for CALL* instructions. */
+#if LJ_SOFTFP
+#if LJ_HASFFI
+#define IRCALLDEF_SOFTFP_FFI(_) \
+  _(softfp_ui2d,	1,   N, NUM, 0) \
+  _(softfp_l2d,		2,   N, NUM, 0) \
+  _(softfp_ul2d,	2,   N, NUM, 0) \
+  _(softfp_f2d,		1,   N, NUM, 0) \
+  _(softfp_d2ui,	2,   N, INT, 0) \
+  _(softfp_d2l,		2,   N, I64, 0) \
+  _(softfp_d2ul,	2,   N, U64, 0) \
+  _(softfp_d2f,		2,   N, FLOAT, 0) \
+  _(softfp_i2f,		1,   N, FLOAT, 0) \
+  _(softfp_ui2f,	1,   N, FLOAT, 0) \
+  _(softfp_l2f,		2,   N, FLOAT, 0) \
+  _(softfp_ul2f,	2,   N, FLOAT, 0) \
+  _(softfp_f2i,		1,   N, INT, 0) \
+  _(softfp_f2ui,	1,   N, INT, 0) \
+  _(softfp_f2l,		1,   N, I64, 0) \
+  _(softfp_f2ul,	1,   N, U64, 0)
+#else
+#define IRCALLDEF_SOFTFP_FFI(_)
+#endif
+#define IRCALLDEF_SOFTFP(_) \
+  _(lj_vm_tobit,	2,   N, INT, 0) \
+  _(softfp_add,		4,   N, NUM, 0) \
+  _(softfp_sub,		4,   N, NUM, 0) \
+  _(softfp_mul,		4,   N, NUM, 0) \
+  _(softfp_div,		4,   N, NUM, 0) \
+  _(softfp_cmp,		4,   N, NIL, 0) \
+  _(softfp_i2d,		1,   N, NUM, 0) \
+  _(softfp_d2i,		2,   N, INT, 0) \
+  IRCALLDEF_SOFTFP_FFI(_)
+#else
+#define IRCALLDEF_SOFTFP(_)
+#endif
+
+#if LJ_TARGET_X86ORX64
+/* Use lj_vm_* helpers and x87 ops. */
+#define IRCALLDEF_FPMATH(_)
+#else
+/* Use standard math library calls. */
+#if LJ_SOFTFP
+#define ARG1_FP		2	/* Treat as 2 32 bit arguments. */
+#else
+#define ARG1_FP		1
+#endif
+/* ORDER FPM */
+#define IRCALLDEF_FPMATH(_) \
+  _(lj_vm_floor,	ARG1_FP,   N, NUM, 0) \
+  _(lj_vm_ceil,		ARG1_FP,   N, NUM, 0) \
+  _(lj_vm_trunc,	ARG1_FP,   N, NUM, 0) \
+  _(sqrt,		ARG1_FP,   N, NUM, 0) \
+  _(exp,		ARG1_FP,   N, NUM, 0) \
+  _(exp2,		ARG1_FP,   N, NUM, 0) \
+  _(log,		ARG1_FP,   N, NUM, 0) \
+  _(log2,		ARG1_FP,   N, NUM, 0) \
+  _(log10,		ARG1_FP,   N, NUM, 0) \
+  _(sin,		ARG1_FP,   N, NUM, 0) \
+  _(cos,		ARG1_FP,   N, NUM, 0) \
+  _(tan,		ARG1_FP,   N, NUM, 0) \
+  _(lj_vm_powi,		ARG1_FP+1, N, NUM, 0) \
+  _(pow,		ARG1_FP*2, N, NUM, 0) \
+  _(atan2,		ARG1_FP*2, N, NUM, 0) \
+  _(ldexp,		ARG1_FP+1, N, NUM, 0)
+#endif
+
 #if LJ_HASFFI
 #if LJ_32
 #define ARG2_64		4	/* Treat as 4 32 bit arguments. */
@@ -62,6 +128,7 @@ typedef struct CCallInfo {
 #else
 #define IRCALLDEF_FFI(_)
 #endif
+
 #define IRCALLDEF(_) \
   _(lj_str_cmp,		2,  FN, INT, CCI_NOFPRCLOBBER) \
   _(lj_str_new,		3,   S, STR, CCI_L) \
@@ -76,6 +143,8 @@ typedef struct CCallInfo {
   _(lj_gc_barrieruv,	2,  FS, NIL, 0) \
   _(lj_mem_newgco,	2,  FS, P32, CCI_L) \
   _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
+  IRCALLDEF_SOFTFP(_) \
+  IRCALLDEF_FPMATH(_) \
   IRCALLDEF_FFI(_) \
   _(sinh,		1,  N, NUM, 0) \
   _(cosh,		1,  N, NUM, 0) \
@@ -97,4 +166,58 @@ LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...);
 
 LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
 
+/* Soft-float declarations. */
+#if LJ_SOFTFP
+#if LJ_TARGET_ARM
+#define softfp_add __aeabi_dadd
+#define softfp_sub __aeabi_dsub
+#define softfp_mul __aeabi_dmul
+#define softfp_div __aeabi_ddiv
+#define softfp_cmp __aeabi_cdcmple
+#define softfp_i2d __aeabi_i2d
+#define softfp_ui2d __aeabi_ui2d
+#define softfp_l2d __aeabi_l2d
+#define softfp_ul2d __aeabi_ul2d
+#define softfp_f2d __aeabi_f2d
+#define softfp_d2i __aeabi_d2iz
+#define softfp_d2ui __aeabi_d2uiz
+#define softfp_d2l __aeabi_d2lz
+#define softfp_d2ul __aeabi_d2ulz
+#define softfp_d2f __aeabi_d2f
+#define softfp_i2f __aeabi_i2f
+#define softfp_ui2f __aeabi_ui2f
+#define softfp_l2f __aeabi_l2f
+#define softfp_ul2f __aeabi_ul2f
+#define softfp_f2i __aeabi_f2iz
+#define softfp_f2ui __aeabi_f2uiz
+#define softfp_f2l __aeabi_f2lz
+#define softfp_f2ul __aeabi_f2ulz
+#else
+#error "Missing soft-float definitions for target architecture"
+#endif
+extern double softfp_add(double a, double b);
+extern double softfp_sub(double a, double b);
+extern double softfp_mul(double a, double b);
+extern double softfp_div(double a, double b);
+extern void softfp_cmp(double a, double b);
+extern double softfp_i2d(int32_t a);
+extern double softfp_ui2d(uint32_t a);
+extern double softfp_l2d(int64_t a);
+extern double softfp_ul2d(uint64_t a);
+extern double softfp_f2d(float a);
+extern int32_t softfp_d2i(double a);
+extern uint32_t softfp_d2ui(double a);
+extern int64_t softfp_d2l(double a);
+extern uint64_t softfp_d2ul(double a);
+extern float softfp_d2f(double a);
+extern float softfp_i2f(int32_t a);
+extern float softfp_ui2f(uint32_t a);
+extern float softfp_l2f(int64_t a);
+extern float softfp_ul2f(uint64_t a);
+extern int32_t softfp_f2i(float a);
+extern uint32_t softfp_f2ui(float a);
+extern int64_t softfp_f2l(float a);
+extern uint64_t softfp_f2ul(float a);
+#endif
+
 #endif
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index daba5296..7ab42b7a 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -148,7 +148,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
 /* Optimization passes. */
 LJ_FUNC void lj_opt_dce(jit_State *J);
 LJ_FUNC int lj_opt_loop(jit_State *J);
-#if LJ_HASFFI && LJ_32
+#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
 LJ_FUNC void lj_opt_split(jit_State *J);
 #else
 #define lj_opt_split(J)		UNUSED(J)
diff --git a/src/lj_jit.h b/src/lj_jit.h
index dd74dedb..63584355 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -250,7 +250,7 @@ enum {
   ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
 
 /* Set/reset flag to activate the SPLIT pass for the current trace. */
-#if LJ_32 && LJ_HASFFI
+#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
 #define lj_needsplit(J)		(J->needsplit = 1)
 #define lj_resetsplit(J)	(J->needsplit = 0)
 #else
@@ -311,7 +311,7 @@ typedef struct jit_State {
   MSize sizesnapmap;	/* Size of temp. snapshot map buffer. */
 
   PostProc postproc;	/* Required post-processing after execution. */
-#if LJ_32 && LJ_HASFFI
+#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
   int needsplit;	/* Need SPLIT pass. */
 #endif
 
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 2f8b1e9c..67436a65 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -8,7 +8,7 @@
 
 #include "lj_obj.h"
 
-#if LJ_HASJIT && LJ_HASFFI && LJ_32
+#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
 
 #include "lj_err.h"
 #include "lj_str.h"
@@ -21,9 +21,9 @@
 /* SPLIT pass:
 **
 ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
-** instructions. It's only active for 32 bit CPUs which lack native 64 bit
-** operations. The FFI is currently the only emitter for 64 bit
-** instructions, so this pass is disabled if the FFI is disabled.
+** instructions. It's only active for soft-float targets or for 32 bit CPUs
+** which lack native 64 bit integer operations (the FFI is currently the
+** only emitter for 64 bit integer instructions).
 **
 ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
 ** backend simple. Only a small amount of extra functionality needs to be
@@ -41,14 +41,19 @@
 ** The operands of HIOP hold the hiword input references. The output of HIOP
 ** is the hiword output reference, which is also used to hold the hiword
 ** register or spill slot information. The register allocator treats this
-** instruction independent of any other instruction, which improves code
+** instruction independently of any other instruction, which improves code
 ** quality compared to using fixed register pairs.
 **
 ** It's easier to split up some instructions into two regular 32 bit
 ** instructions. E.g. XLOAD is split up into two XLOADs with two different
 ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
 ** constants, too. Some hiword instructions can be entirely omitted, e.g.
-** when zero-extending a 32 bit value to 64 bits.
+** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
+** are split up into two 32 bit arguments each.
+**
+** On soft-float targets, floating-point instructions are directly converted
+** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
+** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
 **
 ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
 ** two int64_t fields:
@@ -101,9 +106,42 @@ static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
   return nref;
 }
 
-/* Emit a CALLN with two split 64 bit arguments. */
-static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir,
+#if LJ_SOFTFP
+/* Emit a CALLN with one split 64 bit argument. */
+static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
 			  IRIns *ir, IRCallID id)
+{
+  IRRef tmp, op1 = ir->op1;
+  J->cur.nins--;
+#if LJ_LE
+  tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
+#else
+  tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
+#endif
+  ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
+  return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
+}
+
+/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
+static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
+			   IRIns *ir, IRCallID id)
+{
+  IRRef tmp, op1 = ir->op1, op2 = ir->op2;
+  J->cur.nins--;
+#if LJ_LE
+  tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
+#else
+  tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
+#endif
+  tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
+  ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
+  return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
+}
+#endif
+
+/* Emit a CALLN with two split 64 bit arguments. */
+static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
+			   IRIns *ir, IRCallID id)
 {
   IRRef tmp, op1 = ir->op1, op2 = ir->op2;
   J->cur.nins--;
@@ -117,7 +155,9 @@ static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir,
   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
 #endif
   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
-  return split_emit(J, IRTI(IR_HIOP), tmp, tmp);
+  return split_emit(J,
+    IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
+    tmp, tmp);
 }
 
 /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
@@ -155,7 +195,8 @@ static void split_ir(jit_State *J)
   /* Process constants and fixed references. */
   for (ref = nk; ref <= REF_BASE; ref++) {
     IRIns *ir = &oir[ref];
-    if (ir->o == IR_KINT64) {  /* Split up 64 bit constant. */
+    if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
+      /* Split up 64 bit constant. */
       TValue tv = *ir_k64(ir);
       ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
       hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
@@ -181,6 +222,106 @@ static void split_ir(jit_State *J)
     hisubst[ref] = 0;
 
     /* Split 64 bit instructions. */
+#if LJ_SOFTFP
+    if (irt_isnum(ir->t)) {
+      nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
+      /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
+      switch (ir->o) {
+      case IR_ADD:
+	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
+	break;
+      case IR_SUB:
+	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
+	break;
+      case IR_MUL:
+	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
+	break;
+      case IR_DIV:
+	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
+	break;
+      case IR_POW:
+	hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
+	break;
+      case IR_FPMATH:
+	hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
+	break;
+      case IR_ATAN2:
+	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
+	break;
+      case IR_LDEXP:
+	hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
+	break;
+      case IR_NEG: case IR_ABS:
+	nir->o = IR_CONV;  /* Pass through loword. */
+	nir->op2 = (IRT_INT << 5) | IRT_INT;
+	hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
+			hisubst[ir->op1], hisubst[ir->op2]);
+	break;
+      case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+      case IR_MIN: case IR_MAX:
+	hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
+	break;
+      case IR_XLOAD:
+	hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP),
+			split_ptr(J, nir->op1), ir->op2);
+#if LJ_BE
+	ir->prev = hi; hi = nref;
+#endif
+	break;
+      case IR_ASTORE: case IR_HSTORE: case IR_USTORE:
+	split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
+	break;
+      case IR_XSTORE: {
+#if LJ_LE
+	IRRef hiref = hisubst[ir->op2];
+#else
+	IRRef hiref = nir->op2; nir->op2 = hisubst[ir->op2];
+#endif
+	split_emit(J, IRT(IR_XSTORE, IRT_SOFTFP),
+		   split_ptr(J, nir->op1), hiref);
+	break;
+	}
+      case IR_CONV: {  /* Conversion to number. Others handled below. */
+	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if LJ_32 && LJ_HASFFI
+	if (st == IRT_I64 || st == IRT_U64) {
+	  hi = split_call_l(J, hisubst, oir, ir,
+		 st == IRT_I64 ? IRCALL_softfp_l2d : IRCALL_softfp_ul2d);
+	  break;
+	}
+#endif
+	lua_assert(st == IRT_INT ||
+		   (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
+	nir->o = IR_CALLN;
+#if LJ_32 && LJ_HASFFI
+	nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
+		   st == IRT_FLOAT ? IRCALL_softfp_f2d :
+		   IRCALL_softfp_ui2d;
+#else
+	nir->op2 = IRCALL_softfp_i2d;
+#endif
+	hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
+	break;
+	}
+      case IR_CALLS:
+      case IR_CALLXS:
+	goto split_call;
+      case IR_PHI:
+	if (nir->op1 == nir->op2)
+	  J->cur.nins--;  /* Drop useless PHIs. */
+	if (hisubst[ir->op1] != hisubst[ir->op2])
+	  split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
+		     hisubst[ir->op1], hisubst[ir->op2]);
+	break;
+      default:
+	lua_assert(ir->o <= IR_NE);
+	split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
+		   hisubst[ir->op1], hisubst[ir->op2]);
+	break;
+      }
+    } else
+#endif
+#if LJ_32 && LJ_HASFFI
     if (irt_isint64(ir->t)) {
       IRRef hiref = hisubst[ir->op1];
       nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
@@ -199,22 +340,22 @@ static void split_ir(jit_State *J)
 	hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
 	break;
       case IR_MUL:
-	hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
+	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
 	break;
       case IR_DIV:
-	hi = split_call64(J, hisubst, oir, ir,
-			  irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
-					     IRCALL_lj_carith_divu64);
+	hi = split_call_ll(J, hisubst, oir, ir,
+			   irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+					      IRCALL_lj_carith_divu64);
 	break;
       case IR_MOD:
-	hi = split_call64(J, hisubst, oir, ir,
-			  irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
-					     IRCALL_lj_carith_modu64);
+	hi = split_call_ll(J, hisubst, oir, ir,
+			   irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+					      IRCALL_lj_carith_modu64);
 	break;
       case IR_POW:
-	hi = split_call64(J, hisubst, oir, ir,
-			  irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
-					     IRCALL_lj_carith_powu64);
+	hi = split_call_ll(J, hisubst, oir, ir,
+			   irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+					      IRCALL_lj_carith_powu64);
 	break;
       case IR_FLOAD:
 	lua_assert(ir->op2 == IRFL_CDATA_INT64);
@@ -239,9 +380,21 @@ static void split_ir(jit_State *J)
 	break;
       case IR_CONV: {  /* Conversion to 64 bit integer. Others handled below. */
 	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if LJ_SOFTFP
+	if (st == IRT_NUM) {  /* NUM to 64 bit int conv. */
+	  split_call_l(J, hisubst, oir, ir,
+	    irt_isi64(ir->t) ? IRCALL_softfp_d2l : IRCALL_softfp_d2ul);
+	} else if (st == IRT_FLOAT) {  /* FLOAT to 64 bit int conv. */
+	  nir->o = IR_CALLN;
+	  nir->op2 = irt_isi64(ir->t) ? IRCALL_softfp_f2l : IRCALL_softfp_f2ul;
+	  hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
+	}
+#else
 	if (st == IRT_NUM || st == IRT_FLOAT) {  /* FP to 64 bit int conv. */
 	  hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
-	} else if (st == IRT_I64 || st == IRT_U64) {  /* 64/64 bit cast. */
+	}
+#endif
+	else if (st == IRT_I64 || st == IRT_U64) {  /* 64/64 bit cast. */
 	  /* Drop cast, since assembler doesn't care. */
 	  goto fwdlo;
 	} else if ((ir->op2 & IRCONV_SEXT)) {  /* Sign-extend to 64 bit. */
@@ -274,13 +427,37 @@ static void split_ir(jit_State *J)
 	split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
 	break;
       }
-    } else if (ir->o == IR_CONV) {  /* See above, too. */
+    } else
+#endif
+#if LJ_SOFTFP
+    if (ir->o == IR_TOBIT) {
+      IRRef tmp, op1 = ir->op1;
+      J->cur.nins--;
+#if LJ_LE
+      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
+#else
+      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
+#endif
+      ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
+    } else
+#endif
+    if (ir->o == IR_CONV) {  /* See above, too. */
       IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if LJ_32 && LJ_HASFFI
       if (st == IRT_I64 || st == IRT_U64) {  /* Conversion from 64 bit int. */
+#if LJ_SOFTFP
+	if (irt_isfloat(ir->t)) {
+	  split_call_l(J, hisubst, oir, ir,
+		       st == IRT_I64 ? IRCALL_softfp_l2f : IRCALL_softfp_ul2f);
+	  J->cur.nins--;  /* Drop unused HIOP. */
+	}
+#else
 	if (irt_isfp(ir->t)) {  /* 64 bit integer to FP conversion. */
 	  ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
 				hisubst[ir->op1], nref);
-	} else {  /* Truncate to lower 32 bits. */
+	}
+#endif
+	else {  /* Truncate to lower 32 bits. */
 	fwdlo:
 	  ir->prev = nir->op1;  /* Forward loword. */
 	  /* Replace with NOP to avoid messing up the snapshot logic. */
@@ -288,6 +465,36 @@ static void split_ir(jit_State *J)
 	  nir->op1 = nir->op2 = 0;
 	}
       }
+#endif
+#if LJ_SOFTFP && LJ_32 && LJ_HASFFI
+      else if (irt_isfloat(ir->t)) {
+	if (st == IRT_NUM) {
+	  split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
+	  J->cur.nins--;  /* Drop unused HIOP. */
+	} else {
+	  nir->o = IR_CALLN;
+	  nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
+	}
+      } else if (st == IRT_FLOAT) {
+	nir->o = IR_CALLN;
+	nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
+      } else
+#endif
+#if LJ_SOFTFP
+      if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
+	if (irt_isguard(ir->t)) {
+	  lua_assert(0);  /* NYI: missing check. */
+	}
+	split_call_l(J, hisubst, oir, ir,
+#if LJ_32 && LJ_HASFFI
+		     st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i
+#else
+		     IRCALL_softfp_d2i
+#endif
+		     );
+	J->cur.nins--;  /* Drop unused HIOP. */
+      }
+#endif
     } else if (ir->o == IR_CALLXS) {
       IRRef hiref;
     split_call:
@@ -303,8 +510,10 @@ static void split_ir(jit_State *J)
 #endif
 	ir->prev = nref = split_emit(J, ot, nref, op2);
       }
-      if (irt_isint64(ir->t))
-	hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
+      if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
+	hi = split_emit(J,
+	  IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
+	  nref, nref);
     } else if (ir->o == IR_CARG) {
       IRRef hiref = hisubst[ir->op1];
       if (hiref) {
@@ -367,17 +576,18 @@ static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
   return NULL;
 }
 
-#ifdef LUA_USE_ASSERT
+#if defined(LUA_USE_ASSERT) || LJ_SOFTFP
 /* Slow, but sure way to check whether a SPLIT pass is needed. */
 static int split_needsplit(jit_State *J)
 {
   IRIns *ir, *irend;
   IRRef ref;
   for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
-    if (irt_isint64(ir->t))
+    if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
       return 1;
   for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev)
-    if ((IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 ||
+    if ((LJ_SOFTFP && (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_NUM) ||
+	(IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 ||
 	(IR(ref)->op2 & IRCONV_SRCMASK) == IRT_U64)
       return 1;
   return 0;  /* Nope. */
@@ -387,7 +597,12 @@ static int split_needsplit(jit_State *J)
 /* SPLIT pass. */
 void lj_opt_split(jit_State *J)
 {
+#if LJ_SOFTFP
+  if (!J->needsplit)
+    J->needsplit = split_needsplit(J);
+#else
   lua_assert(J->needsplit >= split_needsplit(J));  /* Verify flag. */
+#endif
   if (J->needsplit) {
     int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
     if (errcode) {
diff --git a/src/lj_record.c b/src/lj_record.c
index 64eba291..b712ec56 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -1992,6 +1992,7 @@ static void rec_setup_side(jit_State *J, GCtrace *T)
     IRRef ref = snap_ref(sn);
     BCReg s = snap_slot(sn);
     IRIns *ir = &T->ir[ref];
+    IRType t = irt_type(ir->t);
     TRef tr;
     /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
     if (bloomtest(seen, ref)) {
@@ -2005,7 +2006,7 @@ static void rec_setup_side(jit_State *J, GCtrace *T)
     bloomset(seen, ref);
     switch ((IROp)ir->o) {
     /* Only have to deal with constants that can occur in stack slots. */
-    case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
+    case IR_KPRI: tr = TREF_PRI(t); break;
     case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
     case IR_KGC:  tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
     case IR_KNUM: tr = lj_ir_k64(J, IR_KNUM, ir_knum(ir)); break;
@@ -2013,13 +2014,14 @@ static void rec_setup_side(jit_State *J, GCtrace *T)
     case IR_KPTR:  tr = lj_ir_kptr(J, ir_kptr(ir)); break;  /* Continuation. */
     /* Inherited SLOADs don't need a guard or type check. */
     case IR_SLOAD:
-      tr = emitir_raw(ir->ot & ~IRT_GUARD, s,
+      if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
+      tr = emitir_raw(IRT(IR_SLOAD, t), s,
 	     (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
       break;
     /* Parent refs are already typed and don't need a guard. */
     default:
-      tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s,
-		      IRSLOAD_INHERIT|IRSLOAD_PARENT);
+      if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
+      tr = emitir_raw(IRT(IR_SLOAD, t), s, IRSLOAD_INHERIT|IRSLOAD_PARENT);
       break;
     }
   setslot:
diff --git a/src/lj_snap.c b/src/lj_snap.c
index dd70ece1..1af7ef85 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -307,7 +307,7 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
 /* Convert a snapshot into a linear slot -> RegSP map.
 ** Note: unused slots are not initialized!
 */
-void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno)
+void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno, int hi)
 {
   SnapShot *snap = &T->snap[snapno];
   MSize n, nent = snap->nent;
@@ -316,7 +316,7 @@ void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno)
   for (n = 0; n < nent; n++) {
     SnapEntry sn = map[n];
     IRRef ref = snap_ref(sn);
-    if (!irref_isk(ref)) {
+    if ((LJ_SOFTFP && hi) ? (ref++, (sn & SNAP_SOFTFPNUM)) : !irref_isk(ref)) {
       IRIns *ir = &T->ir[ref];
       uint32_t rs = ir->prev;
       if (bloomtest(rfilt, ref))
diff --git a/src/lj_snap.h b/src/lj_snap.h
index 031b0ac3..da9813b9 100644
--- a/src/lj_snap.h
+++ b/src/lj_snap.h
@@ -13,7 +13,8 @@
 LJ_FUNC void lj_snap_add(jit_State *J);
 LJ_FUNC void lj_snap_purge(jit_State *J);
 LJ_FUNC void lj_snap_shrink(jit_State *J);
-LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno);
+LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno,
+			      int hi);
 LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr);
 LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
 LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need);
-- 
cgit v1.2.3-55-g6feb