aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2011-05-22 17:41:59 +0200
committerMike Pall <mike>2011-05-22 17:44:58 +0200
commit138f54352ad604ef50f77cbcc15abec6dbd883c0 (patch)
treef8ac2d3599cec2af1007da1c894847d3d0e2ff95
parentd0115c65f5ad80af2a113332906a0c5a010f9812 (diff)
downloadluajit-138f54352ad604ef50f77cbcc15abec6dbd883c0.tar.gz
luajit-138f54352ad604ef50f77cbcc15abec6dbd883c0.tar.bz2
luajit-138f54352ad604ef50f77cbcc15abec6dbd883c0.zip
Split up FP IR instructions with SPLIT pass for soft-float targets.
-rw-r--r--lib/dump.lua10
-rw-r--r--src/Makefile.dep2
-rw-r--r--src/lj_asm.c112
-rw-r--r--src/lj_ffrecord.c7
-rw-r--r--src/lj_ir.c1
-rw-r--r--src/lj_ir.h2
-rw-r--r--src/lj_ircall.h123
-rw-r--r--src/lj_iropt.h2
-rw-r--r--src/lj_jit.h4
-rw-r--r--src/lj_opt_split.c271
-rw-r--r--src/lj_record.c10
-rw-r--r--src/lj_snap.c4
-rw-r--r--src/lj_snap.h3
13 files changed, 479 insertions, 72 deletions
diff --git a/lib/dump.lua b/lib/dump.lua
index 0f9f7b2b..a6b61f53 100644
--- a/lib/dump.lua
+++ b/lib/dump.lua
@@ -147,6 +147,7 @@ local irtype_text = {
147 "u32", 147 "u32",
148 "i64", 148 "i64",
149 "u64", 149 "u64",
150 "sfp",
150} 151}
151 152
152local colortype_ansi = { 153local colortype_ansi = {
@@ -173,6 +174,7 @@ local colortype_ansi = {
173 "\027[35m%s\027[m", 174 "\027[35m%s\027[m",
174 "\027[35m%s\027[m", 175 "\027[35m%s\027[m",
175 "\027[35m%s\027[m", 176 "\027[35m%s\027[m",
177 "\027[35m%s\027[m",
176} 178}
177 179
178local function colorize_text(s, t) 180local function colorize_text(s, t)
@@ -318,11 +320,11 @@ local function printsnap(tr, snap)
318 if ref < 0 then 320 if ref < 0 then
319 out:write(formatk(tr, ref)) 321 out:write(formatk(tr, ref))
320 else 322 else
321 local m, ot, op1, op2 = traceir(tr, ref)
322 out:write(colorize(format("%04d", ref), band(ot, 31)))
323 if band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM 323 if band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
324 local m, ot, op1, op2 = traceir(tr, ref+1) 324 out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
325 out:write(colorize(format("/%04d", ref+1), band(ot, 31))) 325 else
326 local m, ot, op1, op2 = traceir(tr, ref)
327 out:write(colorize(format("%04d", ref), band(ot, 31)))
326 end 328 end
327 end 329 end
328 out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME 330 out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 89ac79e8..8e0d7a1c 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -108,7 +108,7 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
108lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 108lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
109 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 109 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
110 lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ 110 lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
111 lj_lib.h 111 lj_vm.h lj_lib.h
112lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 112lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
113 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ 113 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \
114 lj_state.h lj_lex.h lj_parse.h lj_char.h 114 lj_state.h lj_lex.h lj_parse.h lj_char.h
diff --git a/src/lj_asm.c b/src/lj_asm.c
index f33dc790..18383bcc 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -85,6 +85,9 @@ typedef struct ASMState {
85 85
86 IRRef1 phireg[RID_MAX]; /* PHI register references. */ 86 IRRef1 phireg[RID_MAX]; /* PHI register references. */
87 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */ 87 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */
88#if LJ_SOFTFP
89 uint16_t parentmaphi[LJ_MAX_JSLOTS]; /* Parent slot to hi RegSP map. */
90#endif
88} ASMState; 91} ASMState;
89 92
90#define IR(ref) (&as->ir[(ref)]) 93#define IR(ref) (&as->ir[(ref)])
@@ -273,9 +276,12 @@ static Reg ra_rematk(ASMState *as, IRIns *ir)
273 ra_modified(as, r); 276 ra_modified(as, r);
274 ir->r = RID_INIT; /* Do not keep any hint. */ 277 ir->r = RID_INIT; /* Do not keep any hint. */
275 RA_DBGX((as, "remat $i $r", ir, r)); 278 RA_DBGX((as, "remat $i $r", ir, r));
279#if !LJ_SOFTFP
276 if (ir->o == IR_KNUM) { 280 if (ir->o == IR_KNUM) {
277 emit_loadn(as, r, ir_knum(ir)); 281 emit_loadn(as, r, ir_knum(ir));
278 } else if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 282 } else
283#endif
284 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
279 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ 285 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
280 emit_getgl(as, r, jit_base); 286 emit_getgl(as, r, jit_base);
281 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 287 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
@@ -596,31 +602,40 @@ static int asm_snap_canremat(ASMState *as)
596 return 0; 602 return 0;
597} 603}
598 604
599/* Allocate registers or spill slots for refs escaping to a snapshot. */ 605/* Allocate register or spill slot for a ref that escapes to a snapshot. */
606static void asm_snap_alloc1(ASMState *as, IRRef ref)
607{
608 IRIns *ir = IR(ref);
609 if (!ra_used(ir)) {
610 RegSet allow = (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR;
611 /* Get a weak register if we have a free one or can rematerialize. */
612 if ((as->freeset & allow) ||
613 (allow == RSET_FPR && asm_snap_canremat(as))) {
614 Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */
615 if (!irt_isphi(ir->t))
616 ra_weak(as, r); /* But mark it as weakly referenced. */
617 checkmclim(as);
618 RA_DBGX((as, "snapreg $f $r", ref, ir->r));
619 } else {
620 ra_spill(as, ir); /* Otherwise force a spill slot. */
621 RA_DBGX((as, "snapspill $f $s", ref, ir->s));
622 }
623 }
624}
625
626/* Allocate refs escaping to a snapshot. */
600static void asm_snap_alloc(ASMState *as) 627static void asm_snap_alloc(ASMState *as)
601{ 628{
602 SnapShot *snap = &as->T->snap[as->snapno]; 629 SnapShot *snap = &as->T->snap[as->snapno];
603 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 630 SnapEntry *map = &as->T->snapmap[snap->mapofs];
604 MSize n, nent = snap->nent; 631 MSize n, nent = snap->nent;
605 for (n = 0; n < nent; n++) { 632 for (n = 0; n < nent; n++) {
606 IRRef ref = snap_ref(map[n]); 633 SnapEntry sn = map[n];
634 IRRef ref = snap_ref(sn);
607 if (!irref_isk(ref)) { 635 if (!irref_isk(ref)) {
608 IRIns *ir = IR(ref); 636 asm_snap_alloc1(as, ref);
609 if (!ra_used(ir)) { 637 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM))
610 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 638 asm_snap_alloc1(as, ref+1);
611 /* Get a weak register if we have a free one or can rematerialize. */
612 if ((as->freeset & allow) ||
613 (allow == RSET_FPR && asm_snap_canremat(as))) {
614 Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */
615 if (!irt_isphi(ir->t))
616 ra_weak(as, r); /* But mark it as weakly referenced. */
617 checkmclim(as);
618 RA_DBGX((as, "snapreg $f $r", ref, ir->r));
619 } else {
620 ra_spill(as, ir); /* Otherwise force a spill slot. */
621 RA_DBGX((as, "snapspill $f $s", ref, ir->s));
622 }
623 }
624 } 639 }
625 } 640 }
626} 641}
@@ -997,6 +1012,15 @@ static void asm_head_root(ASMState *as)
997 as->T->topslot = gcref(as->T->startpt)->pt.framesize; 1012 as->T->topslot = gcref(as->T->startpt)->pt.framesize;
998} 1013}
999 1014
1015/* Get RegSP for parent slot. */
1016static LJ_AINLINE RegSP asm_head_parentrs(ASMState *as, IRIns *ir)
1017{
1018#if LJ_SOFTFP
1019 if (ir->o == IR_HIOP) return as->parentmaphi[(ir-1)->op1];
1020#endif
1021 return as->parentmap[ir->op1];
1022}
1023
1000/* Head of a side trace. 1024/* Head of a side trace.
1001** 1025**
1002** The current simplistic algorithm requires that all slots inherited 1026** The current simplistic algorithm requires that all slots inherited
@@ -1022,8 +1046,9 @@ static void asm_head_side(ASMState *as)
1022 for (i = as->stopins; i > REF_BASE; i--) { 1046 for (i = as->stopins; i > REF_BASE; i--) {
1023 IRIns *ir = IR(i); 1047 IRIns *ir = IR(i);
1024 RegSP rs; 1048 RegSP rs;
1025 lua_assert(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)); 1049 lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
1026 rs = as->parentmap[ir->op1]; 1050 (LJ_SOFTFP && ir->o == IR_HIOP));
1051 rs = asm_head_parentrs(as, ir);
1027 if (ra_hasreg(ir->r)) { 1052 if (ra_hasreg(ir->r)) {
1028 rset_clear(allow, ir->r); 1053 rset_clear(allow, ir->r);
1029 if (ra_hasspill(ir->s)) 1054 if (ra_hasspill(ir->s))
@@ -1052,6 +1077,12 @@ static void asm_head_side(ASMState *as)
1052 } 1077 }
1053 as->T->spadjust = (uint16_t)spadj; 1078 as->T->spadjust = (uint16_t)spadj;
1054 1079
1080#if !LJ_TARGET_X86ORX64
1081 /* Restore BASE register from parent spill slot. */
1082 if (ra_hasspill(irp->s))
1083 emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, spdelta + sps_scale(irp->s));
1084#endif
1085
1055 /* Reload spilled target registers. */ 1086 /* Reload spilled target registers. */
1056 if (pass2) { 1087 if (pass2) {
1057 for (i = as->stopins; i > REF_BASE; i--) { 1088 for (i = as->stopins; i > REF_BASE; i--) {
@@ -1061,12 +1092,12 @@ static void asm_head_side(ASMState *as)
1061 Reg r; 1092 Reg r;
1062 RegSP rs; 1093 RegSP rs;
1063 irt_clearmark(ir->t); 1094 irt_clearmark(ir->t);
1064 rs = as->parentmap[ir->op1]; 1095 rs = asm_head_parentrs(as, ir);
1065 if (!ra_hasspill(regsp_spill(rs))) 1096 if (!ra_hasspill(regsp_spill(rs)))
1066 ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */ 1097 ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */
1067 else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s)) 1098 else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s))
1068 continue; /* Same spill slot, do nothing. */ 1099 continue; /* Same spill slot, do nothing. */
1069 mask = (irt_isnum(ir->t) ? RSET_FPR : RSET_GPR) & allow; 1100 mask = ((!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR) & allow;
1070 if (mask == RSET_EMPTY) 1101 if (mask == RSET_EMPTY)
1071 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 1102 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1072 r = ra_allocref(as, i, mask); 1103 r = ra_allocref(as, i, mask);
@@ -1093,7 +1124,7 @@ static void asm_head_side(ASMState *as)
1093 while (work) { 1124 while (work) {
1094 Reg r = rset_pickbot(work); 1125 Reg r = rset_pickbot(work);
1095 IRIns *ir = IR(regcost_ref(as->cost[r])); 1126 IRIns *ir = IR(regcost_ref(as->cost[r]));
1096 RegSP rs = as->parentmap[ir->op1]; 1127 RegSP rs = asm_head_parentrs(as, ir);
1097 rset_clear(work, r); 1128 rset_clear(work, r);
1098 if (ra_hasspill(regsp_spill(rs))) { 1129 if (ra_hasspill(regsp_spill(rs))) {
1099 int32_t ofs = sps_scale(regsp_spill(rs)); 1130 int32_t ofs = sps_scale(regsp_spill(rs));
@@ -1262,14 +1293,38 @@ static void asm_setup_regsp(ASMState *as)
1262 (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; 1293 (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
1263 continue; 1294 continue;
1264 } 1295 }
1265#if LJ_32 && LJ_HASFFI 1296#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
1266 case IR_HIOP: 1297 case IR_HIOP:
1267 if ((ir-1)->o == IR_CALLN) { 1298 switch ((ir-1)->o) {
1299#if LJ_SOFTFP
1300 case IR_SLOAD:
1301 if (((ir-1)->op2 & IRSLOAD_PARENT)) {
1302 RegSP rs = as->parentmaphi[(ir-1)->op1];
1303 lua_assert(regsp_used(rs));
1304 as->stopins = i;
1305 if (!ra_hasspill(regsp_spill(rs)) && ra_hasreg(regsp_reg(rs))) {
1306 ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs));
1307 continue;
1308 }
1309 }
1310 break;
1311#endif
1312 case IR_CALLN: case IR_CALLXS:
1313#if LJ_SOFTFP
1314 case IR_MIN: case IR_MAX:
1315#endif
1268 ir->prev = REGSP_HINT(RID_RETHI); 1316 ir->prev = REGSP_HINT(RID_RETHI);
1269 continue; 1317 continue;
1318 default:
1319 break;
1270 } 1320 }
1271 break; 1321 break;
1272#endif 1322#endif
1323#if LJ_SOFTFP
1324 case IR_MIN: case IR_MAX:
1325 if ((ir+1)->o != IR_HIOP) break;
1326 /* fallthrough */
1327#endif
1273 /* C calls evict all scratch regs and return results in RID_RET. */ 1328 /* C calls evict all scratch regs and return results in RID_RET. */
1274 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 1329 case IR_SNEW: case IR_XSNEW: case IR_NEWREF:
1275 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 1330 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
@@ -1387,7 +1442,10 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1387 as->loopinv = 0; 1442 as->loopinv = 0;
1388 if (J->parent) { 1443 if (J->parent) {
1389 as->parent = traceref(J, J->parent); 1444 as->parent = traceref(J, J->parent);
1390 lj_snap_regspmap(as->parentmap, as->parent, J->exitno); 1445 lj_snap_regspmap(as->parentmap, as->parent, J->exitno, 0);
1446#if LJ_SOFTFP
1447 lj_snap_regspmap(as->parentmaphi, as->parent, J->exitno, 1);
1448#endif
1391 } else { 1449 } else {
1392 as->parent = NULL; 1450 as->parent = NULL;
1393 } 1451 }
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 91d31b29..0de54f04 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -438,7 +438,12 @@ static void LJ_FASTCALL recff_math_unary(jit_State *J, RecordFFData *rd)
438static void LJ_FASTCALL recff_math_binary(jit_State *J, RecordFFData *rd) 438static void LJ_FASTCALL recff_math_binary(jit_State *J, RecordFFData *rd)
439{ 439{
440 TRef tr = lj_ir_tonum(J, J->base[0]); 440 TRef tr = lj_ir_tonum(J, J->base[0]);
441 J->base[0] = emitir(IRTN(rd->data), tr, lj_ir_tonum(J, J->base[1])); 441#if LJ_TARGET_X86ORX64
442 TRef tr2 = lj_ir_tonum(J, J->base[1]);
443#else
444 TRef tr2 = lj_opt_narrow_toint(J, J->base[1]);
445#endif
446 J->base[0] = emitir(IRTN(rd->data), tr, tr2);
442} 447}
443 448
444/* Record math.asin, math.acos, math.atan. */ 449/* Record math.asin, math.acos, math.atan. */
diff --git a/src/lj_ir.c b/src/lj_ir.c
index b7d1e7a7..59ffcfde 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -27,6 +27,7 @@
27#include "lj_cdata.h" 27#include "lj_cdata.h"
28#include "lj_carith.h" 28#include "lj_carith.h"
29#endif 29#endif
30#include "lj_vm.h"
30#include "lj_lib.h" 31#include "lj_lib.h"
31 32
32/* Some local macros to save typing. Undef'd at the end. */ 33/* Some local macros to save typing. Undef'd at the end. */
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 1bc6c332..aac34350 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -283,7 +283,7 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
283 _(NIL) _(FALSE) _(TRUE) _(LIGHTUD) _(STR) _(P32) _(THREAD) \ 283 _(NIL) _(FALSE) _(TRUE) _(LIGHTUD) _(STR) _(P32) _(THREAD) \
284 _(PROTO) _(FUNC) _(P64) _(CDATA) _(TAB) _(UDATA) \ 284 _(PROTO) _(FUNC) _(P64) _(CDATA) _(TAB) _(UDATA) \
285 _(FLOAT) _(NUM) _(I8) _(U8) _(I16) _(U16) _(INT) _(U32) _(I64) _(U64) \ 285 _(FLOAT) _(NUM) _(I8) _(U8) _(I16) _(U16) _(INT) _(U32) _(I64) _(U64) \
286 /* There is room for 10 more types. */ 286 _(SOFTFP) /* There is room for 9 more types. */
287 287
288/* IR result type and flags (8 bit). */ 288/* IR result type and flags (8 bit). */
289typedef enum { 289typedef enum {
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 3131b15d..b83a0a81 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -38,6 +38,72 @@ typedef struct CCallInfo {
38#define CCI_FASTCALL 0x0800 /* Fastcall convention. */ 38#define CCI_FASTCALL 0x0800 /* Fastcall convention. */
39 39
40/* Function definitions for CALL* instructions. */ 40/* Function definitions for CALL* instructions. */
41#if LJ_SOFTFP
42#if LJ_HASFFI
43#define IRCALLDEF_SOFTFP_FFI(_) \
44 _(softfp_ui2d, 1, N, NUM, 0) \
45 _(softfp_l2d, 2, N, NUM, 0) \
46 _(softfp_ul2d, 2, N, NUM, 0) \
47 _(softfp_f2d, 1, N, NUM, 0) \
48 _(softfp_d2ui, 2, N, INT, 0) \
49 _(softfp_d2l, 2, N, I64, 0) \
50 _(softfp_d2ul, 2, N, U64, 0) \
51 _(softfp_d2f, 2, N, FLOAT, 0) \
52 _(softfp_i2f, 1, N, FLOAT, 0) \
53 _(softfp_ui2f, 1, N, FLOAT, 0) \
54 _(softfp_l2f, 2, N, FLOAT, 0) \
55 _(softfp_ul2f, 2, N, FLOAT, 0) \
56 _(softfp_f2i, 1, N, INT, 0) \
57 _(softfp_f2ui, 1, N, INT, 0) \
58 _(softfp_f2l, 1, N, I64, 0) \
59 _(softfp_f2ul, 1, N, U64, 0)
60#else
61#define IRCALLDEF_SOFTFP_FFI(_)
62#endif
63#define IRCALLDEF_SOFTFP(_) \
64 _(lj_vm_tobit, 2, N, INT, 0) \
65 _(softfp_add, 4, N, NUM, 0) \
66 _(softfp_sub, 4, N, NUM, 0) \
67 _(softfp_mul, 4, N, NUM, 0) \
68 _(softfp_div, 4, N, NUM, 0) \
69 _(softfp_cmp, 4, N, NIL, 0) \
70 _(softfp_i2d, 1, N, NUM, 0) \
71 _(softfp_d2i, 2, N, INT, 0) \
72 IRCALLDEF_SOFTFP_FFI(_)
73#else
74#define IRCALLDEF_SOFTFP(_)
75#endif
76
77#if LJ_TARGET_X86ORX64
78/* Use lj_vm_* helpers and x87 ops. */
79#define IRCALLDEF_FPMATH(_)
80#else
81/* Use standard math library calls. */
82#if LJ_SOFTFP
83#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */
84#else
85#define ARG1_FP 1
86#endif
87/* ORDER FPM */
88#define IRCALLDEF_FPMATH(_) \
89 _(lj_vm_floor, ARG1_FP, N, NUM, 0) \
90 _(lj_vm_ceil, ARG1_FP, N, NUM, 0) \
91 _(lj_vm_trunc, ARG1_FP, N, NUM, 0) \
92 _(sqrt, ARG1_FP, N, NUM, 0) \
93 _(exp, ARG1_FP, N, NUM, 0) \
94 _(exp2, ARG1_FP, N, NUM, 0) \
95 _(log, ARG1_FP, N, NUM, 0) \
96 _(log2, ARG1_FP, N, NUM, 0) \
97 _(log10, ARG1_FP, N, NUM, 0) \
98 _(sin, ARG1_FP, N, NUM, 0) \
99 _(cos, ARG1_FP, N, NUM, 0) \
100 _(tan, ARG1_FP, N, NUM, 0) \
101 _(lj_vm_powi, ARG1_FP+1, N, NUM, 0) \
102 _(pow, ARG1_FP*2, N, NUM, 0) \
103 _(atan2, ARG1_FP*2, N, NUM, 0) \
104 _(ldexp, ARG1_FP+1, N, NUM, 0)
105#endif
106
41#if LJ_HASFFI 107#if LJ_HASFFI
42#if LJ_32 108#if LJ_32
43#define ARG2_64 4 /* Treat as 4 32 bit arguments. */ 109#define ARG2_64 4 /* Treat as 4 32 bit arguments. */
@@ -62,6 +128,7 @@ typedef struct CCallInfo {
62#else 128#else
63#define IRCALLDEF_FFI(_) 129#define IRCALLDEF_FFI(_)
64#endif 130#endif
131
65#define IRCALLDEF(_) \ 132#define IRCALLDEF(_) \
66 _(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ 133 _(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
67 _(lj_str_new, 3, S, STR, CCI_L) \ 134 _(lj_str_new, 3, S, STR, CCI_L) \
@@ -76,6 +143,8 @@ typedef struct CCallInfo {
76 _(lj_gc_barrieruv, 2, FS, NIL, 0) \ 143 _(lj_gc_barrieruv, 2, FS, NIL, 0) \
77 _(lj_mem_newgco, 2, FS, P32, CCI_L) \ 144 _(lj_mem_newgco, 2, FS, P32, CCI_L) \
78 _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \ 145 _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
146 IRCALLDEF_SOFTFP(_) \
147 IRCALLDEF_FPMATH(_) \
79 IRCALLDEF_FFI(_) \ 148 IRCALLDEF_FFI(_) \
80 _(sinh, 1, N, NUM, 0) \ 149 _(sinh, 1, N, NUM, 0) \
81 _(cosh, 1, N, NUM, 0) \ 150 _(cosh, 1, N, NUM, 0) \
@@ -97,4 +166,58 @@ LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...);
97 166
98LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; 167LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
99 168
169/* Soft-float declarations. */
170#if LJ_SOFTFP
171#if LJ_TARGET_ARM
172#define softfp_add __aeabi_dadd
173#define softfp_sub __aeabi_dsub
174#define softfp_mul __aeabi_dmul
175#define softfp_div __aeabi_ddiv
176#define softfp_cmp __aeabi_cdcmple
177#define softfp_i2d __aeabi_i2d
178#define softfp_ui2d __aeabi_ui2d
179#define softfp_l2d __aeabi_l2d
180#define softfp_ul2d __aeabi_ul2d
181#define softfp_f2d __aeabi_f2d
182#define softfp_d2i __aeabi_d2iz
183#define softfp_d2ui __aeabi_d2uiz
184#define softfp_d2l __aeabi_d2lz
185#define softfp_d2ul __aeabi_d2ulz
186#define softfp_d2f __aeabi_d2f
187#define softfp_i2f __aeabi_i2f
188#define softfp_ui2f __aeabi_ui2f
189#define softfp_l2f __aeabi_l2f
190#define softfp_ul2f __aeabi_ul2f
191#define softfp_f2i __aeabi_f2iz
192#define softfp_f2ui __aeabi_f2uiz
193#define softfp_f2l __aeabi_f2lz
194#define softfp_f2ul __aeabi_f2ulz
195#else
196#error "Missing soft-float definitions for target architecture"
197#endif
198extern double softfp_add(double a, double b);
199extern double softfp_sub(double a, double b);
200extern double softfp_mul(double a, double b);
201extern double softfp_div(double a, double b);
202extern void softfp_cmp(double a, double b);
203extern double softfp_i2d(int32_t a);
204extern double softfp_ui2d(uint32_t a);
205extern double softfp_l2d(int64_t a);
206extern double softfp_ul2d(uint64_t a);
207extern double softfp_f2d(float a);
208extern int32_t softfp_d2i(double a);
209extern uint32_t softfp_d2ui(double a);
210extern int64_t softfp_d2l(double a);
211extern uint64_t softfp_d2ul(double a);
212extern float softfp_d2f(double a);
213extern float softfp_i2f(int32_t a);
214extern float softfp_ui2f(uint32_t a);
215extern float softfp_l2f(int64_t a);
216extern float softfp_ul2f(uint64_t a);
217extern int32_t softfp_f2i(float a);
218extern uint32_t softfp_f2ui(float a);
219extern int64_t softfp_f2l(float a);
220extern uint64_t softfp_f2ul(float a);
221#endif
222
100#endif 223#endif
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index daba5296..7ab42b7a 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -148,7 +148,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
148/* Optimization passes. */ 148/* Optimization passes. */
149LJ_FUNC void lj_opt_dce(jit_State *J); 149LJ_FUNC void lj_opt_dce(jit_State *J);
150LJ_FUNC int lj_opt_loop(jit_State *J); 150LJ_FUNC int lj_opt_loop(jit_State *J);
151#if LJ_HASFFI && LJ_32 151#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
152LJ_FUNC void lj_opt_split(jit_State *J); 152LJ_FUNC void lj_opt_split(jit_State *J);
153#else 153#else
154#define lj_opt_split(J) UNUSED(J) 154#define lj_opt_split(J) UNUSED(J)
diff --git a/src/lj_jit.h b/src/lj_jit.h
index dd74dedb..63584355 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -250,7 +250,7 @@ enum {
250 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) 250 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
251 251
252/* Set/reset flag to activate the SPLIT pass for the current trace. */ 252/* Set/reset flag to activate the SPLIT pass for the current trace. */
253#if LJ_32 && LJ_HASFFI 253#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
254#define lj_needsplit(J) (J->needsplit = 1) 254#define lj_needsplit(J) (J->needsplit = 1)
255#define lj_resetsplit(J) (J->needsplit = 0) 255#define lj_resetsplit(J) (J->needsplit = 0)
256#else 256#else
@@ -311,7 +311,7 @@ typedef struct jit_State {
311 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ 311 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
312 312
313 PostProc postproc; /* Required post-processing after execution. */ 313 PostProc postproc; /* Required post-processing after execution. */
314#if LJ_32 && LJ_HASFFI 314#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
315 int needsplit; /* Need SPLIT pass. */ 315 int needsplit; /* Need SPLIT pass. */
316#endif 316#endif
317 317
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 2f8b1e9c..67436a65 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -8,7 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10 10
11#if LJ_HASJIT && LJ_HASFFI && LJ_32 11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_str.h"
@@ -21,9 +21,9 @@
21/* SPLIT pass: 21/* SPLIT pass:
22** 22**
23** This pass splits up 64 bit IR instructions into multiple 32 bit IR 23** This pass splits up 64 bit IR instructions into multiple 32 bit IR
24** instructions. It's only active for 32 bit CPUs which lack native 64 bit 24** instructions. It's only active for soft-float targets or for 32 bit CPUs
25** operations. The FFI is currently the only emitter for 64 bit 25** which lack native 64 bit integer operations (the FFI is currently the
26** instructions, so this pass is disabled if the FFI is disabled. 26** only emitter for 64 bit integer instructions).
27** 27**
28** Splitting the IR in a separate pass keeps each 32 bit IR assembler 28** Splitting the IR in a separate pass keeps each 32 bit IR assembler
29** backend simple. Only a small amount of extra functionality needs to be 29** backend simple. Only a small amount of extra functionality needs to be
@@ -41,14 +41,19 @@
41** The operands of HIOP hold the hiword input references. The output of HIOP 41** The operands of HIOP hold the hiword input references. The output of HIOP
42** is the hiword output reference, which is also used to hold the hiword 42** is the hiword output reference, which is also used to hold the hiword
43** register or spill slot information. The register allocator treats this 43** register or spill slot information. The register allocator treats this
44** instruction independent of any other instruction, which improves code 44** instruction independently of any other instruction, which improves code
45** quality compared to using fixed register pairs. 45** quality compared to using fixed register pairs.
46** 46**
47** It's easier to split up some instructions into two regular 32 bit 47** It's easier to split up some instructions into two regular 32 bit
48** instructions. E.g. XLOAD is split up into two XLOADs with two different 48** instructions. E.g. XLOAD is split up into two XLOADs with two different
49** addresses. Obviously 64 bit constants need to be split up into two 32 bit 49** addresses. Obviously 64 bit constants need to be split up into two 32 bit
50** constants, too. Some hiword instructions can be entirely omitted, e.g. 50** constants, too. Some hiword instructions can be entirely omitted, e.g.
51** when zero-extending a 32 bit value to 64 bits. 51** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
52** are split up into two 32 bit arguments each.
53**
54** On soft-float targets, floating-point instructions are directly converted
55** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
56** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
52** 57**
53** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with 58** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
54** two int64_t fields: 59** two int64_t fields:
@@ -101,10 +106,43 @@ static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
101 return nref; 106 return nref;
102} 107}
103 108
104/* Emit a CALLN with two split 64 bit arguments. */ 109#if LJ_SOFTFP
105static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir, 110/* Emit a CALLN with one split 64 bit argument. */
111static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
106 IRIns *ir, IRCallID id) 112 IRIns *ir, IRCallID id)
107{ 113{
114 IRRef tmp, op1 = ir->op1;
115 J->cur.nins--;
116#if LJ_LE
117 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
118#else
119 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
120#endif
121 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
122 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
123}
124
125/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
126static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
127 IRIns *ir, IRCallID id)
128{
129 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
130 J->cur.nins--;
131#if LJ_LE
132 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
133#else
134 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
135#endif
136 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
137 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
138 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
139}
140#endif
141
142/* Emit a CALLN with two split 64 bit arguments. */
143static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
144 IRIns *ir, IRCallID id)
145{
108 IRRef tmp, op1 = ir->op1, op2 = ir->op2; 146 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
109 J->cur.nins--; 147 J->cur.nins--;
110#if LJ_LE 148#if LJ_LE
@@ -117,7 +155,9 @@ static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir,
117 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); 155 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
118#endif 156#endif
119 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 157 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
120 return split_emit(J, IRTI(IR_HIOP), tmp, tmp); 158 return split_emit(J,
159 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
160 tmp, tmp);
121} 161}
122 162
123/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ 163/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
@@ -155,7 +195,8 @@ static void split_ir(jit_State *J)
155 /* Process constants and fixed references. */ 195 /* Process constants and fixed references. */
156 for (ref = nk; ref <= REF_BASE; ref++) { 196 for (ref = nk; ref <= REF_BASE; ref++) {
157 IRIns *ir = &oir[ref]; 197 IRIns *ir = &oir[ref];
158 if (ir->o == IR_KINT64) { /* Split up 64 bit constant. */ 198 if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
199 /* Split up 64 bit constant. */
159 TValue tv = *ir_k64(ir); 200 TValue tv = *ir_k64(ir);
160 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); 201 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
161 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); 202 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
@@ -181,6 +222,106 @@ static void split_ir(jit_State *J)
181 hisubst[ref] = 0; 222 hisubst[ref] = 0;
182 223
183 /* Split 64 bit instructions. */ 224 /* Split 64 bit instructions. */
225#if LJ_SOFTFP
226 if (irt_isnum(ir->t)) {
227 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
228 /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
229 switch (ir->o) {
230 case IR_ADD:
231 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
232 break;
233 case IR_SUB:
234 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
235 break;
236 case IR_MUL:
237 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
238 break;
239 case IR_DIV:
240 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
241 break;
242 case IR_POW:
243 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
244 break;
245 case IR_FPMATH:
246 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
247 break;
248 case IR_ATAN2:
249 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
250 break;
251 case IR_LDEXP:
252 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
253 break;
254 case IR_NEG: case IR_ABS:
255 nir->o = IR_CONV; /* Pass through loword. */
256 nir->op2 = (IRT_INT << 5) | IRT_INT;
257 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
258 hisubst[ir->op1], hisubst[ir->op2]);
259 break;
260 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
261 case IR_MIN: case IR_MAX:
262 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
263 break;
264 case IR_XLOAD:
265 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP),
266 split_ptr(J, nir->op1), ir->op2);
267#if LJ_BE
268 ir->prev = hi; hi = nref;
269#endif
270 break;
271 case IR_ASTORE: case IR_HSTORE: case IR_USTORE:
272 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
273 break;
274 case IR_XSTORE: {
275#if LJ_LE
276 IRRef hiref = hisubst[ir->op2];
277#else
278 IRRef hiref = nir->op2; nir->op2 = hisubst[ir->op2];
279#endif
280 split_emit(J, IRT(IR_XSTORE, IRT_SOFTFP),
281 split_ptr(J, nir->op1), hiref);
282 break;
283 }
284 case IR_CONV: { /* Conversion to number. Others handled below. */
285 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
286#if LJ_32 && LJ_HASFFI
287 if (st == IRT_I64 || st == IRT_U64) {
288 hi = split_call_l(J, hisubst, oir, ir,
289 st == IRT_I64 ? IRCALL_softfp_l2d : IRCALL_softfp_ul2d);
290 break;
291 }
292#endif
293 lua_assert(st == IRT_INT ||
294 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
295 nir->o = IR_CALLN;
296#if LJ_32 && LJ_HASFFI
297 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
298 st == IRT_FLOAT ? IRCALL_softfp_f2d :
299 IRCALL_softfp_ui2d;
300#else
301 nir->op2 = IRCALL_softfp_i2d;
302#endif
303 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
304 break;
305 }
306 case IR_CALLS:
307 case IR_CALLXS:
308 goto split_call;
309 case IR_PHI:
310 if (nir->op1 == nir->op2)
311 J->cur.nins--; /* Drop useless PHIs. */
312 if (hisubst[ir->op1] != hisubst[ir->op2])
313 split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
314 hisubst[ir->op1], hisubst[ir->op2]);
315 break;
316 default:
317 lua_assert(ir->o <= IR_NE);
318 split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
319 hisubst[ir->op1], hisubst[ir->op2]);
320 break;
321 }
322 } else
323#endif
324#if LJ_32 && LJ_HASFFI
184 if (irt_isint64(ir->t)) { 325 if (irt_isint64(ir->t)) {
185 IRRef hiref = hisubst[ir->op1]; 326 IRRef hiref = hisubst[ir->op1];
186 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ 327 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
@@ -199,22 +340,22 @@ static void split_ir(jit_State *J)
199 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); 340 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
200 break; 341 break;
201 case IR_MUL: 342 case IR_MUL:
202 hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); 343 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
203 break; 344 break;
204 case IR_DIV: 345 case IR_DIV:
205 hi = split_call64(J, hisubst, oir, ir, 346 hi = split_call_ll(J, hisubst, oir, ir,
206 irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : 347 irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
207 IRCALL_lj_carith_divu64); 348 IRCALL_lj_carith_divu64);
208 break; 349 break;
209 case IR_MOD: 350 case IR_MOD:
210 hi = split_call64(J, hisubst, oir, ir, 351 hi = split_call_ll(J, hisubst, oir, ir,
211 irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : 352 irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
212 IRCALL_lj_carith_modu64); 353 IRCALL_lj_carith_modu64);
213 break; 354 break;
214 case IR_POW: 355 case IR_POW:
215 hi = split_call64(J, hisubst, oir, ir, 356 hi = split_call_ll(J, hisubst, oir, ir,
216 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : 357 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
217 IRCALL_lj_carith_powu64); 358 IRCALL_lj_carith_powu64);
218 break; 359 break;
219 case IR_FLOAD: 360 case IR_FLOAD:
220 lua_assert(ir->op2 == IRFL_CDATA_INT64); 361 lua_assert(ir->op2 == IRFL_CDATA_INT64);
@@ -239,9 +380,21 @@ static void split_ir(jit_State *J)
239 break; 380 break;
240 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ 381 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
241 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 382 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
383#if LJ_SOFTFP
384 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
385 split_call_l(J, hisubst, oir, ir,
386 irt_isi64(ir->t) ? IRCALL_softfp_d2l : IRCALL_softfp_d2ul);
387 } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
388 nir->o = IR_CALLN;
389 nir->op2 = irt_isi64(ir->t) ? IRCALL_softfp_f2l : IRCALL_softfp_f2ul;
390 hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
391 }
392#else
242 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ 393 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
243 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); 394 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
244 } else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ 395 }
396#endif
397 else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
245 /* Drop cast, since assembler doesn't care. */ 398 /* Drop cast, since assembler doesn't care. */
246 goto fwdlo; 399 goto fwdlo;
247 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ 400 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
@@ -274,13 +427,37 @@ static void split_ir(jit_State *J)
274 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); 427 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
275 break; 428 break;
276 } 429 }
277 } else if (ir->o == IR_CONV) { /* See above, too. */ 430 } else
431#endif
432#if LJ_SOFTFP
433 if (ir->o == IR_TOBIT) {
434 IRRef tmp, op1 = ir->op1;
435 J->cur.nins--;
436#if LJ_LE
437 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
438#else
439 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
440#endif
441 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
442 } else
443#endif
444 if (ir->o == IR_CONV) { /* See above, too. */
278 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 445 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
446#if LJ_32 && LJ_HASFFI
279 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ 447 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
448#if LJ_SOFTFP
449 if (irt_isfloat(ir->t)) {
450 split_call_l(J, hisubst, oir, ir,
451 st == IRT_I64 ? IRCALL_softfp_l2f : IRCALL_softfp_ul2f);
452 J->cur.nins--; /* Drop unused HIOP. */
453 }
454#else
280 if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ 455 if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
281 ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), 456 ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
282 hisubst[ir->op1], nref); 457 hisubst[ir->op1], nref);
283 } else { /* Truncate to lower 32 bits. */ 458 }
459#endif
460 else { /* Truncate to lower 32 bits. */
284 fwdlo: 461 fwdlo:
285 ir->prev = nir->op1; /* Forward loword. */ 462 ir->prev = nir->op1; /* Forward loword. */
286 /* Replace with NOP to avoid messing up the snapshot logic. */ 463 /* Replace with NOP to avoid messing up the snapshot logic. */
@@ -288,6 +465,36 @@ static void split_ir(jit_State *J)
288 nir->op1 = nir->op2 = 0; 465 nir->op1 = nir->op2 = 0;
289 } 466 }
290 } 467 }
468#endif
469#if LJ_SOFTFP && LJ_32 && LJ_HASFFI
470 else if (irt_isfloat(ir->t)) {
471 if (st == IRT_NUM) {
472 split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
473 J->cur.nins--; /* Drop unused HIOP. */
474 } else {
475 nir->o = IR_CALLN;
476 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
477 }
478 } else if (st == IRT_FLOAT) {
479 nir->o = IR_CALLN;
480 nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
481 } else
482#endif
483#if LJ_SOFTFP
484 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
485 if (irt_isguard(ir->t)) {
486 lua_assert(0); /* NYI: missing check. */
487 }
488 split_call_l(J, hisubst, oir, ir,
489#if LJ_32 && LJ_HASFFI
490 st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i
491#else
492 IRCALL_softfp_d2i
493#endif
494 );
495 J->cur.nins--; /* Drop unused HIOP. */
496 }
497#endif
291 } else if (ir->o == IR_CALLXS) { 498 } else if (ir->o == IR_CALLXS) {
292 IRRef hiref; 499 IRRef hiref;
293 split_call: 500 split_call:
@@ -303,8 +510,10 @@ static void split_ir(jit_State *J)
303#endif 510#endif
304 ir->prev = nref = split_emit(J, ot, nref, op2); 511 ir->prev = nref = split_emit(J, ot, nref, op2);
305 } 512 }
306 if (irt_isint64(ir->t)) 513 if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
307 hi = split_emit(J, IRTI(IR_HIOP), nref, nref); 514 hi = split_emit(J,
515 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
516 nref, nref);
308 } else if (ir->o == IR_CARG) { 517 } else if (ir->o == IR_CARG) {
309 IRRef hiref = hisubst[ir->op1]; 518 IRRef hiref = hisubst[ir->op1];
310 if (hiref) { 519 if (hiref) {
@@ -367,17 +576,18 @@ static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
367 return NULL; 576 return NULL;
368} 577}
369 578
370#ifdef LUA_USE_ASSERT 579#if defined(LUA_USE_ASSERT) || LJ_SOFTFP
371/* Slow, but sure way to check whether a SPLIT pass is needed. */ 580/* Slow, but sure way to check whether a SPLIT pass is needed. */
372static int split_needsplit(jit_State *J) 581static int split_needsplit(jit_State *J)
373{ 582{
374 IRIns *ir, *irend; 583 IRIns *ir, *irend;
375 IRRef ref; 584 IRRef ref;
376 for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) 585 for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
377 if (irt_isint64(ir->t)) 586 if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
378 return 1; 587 return 1;
379 for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) 588 for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev)
380 if ((IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 || 589 if ((LJ_SOFTFP && (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_NUM) ||
590 (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 ||
381 (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_U64) 591 (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_U64)
382 return 1; 592 return 1;
383 return 0; /* Nope. */ 593 return 0; /* Nope. */
@@ -387,7 +597,12 @@ static int split_needsplit(jit_State *J)
387/* SPLIT pass. */ 597/* SPLIT pass. */
388void lj_opt_split(jit_State *J) 598void lj_opt_split(jit_State *J)
389{ 599{
600#if LJ_SOFTFP
601 if (!J->needsplit)
602 J->needsplit = split_needsplit(J);
603#else
390 lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ 604 lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
605#endif
391 if (J->needsplit) { 606 if (J->needsplit) {
392 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); 607 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
393 if (errcode) { 608 if (errcode) {
diff --git a/src/lj_record.c b/src/lj_record.c
index 64eba291..b712ec56 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -1992,6 +1992,7 @@ static void rec_setup_side(jit_State *J, GCtrace *T)
1992 IRRef ref = snap_ref(sn); 1992 IRRef ref = snap_ref(sn);
1993 BCReg s = snap_slot(sn); 1993 BCReg s = snap_slot(sn);
1994 IRIns *ir = &T->ir[ref]; 1994 IRIns *ir = &T->ir[ref];
1995 IRType t = irt_type(ir->t);
1995 TRef tr; 1996 TRef tr;
1996 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ 1997 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
1997 if (bloomtest(seen, ref)) { 1998 if (bloomtest(seen, ref)) {
@@ -2005,7 +2006,7 @@ static void rec_setup_side(jit_State *J, GCtrace *T)
2005 bloomset(seen, ref); 2006 bloomset(seen, ref);
2006 switch ((IROp)ir->o) { 2007 switch ((IROp)ir->o) {
2007 /* Only have to deal with constants that can occur in stack slots. */ 2008 /* Only have to deal with constants that can occur in stack slots. */
2008 case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; 2009 case IR_KPRI: tr = TREF_PRI(t); break;
2009 case IR_KINT: tr = lj_ir_kint(J, ir->i); break; 2010 case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
2010 case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; 2011 case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
2011 case IR_KNUM: tr = lj_ir_k64(J, IR_KNUM, ir_knum(ir)); break; 2012 case IR_KNUM: tr = lj_ir_k64(J, IR_KNUM, ir_knum(ir)); break;
@@ -2013,13 +2014,14 @@ static void rec_setup_side(jit_State *J, GCtrace *T)
2013 case IR_KPTR: tr = lj_ir_kptr(J, ir_kptr(ir)); break; /* Continuation. */ 2014 case IR_KPTR: tr = lj_ir_kptr(J, ir_kptr(ir)); break; /* Continuation. */
2014 /* Inherited SLOADs don't need a guard or type check. */ 2015 /* Inherited SLOADs don't need a guard or type check. */
2015 case IR_SLOAD: 2016 case IR_SLOAD:
2016 tr = emitir_raw(ir->ot & ~IRT_GUARD, s, 2017 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
2018 tr = emitir_raw(IRT(IR_SLOAD, t), s,
2017 (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); 2019 (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
2018 break; 2020 break;
2019 /* Parent refs are already typed and don't need a guard. */ 2021 /* Parent refs are already typed and don't need a guard. */
2020 default: 2022 default:
2021 tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, 2023 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
2022 IRSLOAD_INHERIT|IRSLOAD_PARENT); 2024 tr = emitir_raw(IRT(IR_SLOAD, t), s, IRSLOAD_INHERIT|IRSLOAD_PARENT);
2023 break; 2025 break;
2024 } 2026 }
2025 setslot: 2027 setslot:
diff --git a/src/lj_snap.c b/src/lj_snap.c
index dd70ece1..1af7ef85 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -307,7 +307,7 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
307/* Convert a snapshot into a linear slot -> RegSP map. 307/* Convert a snapshot into a linear slot -> RegSP map.
308** Note: unused slots are not initialized! 308** Note: unused slots are not initialized!
309*/ 309*/
310void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno) 310void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno, int hi)
311{ 311{
312 SnapShot *snap = &T->snap[snapno]; 312 SnapShot *snap = &T->snap[snapno];
313 MSize n, nent = snap->nent; 313 MSize n, nent = snap->nent;
@@ -316,7 +316,7 @@ void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno)
316 for (n = 0; n < nent; n++) { 316 for (n = 0; n < nent; n++) {
317 SnapEntry sn = map[n]; 317 SnapEntry sn = map[n];
318 IRRef ref = snap_ref(sn); 318 IRRef ref = snap_ref(sn);
319 if (!irref_isk(ref)) { 319 if ((LJ_SOFTFP && hi) ? (ref++, (sn & SNAP_SOFTFPNUM)) : !irref_isk(ref)) {
320 IRIns *ir = &T->ir[ref]; 320 IRIns *ir = &T->ir[ref];
321 uint32_t rs = ir->prev; 321 uint32_t rs = ir->prev;
322 if (bloomtest(rfilt, ref)) 322 if (bloomtest(rfilt, ref))
diff --git a/src/lj_snap.h b/src/lj_snap.h
index 031b0ac3..da9813b9 100644
--- a/src/lj_snap.h
+++ b/src/lj_snap.h
@@ -13,7 +13,8 @@
13LJ_FUNC void lj_snap_add(jit_State *J); 13LJ_FUNC void lj_snap_add(jit_State *J);
14LJ_FUNC void lj_snap_purge(jit_State *J); 14LJ_FUNC void lj_snap_purge(jit_State *J);
15LJ_FUNC void lj_snap_shrink(jit_State *J); 15LJ_FUNC void lj_snap_shrink(jit_State *J);
16LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno); 16LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, GCtrace *T, SnapNo snapno,
17 int hi);
17LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); 18LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr);
18LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); 19LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
19LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need); 20LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need);