diff options
author | Mike Pall <mike> | 2016-05-21 00:02:45 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2016-05-21 00:02:45 +0200 |
commit | cfa188f1349ba4c843394b53f270cb64635b9805 (patch) | |
tree | e16e643dfa2567fd52506702b79a4b851c3db63e | |
parent | 1931b38da5a9ea075df73a966630308d3988bb96 (diff) | |
download | luajit-cfa188f1349ba4c843394b53f270cb64635b9805.tar.gz luajit-cfa188f1349ba4c843394b53f270cb64635b9805.tar.bz2 luajit-cfa188f1349ba4c843394b53f270cb64635b9805.zip |
Move common 32/64 bit in-memory FP constants to jit_State.
Prerequisite for immovable IR.
Contributed by Peter Cawley.
-rw-r--r-- | src/lj_asm_mips.h | 12 | ||||
-rw-r--r-- | src/lj_asm_ppc.h | 14 | ||||
-rw-r--r-- | src/lj_asm_x86.h | 16 | ||||
-rw-r--r-- | src/lj_ir.c | 10 | ||||
-rw-r--r-- | src/lj_jit.h | 35 | ||||
-rw-r--r-- | src/lj_trace.c | 24 |
6 files changed, 77 insertions, 34 deletions
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index ecb38c5d..d37bc132 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h | |||
@@ -459,12 +459,10 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
459 | dest, dest); | 459 | dest, dest); |
460 | if (irt_isfloat(ir->t)) | 460 | if (irt_isfloat(ir->t)) |
461 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), | 461 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), |
462 | (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), | 462 | (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); |
463 | RSET_GPR); | ||
464 | else | 463 | else |
465 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | 464 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), |
466 | (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), | 465 | (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); |
467 | RSET_GPR); | ||
468 | emit_tg(as, MIPSI_MTC1, RID_TMP, dest); | 466 | emit_tg(as, MIPSI_MTC1, RID_TMP, dest); |
469 | emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); | 467 | emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); |
470 | emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); | 468 | emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); |
@@ -494,12 +492,10 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
494 | tmp, left, tmp); | 492 | tmp, left, tmp); |
495 | if (st == IRT_FLOAT) | 493 | if (st == IRT_FLOAT) |
496 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), | 494 | emit_lsptr(as, MIPSI_LWC1, (tmp & 31), |
497 | (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), | 495 | (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); |
498 | RSET_GPR); | ||
499 | else | 496 | else |
500 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), | 497 | emit_lsptr(as, MIPSI_LDC1, (tmp & 31), |
501 | (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), | 498 | (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); |
502 | RSET_GPR); | ||
503 | } else { | 499 | } else { |
504 | emit_tg(as, MIPSI_MFC1, dest, tmp); | 500 | emit_tg(as, MIPSI_MFC1, dest, tmp); |
505 | emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, | 501 | emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, |
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 4cf1649a..e270b36c 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
@@ -393,8 +393,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |||
393 | emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); | 393 | emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); |
394 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 394 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
395 | emit_lsptr(as, PPCI_LFS, (fbias & 31), | 395 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
396 | (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), | 396 | (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); |
397 | RSET_GPR); | ||
398 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 397 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
399 | emit_fb(as, PPCI_FCTIWZ, tmp, left); | 398 | emit_fb(as, PPCI_FCTIWZ, tmp, left); |
400 | } | 399 | } |
@@ -433,13 +432,11 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
433 | Reg left = ra_alloc1(as, lref, allow); | 432 | Reg left = ra_alloc1(as, lref, allow); |
434 | Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); | 433 | Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); |
435 | Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | 434 | Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
436 | const float *kbias; | ||
437 | if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); | 435 | if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); |
438 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); | 436 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); |
439 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | 437 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); |
440 | kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); | 438 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
441 | if (st == IRT_U32) kbias++; | 439 | &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], |
442 | emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias, | ||
443 | rset_clear(allow, hibias)); | 440 | rset_clear(allow, hibias)); |
444 | emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, | 441 | emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, |
445 | RID_SP, SPOFS_TMPLO); | 442 | RID_SP, SPOFS_TMPLO); |
@@ -472,8 +469,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
472 | emit_fb(as, PPCI_FCTIWZ, tmp, tmp); | 469 | emit_fb(as, PPCI_FCTIWZ, tmp, tmp); |
473 | emit_fab(as, PPCI_FSUB, tmp, left, tmp); | 470 | emit_fab(as, PPCI_FSUB, tmp, left, tmp); |
474 | emit_lsptr(as, PPCI_LFS, (tmp & 31), | 471 | emit_lsptr(as, PPCI_LFS, (tmp & 31), |
475 | (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), | 472 | (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); |
476 | RSET_GPR); | ||
477 | } else { | 473 | } else { |
478 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | 474 | emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); |
479 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | 475 | emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); |
@@ -974,7 +970,7 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
974 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); | 970 | emit_fab(as, PPCI_FSUB, dest, dest, fbias); |
975 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | 971 | emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); |
976 | emit_lsptr(as, PPCI_LFS, (fbias & 31), | 972 | emit_lsptr(as, PPCI_LFS, (fbias & 31), |
977 | (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), | 973 | (void *)&as->J->k32[LJ_K32_2P52_2P31], |
978 | rset_clear(allow, hibias)); | 974 | rset_clear(allow, hibias)); |
979 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); | 975 | emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); |
980 | emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); | 976 | emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 6cd3800d..e3ed7554 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -696,7 +696,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
696 | if (left == dest) return; /* Avoid the XO_XORPS. */ | 696 | if (left == dest) return; /* Avoid the XO_XORPS. */ |
697 | } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ | 697 | } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ |
698 | /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ | 698 | /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ |
699 | cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); | 699 | cTValue *k = &as->J->k64[LJ_K64_TOBIT]; |
700 | Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | 700 | Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); |
701 | if (irt_isfloat(ir->t)) | 701 | if (irt_isfloat(ir->t)) |
702 | emit_rr(as, XO_CVTSD2SS, dest, dest); | 702 | emit_rr(as, XO_CVTSD2SS, dest, dest); |
@@ -711,7 +711,7 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
711 | asm_fuseloadm(as, lref, RSET_GPR, st64); | 711 | asm_fuseloadm(as, lref, RSET_GPR, st64); |
712 | if (LJ_64 && st == IRT_U64) { | 712 | if (LJ_64 && st == IRT_U64) { |
713 | MCLabel l_end = emit_label(as); | 713 | MCLabel l_end = emit_label(as); |
714 | const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); | 714 | cTValue *k = &as->J->k64[LJ_K64_2P64]; |
715 | emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ | 715 | emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ |
716 | emit_sjcc(as, CC_NS, l_end); | 716 | emit_sjcc(as, CC_NS, l_end); |
717 | emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ | 717 | emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ |
@@ -738,11 +738,9 @@ static void asm_conv(ASMState *as, IRIns *ir) | |||
738 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); | 738 | emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); |
739 | emit_rr(as, op, dest|REX_64, tmp); | 739 | emit_rr(as, op, dest|REX_64, tmp); |
740 | if (st == IRT_NUM) | 740 | if (st == IRT_NUM) |
741 | emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J, | 741 | emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); |
742 | LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000))); | ||
743 | else | 742 | else |
744 | emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J, | 743 | emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); |
745 | LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000))); | ||
746 | emit_sjcc(as, CC_NS, l_end); | 744 | emit_sjcc(as, CC_NS, l_end); |
747 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ | 745 | emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ |
748 | emit_rr(as, op, dest|REX_64, tmp); | 746 | emit_rr(as, op, dest|REX_64, tmp); |
@@ -828,8 +826,7 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) | |||
828 | if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { | 826 | if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { |
829 | /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ | 827 | /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ |
830 | MCLabel l_end = emit_label(as); | 828 | MCLabel l_end = emit_label(as); |
831 | emit_rma(as, XO_FADDq, XOg_FADDq, | 829 | emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]); |
832 | lj_ir_k64_find(as->J, U64x(43f00000,00000000))); | ||
833 | emit_sjcc(as, CC_NS, l_end); | 830 | emit_sjcc(as, CC_NS, l_end); |
834 | emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ | 831 | emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ |
835 | } else { | 832 | } else { |
@@ -869,8 +866,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) | |||
869 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); | 866 | emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); |
870 | else | 867 | else |
871 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); | 868 | emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); |
872 | emit_rma(as, XO_FADDq, XOg_FADDq, | 869 | emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); |
873 | lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); | ||
874 | emit_sjcc(as, CC_NS, l_pop); | 870 | emit_sjcc(as, CC_NS, l_pop); |
875 | emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ | 871 | emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ |
876 | } | 872 | } |
diff --git a/src/lj_ir.c b/src/lj_ir.c index b4087aa7..6a1ecc13 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c | |||
@@ -204,12 +204,12 @@ typedef struct K64Array { | |||
204 | void lj_ir_k64_freeall(jit_State *J) | 204 | void lj_ir_k64_freeall(jit_State *J) |
205 | { | 205 | { |
206 | K64Array *k; | 206 | K64Array *k; |
207 | for (k = mref(J->k64, K64Array); k; ) { | 207 | for (k = mref(J->k64p, K64Array); k; ) { |
208 | K64Array *next = mref(k->next, K64Array); | 208 | K64Array *next = mref(k->next, K64Array); |
209 | lj_mem_free(J2G(J), k, sizeof(K64Array)); | 209 | lj_mem_free(J2G(J), k, sizeof(K64Array)); |
210 | k = next; | 210 | k = next; |
211 | } | 211 | } |
212 | setmref(J->k64, NULL); | 212 | setmref(J->k64p, NULL); |
213 | } | 213 | } |
214 | 214 | ||
215 | /* Get new 64 bit constant slot. */ | 215 | /* Get new 64 bit constant slot. */ |
@@ -223,7 +223,7 @@ static TValue *ir_k64_add(jit_State *J, K64Array *kp, uint64_t u64) | |||
223 | if (kp) | 223 | if (kp) |
224 | setmref(kp->next, kn); /* Chain to the end of the list. */ | 224 | setmref(kp->next, kn); /* Chain to the end of the list. */ |
225 | else | 225 | else |
226 | setmref(J->k64, kn); /* Link first array. */ | 226 | setmref(J->k64p, kn); /* Link first array. */ |
227 | kp = kn; | 227 | kp = kn; |
228 | } | 228 | } |
229 | ntv = &kp->k[kp->numk++]; /* Add to current array. */ | 229 | ntv = &kp->k[kp->numk++]; /* Add to current array. */ |
@@ -237,7 +237,7 @@ cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64) | |||
237 | K64Array *k, *kp = NULL; | 237 | K64Array *k, *kp = NULL; |
238 | MSize idx; | 238 | MSize idx; |
239 | /* Search for the constant in the whole chain of arrays. */ | 239 | /* Search for the constant in the whole chain of arrays. */ |
240 | for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) { | 240 | for (k = mref(J->k64p, K64Array); k; k = mref(k->next, K64Array)) { |
241 | kp = k; /* Remember previous element in list. */ | 241 | kp = k; /* Remember previous element in list. */ |
242 | for (idx = 0; idx < k->numk; idx++) { /* Search one array. */ | 242 | for (idx = 0; idx < k->numk; idx++) { /* Search one array. */ |
243 | TValue *tv = &k->k[idx]; | 243 | TValue *tv = &k->k[idx]; |
@@ -254,7 +254,7 @@ TValue *lj_ir_k64_reserve(jit_State *J) | |||
254 | K64Array *k, *kp = NULL; | 254 | K64Array *k, *kp = NULL; |
255 | lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */ | 255 | lj_ir_k64_find(J, 0); /* Intern dummy 0 to protect the reserved slot. */ |
256 | /* Find last K64Array, if any. */ | 256 | /* Find last K64Array, if any. */ |
257 | for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) kp = k; | 257 | for (k = mref(J->k64p, K64Array); k; k = mref(k->next, K64Array)) kp = k; |
258 | return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */ | 258 | return ir_k64_add(J, kp, 0); /* Set to 0. Final value is set later. */ |
259 | } | 259 | } |
260 | 260 | ||
diff --git a/src/lj_jit.h b/src/lj_jit.h index 2d2e833a..6a47961b 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -308,6 +308,37 @@ enum { | |||
308 | LJ_KSIMD__MAX | 308 | LJ_KSIMD__MAX |
309 | }; | 309 | }; |
310 | 310 | ||
311 | enum { | ||
312 | #if LJ_TARGET_X86ORX64 | ||
313 | LJ_K64_TOBIT, /* 2^52 + 2^51 */ | ||
314 | LJ_K64_2P64, /* 2^64 */ | ||
315 | LJ_K64_M2P64, /* -2^64 */ | ||
316 | #if LJ_32 | ||
317 | LJ_K64_M2P64_31, /* -2^64 or -2^31 */ | ||
318 | #else | ||
319 | LJ_K64_M2P64_31 = LJ_K64_M2P64, | ||
320 | #endif | ||
321 | #endif | ||
322 | #if LJ_TARGET_MIPS | ||
323 | LJ_K64_2P31, /* 2^31 */ | ||
324 | #endif | ||
325 | LJ_K64__MAX, | ||
326 | }; | ||
327 | |||
328 | enum { | ||
329 | #if LJ_TARGET_X86ORX64 | ||
330 | LJ_K32_M2P64_31, /* -2^64 or -2^31 */ | ||
331 | #endif | ||
332 | #if LJ_TARGET_PPC | ||
333 | LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ | ||
334 | LJ_K32_2P52, /* 2^52 */ | ||
335 | #endif | ||
336 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | ||
337 | LJ_K32_2P31, /* 2^31 */ | ||
338 | #endif | ||
339 | LJ_K32__MAX | ||
340 | }; | ||
341 | |||
311 | /* Get 16 byte aligned pointer to SIMD constant. */ | 342 | /* Get 16 byte aligned pointer to SIMD constant. */ |
312 | #define LJ_KSIMD(J, n) \ | 343 | #define LJ_KSIMD(J, n) \ |
313 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) | 344 | ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) |
@@ -360,8 +391,10 @@ typedef struct jit_State { | |||
360 | int32_t framedepth; /* Current frame depth. */ | 391 | int32_t framedepth; /* Current frame depth. */ |
361 | int32_t retdepth; /* Return frame depth (count of RETF). */ | 392 | int32_t retdepth; /* Return frame depth (count of RETF). */ |
362 | 393 | ||
363 | MRef k64; /* Pointer to chained array of 64 bit constants. */ | 394 | MRef k64p; /* Pointer to chained array of 64 bit constants. */ |
364 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ | 395 | TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ |
396 | TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */ | ||
397 | uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ | ||
365 | 398 | ||
366 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ | 399 | IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ |
367 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ | 400 | IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ |
diff --git a/src/lj_trace.c b/src/lj_trace.c index 7970aba6..0d54c0af 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
@@ -297,13 +297,35 @@ void lj_trace_initstate(global_State *g) | |||
297 | { | 297 | { |
298 | jit_State *J = G2J(g); | 298 | jit_State *J = G2J(g); |
299 | TValue *tv; | 299 | TValue *tv; |
300 | /* Initialize SIMD constants. */ | 300 | |
301 | /* Initialize aligned SIMD constants. */ | ||
301 | tv = LJ_KSIMD(J, LJ_KSIMD_ABS); | 302 | tv = LJ_KSIMD(J, LJ_KSIMD_ABS); |
302 | tv[0].u64 = U64x(7fffffff,ffffffff); | 303 | tv[0].u64 = U64x(7fffffff,ffffffff); |
303 | tv[1].u64 = U64x(7fffffff,ffffffff); | 304 | tv[1].u64 = U64x(7fffffff,ffffffff); |
304 | tv = LJ_KSIMD(J, LJ_KSIMD_NEG); | 305 | tv = LJ_KSIMD(J, LJ_KSIMD_NEG); |
305 | tv[0].u64 = U64x(80000000,00000000); | 306 | tv[0].u64 = U64x(80000000,00000000); |
306 | tv[1].u64 = U64x(80000000,00000000); | 307 | tv[1].u64 = U64x(80000000,00000000); |
308 | |||
309 | /* Initialize 32/64 bit constants. */ | ||
310 | #if LJ_TARGET_X86ORX64 | ||
311 | J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); | ||
312 | J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); | ||
313 | J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); | ||
314 | #if LJ_32 | ||
315 | J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); | ||
316 | #endif | ||
317 | J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; | ||
318 | #endif | ||
319 | #if LJ_TARGET_PPC | ||
320 | J->k32[LJ_K32_2P52_2P31] = 0x59800004; | ||
321 | J->k32[LJ_K32_2P52] = 0x59800000; | ||
322 | #endif | ||
323 | #if LJ_TARGET_PPC || LJ_TARGET_MIPS | ||
324 | J->k32[LJ_K32_2P31] = 0x4f000000; | ||
325 | #endif | ||
326 | #if LJ_TARGET_MIPS | ||
327 | J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); | ||
328 | #endif | ||
307 | } | 329 | } |
308 | 330 | ||
309 | /* Free everything associated with the JIT compiler state. */ | 331 | /* Free everything associated with the JIT compiler state. */ |