diff options
| author | Mike Pall <mike> | 2012-08-09 13:11:43 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2012-08-09 13:13:11 +0200 |
| commit | 4dc9e22def294492d0aaab1064d2805c2f721c8d (patch) | |
| tree | cba2ab9e659690ca88e07b1bd7b36ba2634dd63b /src | |
| parent | e6fac91d01eff614314ce577ac75f80fadf17517 (diff) | |
| download | luajit-4dc9e22def294492d0aaab1064d2805c2f721c8d.tar.gz luajit-4dc9e22def294492d0aaab1064d2805c2f721c8d.tar.bz2 luajit-4dc9e22def294492d0aaab1064d2805c2f721c8d.zip | |
ARM: Add VFP and hard-float ABI variants to JIT compiler.
Diffstat (limited to 'src')
| -rw-r--r-- | src/jit/dis_arm.lua | 3 | ||||
| -rw-r--r-- | src/lj_arch.h | 3 | ||||
| -rw-r--r-- | src/lj_asm.c | 3 | ||||
| -rw-r--r-- | src/lj_asm_arm.h | 735 | ||||
| -rw-r--r-- | src/lj_emit_arm.h | 55 | ||||
| -rw-r--r-- | src/lj_ircall.h | 4 | ||||
| -rw-r--r-- | src/lj_target_arm.h | 63 | ||||
| -rw-r--r-- | src/lj_vm.h | 7 |
8 files changed, 760 insertions, 113 deletions
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua index ecca392f..d37a9750 100644 --- a/src/jit/dis_arm.lua +++ b/src/jit/dis_arm.lua | |||
| @@ -676,7 +676,8 @@ end | |||
| 676 | 676 | ||
| 677 | -- Return register name for RID. | 677 | -- Return register name for RID. |
| 678 | local function regname_(r) | 678 | local function regname_(r) |
| 679 | return map_gpr[r] | 679 | if r < 16 then return map_gpr[r] end |
| 680 | return "d"..(r-16) | ||
| 680 | end | 681 | end |
| 681 | 682 | ||
| 682 | -- Public module functions. | 683 | -- Public module functions. |
diff --git a/src/lj_arch.h b/src/lj_arch.h index 8850f8a9..c0c2006d 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h | |||
| @@ -362,6 +362,9 @@ | |||
| 362 | #ifndef LJ_ARCH_HASFPU | 362 | #ifndef LJ_ARCH_HASFPU |
| 363 | #define LJ_ARCH_HASFPU 1 | 363 | #define LJ_ARCH_HASFPU 1 |
| 364 | #endif | 364 | #endif |
| 365 | #ifndef LJ_ABI_SOFTFP | ||
| 366 | #define LJ_ABI_SOFTFP 0 | ||
| 367 | #endif | ||
| 365 | #define LJ_SOFTFP (!LJ_ARCH_HASFPU) | 368 | #define LJ_SOFTFP (!LJ_ARCH_HASFPU) |
| 366 | 369 | ||
| 367 | #if LJ_ARCH_ENDIAN == LUAJIT_BE | 370 | #if LJ_ARCH_ENDIAN == LUAJIT_BE |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 2e6b1745..8d2cafb3 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -1610,6 +1610,7 @@ static void asm_setup_regsp(ASMState *as) | |||
| 1610 | break; | 1610 | break; |
| 1611 | /* fallthrough */ | 1611 | /* fallthrough */ |
| 1612 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | 1612 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: |
| 1613 | if (!LJ_SOFTFP && irt_isnum(ir->t)) break; | ||
| 1613 | ir->prev = (uint16_t)REGSP_HINT((rload & 15)); | 1614 | ir->prev = (uint16_t)REGSP_HINT((rload & 15)); |
| 1614 | rload = lj_ror(rload, 4); | 1615 | rload = lj_ror(rload, 4); |
| 1615 | continue; | 1616 | continue; |
| @@ -1641,7 +1642,7 @@ static void asm_setup_regsp(ASMState *as) | |||
| 1641 | } | 1642 | } |
| 1642 | break; | 1643 | break; |
| 1643 | #endif | 1644 | #endif |
| 1644 | #if LJ_NEED_FP64 | 1645 | #if !LJ_SOFTFP && LJ_NEED_FP64 |
| 1645 | case IR_CONV: | 1646 | case IR_CONV: |
| 1646 | if (irt_isfp((ir-1)->t)) { | 1647 | if (irt_isfp((ir-1)->t)) { |
| 1647 | ir->prev = REGSP_HINT(RID_FPRET); | 1648 | ir->prev = REGSP_HINT(RID_FPRET); |
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 6a44e5ef..c380a6e8 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
| @@ -48,6 +48,32 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow) | |||
| 48 | return r; | 48 | return r; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | #if !LJ_SOFTFP | ||
| 52 | /* Allocate two source registers for three-operand instructions. */ | ||
| 53 | static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) | ||
| 54 | { | ||
| 55 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | ||
| 56 | Reg left = irl->r, right = irr->r; | ||
| 57 | if (ra_hasreg(left)) { | ||
| 58 | ra_noweak(as, left); | ||
| 59 | if (ra_noreg(right)) | ||
| 60 | right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); | ||
| 61 | else | ||
| 62 | ra_noweak(as, right); | ||
| 63 | } else if (ra_hasreg(right)) { | ||
| 64 | ra_noweak(as, right); | ||
| 65 | left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); | ||
| 66 | } else if (ra_hashint(right)) { | ||
| 67 | right = ra_allocref(as, ir->op2, allow); | ||
| 68 | left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); | ||
| 69 | } else { | ||
| 70 | left = ra_allocref(as, ir->op1, allow); | ||
| 71 | right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); | ||
| 72 | } | ||
| 73 | return left | (right << 8); | ||
| 74 | } | ||
| 75 | #endif | ||
| 76 | |||
| 51 | /* -- Guard handling ------------------------------------------------------ */ | 77 | /* -- Guard handling ------------------------------------------------------ */ |
| 52 | 78 | ||
| 53 | /* Generate an exit stub group at the bottom of the reserved MCode memory. */ | 79 | /* Generate an exit stub group at the bottom of the reserved MCode memory. */ |
| @@ -125,7 +151,8 @@ static int32_t asm_fuseabase(ASMState *as, IRRef ref) | |||
| 125 | } | 151 | } |
| 126 | 152 | ||
| 127 | /* Fuse array/hash/upvalue reference into register+offset operand. */ | 153 | /* Fuse array/hash/upvalue reference into register+offset operand. */ |
| 128 | static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) | 154 | static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, |
| 155 | int lim) | ||
| 129 | { | 156 | { |
| 130 | IRIns *ir = IR(ref); | 157 | IRIns *ir = IR(ref); |
| 131 | if (ra_noreg(ir->r)) { | 158 | if (ra_noreg(ir->r)) { |
| @@ -136,7 +163,7 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) | |||
| 136 | int32_t ofs = asm_fuseabase(as, tab); | 163 | int32_t ofs = asm_fuseabase(as, tab); |
| 137 | IRRef refa = ofs ? tab : ir->op1; | 164 | IRRef refa = ofs ? tab : ir->op1; |
| 138 | ofs += 8*IR(ir->op2)->i; | 165 | ofs += 8*IR(ir->op2)->i; |
| 139 | if (ofs > -4096 && ofs < 4096) { | 166 | if (ofs > -lim && ofs < lim) { |
| 140 | *ofsp = ofs; | 167 | *ofsp = ofs; |
| 141 | return ra_alloc1(as, refa, allow); | 168 | return ra_alloc1(as, refa, allow); |
| 142 | } | 169 | } |
| @@ -145,7 +172,7 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) | |||
| 145 | } else if (ir->o == IR_HREFK) { | 172 | } else if (ir->o == IR_HREFK) { |
| 146 | if (mayfuse(as, ref)) { | 173 | if (mayfuse(as, ref)) { |
| 147 | int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); | 174 | int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); |
| 148 | if (ofs < 4096) { | 175 | if (ofs < lim) { |
| 149 | *ofsp = ofs; | 176 | *ofsp = ofs; |
| 150 | return ra_alloc1(as, ir->op1, allow); | 177 | return ra_alloc1(as, ir->op1, allow); |
| 151 | } | 178 | } |
| @@ -211,14 +238,16 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, | |||
| 211 | IRIns *ir = IR(ref); | 238 | IRIns *ir = IR(ref); |
| 212 | Reg base; | 239 | Reg base; |
| 213 | if (ra_noreg(ir->r) && mayfuse(as, ref)) { | 240 | if (ra_noreg(ir->r) && mayfuse(as, ref)) { |
| 214 | int32_t lim = (ai & 0x04000000) ? 4096 : 256; | 241 | int32_t lim = (!LJ_SOFTFP && (ai & 0x08000000)) ? 1024 : |
| 242 | (ai & 0x04000000) ? 4096 : 256; | ||
| 215 | if (ir->o == IR_ADD) { | 243 | if (ir->o == IR_ADD) { |
| 216 | int32_t ofs2; | 244 | int32_t ofs2; |
| 217 | if (irref_isk(ir->op2) && | 245 | if (irref_isk(ir->op2) && |
| 218 | (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim) { | 246 | (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim && |
| 247 | (!(!LJ_SOFTFP && (ai & 0x08000000)) || !(ofs2 & 3))) { | ||
| 219 | ofs = ofs2; | 248 | ofs = ofs2; |
| 220 | ref = ir->op1; | 249 | ref = ir->op1; |
| 221 | } else if (ofs == 0) { | 250 | } else if (ofs == 0 && !(!LJ_SOFTFP && (ai & 0x08000000))) { |
| 222 | IRRef lref = ir->op1, rref = ir->op2; | 251 | IRRef lref = ir->op1, rref = ir->op2; |
| 223 | Reg rn, rm; | 252 | Reg rn, rm; |
| 224 | if ((ai & 0x04000000)) { | 253 | if ((ai & 0x04000000)) { |
| @@ -238,7 +267,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, | |||
| 238 | emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); | 267 | emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); |
| 239 | return; | 268 | return; |
| 240 | } | 269 | } |
| 241 | } else if (ir->o == IR_STRREF) { | 270 | } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { |
| 242 | lua_assert(ofs == 0); | 271 | lua_assert(ofs == 0); |
| 243 | ofs = (int32_t)sizeof(GCstr); | 272 | ofs = (int32_t)sizeof(GCstr); |
| 244 | if (irref_isk(ir->op2)) { | 273 | if (irref_isk(ir->op2)) { |
| @@ -268,12 +297,41 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, | |||
| 268 | } | 297 | } |
| 269 | } | 298 | } |
| 270 | base = ra_alloc1(as, ref, allow); | 299 | base = ra_alloc1(as, ref, allow); |
| 300 | #if !LJ_SOFTFP | ||
| 301 | if ((ai & 0x08000000)) | ||
| 302 | emit_vlso(as, ai, rd, base, ofs); | ||
| 303 | else | ||
| 304 | #endif | ||
| 271 | if ((ai & 0x04000000)) | 305 | if ((ai & 0x04000000)) |
| 272 | emit_lso(as, ai, rd, base, ofs); | 306 | emit_lso(as, ai, rd, base, ofs); |
| 273 | else | 307 | else |
| 274 | emit_lsox(as, ai, rd, base, ofs); | 308 | emit_lsox(as, ai, rd, base, ofs); |
| 275 | } | 309 | } |
| 276 | 310 | ||
| 311 | #if !LJ_SOFTFP | ||
| 312 | /* Fuse to multiply-add/sub instruction. */ | ||
| 313 | static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) | ||
| 314 | { | ||
| 315 | IRRef lref = ir->op1, rref = ir->op2; | ||
| 316 | IRIns *irm; | ||
| 317 | if (lref != rref && | ||
| 318 | ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && | ||
| 319 | ra_noreg(irm->r)) || | ||
| 320 | (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && | ||
| 321 | (rref = lref, ai = air, ra_noreg(irm->r))))) { | ||
| 322 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
| 323 | Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); | ||
| 324 | Reg right, left = ra_alloc2(as, irm, | ||
| 325 | rset_exclude(rset_exclude(RSET_FPR, dest), add)); | ||
| 326 | right = (left >> 8); left &= 255; | ||
| 327 | emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15)); | ||
| 328 | if (dest != add) emit_dm(as, ARMI_VMOV_D, (dest & 15), (add & 15)); | ||
| 329 | return 1; | ||
| 330 | } | ||
| 331 | return 0; | ||
| 332 | } | ||
| 333 | #endif | ||
| 334 | |||
| 277 | /* -- Calls --------------------------------------------------------------- */ | 335 | /* -- Calls --------------------------------------------------------------- */ |
| 278 | 336 | ||
| 279 | /* Generate a call to a C function. */ | 337 | /* Generate a call to a C function. */ |
| @@ -282,21 +340,69 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |||
| 282 | uint32_t n, nargs = CCI_NARGS(ci); | 340 | uint32_t n, nargs = CCI_NARGS(ci); |
| 283 | int32_t ofs = 0; | 341 | int32_t ofs = 0; |
| 284 | Reg gpr = REGARG_FIRSTGPR; | 342 | Reg gpr = REGARG_FIRSTGPR; |
| 343 | #if !LJ_SOFTFP | ||
| 344 | Reg fpr = REGARG_FIRSTFPR, fprodd = 0; | ||
| 345 | #endif | ||
| 285 | if ((void *)ci->func) | 346 | if ((void *)ci->func) |
| 286 | emit_call(as, (void *)ci->func); | 347 | emit_call(as, (void *)ci->func); |
| 287 | for (n = 0; n < nargs; n++) { /* Setup args. */ | 348 | for (n = 0; n < nargs; n++) { /* Setup args. */ |
| 288 | IRRef ref = args[n]; | 349 | IRRef ref = args[n]; |
| 289 | IRIns *ir = IR(ref); | 350 | IRIns *ir = IR(ref); |
| 290 | if (gpr <= REGARG_LASTGPR) { | 351 | #if !LJ_SOFTFP |
| 291 | lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ | 352 | if (irt_isfp(ir->t)) { |
| 292 | if (ref) ra_leftov(as, gpr, ref); | 353 | Reg src; |
| 293 | gpr++; | 354 | if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { |
| 294 | } else { | 355 | if (irt_isnum(ir->t)) { |
| 295 | if (ref) { | 356 | if (fpr <= REGARG_LASTFPR) { |
| 296 | Reg r = ra_alloc1(as, ref, RSET_GPR); | 357 | ra_leftov(as, fpr, ref); |
| 297 | emit_spstore(as, ir, r, ofs); | 358 | fpr++; |
| 359 | continue; | ||
| 360 | } | ||
| 361 | } else if (fprodd) { /* Ick. */ | ||
| 362 | src = ra_alloc1(as, ref, RSET_FPR); | ||
| 363 | emit_dm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00400000); | ||
| 364 | fprodd = 0; | ||
| 365 | continue; | ||
| 366 | } else if (fpr <= REGARG_LASTFPR) { | ||
| 367 | ra_leftov(as, fpr, ref); | ||
| 368 | fprodd = fpr++; | ||
| 369 | continue; | ||
| 370 | } | ||
| 371 | src = ra_alloc1(as, ref, RSET_FPR); | ||
| 372 | fprodd = 0; | ||
| 373 | goto stackfp; | ||
| 374 | } | ||
| 375 | src = ra_alloc1(as, ref, RSET_FPR); | ||
| 376 | if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; | ||
| 377 | if (gpr <= REGARG_LASTGPR) { | ||
| 378 | lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ | ||
| 379 | if (irt_isnum(ir->t)) { | ||
| 380 | emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); | ||
| 381 | gpr += 2; | ||
| 382 | } else { | ||
| 383 | emit_dn(as, ARMI_VMOV_R_S, gpr, (src & 15)); | ||
| 384 | gpr++; | ||
| 385 | } | ||
| 386 | } else { | ||
| 387 | stackfp: | ||
| 388 | if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; | ||
| 389 | emit_spstore(as, ir, src, ofs); | ||
| 390 | ofs += irt_isnum(ir->t) ? 8 : 4; | ||
| 391 | } | ||
| 392 | } else | ||
| 393 | #endif | ||
| 394 | { | ||
| 395 | if (gpr <= REGARG_LASTGPR) { | ||
| 396 | lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ | ||
| 397 | if (ref) ra_leftov(as, gpr, ref); | ||
| 398 | gpr++; | ||
| 399 | } else { | ||
| 400 | if (ref) { | ||
| 401 | Reg r = ra_alloc1(as, ref, RSET_GPR); | ||
| 402 | emit_spstore(as, ir, r, ofs); | ||
| 403 | } | ||
| 404 | ofs += 4; | ||
| 298 | } | 405 | } |
| 299 | ofs += 4; | ||
| 300 | } | 406 | } |
| 301 | } | 407 | } |
| 302 | } | 408 | } |
| @@ -313,10 +419,21 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
| 313 | ra_evictset(as, drop); /* Evictions must be performed first. */ | 419 | ra_evictset(as, drop); /* Evictions must be performed first. */ |
| 314 | if (ra_used(ir)) { | 420 | if (ra_used(ir)) { |
| 315 | lua_assert(!irt_ispri(ir->t)); | 421 | lua_assert(!irt_ispri(ir->t)); |
| 316 | if (hiop) | 422 | if (!LJ_SOFTFP && irt_isfp(ir->t)) { |
| 423 | if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { | ||
| 424 | Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); | ||
| 425 | if (irt_isnum(ir->t)) | ||
| 426 | emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest); | ||
| 427 | else | ||
| 428 | emit_dn(as, ARMI_VMOV_S_R, RID_RET, dest); | ||
| 429 | } else { | ||
| 430 | ra_destreg(as, ir, RID_FPRET); | ||
| 431 | } | ||
| 432 | } else if (hiop) { | ||
| 317 | ra_destpair(as, ir); | 433 | ra_destpair(as, ir); |
| 318 | else | 434 | } else { |
| 319 | ra_destreg(as, ir, RID_RET); | 435 | ra_destreg(as, ir, RID_RET); |
| 436 | } | ||
| 320 | } | 437 | } |
| 321 | UNUSED(ci); | 438 | UNUSED(ci); |
| 322 | } | 439 | } |
| @@ -373,33 +490,125 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
| 373 | 490 | ||
| 374 | /* -- Type conversions ---------------------------------------------------- */ | 491 | /* -- Type conversions ---------------------------------------------------- */ |
| 375 | 492 | ||
| 376 | static void asm_conv(ASMState *as, IRIns *ir) | 493 | #if !LJ_SOFTFP |
| 494 | static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | ||
| 495 | { | ||
| 496 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | ||
| 497 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 498 | asm_guardcc(as, CC_NE); | ||
| 499 | emit_d(as, ARMI_VMRS, 0); | ||
| 500 | emit_dm(as, ARMI_VCMP_D, (tmp & 15), (left & 15)); | ||
| 501 | emit_dm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15)); | ||
| 502 | emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); | ||
| 503 | emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15)); | ||
| 504 | } | ||
| 505 | |||
| 506 | static void asm_tobit(ASMState *as, IRIns *ir) | ||
| 377 | { | 507 | { |
| 508 | RegSet allow = RSET_FPR; | ||
| 378 | Reg dest = ra_dest(as, ir, RSET_GPR); | 509 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 510 | Reg left = ra_alloc1(as, ir->op1, allow); | ||
| 511 | Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); | ||
| 512 | Reg tmp = ra_scratch(as, rset_clear(allow, right)); | ||
| 513 | emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); | ||
| 514 | emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); | ||
| 515 | } | ||
| 516 | #endif | ||
| 517 | |||
| 518 | static void asm_conv(ASMState *as, IRIns *ir) | ||
| 519 | { | ||
| 379 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | 520 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); |
| 380 | /* FP conversions and 64 bit integer conversions are handled by SPLIT. */ | 521 | #if !LJ_SOFTFP |
| 381 | lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); | 522 | int stfp = (st == IRT_NUM || st == IRT_FLOAT); |
| 523 | #endif | ||
| 524 | IRRef lref = ir->op1; | ||
| 525 | /* 64 bit integer conversions are handled by SPLIT. */ | ||
| 382 | lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); | 526 | lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); |
| 383 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | 527 | #if LJ_SOFTFP |
| 384 | Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | 528 | /* FP conversions are handled by SPLIT. */ |
| 385 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | 529 | lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); |
| 386 | if ((as->flags & JIT_F_ARMV6)) { | 530 | /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ |
| 387 | ARMIns ai = st == IRT_I8 ? ARMI_SXTB : | 531 | #else |
| 388 | st == IRT_U8 ? ARMI_UXTB : | 532 | lua_assert(irt_type(ir->t) != st); |
| 389 | st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH; | 533 | if (irt_isfp(ir->t)) { |
| 390 | emit_dm(as, ai, dest, left); | 534 | Reg dest = ra_dest(as, ir, RSET_FPR); |
| 391 | } else if (st == IRT_U8) { | 535 | if (stfp) { /* FP to FP conversion. */ |
| 392 | emit_dn(as, ARMI_AND|ARMI_K12|255, dest, left); | 536 | emit_dm(as, st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32, |
| 537 | (dest & 15), (ra_alloc1(as, lref, RSET_FPR) & 15)); | ||
| 538 | } else { /* Integer to FP conversion. */ | ||
| 539 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
| 540 | ARMIns ai = irt_isfloat(ir->t) ? | ||
| 541 | (st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) : | ||
| 542 | (st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32); | ||
| 543 | emit_dm(as, ai, (dest & 15), (dest & 15)); | ||
| 544 | emit_dn(as, ARMI_VMOV_S_R, left, (dest & 15)); | ||
| 545 | } | ||
| 546 | } else if (stfp) { /* FP to integer conversion. */ | ||
| 547 | if (irt_isguard(ir->t)) { | ||
| 548 | /* Checked conversions are only supported from number to int. */ | ||
| 549 | lua_assert(irt_isint(ir->t) && st == IRT_NUM); | ||
| 550 | asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | ||
| 393 | } else { | 551 | } else { |
| 394 | uint32_t shift = st == IRT_I8 ? 24 : 16; | 552 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 395 | ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR; | 553 | Reg left = ra_alloc1(as, lref, RSET_FPR); |
| 396 | emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, RID_TMP); | 554 | Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); |
| 397 | emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_LSL, shift), RID_TMP, left); | 555 | ARMIns ai; |
| 556 | emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); | ||
| 557 | ai = irt_isint(ir->t) ? | ||
| 558 | (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) : | ||
| 559 | (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32); | ||
| 560 | emit_dm(as, ai, (tmp & 15), (left & 15)); | ||
| 398 | } | 561 | } |
| 399 | } else { /* Handle 32/32 bit no-op (cast). */ | 562 | } else |
| 400 | ra_leftov(as, dest, ir->op1); /* Do nothing, but may need to move regs. */ | 563 | #endif |
| 564 | { | ||
| 565 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 566 | if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | ||
| 567 | Reg left = ra_alloc1(as, lref, RSET_GPR); | ||
| 568 | lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); | ||
| 569 | if ((as->flags & JIT_F_ARMV6)) { | ||
| 570 | ARMIns ai = st == IRT_I8 ? ARMI_SXTB : | ||
| 571 | st == IRT_U8 ? ARMI_UXTB : | ||
| 572 | st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH; | ||
| 573 | emit_dm(as, ai, dest, left); | ||
| 574 | } else if (st == IRT_U8) { | ||
| 575 | emit_dn(as, ARMI_AND|ARMI_K12|255, dest, left); | ||
| 576 | } else { | ||
| 577 | uint32_t shift = st == IRT_I8 ? 24 : 16; | ||
| 578 | ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR; | ||
| 579 | emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, RID_TMP); | ||
| 580 | emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_LSL, shift), RID_TMP, left); | ||
| 581 | } | ||
| 582 | } else { /* Handle 32/32 bit no-op (cast). */ | ||
| 583 | ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | ||
| 584 | } | ||
| 585 | } | ||
| 586 | } | ||
| 587 | |||
| 588 | #if !LJ_SOFTFP && LJ_HASFFI | ||
| 589 | static void asm_conv64(ASMState *as, IRIns *ir) | ||
| 590 | { | ||
| 591 | IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); | ||
| 592 | IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); | ||
| 593 | IRCallID id; | ||
| 594 | CCallInfo ci; | ||
| 595 | IRRef args[2]; | ||
| 596 | args[0] = (ir-1)->op1; | ||
| 597 | args[1] = ir->op1; | ||
| 598 | if (st == IRT_NUM || st == IRT_FLOAT) { | ||
| 599 | id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); | ||
| 600 | ir--; | ||
| 601 | } else { | ||
| 602 | id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); | ||
| 401 | } | 603 | } |
| 604 | ci = lj_ir_callinfo[id]; | ||
| 605 | #if !LJ_ABI_SOFTFP | ||
| 606 | ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ | ||
| 607 | #endif | ||
| 608 | asm_setupresult(as, ir, &ci); | ||
| 609 | asm_gencall(as, &ci, args); | ||
| 402 | } | 610 | } |
| 611 | #endif | ||
| 403 | 612 | ||
| 404 | static void asm_strto(ASMState *as, IRIns *ir) | 613 | static void asm_strto(ASMState *as, IRIns *ir) |
| 405 | { | 614 | { |
| @@ -409,6 +618,7 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
| 409 | int destused = ra_used(ir); | 618 | int destused = ra_used(ir); |
| 410 | int32_t ofs = 0; | 619 | int32_t ofs = 0; |
| 411 | ra_evictset(as, RSET_SCRATCH); | 620 | ra_evictset(as, RSET_SCRATCH); |
| 621 | #if LJ_SOFTFP | ||
| 412 | if (destused) { | 622 | if (destused) { |
| 413 | if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && | 623 | if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && |
| 414 | (ir->s & 1) == 0 && ir->s + 1 == (ir+1)->s) { | 624 | (ir->s & 1) == 0 && ir->s + 1 == (ir+1)->s) { |
| @@ -433,6 +643,25 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
| 433 | emit_lso(as, ARMI_LDR, rhi, RID_SP, 4); | 643 | emit_lso(as, ARMI_LDR, rhi, RID_SP, 4); |
| 434 | emit_lso(as, ARMI_LDR, rlo, RID_SP, 0); | 644 | emit_lso(as, ARMI_LDR, rlo, RID_SP, 0); |
| 435 | } | 645 | } |
| 646 | #else | ||
| 647 | UNUSED(rhi); | ||
| 648 | if (destused) { | ||
| 649 | if (ra_hasspill(ir->s)) { | ||
| 650 | ofs = sps_scale(ir->s); | ||
| 651 | destused = 0; | ||
| 652 | if (ra_hasreg(ir->r)) { | ||
| 653 | ra_free(as, ir->r); | ||
| 654 | ra_modified(as, ir->r); | ||
| 655 | emit_spload(as, ir, ir->r, ofs); | ||
| 656 | } | ||
| 657 | } else { | ||
| 658 | rlo = ra_dest(as, ir, RSET_FPR); | ||
| 659 | } | ||
| 660 | } | ||
| 661 | asm_guardcc(as, CC_EQ); | ||
| 662 | if (destused) | ||
| 663 | emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0); | ||
| 664 | #endif | ||
| 436 | emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); /* Test return status. */ | 665 | emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); /* Test return status. */ |
| 437 | args[0] = ir->op1; /* GCstr *str */ | 666 | args[0] = ir->op1; /* GCstr *str */ |
| 438 | args[1] = ASMREF_TMP1; /* TValue *n */ | 667 | args[1] = ASMREF_TMP1; /* TValue *n */ |
| @@ -448,9 +677,18 @@ static void asm_strto(ASMState *as, IRIns *ir) | |||
| 448 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) | 677 | static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) |
| 449 | { | 678 | { |
| 450 | IRIns *ir = IR(ref); | 679 | IRIns *ir = IR(ref); |
| 451 | if (irt_isnum(ir->t)) { /* Use the number constant itself as a TValue. */ | 680 | if (irt_isnum(ir->t)) { |
| 452 | lua_assert(irref_isk(ref)); | 681 | if (irref_isk(ref)) { |
| 453 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | 682 | /* Use the number constant itself as a TValue. */ |
| 683 | ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | ||
| 684 | } else { | ||
| 685 | #if LJ_SOFTFP | ||
| 686 | lua_assert(0); | ||
| 687 | #else | ||
| 688 | /* Otherwise force a spill and use the spill slot. */ | ||
| 689 | emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); | ||
| 690 | #endif | ||
| 691 | } | ||
| 454 | } else { | 692 | } else { |
| 455 | /* Otherwise use [sp] and [sp+4] to hold the TValue. */ | 693 | /* Otherwise use [sp] and [sp+4] to hold the TValue. */ |
| 456 | RegSet allow = rset_exclude(RSET_GPR, dest); | 694 | RegSet allow = rset_exclude(RSET_GPR, dest); |
| @@ -532,6 +770,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
| 532 | MCLabel l_end, l_loop; | 770 | MCLabel l_end, l_loop; |
| 533 | rset_clear(allow, tab); | 771 | rset_clear(allow, tab); |
| 534 | if (!irref_isk(refkey) || irt_isstr(kt)) { | 772 | if (!irref_isk(refkey) || irt_isstr(kt)) { |
| 773 | #if LJ_SOFTFP | ||
| 535 | key = ra_alloc1(as, refkey, allow); | 774 | key = ra_alloc1(as, refkey, allow); |
| 536 | rset_clear(allow, key); | 775 | rset_clear(allow, key); |
| 537 | if (irkey[1].o == IR_HIOP) { | 776 | if (irkey[1].o == IR_HIOP) { |
| @@ -545,6 +784,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
| 545 | rset_clear(allow, keynumhi); | 784 | rset_clear(allow, keynumhi); |
| 546 | khi = 0; | 785 | khi = 0; |
| 547 | } | 786 | } |
| 787 | #else | ||
| 788 | if (irt_isnum(kt)) { | ||
| 789 | key = ra_scratch(as, allow); | ||
| 790 | rset_clear(allow, key); | ||
| 791 | keyhi = keynumhi = ra_scratch(as, allow); | ||
| 792 | rset_clear(allow, keyhi); | ||
| 793 | khi = 0; | ||
| 794 | } else { | ||
| 795 | key = ra_alloc1(as, refkey, allow); | ||
| 796 | rset_clear(allow, key); | ||
| 797 | } | ||
| 798 | #endif | ||
| 548 | } else if (irt_isnum(kt)) { | 799 | } else if (irt_isnum(kt)) { |
| 549 | int32_t val = (int32_t)ir_knum(irkey)->u32.lo; | 800 | int32_t val = (int32_t)ir_knum(irkey)->u32.lo; |
| 550 | k = emit_isk12(ARMI_CMP, val); | 801 | k = emit_isk12(ARMI_CMP, val); |
| @@ -630,6 +881,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |||
| 630 | emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); | 881 | emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); |
| 631 | emit_dnm(as, ARMI_ORR|ARMI_S, RID_TMP, tmp, key); /* Test for +-0.0. */ | 882 | emit_dnm(as, ARMI_ORR|ARMI_S, RID_TMP, tmp, key); /* Test for +-0.0. */ |
| 632 | emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi); | 883 | emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi); |
| 884 | #if !LJ_SOFTFP | ||
| 885 | emit_dnm(as, ARMI_VMOV_RR_D, key, keynumhi, | ||
| 886 | (ra_alloc1(as, refkey, RSET_FPR) & 15)); | ||
| 887 | #endif | ||
| 633 | } else { | 888 | } else { |
| 634 | emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); | 889 | emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); |
| 635 | emit_opk(as, ARMI_ADD, tmp, key, (int32_t)HASH_BIAS, | 890 | emit_opk(as, ARMI_ADD, tmp, key, (int32_t)HASH_BIAS, |
| @@ -775,8 +1030,8 @@ static ARMIns asm_fxloadins(IRIns *ir) | |||
| 775 | case IRT_U8: return ARMI_LDRB; | 1030 | case IRT_U8: return ARMI_LDRB; |
| 776 | case IRT_I16: return ARMI_LDRSH; | 1031 | case IRT_I16: return ARMI_LDRSH; |
| 777 | case IRT_U16: return ARMI_LDRH; | 1032 | case IRT_U16: return ARMI_LDRH; |
| 778 | case IRT_NUM: lua_assert(0); | 1033 | case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; |
| 779 | case IRT_FLOAT: | 1034 | case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; |
| 780 | default: return ARMI_LDR; | 1035 | default: return ARMI_LDR; |
| 781 | } | 1036 | } |
| 782 | } | 1037 | } |
| @@ -786,8 +1041,8 @@ static ARMIns asm_fxstoreins(IRIns *ir) | |||
| 786 | switch (irt_type(ir->t)) { | 1041 | switch (irt_type(ir->t)) { |
| 787 | case IRT_I8: case IRT_U8: return ARMI_STRB; | 1042 | case IRT_I8: case IRT_U8: return ARMI_STRB; |
| 788 | case IRT_I16: case IRT_U16: return ARMI_STRH; | 1043 | case IRT_I16: case IRT_U16: return ARMI_STRH; |
| 789 | case IRT_NUM: lua_assert(0); | 1044 | case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; |
| 790 | case IRT_FLOAT: | 1045 | case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; |
| 791 | default: return ARMI_STR; | 1046 | default: return ARMI_STR; |
| 792 | } | 1047 | } |
| 793 | } | 1048 | } |
| @@ -829,7 +1084,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) | |||
| 829 | 1084 | ||
| 830 | static void asm_xload(ASMState *as, IRIns *ir) | 1085 | static void asm_xload(ASMState *as, IRIns *ir) |
| 831 | { | 1086 | { |
| 832 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1087 | Reg dest = ra_dest(as, ir, |
| 1088 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | ||
| 833 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); | 1089 | lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); |
| 834 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); | 1090 | asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); |
| 835 | } | 1091 | } |
| @@ -837,7 +1093,8 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
| 837 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 1093 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) |
| 838 | { | 1094 | { |
| 839 | if (ir->r != RID_SINK) { | 1095 | if (ir->r != RID_SINK) { |
| 840 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | 1096 | Reg src = ra_alloc1(as, ir->op2, |
| 1097 | (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | ||
| 841 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 1098 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, |
| 842 | rset_exclude(RSET_GPR, src), ofs); | 1099 | rset_exclude(RSET_GPR, src), ofs); |
| 843 | } | 1100 | } |
| @@ -845,7 +1102,7 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | |||
| 845 | 1102 | ||
| 846 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 1103 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
| 847 | { | 1104 | { |
| 848 | int hiop = ((ir+1)->o == IR_HIOP); | 1105 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); |
| 849 | IRType t = hiop ? IRT_NUM : irt_type(ir->t); | 1106 | IRType t = hiop ? IRT_NUM : irt_type(ir->t); |
| 850 | Reg dest = RID_NONE, type = RID_NONE, idx; | 1107 | Reg dest = RID_NONE, type = RID_NONE, idx; |
| 851 | RegSet allow = RSET_GPR; | 1108 | RegSet allow = RSET_GPR; |
| @@ -855,11 +1112,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
| 855 | rset_clear(allow, type); | 1112 | rset_clear(allow, type); |
| 856 | } | 1113 | } |
| 857 | if (ra_used(ir)) { | 1114 | if (ra_used(ir)) { |
| 858 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | 1115 | lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || |
| 859 | dest = ra_dest(as, ir, allow); | 1116 | irt_isint(ir->t) || irt_isaddr(ir->t)); |
| 1117 | dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); | ||
| 860 | rset_clear(allow, dest); | 1118 | rset_clear(allow, dest); |
| 861 | } | 1119 | } |
| 862 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | 1120 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow, |
| 1121 | (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); | ||
| 863 | if (!hiop || type == RID_NONE) { | 1122 | if (!hiop || type == RID_NONE) { |
| 864 | rset_clear(allow, idx); | 1123 | rset_clear(allow, idx); |
| 865 | if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && | 1124 | if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && |
| @@ -872,7 +1131,14 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
| 872 | } | 1131 | } |
| 873 | asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); | 1132 | asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); |
| 874 | emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); | 1133 | emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); |
| 875 | if (ra_hasreg(dest)) emit_lso(as, ARMI_LDR, dest, idx, ofs); | 1134 | if (ra_hasreg(dest)) { |
| 1135 | #if !LJ_SOFTFP | ||
| 1136 | if (t == IRT_NUM) | ||
| 1137 | emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs); | ||
| 1138 | else | ||
| 1139 | #endif | ||
| 1140 | emit_lso(as, ARMI_LDR, dest, idx, ofs); | ||
| 1141 | } | ||
| 876 | emit_lso(as, ARMI_LDR, type, idx, ofs+4); | 1142 | emit_lso(as, ARMI_LDR, type, idx, ofs+4); |
| 877 | } | 1143 | } |
| 878 | 1144 | ||
| @@ -882,44 +1148,80 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
| 882 | RegSet allow = RSET_GPR; | 1148 | RegSet allow = RSET_GPR; |
| 883 | Reg idx, src = RID_NONE, type = RID_NONE; | 1149 | Reg idx, src = RID_NONE, type = RID_NONE; |
| 884 | int32_t ofs = 0; | 1150 | int32_t ofs = 0; |
| 885 | int hiop = ((ir+1)->o == IR_HIOP); | 1151 | #if !LJ_SOFTFP |
| 886 | if (!irt_ispri(ir->t)) { | 1152 | if (irt_isnum(ir->t)) { |
| 887 | src = ra_alloc1(as, ir->op2, allow); | 1153 | src = ra_alloc1(as, ir->op2, RSET_FPR); |
| 888 | rset_clear(allow, src); | 1154 | idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024); |
| 1155 | emit_vlso(as, ARMI_VSTR_D, src, idx, ofs); | ||
| 1156 | } else | ||
| 1157 | #endif | ||
| 1158 | { | ||
| 1159 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); | ||
| 1160 | if (!irt_ispri(ir->t)) { | ||
| 1161 | src = ra_alloc1(as, ir->op2, allow); | ||
| 1162 | rset_clear(allow, src); | ||
| 1163 | } | ||
| 1164 | if (hiop) | ||
| 1165 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
| 1166 | else | ||
| 1167 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
| 1168 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096); | ||
| 1169 | if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); | ||
| 1170 | emit_lso(as, ARMI_STR, type, idx, ofs+4); | ||
| 889 | } | 1171 | } |
| 890 | if (hiop) | ||
| 891 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
| 892 | else | ||
| 893 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
| 894 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type)); | ||
| 895 | if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); | ||
| 896 | emit_lso(as, ARMI_STR, type, idx, ofs+4); | ||
| 897 | } | 1172 | } |
| 898 | } | 1173 | } |
| 899 | 1174 | ||
| 900 | static void asm_sload(ASMState *as, IRIns *ir) | 1175 | static void asm_sload(ASMState *as, IRIns *ir) |
| 901 | { | 1176 | { |
| 902 | int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); | 1177 | int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); |
| 903 | int hiop = ((ir+1)->o == IR_HIOP); | 1178 | int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); |
| 904 | IRType t = hiop ? IRT_NUM : irt_type(ir->t); | 1179 | IRType t = hiop ? IRT_NUM : irt_type(ir->t); |
| 905 | Reg dest = RID_NONE, type = RID_NONE, base; | 1180 | Reg dest = RID_NONE, type = RID_NONE, base; |
| 906 | RegSet allow = RSET_GPR; | 1181 | RegSet allow = RSET_GPR; |
| 907 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ | 1182 | lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ |
| 908 | lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); | 1183 | lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); |
| 1184 | #if LJ_SOFTFP | ||
| 909 | lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ | 1185 | lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ |
| 910 | if (hiop && ra_used(ir+1)) { | 1186 | if (hiop && ra_used(ir+1)) { |
| 911 | type = ra_dest(as, ir+1, allow); | 1187 | type = ra_dest(as, ir+1, allow); |
| 912 | rset_clear(allow, type); | 1188 | rset_clear(allow, type); |
| 913 | } | 1189 | } |
| 1190 | #else | ||
| 1191 | if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) { | ||
| 1192 | dest = ra_scratch(as, RSET_FPR); | ||
| 1193 | asm_tointg(as, ir, dest); | ||
| 1194 | t = IRT_NUM; /* Continue with a regular number type check. */ | ||
| 1195 | } else | ||
| 1196 | #endif | ||
| 914 | if (ra_used(ir)) { | 1197 | if (ra_used(ir)) { |
| 915 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | 1198 | lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || |
| 916 | dest = ra_dest(as, ir, allow); | 1199 | irt_isint(ir->t) || irt_isaddr(ir->t)); |
| 1200 | dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); | ||
| 917 | rset_clear(allow, dest); | 1201 | rset_clear(allow, dest); |
| 1202 | base = ra_alloc1(as, REF_BASE, allow); | ||
| 1203 | if ((ir->op2 & IRSLOAD_CONVERT)) { | ||
| 1204 | if (t == IRT_INT) { | ||
| 1205 | Reg tmp = ra_scratch(as, RSET_FPR); | ||
| 1206 | emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); | ||
| 1207 | emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15)); | ||
| 1208 | dest = tmp; | ||
| 1209 | t = IRT_NUM; /* Check for original type. */ | ||
| 1210 | } else { | ||
| 1211 | Reg tmp = ra_scratch(as, RSET_GPR); | ||
| 1212 | emit_dm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15)); | ||
| 1213 | emit_dn(as, ARMI_VMOV_S_R, tmp, (dest & 15)); | ||
| 1214 | dest = tmp; | ||
| 1215 | t = IRT_INT; /* Check for original type. */ | ||
| 1216 | } | ||
| 1217 | } | ||
| 1218 | goto dotypecheck; | ||
| 918 | } | 1219 | } |
| 919 | base = ra_alloc1(as, REF_BASE, allow); | 1220 | base = ra_alloc1(as, REF_BASE, allow); |
| 1221 | dotypecheck: | ||
| 1222 | rset_clear(allow, base); | ||
| 920 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { | 1223 | if ((ir->op2 & IRSLOAD_TYPECHECK)) { |
| 921 | if (ra_noreg(type)) { | 1224 | if (ra_noreg(type)) { |
| 922 | rset_clear(allow, base); | ||
| 923 | if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && | 1225 | if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && |
| 924 | rset_test((as->freeset & allow), dest+1)) { | 1226 | rset_test((as->freeset & allow), dest+1)) { |
| 925 | type = dest+1; | 1227 | type = dest+1; |
| @@ -931,7 +1233,21 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
| 931 | asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); | 1233 | asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); |
| 932 | emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); | 1234 | emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); |
| 933 | } | 1235 | } |
| 934 | if (ra_hasreg(dest)) emit_lso(as, ARMI_LDR, dest, base, ofs); | 1236 | if (ra_hasreg(dest)) { |
| 1237 | #if !LJ_SOFTFP | ||
| 1238 | if (t == IRT_NUM) { | ||
| 1239 | if (ofs < 1024) { | ||
| 1240 | emit_vlso(as, ARMI_VLDR_D, dest, base, ofs); | ||
| 1241 | } else { | ||
| 1242 | if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4); | ||
| 1243 | emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0); | ||
| 1244 | emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow); | ||
| 1245 | return; | ||
| 1246 | } | ||
| 1247 | } else | ||
| 1248 | #endif | ||
| 1249 | emit_lso(as, ARMI_LDR, dest, base, ofs); | ||
| 1250 | } | ||
| 935 | if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4); | 1251 | if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4); |
| 936 | } | 1252 | } |
| 937 | 1253 | ||
| @@ -1045,6 +1361,42 @@ static void asm_obar(ASMState *as, IRIns *ir) | |||
| 1045 | 1361 | ||
| 1046 | /* -- Arithmetic and logic operations ------------------------------------- */ | 1362 | /* -- Arithmetic and logic operations ------------------------------------- */ |
| 1047 | 1363 | ||
| 1364 | #if !LJ_SOFTFP | ||
| 1365 | static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai) | ||
| 1366 | { | ||
| 1367 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
| 1368 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | ||
| 1369 | right = (left >> 8); left &= 255; | ||
| 1370 | emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15)); | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) | ||
| 1374 | { | ||
| 1375 | Reg dest = ra_dest(as, ir, RSET_FPR); | ||
| 1376 | Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); | ||
| 1377 | emit_dm(as, ai, (dest & 15), (left & 15)); | ||
| 1378 | } | ||
| 1379 | |||
| 1380 | static int asm_fpjoin_pow(ASMState *as, IRIns *ir) | ||
| 1381 | { | ||
| 1382 | IRIns *irp = IR(ir->op1); | ||
| 1383 | if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { | ||
| 1384 | IRIns *irpp = IR(irp->op1); | ||
| 1385 | if (irpp == ir-2 && irpp->o == IR_FPMATH && | ||
| 1386 | irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { | ||
| 1387 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; | ||
| 1388 | IRRef args[2]; | ||
| 1389 | args[0] = irpp->op1; | ||
| 1390 | args[1] = irp->op2; | ||
| 1391 | asm_setupresult(as, ir, ci); | ||
| 1392 | asm_gencall(as, ci, args); | ||
| 1393 | return 1; | ||
| 1394 | } | ||
| 1395 | } | ||
| 1396 | return 0; | ||
| 1397 | } | ||
| 1398 | #endif | ||
| 1399 | |||
| 1048 | static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) | 1400 | static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) |
| 1049 | { | 1401 | { |
| 1050 | IRIns *ir; | 1402 | IRIns *ir; |
| @@ -1082,6 +1434,16 @@ static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai) | |||
| 1082 | emit_dn(as, ai^m, dest, left); | 1434 | emit_dn(as, ai^m, dest, left); |
| 1083 | } | 1435 | } |
| 1084 | 1436 | ||
| 1437 | static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) | ||
| 1438 | { | ||
| 1439 | if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ | ||
| 1440 | as->flagmcp = NULL; | ||
| 1441 | as->mcp++; | ||
| 1442 | ai |= ARMI_S; | ||
| 1443 | } | ||
| 1444 | asm_intop(as, ir, ai); | ||
| 1445 | } | ||
| 1446 | |||
| 1085 | static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) | 1447 | static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) |
| 1086 | { | 1448 | { |
| 1087 | if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ | 1449 | if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ |
| @@ -1108,16 +1470,6 @@ static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) | |||
| 1108 | } | 1470 | } |
| 1109 | } | 1471 | } |
| 1110 | 1472 | ||
| 1111 | static void asm_arithop(ASMState *as, IRIns *ir, ARMIns ai) | ||
| 1112 | { | ||
| 1113 | if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ | ||
| 1114 | as->flagmcp = NULL; | ||
| 1115 | as->mcp++; | ||
| 1116 | ai |= ARMI_S; | ||
| 1117 | } | ||
| 1118 | asm_intop(as, ir, ai); | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) | 1473 | static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) |
| 1122 | { | 1474 | { |
| 1123 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1475 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| @@ -1148,9 +1500,55 @@ static void asm_intmul(ASMState *as, IRIns *ir) | |||
| 1148 | if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right); | 1500 | if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right); |
| 1149 | } | 1501 | } |
| 1150 | 1502 | ||
| 1151 | static void asm_intmod(ASMState *as, IRIns *ir) | 1503 | static void asm_add(ASMState *as, IRIns *ir) |
| 1504 | { | ||
| 1505 | #if !LJ_SOFTFP | ||
| 1506 | if (irt_isnum(ir->t)) { | ||
| 1507 | if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D)) | ||
| 1508 | asm_fparith(as, ir, ARMI_VADD_D); | ||
| 1509 | return; | ||
| 1510 | } | ||
| 1511 | #endif | ||
| 1512 | asm_intop_s(as, ir, ARMI_ADD); | ||
| 1513 | } | ||
| 1514 | |||
| 1515 | static void asm_sub(ASMState *as, IRIns *ir) | ||
| 1516 | { | ||
| 1517 | #if !LJ_SOFTFP | ||
| 1518 | if (irt_isnum(ir->t)) { | ||
| 1519 | if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D)) | ||
| 1520 | asm_fparith(as, ir, ARMI_VSUB_D); | ||
| 1521 | return; | ||
| 1522 | } | ||
| 1523 | #endif | ||
| 1524 | asm_intop_s(as, ir, ARMI_SUB); | ||
| 1525 | } | ||
| 1526 | |||
| 1527 | static void asm_mul(ASMState *as, IRIns *ir) | ||
| 1528 | { | ||
| 1529 | #if !LJ_SOFTFP | ||
| 1530 | if (irt_isnum(ir->t)) { | ||
| 1531 | asm_fparith(as, ir, ARMI_VMUL_D); | ||
| 1532 | return; | ||
| 1533 | } | ||
| 1534 | #endif | ||
| 1535 | asm_intmul(as, ir); | ||
| 1536 | } | ||
| 1537 | |||
| 1538 | static void asm_neg(ASMState *as, IRIns *ir) | ||
| 1539 | { | ||
| 1540 | #if !LJ_SOFTFP | ||
| 1541 | if (irt_isnum(ir->t)) { | ||
| 1542 | asm_fpunary(as, ir, ARMI_VNEG_D); | ||
| 1543 | return; | ||
| 1544 | } | ||
| 1545 | #endif | ||
| 1546 | asm_intneg(as, ir, ARMI_RSB); | ||
| 1547 | } | ||
| 1548 | |||
| 1549 | static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) | ||
| 1152 | { | 1550 | { |
| 1153 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi]; | 1551 | const CCallInfo *ci = &lj_ir_callinfo[id]; |
| 1154 | IRRef args[2]; | 1552 | IRRef args[2]; |
| 1155 | args[0] = ir->op1; | 1553 | args[0] = ir->op1; |
| 1156 | args[1] = ir->op2; | 1554 | args[1] = ir->op2; |
| @@ -1158,6 +1556,21 @@ static void asm_intmod(ASMState *as, IRIns *ir) | |||
| 1158 | asm_gencall(as, ci, args); | 1556 | asm_gencall(as, ci, args); |
| 1159 | } | 1557 | } |
| 1160 | 1558 | ||
| 1559 | #if !LJ_SOFTFP | ||
| 1560 | static void asm_callround(ASMState *as, IRIns *ir, int id) | ||
| 1561 | { | ||
| 1562 | /* The modified regs must match with the *.dasc implementation. */ | ||
| 1563 | RegSet drop = RID2RSET(RID_D1)|RID2RSET(RID_D2)| | ||
| 1564 | RID2RSET(RID_R0)|RID2RSET(RID_R1); | ||
| 1565 | ra_evictset(as, drop); | ||
| 1566 | ra_destreg(as, ir, RID_FPRET); | ||
| 1567 | emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_hf : | ||
| 1568 | id == IRFPM_CEIL ? (void *)lj_vm_ceil_hf : | ||
| 1569 | (void *)lj_vm_trunc_hf); | ||
| 1570 | ra_leftov(as, RID_D0, ir->op1); | ||
| 1571 | } | ||
| 1572 | #endif | ||
| 1573 | |||
| 1161 | static void asm_bitswap(ASMState *as, IRIns *ir) | 1574 | static void asm_bitswap(ASMState *as, IRIns *ir) |
| 1162 | { | 1575 | { |
| 1163 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1576 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| @@ -1216,7 +1629,8 @@ static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) | |||
| 1216 | emit_nm(as, ARMI_CMP^kcmp, left, right); | 1629 | emit_nm(as, ARMI_CMP^kcmp, left, right); |
| 1217 | } | 1630 | } |
| 1218 | 1631 | ||
| 1219 | static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc) | 1632 | #if LJ_SOFTFP |
| 1633 | static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc) | ||
| 1220 | { | 1634 | { |
| 1221 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; | 1635 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; |
| 1222 | RegSet drop = RSET_SCRATCH; | 1636 | RegSet drop = RSET_SCRATCH; |
| @@ -1239,6 +1653,30 @@ static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc) | |||
| 1239 | for (r = RID_R0; r <= RID_R3; r++) | 1653 | for (r = RID_R0; r <= RID_R3; r++) |
| 1240 | ra_leftov(as, r, args[r-RID_R0]); | 1654 | ra_leftov(as, r, args[r-RID_R0]); |
| 1241 | } | 1655 | } |
| 1656 | #else | ||
| 1657 | static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc) | ||
| 1658 | { | ||
| 1659 | Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); | ||
| 1660 | Reg right, left = ra_alloc2(as, ir, RSET_FPR); | ||
| 1661 | right = ((left >> 8) & 15); left &= 15; | ||
| 1662 | if (dest != left) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc^1), dest, left); | ||
| 1663 | if (dest != right) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc), dest, right); | ||
| 1664 | emit_d(as, ARMI_VMRS, 0); | ||
| 1665 | emit_dm(as, ARMI_VCMP_D, left, right); | ||
| 1666 | } | ||
| 1667 | #endif | ||
| 1668 | |||
| 1669 | static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) | ||
| 1670 | { | ||
| 1671 | #if LJ_SOFTFP | ||
| 1672 | UNUSED(fcc); | ||
| 1673 | #else | ||
| 1674 | if (irt_isnum(ir->t)) | ||
| 1675 | asm_fpmin_max(as, ir, fcc); | ||
| 1676 | else | ||
| 1677 | #endif | ||
| 1678 | asm_intmin_max(as, ir, cc); | ||
| 1679 | } | ||
| 1242 | 1680 | ||
| 1243 | /* -- Comparisons --------------------------------------------------------- */ | 1681 | /* -- Comparisons --------------------------------------------------------- */ |
| 1244 | 1682 | ||
| @@ -1258,8 +1696,9 @@ static const uint8_t asm_compmap[IR_ABC+1] = { | |||
| 1258 | /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ | 1696 | /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ |
| 1259 | }; | 1697 | }; |
| 1260 | 1698 | ||
| 1699 | #if LJ_SOFTFP | ||
| 1261 | /* FP comparisons. */ | 1700 | /* FP comparisons. */ |
| 1262 | static void asm_fpcomp(ASMState *as, IRIns *ir) | 1701 | static void asm_sfpcomp(ASMState *as, IRIns *ir) |
| 1263 | { | 1702 | { |
| 1264 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; | 1703 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; |
| 1265 | RegSet drop = RSET_SCRATCH; | 1704 | RegSet drop = RSET_SCRATCH; |
| @@ -1278,6 +1717,31 @@ static void asm_fpcomp(ASMState *as, IRIns *ir) | |||
| 1278 | for (r = RID_R0; r <= RID_R3; r++) | 1717 | for (r = RID_R0; r <= RID_R3; r++) |
| 1279 | ra_leftov(as, r, args[r-RID_R0]); | 1718 | ra_leftov(as, r, args[r-RID_R0]); |
| 1280 | } | 1719 | } |
| 1720 | #else | ||
| 1721 | /* FP comparisons. */ | ||
| 1722 | static void asm_fpcomp(ASMState *as, IRIns *ir) | ||
| 1723 | { | ||
| 1724 | Reg left, right; | ||
| 1725 | ARMIns ai; | ||
| 1726 | int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); | ||
| 1727 | if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { | ||
| 1728 | left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15); | ||
| 1729 | right = 0; | ||
| 1730 | ai = ARMI_VCMPZ_D; | ||
| 1731 | } else { | ||
| 1732 | left = ra_alloc2(as, ir, RSET_FPR); | ||
| 1733 | if (swp) { | ||
| 1734 | right = (left & 15); left = ((left >> 8) & 15); | ||
| 1735 | } else { | ||
| 1736 | right = ((left >> 8) & 15); left &= 15; | ||
| 1737 | } | ||
| 1738 | ai = ARMI_VCMP_D; | ||
| 1739 | } | ||
| 1740 | asm_guardcc(as, (asm_compmap[ir->o] >> 4)); | ||
| 1741 | emit_d(as, ARMI_VMRS, 0); | ||
| 1742 | emit_dm(as, ai, left, right); | ||
| 1743 | } | ||
| 1744 | #endif | ||
| 1281 | 1745 | ||
| 1282 | /* Integer comparisons. */ | 1746 | /* Integer comparisons. */ |
| 1283 | static void asm_intcomp(ASMState *as, IRIns *ir) | 1747 | static void asm_intcomp(ASMState *as, IRIns *ir) |
| @@ -1370,21 +1834,32 @@ static void asm_int64comp(ASMState *as, IRIns *ir) | |||
| 1370 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ | 1834 | /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ |
| 1371 | static void asm_hiop(ASMState *as, IRIns *ir) | 1835 | static void asm_hiop(ASMState *as, IRIns *ir) |
| 1372 | { | 1836 | { |
| 1837 | #if LJ_HASFFI || LJ_SOFTFP | ||
| 1373 | /* HIOP is marked as a store because it needs its own DCE logic. */ | 1838 | /* HIOP is marked as a store because it needs its own DCE logic. */ |
| 1374 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | 1839 | int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ |
| 1375 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | 1840 | if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; |
| 1376 | if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ | 1841 | if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ |
| 1377 | as->curins--; /* Always skip the loword comparison. */ | 1842 | as->curins--; /* Always skip the loword comparison. */ |
| 1378 | if (irt_isint(ir->t)) | 1843 | #if LJ_SOFTFP |
| 1379 | asm_int64comp(as, ir-1); | 1844 | if (!irt_isint(ir->t)) |
| 1845 | asm_sfpcomp(as, ir-1); | ||
| 1380 | else | 1846 | else |
| 1381 | asm_fpcomp(as, ir-1); | 1847 | #endif |
| 1848 | asm_int64comp(as, ir-1); | ||
| 1382 | return; | 1849 | return; |
| 1850 | #if LJ_SOFTFP | ||
| 1383 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { | 1851 | } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { |
| 1384 | as->curins--; /* Always skip the loword min/max. */ | 1852 | as->curins--; /* Always skip the loword min/max. */ |
| 1385 | if (uselo || usehi) | 1853 | if (uselo || usehi) |
| 1386 | asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); | 1854 | asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); |
| 1855 | return; | ||
| 1856 | #elif LJ_HASFFI | ||
| 1857 | } else if ((ir-1)->o == IR_CONV) { | ||
| 1858 | as->curins--; /* Always skip the CONV. */ | ||
| 1859 | if (usehi || uselo) | ||
| 1860 | asm_conv64(as, ir); | ||
| 1387 | return; | 1861 | return; |
| 1862 | #endif | ||
| 1388 | } else if ((ir-1)->o == IR_XSTORE) { | 1863 | } else if ((ir-1)->o == IR_XSTORE) { |
| 1389 | if ((ir-1)->r != RID_SINK) | 1864 | if ((ir-1)->r != RID_SINK) |
| 1390 | asm_xstore(as, ir, 4); | 1865 | asm_xstore(as, ir, 4); |
| @@ -1409,23 +1884,30 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
| 1409 | asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); | 1884 | asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); |
| 1410 | break; | 1885 | break; |
| 1411 | #endif | 1886 | #endif |
| 1887 | #if LJ_SOFTFP | ||
| 1412 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | 1888 | case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: |
| 1413 | case IR_STRTO: | 1889 | case IR_STRTO: |
| 1414 | if (!uselo) | 1890 | if (!uselo) |
| 1415 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ | 1891 | ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ |
| 1416 | break; | 1892 | break; |
| 1893 | #endif | ||
| 1417 | case IR_CALLN: | 1894 | case IR_CALLN: |
| 1418 | case IR_CALLS: | 1895 | case IR_CALLS: |
| 1419 | case IR_CALLXS: | 1896 | case IR_CALLXS: |
| 1420 | if (!uselo) | 1897 | if (!uselo) |
| 1421 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | 1898 | ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ |
| 1422 | break; | 1899 | break; |
| 1423 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: | 1900 | #if LJ_SOFTFP |
| 1424 | case IR_TOSTR: case IR_CNEWI: | 1901 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: |
| 1902 | #endif | ||
| 1903 | case IR_CNEWI: | ||
| 1425 | /* Nothing to do here. Handled by lo op itself. */ | 1904 | /* Nothing to do here. Handled by lo op itself. */ |
| 1426 | break; | 1905 | break; |
| 1427 | default: lua_assert(0); break; | 1906 | default: lua_assert(0); break; |
| 1428 | } | 1907 | } |
| 1908 | #else | ||
| 1909 | UNUSED(as); UNUSED(ir); lua_assert(0); | ||
| 1910 | #endif | ||
| 1429 | } | 1911 | } |
| 1430 | 1912 | ||
| 1431 | /* -- Stack handling ------------------------------------------------------ */ | 1913 | /* -- Stack handling ------------------------------------------------------ */ |
| @@ -1485,6 +1967,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
| 1485 | if ((sn & SNAP_NORESTORE)) | 1967 | if ((sn & SNAP_NORESTORE)) |
| 1486 | continue; | 1968 | continue; |
| 1487 | if (irt_isnum(ir->t)) { | 1969 | if (irt_isnum(ir->t)) { |
| 1970 | #if LJ_SOFTFP | ||
| 1488 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); | 1971 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); |
| 1489 | Reg tmp; | 1972 | Reg tmp; |
| 1490 | lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ | 1973 | lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ |
| @@ -1494,6 +1977,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
| 1494 | if (rset_test(as->freeset, tmp+1)) odd = RID2RSET(tmp+1); | 1977 | if (rset_test(as->freeset, tmp+1)) odd = RID2RSET(tmp+1); |
| 1495 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd); | 1978 | tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd); |
| 1496 | emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4); | 1979 | emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4); |
| 1980 | #else | ||
| 1981 | Reg src = ra_alloc1(as, ref, RSET_FPR); | ||
| 1982 | emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs); | ||
| 1983 | #endif | ||
| 1497 | } else { | 1984 | } else { |
| 1498 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); | 1985 | RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); |
| 1499 | Reg type; | 1986 | Reg type; |
| @@ -1506,8 +1993,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) | |||
| 1506 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 1993 | if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
| 1507 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ | 1994 | if (s == 0) continue; /* Do not overwrite link to previous frame. */ |
| 1508 | type = ra_allock(as, (int32_t)(*flinks--), odd); | 1995 | type = ra_allock(as, (int32_t)(*flinks--), odd); |
| 1996 | #if LJ_SOFTFP | ||
| 1509 | } else if ((sn & SNAP_SOFTFPNUM)) { | 1997 | } else if ((sn & SNAP_SOFTFPNUM)) { |
| 1510 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); | 1998 | type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); |
| 1999 | #endif | ||
| 1511 | } else { | 2000 | } else { |
| 1512 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); | 2001 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); |
| 1513 | } | 2002 | } |
| @@ -1648,7 +2137,8 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
| 1648 | /* Miscellaneous ops. */ | 2137 | /* Miscellaneous ops. */ |
| 1649 | case IR_LOOP: asm_loop(as); break; | 2138 | case IR_LOOP: asm_loop(as); break; |
| 1650 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; | 2139 | case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; |
| 1651 | case IR_USE: ra_alloc1(as, ir->op1, RSET_GPR); break; | 2140 | case IR_USE: |
| 2141 | ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; | ||
| 1652 | case IR_PHI: asm_phi(as, ir); break; | 2142 | case IR_PHI: asm_phi(as, ir); break; |
| 1653 | case IR_HIOP: asm_hiop(as, ir); break; | 2143 | case IR_HIOP: asm_hiop(as, ir); break; |
| 1654 | case IR_GCSTEP: asm_gcstep(as, ir); break; | 2144 | case IR_GCSTEP: asm_gcstep(as, ir); break; |
| @@ -1664,6 +2154,9 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
| 1664 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | 2154 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: |
| 1665 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: | 2155 | case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: |
| 1666 | case IR_ABC: | 2156 | case IR_ABC: |
| 2157 | #if !LJ_SOFTFP | ||
| 2158 | if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; } | ||
| 2159 | #endif | ||
| 1667 | asm_intcomp(as, ir); | 2160 | asm_intcomp(as, ir); |
| 1668 | break; | 2161 | break; |
| 1669 | 2162 | ||
| @@ -1684,20 +2177,38 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
| 1684 | case IR_BROL: lua_assert(0); break; | 2177 | case IR_BROL: lua_assert(0); break; |
| 1685 | 2178 | ||
| 1686 | /* Arithmetic ops. */ | 2179 | /* Arithmetic ops. */ |
| 1687 | case IR_ADD: case IR_ADDOV: asm_arithop(as, ir, ARMI_ADD); break; | 2180 | case IR_ADD: case IR_ADDOV: asm_add(as, ir); break; |
| 1688 | case IR_SUB: case IR_SUBOV: asm_arithop(as, ir, ARMI_SUB); break; | 2181 | case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break; |
| 1689 | case IR_MUL: case IR_MULOV: asm_intmul(as, ir); break; | 2182 | case IR_MUL: case IR_MULOV: asm_mul(as, ir); break; |
| 1690 | case IR_MOD: asm_intmod(as, ir); break; | 2183 | case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; |
| 1691 | 2184 | case IR_NEG: asm_neg(as, ir); break; | |
| 1692 | case IR_NEG: asm_intneg(as, ir, ARMI_RSB); break; | 2185 | |
| 1693 | 2186 | #if LJ_SOFTFP | |
| 1694 | case IR_MIN: asm_intmin_max(as, ir, CC_GT); break; | 2187 | case IR_DIV: case IR_POW: case IR_ABS: |
| 1695 | case IR_MAX: asm_intmin_max(as, ir, CC_LT); break; | 2188 | case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: |
| 1696 | |||
| 1697 | case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: | ||
| 1698 | case IR_DIV: case IR_POW: case IR_ABS: case IR_TOBIT: | ||
| 1699 | lua_assert(0); /* Unused for LJ_SOFTFP. */ | 2189 | lua_assert(0); /* Unused for LJ_SOFTFP. */ |
| 1700 | break; | 2190 | break; |
| 2191 | #else | ||
| 2192 | case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break; | ||
| 2193 | case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; | ||
| 2194 | case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break; | ||
| 2195 | case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; | ||
| 2196 | case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; | ||
| 2197 | case IR_FPMATH: | ||
| 2198 | if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) | ||
| 2199 | break; | ||
| 2200 | if (ir->op2 <= IRFPM_TRUNC) | ||
| 2201 | asm_callround(as, ir, ir->op2); | ||
| 2202 | else if (ir->op2 == IRFPM_SQRT) | ||
| 2203 | asm_fpunary(as, ir, ARMI_VSQRT_D); | ||
| 2204 | else | ||
| 2205 | asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | ||
| 2206 | break; | ||
| 2207 | case IR_TOBIT: asm_tobit(as, ir); break; | ||
| 2208 | #endif | ||
| 2209 | |||
| 2210 | case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break; | ||
| 2211 | case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break; | ||
| 1701 | 2212 | ||
| 1702 | /* Memory references. */ | 2213 | /* Memory references. */ |
| 1703 | case IR_AREF: asm_aref(as, ir); break; | 2214 | case IR_AREF: asm_aref(as, ir); break; |
| @@ -1754,15 +2265,29 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | |||
| 1754 | { | 2265 | { |
| 1755 | IRRef args[CCI_NARGS_MAX]; | 2266 | IRRef args[CCI_NARGS_MAX]; |
| 1756 | uint32_t i, nargs = (int)CCI_NARGS(ci); | 2267 | uint32_t i, nargs = (int)CCI_NARGS(ci); |
| 1757 | int nslots = 0, ngpr = REGARG_NUMGPR; | 2268 | int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; |
| 1758 | asm_collectargs(as, ir, ci, args); | 2269 | asm_collectargs(as, ir, ci, args); |
| 1759 | for (i = 0; i < nargs; i++) | 2270 | for (i = 0; i < nargs; i++) { |
| 1760 | if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) { | 2271 | if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { |
| 1761 | ngpr &= ~1; | 2272 | if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { |
| 1762 | if (ngpr > 0) ngpr -= 2; else nslots += 2; | 2273 | if (irt_isnum(IR(args[i])->t)) { |
| 2274 | if (nfpr > 0) nfpr--; | ||
| 2275 | else fprodd = 0, nslots = (nslots + 3) & ~1; | ||
| 2276 | } else { | ||
| 2277 | if (fprodd) fprodd--; | ||
| 2278 | else if (nfpr > 0) fprodd = 1, nfpr--; | ||
| 2279 | else nslots++; | ||
| 2280 | } | ||
| 2281 | } else if (irt_isnum(IR(args[i])->t)) { | ||
| 2282 | ngpr &= ~1; | ||
| 2283 | if (ngpr > 0) ngpr -= 2; else nslots += 2; | ||
| 2284 | } else { | ||
| 2285 | if (ngpr > 0) ngpr--; else nslots++; | ||
| 2286 | } | ||
| 1763 | } else { | 2287 | } else { |
| 1764 | if (ngpr > 0) ngpr--; else nslots++; | 2288 | if (ngpr > 0) ngpr--; else nslots++; |
| 1765 | } | 2289 | } |
| 2290 | } | ||
| 1766 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | 2291 | if (nslots > as->evenspill) /* Leave room for args in stack slots. */ |
| 1767 | as->evenspill = nslots; | 2292 | as->evenspill = nslots; |
| 1768 | return REGSP_HINT(RID_RET); | 2293 | return REGSP_HINT(RID_RET); |
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index 27de6852..79ca2db7 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h | |||
| @@ -103,6 +103,15 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) | |||
| 103 | *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd) | ARMF_N(rn) | ofs; | 103 | *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd) | ARMF_N(rn) | ofs; |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | #if !LJ_SOFTFP | ||
| 107 | static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) | ||
| 108 | { | ||
| 109 | lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0); | ||
| 110 | if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; | ||
| 111 | *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); | ||
| 112 | } | ||
| 113 | #endif | ||
| 114 | |||
| 106 | /* -- Emit loads/stores --------------------------------------------------- */ | 115 | /* -- Emit loads/stores --------------------------------------------------- */ |
| 107 | 116 | ||
| 108 | /* Prefer spills of BASE/L. */ | 117 | /* Prefer spills of BASE/L. */ |
| @@ -208,6 +217,28 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) | |||
| 208 | (i & 4095)); | 217 | (i & 4095)); |
| 209 | } | 218 | } |
| 210 | 219 | ||
| 220 | #if !LJ_SOFTFP | ||
| 221 | /* Load a number constant into an FPR. */ | ||
| 222 | static void emit_loadn(ASMState *as, Reg r, cTValue *tv) | ||
| 223 | { | ||
| 224 | int32_t i; | ||
| 225 | if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { | ||
| 226 | uint32_t hi = tv->u32.hi; | ||
| 227 | uint32_t b = ((hi >> 22) & 0x1ff); | ||
| 228 | if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) { | ||
| 229 | *--as->mcp = ARMI_VMOVI_D | ARMF_D(r & 15) | | ||
| 230 | ((tv->u32.hi >> 12) & 0x00080000) | | ||
| 231 | ((tv->u32.hi >> 4) & 0x00070000) | | ||
| 232 | ((tv->u32.hi >> 16) & 0x0000000f); | ||
| 233 | return; | ||
| 234 | } | ||
| 235 | } | ||
| 236 | i = i32ptr(tv); | ||
| 237 | emit_vlso(as, ARMI_VLDR_D, r, | ||
| 238 | ra_allock(as, (i & ~1020), RSET_GPR), (i & 1020)); | ||
| 239 | } | ||
| 240 | #endif | ||
| 241 | |||
| 211 | /* Get/set global_State fields. */ | 242 | /* Get/set global_State fields. */ |
| 212 | #define emit_getgl(as, r, field) \ | 243 | #define emit_getgl(as, r, field) \ |
| 213 | emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field) | 244 | emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field) |
| @@ -256,7 +287,15 @@ static void emit_call(ASMState *as, void *target) | |||
| 256 | /* Generic move between two regs. */ | 287 | /* Generic move between two regs. */ |
| 257 | static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | 288 | static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) |
| 258 | { | 289 | { |
| 290 | #if LJ_SOFTFP | ||
| 259 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); | 291 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); |
| 292 | #else | ||
| 293 | if (dst >= RID_MAX_GPR) { | ||
| 294 | emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, | ||
| 295 | (dst & 15), (src & 15)); | ||
| 296 | return; | ||
| 297 | } | ||
| 298 | #endif | ||
| 260 | if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ | 299 | if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ |
| 261 | MCode ins = *as->mcp, swp = (src^dst); | 300 | MCode ins = *as->mcp, swp = (src^dst); |
| 262 | if ((ins & 0x0c000000) == 0x04000000 && (ins & 0x02000010) != 0x02000010) { | 301 | if ((ins & 0x0c000000) == 0x04000000 && (ins & 0x02000010) != 0x02000010) { |
| @@ -272,15 +311,27 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |||
| 272 | /* Generic load of register from stack slot. */ | 311 | /* Generic load of register from stack slot. */ |
| 273 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 312 | static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) |
| 274 | { | 313 | { |
| 314 | #if LJ_SOFTFP | ||
| 275 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); | 315 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); |
| 276 | emit_lso(as, ARMI_LDR, r, RID_SP, ofs); | 316 | #else |
| 317 | if (r >= RID_MAX_GPR) | ||
| 318 | emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs); | ||
| 319 | else | ||
| 320 | #endif | ||
| 321 | emit_lso(as, ARMI_LDR, r, RID_SP, ofs); | ||
| 277 | } | 322 | } |
| 278 | 323 | ||
| 279 | /* Generic store of register to stack slot. */ | 324 | /* Generic store of register to stack slot. */ |
| 280 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) | 325 | static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) |
| 281 | { | 326 | { |
| 327 | #if LJ_SOFTFP | ||
| 282 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); | 328 | lua_assert(!irt_isnum(ir->t)); UNUSED(ir); |
| 283 | emit_lso(as, ARMI_STR, r, RID_SP, ofs); | 329 | #else |
| 330 | if (r >= RID_MAX_GPR) | ||
| 331 | emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs); | ||
| 332 | else | ||
| 333 | #endif | ||
| 334 | emit_lso(as, ARMI_STR, r, RID_SP, ofs); | ||
| 284 | } | 335 | } |
| 285 | 336 | ||
| 286 | /* Emit an arithmetic/logic operation with a constant operand. */ | 337 | /* Emit an arithmetic/logic operation with a constant operand. */ |
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 7f08bc4a..8f481106 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
| @@ -66,7 +66,7 @@ typedef struct CCallInfo { | |||
| 66 | #define IRCALLCOND_SOFTFP_FFI(x) NULL | 66 | #define IRCALLCOND_SOFTFP_FFI(x) NULL |
| 67 | #endif | 67 | #endif |
| 68 | 68 | ||
| 69 | #define LJ_NEED_FP64 (LJ_TARGET_PPC || LJ_TARGET_MIPS) | 69 | #define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) |
| 70 | 70 | ||
| 71 | #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) | 71 | #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) |
| 72 | #define IRCALLCOND_FP64_FFI(x) x | 72 | #define IRCALLCOND_FP64_FFI(x) x |
| @@ -242,7 +242,7 @@ extern uint32_t softfp_f2ui(float a); | |||
| 242 | #endif | 242 | #endif |
| 243 | #endif | 243 | #endif |
| 244 | 244 | ||
| 245 | #if LJ_HASFFI && LJ_NEED_FP64 | 245 | #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) |
| 246 | #ifdef __GNUC__ | 246 | #ifdef __GNUC__ |
| 247 | #define fp64_l2d __floatdidf | 247 | #define fp64_l2d __floatdidf |
| 248 | #define fp64_ul2d __floatundidf | 248 | #define fp64_ul2d __floatundidf |
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index 20e8ad36..ee920f0b 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h | |||
| @@ -32,7 +32,11 @@ enum { | |||
| 32 | RID_RET = RID_R0, | 32 | RID_RET = RID_R0, |
| 33 | RID_RETLO = RID_R0, | 33 | RID_RETLO = RID_R0, |
| 34 | RID_RETHI = RID_R1, | 34 | RID_RETHI = RID_R1, |
| 35 | #if LJ_SOFTFP | ||
| 35 | RID_FPRET = RID_R0, | 36 | RID_FPRET = RID_R0, |
| 37 | #else | ||
| 38 | RID_FPRET = RID_D0, | ||
| 39 | #endif | ||
| 36 | 40 | ||
| 37 | /* These definitions must match with the *.dasc file(s): */ | 41 | /* These definitions must match with the *.dasc file(s): */ |
| 38 | RID_BASE = RID_R9, /* Interpreter BASE. */ | 42 | RID_BASE = RID_R9, /* Interpreter BASE. */ |
| @@ -68,11 +72,10 @@ enum { | |||
| 68 | RID2RSET(RID_R9)|RID2RSET(RID_R11)) | 72 | RID2RSET(RID_R9)|RID2RSET(RID_R11)) |
| 69 | #if LJ_SOFTFP | 73 | #if LJ_SOFTFP |
| 70 | #define RSET_FPR 0 | 74 | #define RSET_FPR 0 |
| 71 | #define RSET_ALL RSET_GPR | ||
| 72 | #else | 75 | #else |
| 73 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) | 76 | #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) |
| 74 | #define RSET_ALL (RSET_GPR|RSET_FPR) | ||
| 75 | #endif | 77 | #endif |
| 78 | #define RSET_ALL (RSET_GPR|RSET_FPR) | ||
| 76 | #define RSET_INIT RSET_ALL | 79 | #define RSET_INIT RSET_ALL |
| 77 | 80 | ||
| 78 | /* ABI-specific register sets. lr is an implicit scratch register. */ | 81 | /* ABI-specific register sets. lr is an implicit scratch register. */ |
| @@ -91,6 +94,15 @@ enum { | |||
| 91 | #define REGARG_FIRSTGPR RID_R0 | 94 | #define REGARG_FIRSTGPR RID_R0 |
| 92 | #define REGARG_LASTGPR RID_R3 | 95 | #define REGARG_LASTGPR RID_R3 |
| 93 | #define REGARG_NUMGPR 4 | 96 | #define REGARG_NUMGPR 4 |
| 97 | #if LJ_ABI_SOFTFP | ||
| 98 | #define REGARG_FIRSTFPR 0 | ||
| 99 | #define REGARG_LASTFPR 0 | ||
| 100 | #define REGARG_NUMFPR 0 | ||
| 101 | #else | ||
| 102 | #define REGARG_FIRSTFPR RID_D0 | ||
| 103 | #define REGARG_LASTFPR RID_D7 | ||
| 104 | #define REGARG_NUMFPR 8 | ||
| 105 | #endif | ||
| 94 | 106 | ||
| 95 | /* -- Spill slots --------------------------------------------------------- */ | 107 | /* -- Spill slots --------------------------------------------------------- */ |
| 96 | 108 | ||
| @@ -199,6 +211,53 @@ typedef enum ARMIns { | |||
| 199 | /* ARMv6T2 */ | 211 | /* ARMv6T2 */ |
| 200 | ARMI_MOVW = 0xe3000000, | 212 | ARMI_MOVW = 0xe3000000, |
| 201 | ARMI_MOVT = 0xe3400000, | 213 | ARMI_MOVT = 0xe3400000, |
| 214 | |||
| 215 | /* VFP */ | ||
| 216 | ARMI_VMOV_D = 0xeeb00b40, | ||
| 217 | ARMI_VMOV_S = 0xeeb00a40, | ||
| 218 | ARMI_VMOVI_D = 0xeeb00b00, | ||
| 219 | |||
| 220 | ARMI_VMOV_R_S = 0xee100a10, | ||
| 221 | ARMI_VMOV_S_R = 0xee000a10, | ||
| 222 | ARMI_VMOV_RR_D = 0xec500b10, | ||
| 223 | ARMI_VMOV_D_RR = 0xec400b10, | ||
| 224 | |||
| 225 | ARMI_VADD_D = 0xee300b00, | ||
| 226 | ARMI_VSUB_D = 0xee300b40, | ||
| 227 | ARMI_VMUL_D = 0xee200b00, | ||
| 228 | ARMI_VMLA_D = 0xee000b00, | ||
| 229 | ARMI_VMLS_D = 0xee000b40, | ||
| 230 | ARMI_VNMLS_D = 0xee100b00, | ||
| 231 | ARMI_VDIV_D = 0xee800b00, | ||
| 232 | |||
| 233 | ARMI_VABS_D = 0xeeb00bc0, | ||
| 234 | ARMI_VNEG_D = 0xeeb10b40, | ||
| 235 | ARMI_VSQRT_D = 0xeeb10bc0, | ||
| 236 | |||
| 237 | ARMI_VCMP_D = 0xeeb40b40, | ||
| 238 | ARMI_VCMPZ_D = 0xeeb50b40, | ||
| 239 | |||
| 240 | ARMI_VMRS = 0xeef1fa10, | ||
| 241 | |||
| 242 | ARMI_VCVT_S32_F32 = 0xeebd0ac0, | ||
| 243 | ARMI_VCVT_S32_F64 = 0xeebd0bc0, | ||
| 244 | ARMI_VCVT_U32_F32 = 0xeebc0ac0, | ||
| 245 | ARMI_VCVT_U32_F64 = 0xeebc0bc0, | ||
| 246 | ARMI_VCVTR_S32_F32 = 0xeebd0a40, | ||
| 247 | ARMI_VCVTR_S32_F64 = 0xeebd0b40, | ||
| 248 | ARMI_VCVTR_U32_F32 = 0xeebc0a40, | ||
| 249 | ARMI_VCVTR_U32_F64 = 0xeebc0b40, | ||
| 250 | ARMI_VCVT_F32_S32 = 0xeeb80ac0, | ||
| 251 | ARMI_VCVT_F64_S32 = 0xeeb80bc0, | ||
| 252 | ARMI_VCVT_F32_U32 = 0xeeb80a40, | ||
| 253 | ARMI_VCVT_F64_U32 = 0xeeb80b40, | ||
| 254 | ARMI_VCVT_F32_F64 = 0xeeb70bc0, | ||
| 255 | ARMI_VCVT_F64_F32 = 0xeeb70ac0, | ||
| 256 | |||
| 257 | ARMI_VLDR_S = 0xed100a00, | ||
| 258 | ARMI_VLDR_D = 0xed100b00, | ||
| 259 | ARMI_VSTR_S = 0xed000a00, | ||
| 260 | ARMI_VSTR_D = 0xed000b00, | ||
| 202 | } ARMIns; | 261 | } ARMIns; |
| 203 | 262 | ||
| 204 | typedef enum ARMShift { | 263 | typedef enum ARMShift { |
diff --git a/src/lj_vm.h b/src/lj_vm.h index a13a8ce0..4f9a10b8 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h | |||
| @@ -55,6 +55,10 @@ LJ_ASMF void lj_vm_exit_interp(void); | |||
| 55 | #else | 55 | #else |
| 56 | LJ_ASMF double lj_vm_floor(double); | 56 | LJ_ASMF double lj_vm_floor(double); |
| 57 | LJ_ASMF double lj_vm_ceil(double); | 57 | LJ_ASMF double lj_vm_ceil(double); |
| 58 | #if LJ_TARGET_ARM | ||
| 59 | LJ_ASMF double lj_vm_floor_hf(double); | ||
| 60 | LJ_ASMF double lj_vm_ceil_hf(double); | ||
| 61 | #endif | ||
| 58 | #endif | 62 | #endif |
| 59 | 63 | ||
| 60 | #if LJ_HASJIT | 64 | #if LJ_HASJIT |
| @@ -71,6 +75,9 @@ LJ_ASMF void lj_vm_powi_sse(void); | |||
| 71 | #define lj_vm_trunc trunc | 75 | #define lj_vm_trunc trunc |
| 72 | #else | 76 | #else |
| 73 | LJ_ASMF double lj_vm_trunc(double); | 77 | LJ_ASMF double lj_vm_trunc(double); |
| 78 | #if LJ_TARGET_ARM | ||
| 79 | LJ_ASMF double lj_vm_trunc_hf(double); | ||
| 80 | #endif | ||
| 74 | #endif | 81 | #endif |
| 75 | LJ_ASMF double lj_vm_powi(double, int32_t); | 82 | LJ_ASMF double lj_vm_powi(double, int32_t); |
| 76 | #ifdef LUAJIT_NO_LOG2 | 83 | #ifdef LUAJIT_NO_LOG2 |
