diff options
| author | Mike Pall <mike> | 2011-09-05 18:30:36 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2011-09-05 18:30:36 +0200 |
| commit | 690ff909f58d9a554d6e728933adbcc1bae73e7c (patch) | |
| tree | 94ed6fe21e4ad7d743b184a84d98d8eefcaf29f1 | |
| parent | d9fe562ccc2566291901679425b20dfe11a1ce6c (diff) | |
| download | luajit-690ff909f58d9a554d6e728933adbcc1bae73e7c.tar.gz luajit-690ff909f58d9a554d6e728933adbcc1bae73e7c.tar.bz2 luajit-690ff909f58d9a554d6e728933adbcc1bae73e7c.zip | |
PPC: More interpreter tuning. Use y-bit for branch predictions.
| -rw-r--r-- | src/buildvm_ppc.dasc | 46 |
1 files changed, 23 insertions, 23 deletions
diff --git a/src/buildvm_ppc.dasc b/src/buildvm_ppc.dasc index ad209ce6..cf748f59 100644 --- a/src/buildvm_ppc.dasc +++ b/src/buildvm_ppc.dasc | |||
| @@ -326,7 +326,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 326 | | rlwinm TMP2, PC, 0, 0, 28 | 326 | | rlwinm TMP2, PC, 0, 0, 28 |
| 327 | | li_vmstate C | 327 | | li_vmstate C |
| 328 | | sub TMP2, BASE, TMP2 // TMP2 = previous base. | 328 | | sub TMP2, BASE, TMP2 // TMP2 = previous base. |
| 329 | | bne ->vm_returnp | 329 | | bney ->vm_returnp |
| 330 | | | 330 | | |
| 331 | | addic. TMP1, RD, -8 | 331 | | addic. TMP1, RD, -8 |
| 332 | | stw TMP2, L->base | 332 | | stw TMP2, L->base |
| @@ -341,7 +341,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 341 | | addi RA, RA, 8 | 341 | | addi RA, RA, 8 |
| 342 | | stfd f0, 0(BASE) | 342 | | stfd f0, 0(BASE) |
| 343 | | addi BASE, BASE, 8 | 343 | | addi BASE, BASE, 8 |
| 344 | | bne <1 | 344 | | bney <1 |
| 345 | | | 345 | | |
| 346 | |2: | 346 | |2: |
| 347 | | cmpw TMP2, RD // More/less results wanted? | 347 | | cmpw TMP2, RD // More/less results wanted? |
| @@ -627,7 +627,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 627 | | sub CARG3, CARG2, TMP1 | 627 | | sub CARG3, CARG2, TMP1 |
| 628 | | decode_RA8 RA, INS | 628 | | decode_RA8 RA, INS |
| 629 | | stfd f0, 0(CARG2) | 629 | | stfd f0, 0(CARG2) |
| 630 | | bne ->BC_CAT_Z | 630 | | bney ->BC_CAT_Z |
| 631 | | stfdx f0, BASE, RA | 631 | | stfdx f0, BASE, RA |
| 632 | | b ->cont_nop | 632 | | b ->cont_nop |
| 633 | | | 633 | | |
| @@ -962,7 +962,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 962 | #endif | 962 | #endif |
| 963 | | decode_RD8 RD, SAVE0 | 963 | | decode_RD8 RD, SAVE0 |
| 964 | #if LJ_HASJIT | 964 | #if LJ_HASJIT |
| 965 | | beq =>BC_JFORI | 965 | | beqy =>BC_JFORI |
| 966 | #endif | 966 | #endif |
| 967 | | b =>BC_FORI | 967 | | b =>BC_FORI |
| 968 | | | 968 | | |
| @@ -1040,7 +1040,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1040 | | lfdx f0, BASE, TMP1 | 1040 | | lfdx f0, BASE, TMP1 |
| 1041 | | stfdx f0, RA, TMP1 | 1041 | | stfdx f0, RA, TMP1 |
| 1042 | | addi TMP1, TMP1, 8 | 1042 | | addi TMP1, TMP1, 8 |
| 1043 | | bne <1 | 1043 | | bney <1 |
| 1044 | | b ->fff_res | 1044 | | b ->fff_res |
| 1045 | | | 1045 | | |
| 1046 | |.ffunc type | 1046 | |.ffunc type |
| @@ -1513,7 +1513,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1513 | | // RA = results, RD = (nresults+1)*8, PC = return. | 1513 | | // RA = results, RD = (nresults+1)*8, PC = return. |
| 1514 | | andi. TMP0, PC, FRAME_TYPE | 1514 | | andi. TMP0, PC, FRAME_TYPE |
| 1515 | | mr MULTRES, RD | 1515 | | mr MULTRES, RD |
| 1516 | | bne ->vm_return | 1516 | | bney ->vm_return |
| 1517 | | lwz INS, -4(PC) | 1517 | | lwz INS, -4(PC) |
| 1518 | | decode_RB8 RB, INS | 1518 | | decode_RB8 RB, INS |
| 1519 | |5: | 1519 | |5: |
| @@ -1545,7 +1545,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1545 | | | 1545 | | |
| 1546 | |.macro math_round, func | 1546 | |.macro math_round, func |
| 1547 | | .ffunc_1 math_ .. func | 1547 | | .ffunc_1 math_ .. func |
| 1548 | | checknum CARG3; beq ->fff_restv | 1548 | | checknum CARG3; beqy ->fff_restv |
| 1549 | | rlwinm TMP2, CARG3, 12, 21, 31 | 1549 | | rlwinm TMP2, CARG3, 12, 21, 31 |
| 1550 | | bge ->fff_fallback | 1550 | | bge ->fff_fallback |
| 1551 | | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 | 1551 | | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 |
| @@ -1580,7 +1580,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1580 | | sub CARG1, CARG1, TMP2 | 1580 | | sub CARG1, CARG1, TMP2 |
| 1581 | | bns ->fff_resi | 1581 | | bns ->fff_resi |
| 1582 | | // Potential overflow. | 1582 | | // Potential overflow. |
| 1583 | | mcrxr cr0; ble ->fff_resi // Ignore unrelated overflow. | 1583 | | mcrxr cr0; bley ->fff_resi // Ignore unrelated overflow. |
| 1584 | | lus CARG3, 0x41e0 // 2^31. | 1584 | | lus CARG3, 0x41e0 // 2^31. |
| 1585 | | li CARG1, 0 | 1585 | | li CARG1, 0 |
| 1586 | | b ->fff_restv | 1586 | | b ->fff_restv |
| @@ -1608,7 +1608,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1608 | | orc. TMP1, TMP1, TMP2 | 1608 | | orc. TMP1, TMP1, TMP2 |
| 1609 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | 1609 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq |
| 1610 | | lus CARG1, 0x8000 // -(2^31). | 1610 | | lus CARG1, 0x8000 // -(2^31). |
| 1611 | | beq ->fff_resi | 1611 | | beqy ->fff_resi |
| 1612 | |5: | 1612 | |5: |
| 1613 | | lfd FARG1, 0(BASE) | 1613 | | lfd FARG1, 0(BASE) |
| 1614 | | bl extern func | 1614 | | bl extern func |
| @@ -1792,7 +1792,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1792 | | lwz PC, FRAME_PC(BASE) | 1792 | | lwz PC, FRAME_PC(BASE) |
| 1793 | | cmplwi TMP0, 0 | 1793 | | cmplwi TMP0, 0 |
| 1794 | | la RA, -8(BASE) | 1794 | | la RA, -8(BASE) |
| 1795 | | beq ->fff_res | 1795 | | beqy ->fff_res |
| 1796 | | b ->fff_resi | 1796 | | b ->fff_resi |
| 1797 | } else { | 1797 | } else { |
| 1798 | | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | 1798 | | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). |
| @@ -1965,7 +1965,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1965 | |1: // Reverse string copy. | 1965 | |1: // Reverse string copy. |
| 1966 | | cmpwi TMP3, 0 | 1966 | | cmpwi TMP3, 0 |
| 1967 | | lbzx TMP1, CARG1, TMP2 | 1967 | | lbzx TMP1, CARG1, TMP2 |
| 1968 | | blt ->fff_newstr | 1968 | | blty ->fff_newstr |
| 1969 | | stbx TMP1, CARG2, TMP3 | 1969 | | stbx TMP1, CARG2, TMP3 |
| 1970 | | subi TMP3, TMP3, 1 | 1970 | | subi TMP3, TMP3, 1 |
| 1971 | | addi TMP2, TMP2, 1 | 1971 | | addi TMP2, TMP2, 1 |
| @@ -1990,7 +1990,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 1990 | |1: // ASCII case conversion. | 1990 | |1: // ASCII case conversion. |
| 1991 | | cmplw TMP2, CARG3 | 1991 | | cmplw TMP2, CARG3 |
| 1992 | | lbzx TMP1, CARG1, TMP2 | 1992 | | lbzx TMP1, CARG1, TMP2 |
| 1993 | | bge ->fff_newstr | 1993 | | bgey ->fff_newstr |
| 1994 | | subi TMP0, TMP1, lo | 1994 | | subi TMP0, TMP1, lo |
| 1995 | | xori TMP3, TMP1, 0x20 | 1995 | | xori TMP3, TMP1, 0x20 |
| 1996 | | addic TMP0, TMP0, -26 | 1996 | | addic TMP0, TMP0, -26 |
| @@ -2039,7 +2039,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2039 | ||} else { | 2039 | ||} else { |
| 2040 | | lfd FARG1, 0(TMP1) | 2040 | | lfd FARG1, 0(TMP1) |
| 2041 | ||} | 2041 | ||} |
| 2042 | | bge cr1, ->fff_resi | 2042 | | bgey cr1, ->fff_resi |
| 2043 | | checknum CARG4 | 2043 | | checknum CARG4 |
| 2044 | ||if (LJ_DUALNUM) { | 2044 | ||if (LJ_DUALNUM) { |
| 2045 | | bnel ->fff_bitop_fb | 2045 | | bnel ->fff_bitop_fb |
| @@ -2400,7 +2400,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2400 | | li TMP3, 0 | 2400 | | li TMP3, 0 |
| 2401 | | la TMP1, CCSTATE->stack | 2401 | | la TMP1, CCSTATE->stack |
| 2402 | | slwi CARG2, CARG2, 2 | 2402 | | slwi CARG2, CARG2, 2 |
| 2403 | | blt >2 | 2403 | | blty >2 |
| 2404 | | la TMP2, 8(sp) | 2404 | | la TMP2, 8(sp) |
| 2405 | |1: | 2405 | |1: |
| 2406 | | lwzx TMP0, TMP1, CARG2 | 2406 | | lwzx TMP0, TMP1, CARG2 |
| @@ -2408,7 +2408,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
| 2408 | | addic. CARG2, CARG2, -4 | 2408 | | addic. CARG2, CARG2, -4 |
| 2409 | | bge <1 | 2409 | | bge <1 |
| 2410 | |2: | 2410 | |2: |
| 2411 | | bne cr1, >3 | 2411 | | bney cr1, >3 |
| 2412 | | lfd f1, CCSTATE->fpr[0] | 2412 | | lfd f1, CCSTATE->fpr[0] |
| 2413 | | lfd f2, CCSTATE->fpr[1] | 2413 | | lfd f2, CCSTATE->fpr[1] |
| 2414 | | lfd f3, CCSTATE->fpr[2] | 2414 | | lfd f3, CCSTATE->fpr[2] |
| @@ -2863,7 +2863,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 2863 | |3: | 2863 | |3: |
| 2864 | | ins_next2 | 2864 | | ins_next2 |
| 2865 | |4: // Potential overflow. | 2865 | |4: // Potential overflow. |
| 2866 | | mcrxr cr0; ble <1 // Ignore unrelated overflow. | 2866 | | mcrxr cr0; bley <1 // Ignore unrelated overflow. |
| 2867 | | lus TMP1, 0x41e0 // 2^31. | 2867 | | lus TMP1, 0x41e0 // 2^31. |
| 2868 | | li TMP0, 0 | 2868 | | li TMP0, 0 |
| 2869 | | b >7 | 2869 | | b >7 |
| @@ -3047,7 +3047,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3047 | |2: | 3047 | |2: |
| 3048 | | ins_next2 | 3048 | | ins_next2 |
| 3049 | |4: // Overflow. | 3049 | |4: // Overflow. |
| 3050 | | mcrxr cr0; ble <1 // Ignore unrelated overflow. | 3050 | | mcrxr cr0; bley <1 // Ignore unrelated overflow. |
| 3051 | | ins_arithfallback b | 3051 | | ins_arithfallback b |
| 3052 | |5: // FP variant. | 3052 | |5: // FP variant. |
| 3053 | ||if (vk == 1) { | 3053 | ||if (vk == 1) { |
| @@ -3924,7 +3924,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 3924 | |6: | 3924 | |6: |
| 3925 | | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. | 3925 | | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. |
| 3926 | | slwi TMP3, RC, 5 | 3926 | | slwi TMP3, RC, 5 |
| 3927 | | bgt <3 | 3927 | | bgty <3 |
| 3928 | | slwi RB, RC, 3 | 3928 | | slwi RB, RC, 3 |
| 3929 | | sub TMP3, TMP3, RB | 3929 | | sub TMP3, TMP3, RB |
| 3930 | | lwzx RB, TMP2, TMP3 | 3930 | | lwzx RB, TMP2, TMP3 |
| @@ -4015,7 +4015,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4015 | |5: // Copy all varargs. | 4015 | |5: // Copy all varargs. |
| 4016 | | lwz TMP0, L->maxstack | 4016 | | lwz TMP0, L->maxstack |
| 4017 | | li MULTRES, 8 // MULTRES = (0+1)*8 | 4017 | | li MULTRES, 8 // MULTRES = (0+1)*8 |
| 4018 | | ble <3 // No vararg slots? | 4018 | | bley <3 // No vararg slots? |
| 4019 | | add TMP2, RA, TMP1 | 4019 | | add TMP2, RA, TMP1 |
| 4020 | | cmplw TMP2, TMP0 | 4020 | | cmplw TMP2, TMP0 |
| 4021 | | addi MULTRES, TMP1, 8 | 4021 | | addi MULTRES, TMP1, 8 |
| @@ -4117,7 +4117,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4117 | | mr MULTRES, RD | 4117 | | mr MULTRES, RD |
| 4118 | | andi. TMP0, PC, FRAME_TYPE | 4118 | | andi. TMP0, PC, FRAME_TYPE |
| 4119 | | xori TMP1, PC, FRAME_VARG | 4119 | | xori TMP1, PC, FRAME_VARG |
| 4120 | | bne ->BC_RETV_Z | 4120 | | bney ->BC_RETV_Z |
| 4121 | | | 4121 | | |
| 4122 | | lwz INS, -4(PC) | 4122 | | lwz INS, -4(PC) |
| 4123 | | subi TMP2, BASE, 8 | 4123 | | subi TMP2, BASE, 8 |
| @@ -4217,7 +4217,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4217 | | b <1 | 4217 | | b <1 |
| 4218 | if (vk) { | 4218 | if (vk) { |
| 4219 | |6: // Potential overflow. | 4219 | |6: // Potential overflow. |
| 4220 | | mcrxr cr0; ble <4 // Ignore unrelated overflow. | 4220 | | mcrxr cr0; bley <4 // Ignore unrelated overflow. |
| 4221 | | b <2 | 4221 | | b <2 |
| 4222 | } | 4222 | } |
| 4223 | } | 4223 | } |
| @@ -4268,7 +4268,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4268 | | bgt >3 | 4268 | | bgt >3 |
| 4269 | } else if (op == BC_IFORL) { | 4269 | } else if (op == BC_IFORL) { |
| 4270 | if (LJ_DUALNUM) { | 4270 | if (LJ_DUALNUM) { |
| 4271 | | bgt <2 | 4271 | | bgty <2 |
| 4272 | } else { | 4272 | } else { |
| 4273 | | bgt >2 | 4273 | | bgt >2 |
| 4274 | } | 4274 | } |
| @@ -4289,7 +4289,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
| 4289 | |3: // Used by integer loop, too. | 4289 | |3: // Used by integer loop, too. |
| 4290 | | addis PC, RD, -(BCBIAS_J*4 >> 16) | 4290 | | addis PC, RD, -(BCBIAS_J*4 >> 16) |
| 4291 | } else if (op == BC_IFORL) { | 4291 | } else if (op == BC_IFORL) { |
| 4292 | | bge <1 | 4292 | | bgey <1 |
| 4293 | } else { | 4293 | } else { |
| 4294 | | bge =>BC_JLOOP | 4294 | | bge =>BC_JLOOP |
| 4295 | } | 4295 | } |
