diff options
author | Mike Pall <mike> | 2011-08-10 20:28:14 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2011-08-10 20:28:14 +0200 |
commit | 28b98acd757bcf4eaa4a8eb9b4a921e0d0c34bf1 (patch) | |
tree | 5973d3310b82364962d5cde12af971ed93dde918 /src | |
parent | f333dfd17d65833d4b9d9aefec07b06bb0683d91 (diff) | |
download | luajit-28b98acd757bcf4eaa4a8eb9b4a921e0d0c34bf1.tar.gz luajit-28b98acd757bcf4eaa4a8eb9b4a921e0d0c34bf1.tar.bz2 luajit-28b98acd757bcf4eaa4a8eb9b4a921e0d0c34bf1.zip |
PPC: Tune and reschedule interpreter for PPC/e300.
Diffstat (limited to 'src')
-rw-r--r-- | src/buildvm_ppc.dasc | 68 |
1 files changed, 27 insertions, 41 deletions
diff --git a/src/buildvm_ppc.dasc b/src/buildvm_ppc.dasc index 892dccbf..8fb77489 100644 --- a/src/buildvm_ppc.dasc +++ b/src/buildvm_ppc.dasc | |||
@@ -183,15 +183,15 @@ | |||
183 | | lwz INS, 0(PC) | 183 | | lwz INS, 0(PC) |
184 | | addi PC, PC, 4 | 184 | | addi PC, PC, 4 |
185 | |.endmacro | 185 | |.endmacro |
186 | |// Instruction decode+dispatch. | 186 | |// Instruction decode+dispatch. Note: optimized for e300! |
187 | |.macro ins_NEXT2 | 187 | |.macro ins_NEXT2 |
188 | | decode_OP4 TMP1, INS | 188 | | decode_OP4 TMP1, INS |
189 | | lwzx TMP0, DISPATCH, TMP1 | ||
190 | | mtctr TMP0 | ||
189 | | decode_RB8 RB, INS | 191 | | decode_RB8 RB, INS |
190 | | decode_RD8 RD, INS | 192 | | decode_RD8 RD, INS |
191 | | lwzx TMP0, DISPATCH, TMP1 | ||
192 | | decode_RA8 RA, INS | 193 | | decode_RA8 RA, INS |
193 | | decode_RC8 RC, INS | 194 | | decode_RC8 RC, INS |
194 | | mtctr TMP0 | ||
195 | | bctr | 195 | | bctr |
196 | |.endmacro | 196 | |.endmacro |
197 | |.macro ins_NEXT | 197 | |.macro ins_NEXT |
@@ -255,8 +255,8 @@ | |||
255 | | | 255 | | |
256 | |.macro branch_RD | 256 | |.macro branch_RD |
257 | | srwi TMP0, RD, 1 | 257 | | srwi TMP0, RD, 1 |
258 | | add PC, PC, TMP0 | ||
259 | | addis PC, PC, -(BCBIAS_J*4 >> 16) | 258 | | addis PC, PC, -(BCBIAS_J*4 >> 16) |
259 | | add PC, PC, TMP0 | ||
260 | |.endmacro | 260 | |.endmacro |
261 | | | 261 | | |
262 | |// Assumes DISPATCH is relative to GL. | 262 | |// Assumes DISPATCH is relative to GL. |
@@ -2983,14 +2983,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
2983 | |.endmacro | 2983 | |.endmacro |
2984 | | | 2984 | | |
2985 | |.macro intmod, a, b, c | 2985 | |.macro intmod, a, b, c |
2986 | |->BC_MODVNI_Z: | ||
2987 | | bl ->vm_modi | 2986 | | bl ->vm_modi |
2988 | |.endmacro | 2987 | |.endmacro |
2989 | | | 2988 | | |
2990 | |.macro fpmod, a, b, c | 2989 | |.macro fpmod, a, b, c |
2991 | ||if (!LJ_DUALNUM) { | ||
2992 | |->BC_MODVNI_Z: | ||
2993 | ||} | ||
2994 | |->BC_MODVN_Z: | 2990 | |->BC_MODVN_Z: |
2995 | | fdiv FARG1, b, c | 2991 | | fdiv FARG1, b, c |
2996 | | // NYI: Use internal implementation of floor. | 2992 | | // NYI: Use internal implementation of floor. |
@@ -3038,11 +3034,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3038 | || break; | 3034 | || break; |
3039 | ||} | 3035 | ||} |
3040 | | checknum cr1, TMP2 | 3036 | | checknum cr1, TMP2 |
3041 | | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq | ||
3042 | | bne >5 | 3037 | | bne >5 |
3043 | |.if "intins" == "intmod_" | 3038 | | bne cr1, >5 |
3044 | | b ->BC_MODVNI_Z // Avoid 3 copies. It's slow anyway. | ||
3045 | |.else | ||
3046 | | intins CARG1, CARG1, CARG2 | 3039 | | intins CARG1, CARG1, CARG2 |
3047 | | bso >4 | 3040 | | bso >4 |
3048 | |1: | 3041 | |1: |
@@ -3054,7 +3047,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3054 | |4: // Overflow. | 3047 | |4: // Overflow. |
3055 | | mcrxr cr0; ble <1 // Ignore unrelated overflow. | 3048 | | mcrxr cr0; ble <1 // Ignore unrelated overflow. |
3056 | | ins_arithfallback b | 3049 | | ins_arithfallback b |
3057 | |.endif | ||
3058 | |5: // FP variant. | 3050 | |5: // FP variant. |
3059 | ||if (vk == 1) { | 3051 | ||if (vk == 1) { |
3060 | | lfd f15, 0(RB) | 3052 | | lfd f15, 0(RB) |
@@ -3100,7 +3092,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3100 | | ins_arith intmod, fpmod | 3092 | | ins_arith intmod, fpmod |
3101 | break; | 3093 | break; |
3102 | case BC_MODNV: case BC_MODVV: | 3094 | case BC_MODNV: case BC_MODVV: |
3103 | | ins_arith intmod_, fpmod_ | 3095 | | ins_arith intmod, fpmod_ |
3104 | break; | 3096 | break; |
3105 | case BC_POW: | 3097 | case BC_POW: |
3106 | | // NYI: (partial) integer arithmetic. | 3098 | | // NYI: (partial) integer arithmetic. |
@@ -3113,8 +3105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3113 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | 3105 | | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt |
3114 | | bge ->vmeta_arith_vv | 3106 | | bge ->vmeta_arith_vv |
3115 | | bl extern pow | 3107 | | bl extern pow |
3108 | | ins_next1 | ||
3116 | | stfdx FARG1, BASE, RA | 3109 | | stfdx FARG1, BASE, RA |
3117 | | ins_next | 3110 | | ins_next2 |
3118 | break; | 3111 | break; |
3119 | 3112 | ||
3120 | case BC_CAT: | 3113 | case BC_CAT: |
@@ -3132,9 +3125,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3132 | | cmplwi CRET1, 0 | 3125 | | cmplwi CRET1, 0 |
3133 | | lwz BASE, L->base | 3126 | | lwz BASE, L->base |
3134 | | bne ->vmeta_binop | 3127 | | bne ->vmeta_binop |
3128 | | ins_next1 | ||
3135 | | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. | 3129 | | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. |
3136 | | stfdx f0, BASE, RA | 3130 | | stfdx f0, BASE, RA |
3137 | | ins_next | 3131 | | ins_next2 |
3138 | break; | 3132 | break; |
3139 | 3133 | ||
3140 | /* -- Constant ops ------------------------------------------------------ */ | 3134 | /* -- Constant ops ------------------------------------------------------ */ |
@@ -3143,9 +3137,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3143 | | // RA = dst*8, RD = str_const*8 (~) | 3137 | | // RA = dst*8, RD = str_const*8 (~) |
3144 | | srwi TMP1, RD, 1 | 3138 | | srwi TMP1, RD, 1 |
3145 | | subfic TMP1, TMP1, -4 | 3139 | | subfic TMP1, TMP1, -4 |
3140 | | ins_next1 | ||
3146 | | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 | 3141 | | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 |
3147 | | li TMP2, LJ_TSTR | 3142 | | li TMP2, LJ_TSTR |
3148 | | ins_next1 | ||
3149 | | stwux TMP2, RA, BASE | 3143 | | stwux TMP2, RA, BASE |
3150 | | stw TMP0, 4(RA) | 3144 | | stw TMP0, 4(RA) |
3151 | | ins_next2 | 3145 | | ins_next2 |
@@ -3155,9 +3149,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3155 | | // RA = dst*8, RD = cdata_const*8 (~) | 3149 | | // RA = dst*8, RD = cdata_const*8 (~) |
3156 | | srwi TMP1, RD, 1 | 3150 | | srwi TMP1, RD, 1 |
3157 | | subfic TMP1, TMP1, -4 | 3151 | | subfic TMP1, TMP1, -4 |
3152 | | ins_next1 | ||
3158 | | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 | 3153 | | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 |
3159 | | li TMP2, LJ_TCDATA | 3154 | | li TMP2, LJ_TCDATA |
3160 | | ins_next1 | ||
3161 | | stwux TMP2, RA, BASE | 3155 | | stwux TMP2, RA, BASE |
3162 | | stw TMP0, 4(RA) | 3156 | | stw TMP0, 4(RA) |
3163 | | ins_next2 | 3157 | | ins_next2 |
@@ -3173,21 +3167,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3173 | | stw RD, 4(RA) | 3167 | | stw RD, 4(RA) |
3174 | | ins_next2 | 3168 | | ins_next2 |
3175 | } else { | 3169 | } else { |
3176 | | // NYI: which approach is faster? | 3170 | | // The soft-float approach is faster. |
3177 | |.if 1 | ||
3178 | | slwi RD, RD, 13 | ||
3179 | | srawi RD, RD, 16 | ||
3180 | | tonum_i f0, RD | ||
3181 | | ins_next1 | ||
3182 | | stfdx f0, BASE, RA | ||
3183 | | ins_next2 | ||
3184 | |.else | ||
3185 | | slwi RD, RD, 13 | 3171 | | slwi RD, RD, 13 |
3186 | | srawi TMP1, RD, 31 | 3172 | | srawi TMP1, RD, 31 |
3187 | | xor TMP2, TMP1, RD | 3173 | | xor TMP2, TMP1, RD |
3188 | | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) | 3174 | | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) |
3189 | | cntlzw TMP3, TMP2 | 3175 | | cntlzw TMP3, TMP2 |
3190 | | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 | 3176 | | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 |
3191 | | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa | 3177 | | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa |
3192 | | subfic TMP3, RD, 0 | 3178 | | subfic TMP3, RD, 0 |
3193 | | slwi TMP1, TMP1, 20 | 3179 | | slwi TMP1, TMP1, 20 |
@@ -3199,13 +3185,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3199 | | stwux RD, RA, BASE | 3185 | | stwux RD, RA, BASE |
3200 | | stw ZERO, 4(RA) | 3186 | | stw ZERO, 4(RA) |
3201 | | ins_next2 | 3187 | | ins_next2 |
3202 | |.endif | ||
3203 | } | 3188 | } |
3204 | break; | 3189 | break; |
3205 | case BC_KNUM: | 3190 | case BC_KNUM: |
3206 | | // RA = dst*8, RD = num_const*8 | 3191 | | // RA = dst*8, RD = num_const*8 |
3207 | | lfdx f0, KBASE, RD | ||
3208 | | ins_next1 | 3192 | | ins_next1 |
3193 | | lfdx f0, KBASE, RD | ||
3209 | | stfdx f0, BASE, RA | 3194 | | stfdx f0, BASE, RA |
3210 | | ins_next2 | 3195 | | ins_next2 |
3211 | break; | 3196 | break; |
@@ -3233,11 +3218,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3233 | 3218 | ||
3234 | case BC_UGET: | 3219 | case BC_UGET: |
3235 | | // RA = dst*8, RD = uvnum*8 | 3220 | | // RA = dst*8, RD = uvnum*8 |
3236 | | ins_next1 | ||
3237 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | 3221 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
3238 | | srwi RD, RD, 1 | 3222 | | srwi RD, RD, 1 |
3239 | | addi RD, RD, offsetof(GCfuncL, uvptr) | 3223 | | addi RD, RD, offsetof(GCfuncL, uvptr) |
3240 | | lwzx UPVAL:RB, LFUNC:RB, RD | 3224 | | lwzx UPVAL:RB, LFUNC:RB, RD |
3225 | | ins_next1 | ||
3241 | | lwz TMP1, UPVAL:RB->v | 3226 | | lwz TMP1, UPVAL:RB->v |
3242 | | lfd f0, 0(TMP1) | 3227 | | lfd f0, 0(TMP1) |
3243 | | stfdx f0, BASE, RA | 3228 | | stfdx f0, BASE, RA |
@@ -3250,6 +3235,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3250 | | addi RA, RA, offsetof(GCfuncL, uvptr) | 3235 | | addi RA, RA, offsetof(GCfuncL, uvptr) |
3251 | | lfdux f0, RD, BASE | 3236 | | lfdux f0, RD, BASE |
3252 | | lwzx UPVAL:RB, LFUNC:RB, RA | 3237 | | lwzx UPVAL:RB, LFUNC:RB, RA |
3238 | | ins_next1 | ||
3253 | | lbz TMP3, UPVAL:RB->marked | 3239 | | lbz TMP3, UPVAL:RB->marked |
3254 | | lwz CARG2, UPVAL:RB->v | 3240 | | lwz CARG2, UPVAL:RB->v |
3255 | | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | 3241 | | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) |
@@ -3262,7 +3248,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3262 | | subi TMP2, TMP2, (LJ_TISNUM+1) | 3248 | | subi TMP2, TMP2, (LJ_TISNUM+1) |
3263 | | bne >2 // Upvalue is closed and black? | 3249 | | bne >2 // Upvalue is closed and black? |
3264 | |1: | 3250 | |1: |
3265 | | ins_next | 3251 | | ins_next2 |
3266 | | | 3252 | | |
3267 | |2: // Check if new value is collectable. | 3253 | |2: // Check if new value is collectable. |
3268 | | cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1) | 3254 | | cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1) |
@@ -3277,7 +3263,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3277 | break; | 3263 | break; |
3278 | case BC_USETS: | 3264 | case BC_USETS: |
3279 | | // RA = uvnum*8, RD = str_const*8 (~) | 3265 | | // RA = uvnum*8, RD = str_const*8 (~) |
3280 | | ins_next1 | ||
3281 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | 3266 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
3282 | | srwi TMP1, RD, 1 | 3267 | | srwi TMP1, RD, 1 |
3283 | | srwi RA, RA, 1 | 3268 | | srwi RA, RA, 1 |
@@ -3285,6 +3270,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3285 | | addi RA, RA, offsetof(GCfuncL, uvptr) | 3270 | | addi RA, RA, offsetof(GCfuncL, uvptr) |
3286 | | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 | 3271 | | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 |
3287 | | lwzx UPVAL:RB, LFUNC:RB, RA | 3272 | | lwzx UPVAL:RB, LFUNC:RB, RA |
3273 | | ins_next1 | ||
3288 | | lbz TMP3, UPVAL:RB->marked | 3274 | | lbz TMP3, UPVAL:RB->marked |
3289 | | lwz CARG2, UPVAL:RB->v | 3275 | | lwz CARG2, UPVAL:RB->v |
3290 | | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | 3276 | | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) |
@@ -3309,25 +3295,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3309 | break; | 3295 | break; |
3310 | case BC_USETN: | 3296 | case BC_USETN: |
3311 | | // RA = uvnum*8, RD = num_const*8 | 3297 | | // RA = uvnum*8, RD = num_const*8 |
3312 | | ins_next1 | ||
3313 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | 3298 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
3314 | | srwi RA, RA, 1 | 3299 | | srwi RA, RA, 1 |
3315 | | addi RA, RA, offsetof(GCfuncL, uvptr) | 3300 | | addi RA, RA, offsetof(GCfuncL, uvptr) |
3316 | | lfdx f0, KBASE, RD | 3301 | | lfdx f0, KBASE, RD |
3317 | | lwzx UPVAL:RB, LFUNC:RB, RA | 3302 | | lwzx UPVAL:RB, LFUNC:RB, RA |
3303 | | ins_next1 | ||
3318 | | lwz TMP1, UPVAL:RB->v | 3304 | | lwz TMP1, UPVAL:RB->v |
3319 | | stfd f0, 0(TMP1) | 3305 | | stfd f0, 0(TMP1) |
3320 | | ins_next2 | 3306 | | ins_next2 |
3321 | break; | 3307 | break; |
3322 | case BC_USETP: | 3308 | case BC_USETP: |
3323 | | // RA = uvnum*8, RD = primitive_type*8 (~) | 3309 | | // RA = uvnum*8, RD = primitive_type*8 (~) |
3324 | | ins_next1 | ||
3325 | | lwz LFUNC:RB, FRAME_FUNC(BASE) | 3310 | | lwz LFUNC:RB, FRAME_FUNC(BASE) |
3326 | | srwi RA, RA, 1 | 3311 | | srwi RA, RA, 1 |
3327 | | addi RA, RA, offsetof(GCfuncL, uvptr) | ||
3328 | | srwi TMP0, RD, 3 | 3312 | | srwi TMP0, RD, 3 |
3329 | | lwzx UPVAL:RB, LFUNC:RB, RA | 3313 | | addi RA, RA, offsetof(GCfuncL, uvptr) |
3330 | | not TMP0, TMP0 | 3314 | | not TMP0, TMP0 |
3315 | | lwzx UPVAL:RB, LFUNC:RB, RA | ||
3316 | | ins_next1 | ||
3331 | | lwz TMP1, UPVAL:RB->v | 3317 | | lwz TMP1, UPVAL:RB->v |
3332 | | stw TMP0, 0(TMP1) | 3318 | | stw TMP0, 0(TMP1) |
3333 | | ins_next2 | 3319 | | ins_next2 |
@@ -3538,8 +3524,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3538 | case BC_TGETB: | 3524 | case BC_TGETB: |
3539 | | // RA = dst*8, RB = table*8, RC = index*8 | 3525 | | // RA = dst*8, RB = table*8, RC = index*8 |
3540 | | lwzux CARG1, RB, BASE | 3526 | | lwzux CARG1, RB, BASE |
3541 | | lwz TAB:RB, 4(RB) | ||
3542 | | srwi TMP0, RC, 3 | 3527 | | srwi TMP0, RC, 3 |
3528 | | lwz TAB:RB, 4(RB) | ||
3543 | | checktab CARG1; bne ->vmeta_tgetb | 3529 | | checktab CARG1; bne ->vmeta_tgetb |
3544 | | lwz TMP1, TAB:RB->asize | 3530 | | lwz TMP1, TAB:RB->asize |
3545 | | lwz TMP2, TAB:RB->array | 3531 | | lwz TMP2, TAB:RB->array |
@@ -3717,8 +3703,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3717 | case BC_TSETB: | 3703 | case BC_TSETB: |
3718 | | // RA = src*8, RB = table*8, RC = index*8 | 3704 | | // RA = src*8, RB = table*8, RC = index*8 |
3719 | | lwzux CARG1, RB, BASE | 3705 | | lwzux CARG1, RB, BASE |
3720 | | lwz TAB:RB, 4(RB) | ||
3721 | | srwi TMP0, RC, 3 | 3706 | | srwi TMP0, RC, 3 |
3707 | | lwz TAB:RB, 4(RB) | ||
3722 | | checktab CARG1; bne ->vmeta_tsetb | 3708 | | checktab CARG1; bne ->vmeta_tsetb |
3723 | | lwz TMP1, TAB:RB->asize | 3709 | | lwz TMP1, TAB:RB->asize |
3724 | | lwz TMP2, TAB:RB->array | 3710 | | lwz TMP2, TAB:RB->array |
@@ -4470,9 +4456,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
4470 | | st_vmstate | 4456 | | st_vmstate |
4471 | | bctrl // (lua_State *L [, lua_CFunction f]) | 4457 | | bctrl // (lua_State *L [, lua_CFunction f]) |
4472 | | // Returns nresults. | 4458 | | // Returns nresults. |
4473 | | lwz TMP1, L->top | ||
4474 | | slwi RD, CRET1, 3 | ||
4475 | | lwz BASE, L->base | 4459 | | lwz BASE, L->base |
4460 | | slwi RD, CRET1, 3 | ||
4461 | | lwz TMP1, L->top | ||
4476 | | li_vmstate INTERP | 4462 | | li_vmstate INTERP |
4477 | | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. | 4463 | | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. |
4478 | | sub RA, TMP1, RD // RA = L->top - nresults*8 | 4464 | | sub RA, TMP1, RD // RA = L->top - nresults*8 |