aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2011-08-10 20:28:14 +0200
committerMike Pall <mike>2011-08-10 20:28:14 +0200
commit28b98acd757bcf4eaa4a8eb9b4a921e0d0c34bf1 (patch)
tree5973d3310b82364962d5cde12af971ed93dde918 /src
parentf333dfd17d65833d4b9d9aefec07b06bb0683d91 (diff)
downloadluajit-28b98acd757bcf4eaa4a8eb9b4a921e0d0c34bf1.tar.gz
luajit-28b98acd757bcf4eaa4a8eb9b4a921e0d0c34bf1.tar.bz2
luajit-28b98acd757bcf4eaa4a8eb9b4a921e0d0c34bf1.zip
PPC: Tune and reschedule interpreter for PPC/e300.
Diffstat (limited to 'src')
-rw-r--r--src/buildvm_ppc.dasc68
1 files changed, 27 insertions, 41 deletions
diff --git a/src/buildvm_ppc.dasc b/src/buildvm_ppc.dasc
index 892dccbf..8fb77489 100644
--- a/src/buildvm_ppc.dasc
+++ b/src/buildvm_ppc.dasc
@@ -183,15 +183,15 @@
183| lwz INS, 0(PC) 183| lwz INS, 0(PC)
184| addi PC, PC, 4 184| addi PC, PC, 4
185|.endmacro 185|.endmacro
186|// Instruction decode+dispatch. 186|// Instruction decode+dispatch. Note: optimized for e300!
187|.macro ins_NEXT2 187|.macro ins_NEXT2
188| decode_OP4 TMP1, INS 188| decode_OP4 TMP1, INS
189| lwzx TMP0, DISPATCH, TMP1
190| mtctr TMP0
189| decode_RB8 RB, INS 191| decode_RB8 RB, INS
190| decode_RD8 RD, INS 192| decode_RD8 RD, INS
191| lwzx TMP0, DISPATCH, TMP1
192| decode_RA8 RA, INS 193| decode_RA8 RA, INS
193| decode_RC8 RC, INS 194| decode_RC8 RC, INS
194| mtctr TMP0
195| bctr 195| bctr
196|.endmacro 196|.endmacro
197|.macro ins_NEXT 197|.macro ins_NEXT
@@ -255,8 +255,8 @@
255| 255|
256|.macro branch_RD 256|.macro branch_RD
257| srwi TMP0, RD, 1 257| srwi TMP0, RD, 1
258| add PC, PC, TMP0
259| addis PC, PC, -(BCBIAS_J*4 >> 16) 258| addis PC, PC, -(BCBIAS_J*4 >> 16)
259| add PC, PC, TMP0
260|.endmacro 260|.endmacro
261| 261|
262|// Assumes DISPATCH is relative to GL. 262|// Assumes DISPATCH is relative to GL.
@@ -2983,14 +2983,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2983 |.endmacro 2983 |.endmacro
2984 | 2984 |
2985 |.macro intmod, a, b, c 2985 |.macro intmod, a, b, c
2986 |->BC_MODVNI_Z:
2987 | bl ->vm_modi 2986 | bl ->vm_modi
2988 |.endmacro 2987 |.endmacro
2989 | 2988 |
2990 |.macro fpmod, a, b, c 2989 |.macro fpmod, a, b, c
2991 ||if (!LJ_DUALNUM) {
2992 |->BC_MODVNI_Z:
2993 ||}
2994 |->BC_MODVN_Z: 2990 |->BC_MODVN_Z:
2995 | fdiv FARG1, b, c 2991 | fdiv FARG1, b, c
2996 | // NYI: Use internal implementation of floor. 2992 | // NYI: Use internal implementation of floor.
@@ -3038,11 +3034,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3038 || break; 3034 || break;
3039 ||} 3035 ||}
3040 | checknum cr1, TMP2 3036 | checknum cr1, TMP2
3041 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
3042 | bne >5 3037 | bne >5
3043 |.if "intins" == "intmod_" 3038 | bne cr1, >5
3044 | b ->BC_MODVNI_Z // Avoid 3 copies. It's slow anyway.
3045 |.else
3046 | intins CARG1, CARG1, CARG2 3039 | intins CARG1, CARG1, CARG2
3047 | bso >4 3040 | bso >4
3048 |1: 3041 |1:
@@ -3054,7 +3047,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3054 |4: // Overflow. 3047 |4: // Overflow.
3055 | mcrxr cr0; ble <1 // Ignore unrelated overflow. 3048 | mcrxr cr0; ble <1 // Ignore unrelated overflow.
3056 | ins_arithfallback b 3049 | ins_arithfallback b
3057 |.endif
3058 |5: // FP variant. 3050 |5: // FP variant.
3059 ||if (vk == 1) { 3051 ||if (vk == 1) {
3060 | lfd f15, 0(RB) 3052 | lfd f15, 0(RB)
@@ -3100,7 +3092,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3100 | ins_arith intmod, fpmod 3092 | ins_arith intmod, fpmod
3101 break; 3093 break;
3102 case BC_MODNV: case BC_MODVV: 3094 case BC_MODNV: case BC_MODVV:
3103 | ins_arith intmod_, fpmod_ 3095 | ins_arith intmod, fpmod_
3104 break; 3096 break;
3105 case BC_POW: 3097 case BC_POW:
3106 | // NYI: (partial) integer arithmetic. 3098 | // NYI: (partial) integer arithmetic.
@@ -3113,8 +3105,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3113 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3105 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3114 | bge ->vmeta_arith_vv 3106 | bge ->vmeta_arith_vv
3115 | bl extern pow 3107 | bl extern pow
3108 | ins_next1
3116 | stfdx FARG1, BASE, RA 3109 | stfdx FARG1, BASE, RA
3117 | ins_next 3110 | ins_next2
3118 break; 3111 break;
3119 3112
3120 case BC_CAT: 3113 case BC_CAT:
@@ -3132,9 +3125,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3132 | cmplwi CRET1, 0 3125 | cmplwi CRET1, 0
3133 | lwz BASE, L->base 3126 | lwz BASE, L->base
3134 | bne ->vmeta_binop 3127 | bne ->vmeta_binop
3128 | ins_next1
3135 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. 3129 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
3136 | stfdx f0, BASE, RA 3130 | stfdx f0, BASE, RA
3137 | ins_next 3131 | ins_next2
3138 break; 3132 break;
3139 3133
3140 /* -- Constant ops ------------------------------------------------------ */ 3134 /* -- Constant ops ------------------------------------------------------ */
@@ -3143,9 +3137,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3143 | // RA = dst*8, RD = str_const*8 (~) 3137 | // RA = dst*8, RD = str_const*8 (~)
3144 | srwi TMP1, RD, 1 3138 | srwi TMP1, RD, 1
3145 | subfic TMP1, TMP1, -4 3139 | subfic TMP1, TMP1, -4
3140 | ins_next1
3146 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 3141 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
3147 | li TMP2, LJ_TSTR 3142 | li TMP2, LJ_TSTR
3148 | ins_next1
3149 | stwux TMP2, RA, BASE 3143 | stwux TMP2, RA, BASE
3150 | stw TMP0, 4(RA) 3144 | stw TMP0, 4(RA)
3151 | ins_next2 3145 | ins_next2
@@ -3155,9 +3149,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3155 | // RA = dst*8, RD = cdata_const*8 (~) 3149 | // RA = dst*8, RD = cdata_const*8 (~)
3156 | srwi TMP1, RD, 1 3150 | srwi TMP1, RD, 1
3157 | subfic TMP1, TMP1, -4 3151 | subfic TMP1, TMP1, -4
3152 | ins_next1
3158 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 3153 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
3159 | li TMP2, LJ_TCDATA 3154 | li TMP2, LJ_TCDATA
3160 | ins_next1
3161 | stwux TMP2, RA, BASE 3155 | stwux TMP2, RA, BASE
3162 | stw TMP0, 4(RA) 3156 | stw TMP0, 4(RA)
3163 | ins_next2 3157 | ins_next2
@@ -3173,21 +3167,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3173 | stw RD, 4(RA) 3167 | stw RD, 4(RA)
3174 | ins_next2 3168 | ins_next2
3175 } else { 3169 } else {
3176 | // NYI: which approach is faster? 3170 | // The soft-float approach is faster.
3177 |.if 1
3178 | slwi RD, RD, 13
3179 | srawi RD, RD, 16
3180 | tonum_i f0, RD
3181 | ins_next1
3182 | stfdx f0, BASE, RA
3183 | ins_next2
3184 |.else
3185 | slwi RD, RD, 13 3171 | slwi RD, RD, 13
3186 | srawi TMP1, RD, 31 3172 | srawi TMP1, RD, 31
3187 | xor TMP2, TMP1, RD 3173 | xor TMP2, TMP1, RD
3188 | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) 3174 | sub TMP2, TMP2, TMP1 // TMP2 = abs(x)
3189 | cntlzw TMP3, TMP2 3175 | cntlzw TMP3, TMP2
3190 | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 3176 | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1
3191 | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa 3177 | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa
3192 | subfic TMP3, RD, 0 3178 | subfic TMP3, RD, 0
3193 | slwi TMP1, TMP1, 20 3179 | slwi TMP1, TMP1, 20
@@ -3199,13 +3185,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3199 | stwux RD, RA, BASE 3185 | stwux RD, RA, BASE
3200 | stw ZERO, 4(RA) 3186 | stw ZERO, 4(RA)
3201 | ins_next2 3187 | ins_next2
3202 |.endif
3203 } 3188 }
3204 break; 3189 break;
3205 case BC_KNUM: 3190 case BC_KNUM:
3206 | // RA = dst*8, RD = num_const*8 3191 | // RA = dst*8, RD = num_const*8
3207 | lfdx f0, KBASE, RD
3208 | ins_next1 3192 | ins_next1
3193 | lfdx f0, KBASE, RD
3209 | stfdx f0, BASE, RA 3194 | stfdx f0, BASE, RA
3210 | ins_next2 3195 | ins_next2
3211 break; 3196 break;
@@ -3233,11 +3218,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3233 3218
3234 case BC_UGET: 3219 case BC_UGET:
3235 | // RA = dst*8, RD = uvnum*8 3220 | // RA = dst*8, RD = uvnum*8
3236 | ins_next1
3237 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3221 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3238 | srwi RD, RD, 1 3222 | srwi RD, RD, 1
3239 | addi RD, RD, offsetof(GCfuncL, uvptr) 3223 | addi RD, RD, offsetof(GCfuncL, uvptr)
3240 | lwzx UPVAL:RB, LFUNC:RB, RD 3224 | lwzx UPVAL:RB, LFUNC:RB, RD
3225 | ins_next1
3241 | lwz TMP1, UPVAL:RB->v 3226 | lwz TMP1, UPVAL:RB->v
3242 | lfd f0, 0(TMP1) 3227 | lfd f0, 0(TMP1)
3243 | stfdx f0, BASE, RA 3228 | stfdx f0, BASE, RA
@@ -3250,6 +3235,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3250 | addi RA, RA, offsetof(GCfuncL, uvptr) 3235 | addi RA, RA, offsetof(GCfuncL, uvptr)
3251 | lfdux f0, RD, BASE 3236 | lfdux f0, RD, BASE
3252 | lwzx UPVAL:RB, LFUNC:RB, RA 3237 | lwzx UPVAL:RB, LFUNC:RB, RA
3238 | ins_next1
3253 | lbz TMP3, UPVAL:RB->marked 3239 | lbz TMP3, UPVAL:RB->marked
3254 | lwz CARG2, UPVAL:RB->v 3240 | lwz CARG2, UPVAL:RB->v
3255 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3241 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
@@ -3262,7 +3248,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3262 | subi TMP2, TMP2, (LJ_TISNUM+1) 3248 | subi TMP2, TMP2, (LJ_TISNUM+1)
3263 | bne >2 // Upvalue is closed and black? 3249 | bne >2 // Upvalue is closed and black?
3264 |1: 3250 |1:
3265 | ins_next 3251 | ins_next2
3266 | 3252 |
3267 |2: // Check if new value is collectable. 3253 |2: // Check if new value is collectable.
3268 | cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1) 3254 | cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1)
@@ -3277,7 +3263,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3277 break; 3263 break;
3278 case BC_USETS: 3264 case BC_USETS:
3279 | // RA = uvnum*8, RD = str_const*8 (~) 3265 | // RA = uvnum*8, RD = str_const*8 (~)
3280 | ins_next1
3281 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3266 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3282 | srwi TMP1, RD, 1 3267 | srwi TMP1, RD, 1
3283 | srwi RA, RA, 1 3268 | srwi RA, RA, 1
@@ -3285,6 +3270,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3285 | addi RA, RA, offsetof(GCfuncL, uvptr) 3270 | addi RA, RA, offsetof(GCfuncL, uvptr)
3286 | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 3271 | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4
3287 | lwzx UPVAL:RB, LFUNC:RB, RA 3272 | lwzx UPVAL:RB, LFUNC:RB, RA
3273 | ins_next1
3288 | lbz TMP3, UPVAL:RB->marked 3274 | lbz TMP3, UPVAL:RB->marked
3289 | lwz CARG2, UPVAL:RB->v 3275 | lwz CARG2, UPVAL:RB->v
3290 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3276 | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
@@ -3309,25 +3295,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3309 break; 3295 break;
3310 case BC_USETN: 3296 case BC_USETN:
3311 | // RA = uvnum*8, RD = num_const*8 3297 | // RA = uvnum*8, RD = num_const*8
3312 | ins_next1
3313 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3298 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3314 | srwi RA, RA, 1 3299 | srwi RA, RA, 1
3315 | addi RA, RA, offsetof(GCfuncL, uvptr) 3300 | addi RA, RA, offsetof(GCfuncL, uvptr)
3316 | lfdx f0, KBASE, RD 3301 | lfdx f0, KBASE, RD
3317 | lwzx UPVAL:RB, LFUNC:RB, RA 3302 | lwzx UPVAL:RB, LFUNC:RB, RA
3303 | ins_next1
3318 | lwz TMP1, UPVAL:RB->v 3304 | lwz TMP1, UPVAL:RB->v
3319 | stfd f0, 0(TMP1) 3305 | stfd f0, 0(TMP1)
3320 | ins_next2 3306 | ins_next2
3321 break; 3307 break;
3322 case BC_USETP: 3308 case BC_USETP:
3323 | // RA = uvnum*8, RD = primitive_type*8 (~) 3309 | // RA = uvnum*8, RD = primitive_type*8 (~)
3324 | ins_next1
3325 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3310 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3326 | srwi RA, RA, 1 3311 | srwi RA, RA, 1
3327 | addi RA, RA, offsetof(GCfuncL, uvptr)
3328 | srwi TMP0, RD, 3 3312 | srwi TMP0, RD, 3
3329 | lwzx UPVAL:RB, LFUNC:RB, RA 3313 | addi RA, RA, offsetof(GCfuncL, uvptr)
3330 | not TMP0, TMP0 3314 | not TMP0, TMP0
3315 | lwzx UPVAL:RB, LFUNC:RB, RA
3316 | ins_next1
3331 | lwz TMP1, UPVAL:RB->v 3317 | lwz TMP1, UPVAL:RB->v
3332 | stw TMP0, 0(TMP1) 3318 | stw TMP0, 0(TMP1)
3333 | ins_next2 3319 | ins_next2
@@ -3538,8 +3524,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3538 case BC_TGETB: 3524 case BC_TGETB:
3539 | // RA = dst*8, RB = table*8, RC = index*8 3525 | // RA = dst*8, RB = table*8, RC = index*8
3540 | lwzux CARG1, RB, BASE 3526 | lwzux CARG1, RB, BASE
3541 | lwz TAB:RB, 4(RB)
3542 | srwi TMP0, RC, 3 3527 | srwi TMP0, RC, 3
3528 | lwz TAB:RB, 4(RB)
3543 | checktab CARG1; bne ->vmeta_tgetb 3529 | checktab CARG1; bne ->vmeta_tgetb
3544 | lwz TMP1, TAB:RB->asize 3530 | lwz TMP1, TAB:RB->asize
3545 | lwz TMP2, TAB:RB->array 3531 | lwz TMP2, TAB:RB->array
@@ -3717,8 +3703,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3717 case BC_TSETB: 3703 case BC_TSETB:
3718 | // RA = src*8, RB = table*8, RC = index*8 3704 | // RA = src*8, RB = table*8, RC = index*8
3719 | lwzux CARG1, RB, BASE 3705 | lwzux CARG1, RB, BASE
3720 | lwz TAB:RB, 4(RB)
3721 | srwi TMP0, RC, 3 3706 | srwi TMP0, RC, 3
3707 | lwz TAB:RB, 4(RB)
3722 | checktab CARG1; bne ->vmeta_tsetb 3708 | checktab CARG1; bne ->vmeta_tsetb
3723 | lwz TMP1, TAB:RB->asize 3709 | lwz TMP1, TAB:RB->asize
3724 | lwz TMP2, TAB:RB->array 3710 | lwz TMP2, TAB:RB->array
@@ -4470,9 +4456,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4470 | st_vmstate 4456 | st_vmstate
4471 | bctrl // (lua_State *L [, lua_CFunction f]) 4457 | bctrl // (lua_State *L [, lua_CFunction f])
4472 | // Returns nresults. 4458 | // Returns nresults.
4473 | lwz TMP1, L->top
4474 | slwi RD, CRET1, 3
4475 | lwz BASE, L->base 4459 | lwz BASE, L->base
4460 | slwi RD, CRET1, 3
4461 | lwz TMP1, L->top
4476 | li_vmstate INTERP 4462 | li_vmstate INTERP
4477 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 4463 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
4478 | sub RA, TMP1, RD // RA = L->top - nresults*8 4464 | sub RA, TMP1, RD // RA = L->top - nresults*8