diff options
| author | Mike Pall <mike> | 2009-12-08 20:35:29 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2009-12-08 20:35:29 +0100 |
| commit | 3f1f9e11f4f699ae94182d4cba158092f434a7f6 (patch) | |
| tree | 88fbb674a21a1d554d4b1ee9d4ef2c5fed6a1d88 /src | |
| parent | 5287b9326479ea2b7dddd6f642673e58e5a7f354 (diff) | |
| download | luajit-3f1f9e11f4f699ae94182d4cba158092f434a7f6.tar.gz luajit-3f1f9e11f4f699ae94182d4cba158092f434a7f6.tar.bz2 luajit-3f1f9e11f4f699ae94182d4cba158092f434a7f6.zip | |
Fast forward to sync public repo.
Compile math.sinh(), math.cosh(), math.tanh() and math.random().
Compile various io.*() functions.
Drive the GC forward on string allocations in the parser.
Improve KNUM fuse vs. load heuristics.
Add abstract C call handling to IR.
Diffstat (limited to 'src')
38 files changed, 1153 insertions, 743 deletions
diff --git a/src/Makefile.dep b/src/Makefile.dep index 1fb81e27..779ee545 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
| @@ -21,8 +21,9 @@ lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | |||
| 21 | lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h | 21 | lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h |
| 22 | lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h | 22 | lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h |
| 23 | lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | 23 | lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
| 24 | lj_arch.h lj_err.h lj_errmsg.h lj_gc.h lj_ff.h lj_ffdef.h lj_lib.h \ | 24 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ff.h lj_ffdef.h \ |
| 25 | lj_libdef.h | 25 | lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h lj_traceerr.h \ |
| 26 | lj_lib.h lj_libdef.h | ||
| 26 | lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ | 27 | lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ |
| 27 | lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \ | 28 | lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \ |
| 28 | lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \ | 29 | lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \ |
| @@ -45,9 +46,9 @@ lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | |||
| 45 | lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ | 46 | lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ |
| 46 | lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h | 47 | lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h |
| 47 | lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 48 | lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
| 48 | lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ | 49 | lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ |
| 49 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h \ | 50 | lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_asm.h \ |
| 50 | lj_target.h lj_target_x86.h | 51 | lj_vm.h lj_target.h lj_target_x86.h |
| 51 | lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h | 52 | lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h |
| 52 | lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h | 53 | lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h |
| 53 | lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 54 | lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
| @@ -67,8 +68,8 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
| 67 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \ | 68 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \ |
| 68 | lj_ir.h lj_dispatch.h | 69 | lj_ir.h lj_dispatch.h |
| 69 | lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 70 | lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
| 70 | lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ | 71 | lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ |
| 71 | lj_traceerr.h | 72 | lj_bc.h lj_traceerr.h lj_lib.h |
| 72 | lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 73 | lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
| 73 | lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h | 74 | lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h |
| 74 | lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ | 75 | lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ |
diff --git a/src/buildvm.c b/src/buildvm.c index b3738db4..4aba39d4 100644 --- a/src/buildvm.c +++ b/src/buildvm.c | |||
| @@ -215,12 +215,19 @@ IRFPMDEF(FPMNAME) | |||
| 215 | }; | 215 | }; |
| 216 | 216 | ||
| 217 | const char *const irfield_names[] = { | 217 | const char *const irfield_names[] = { |
| 218 | #define FLNAME(name, type, field) #name, | 218 | #define FLNAME(name, ofs) #name, |
| 219 | IRFLDEF(FLNAME) | 219 | IRFLDEF(FLNAME) |
| 220 | #undef FLNAME | 220 | #undef FLNAME |
| 221 | NULL | 221 | NULL |
| 222 | }; | 222 | }; |
| 223 | 223 | ||
| 224 | const char *const ircall_names[] = { | ||
| 225 | #define IRCALLNAME(name, nargs, kind, type, flags) #name, | ||
| 226 | IRCALLDEF(IRCALLNAME) | ||
| 227 | #undef IRCALLNAME | ||
| 228 | NULL | ||
| 229 | }; | ||
| 230 | |||
| 224 | static const char *const trace_errors[] = { | 231 | static const char *const trace_errors[] = { |
| 225 | #define TREDEF(name, msg) msg, | 232 | #define TREDEF(name, msg) msg, |
| 226 | #include "lj_traceerr.h" | 233 | #include "lj_traceerr.h" |
| @@ -269,6 +276,11 @@ static void emit_vmdef(BuildCtx *ctx) | |||
| 269 | } | 276 | } |
| 270 | fprintf(ctx->fp, "}\n\n"); | 277 | fprintf(ctx->fp, "}\n\n"); |
| 271 | 278 | ||
| 279 | fprintf(ctx->fp, "ircall = {\n[0]="); | ||
| 280 | for (i = 0; ircall_names[i]; i++) | ||
| 281 | fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); | ||
| 282 | fprintf(ctx->fp, "}\n\n"); | ||
| 283 | |||
| 272 | fprintf(ctx->fp, "traceerr = {\n[0]="); | 284 | fprintf(ctx->fp, "traceerr = {\n[0]="); |
| 273 | for (i = 0; trace_errors[i]; i++) | 285 | for (i = 0; trace_errors[i]; i++) |
| 274 | fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); | 286 | fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); |
diff --git a/src/buildvm.h b/src/buildvm.h index e55527fd..53c820ad 100644 --- a/src/buildvm.h +++ b/src/buildvm.h | |||
| @@ -102,5 +102,6 @@ extern const char *const bc_names[]; | |||
| 102 | extern const char *const ir_names[]; | 102 | extern const char *const ir_names[]; |
| 103 | extern const char *const irfpm_names[]; | 103 | extern const char *const irfpm_names[]; |
| 104 | extern const char *const irfield_names[]; | 104 | extern const char *const irfield_names[]; |
| 105 | extern const char *const ircall_names[]; | ||
| 105 | 106 | ||
| 106 | #endif | 107 | #endif |
diff --git a/src/buildvm_asm.c b/src/buildvm_asm.c index 5daab13b..31b6f61e 100644 --- a/src/buildvm_asm.c +++ b/src/buildvm_asm.c | |||
| @@ -26,6 +26,14 @@ static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n) | |||
| 26 | static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r) | 26 | static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r) |
| 27 | { | 27 | { |
| 28 | const char *sym = ctx->extnames[r->sym]; | 28 | const char *sym = ctx->extnames[r->sym]; |
| 29 | const char *p = strchr(sym, '@'); | ||
| 30 | char buf[80]; | ||
| 31 | if (p) { | ||
| 32 | /* Always strip fastcall suffix. Wrong for (unused) COFF on Win32. */ | ||
| 33 | strncpy(buf, sym, p-sym); | ||
| 34 | buf[p-sym] = '\0'; | ||
| 35 | sym = buf; | ||
| 36 | } | ||
| 29 | switch (ctx->mode) { | 37 | switch (ctx->mode) { |
| 30 | case BUILD_elfasm: | 38 | case BUILD_elfasm: |
| 31 | if (r->type) | 39 | if (r->type) |
diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c index 271118e0..77af3dc5 100644 --- a/src/buildvm_fold.c +++ b/src/buildvm_fold.c | |||
| @@ -107,6 +107,10 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany) | |||
| 107 | for (i = 0; irfield_names[i]; i++) | 107 | for (i = 0; irfield_names[i]; i++) |
| 108 | if (!strcmp(irfield_names[i], p+5)) | 108 | if (!strcmp(irfield_names[i], p+5)) |
| 109 | return i; | 109 | return i; |
| 110 | } else if (allowlit && !strncmp(p, "IRCALL_", 7)) { | ||
| 111 | for (i = 0; ircall_names[i]; i++) | ||
| 112 | if (!strcmp(ircall_names[i], p+7)) | ||
| 113 | return i; | ||
| 110 | } else if (allowany && !strcmp("any", p)) { | 114 | } else if (allowany && !strcmp("any", p)) { |
| 111 | return 0xff; | 115 | return 0xff; |
| 112 | } else { | 116 | } else { |
diff --git a/src/buildvm_peobj.c b/src/buildvm_peobj.c index 1a8661bf..a24ae727 100644 --- a/src/buildvm_peobj.c +++ b/src/buildvm_peobj.c | |||
| @@ -85,6 +85,7 @@ typedef struct PEsymaux { | |||
| 85 | #define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */ | 85 | #define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */ |
| 86 | #define PEOBJ_RELOC_DIR32 0x06 | 86 | #define PEOBJ_RELOC_DIR32 0x06 |
| 87 | #define PEOBJ_SYM_PREFIX "_" | 87 | #define PEOBJ_SYM_PREFIX "_" |
| 88 | #define PEOBJ_SYMF_PREFIX "@" | ||
| 88 | #elif LJ_TARGET_X64 | 89 | #elif LJ_TARGET_X64 |
| 89 | #define PEOBJ_ARCH_TARGET 0x8664 | 90 | #define PEOBJ_ARCH_TARGET 0x8664 |
| 90 | #define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */ | 91 | #define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */ |
| @@ -260,7 +261,18 @@ void emit_peobj(BuildCtx *ctx) | |||
| 260 | 261 | ||
| 261 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT); | 262 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT); |
| 262 | for (i = 0; ctx->extnames[i]; i++) { | 263 | for (i = 0; ctx->extnames[i]; i++) { |
| 263 | sprintf(name, PEOBJ_SYM_PREFIX "%s", ctx->extnames[i]); | 264 | const char *sym = ctx->extnames[i]; |
| 265 | const char *p = strchr(sym, '@'); | ||
| 266 | if (p) { | ||
| 267 | #ifdef PEOBJ_SYMF_PREFIX | ||
| 268 | sprintf(name, PEOBJ_SYMF_PREFIX "%s", sym); | ||
| 269 | #else | ||
| 270 | strncpy(name, sym, p-sym); | ||
| 271 | name[p-sym] = '\0'; | ||
| 272 | #endif | ||
| 273 | } else { | ||
| 274 | sprintf(name, PEOBJ_SYM_PREFIX "%s", sym); | ||
| 275 | } | ||
| 264 | emit_peobj_sym(ctx, name, 0, | 276 | emit_peobj_sym(ctx, name, 0, |
| 265 | PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); | 277 | PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); |
| 266 | } | 278 | } |
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc index 09cfa6dc..e857a6be 100644 --- a/src/buildvm_x86.dasc +++ b/src/buildvm_x86.dasc | |||
| @@ -30,6 +30,9 @@ | |||
| 30 | |.define RD, RC | 30 | |.define RD, RC |
| 31 | |.define RDL, RCL | 31 | |.define RDL, RCL |
| 32 | | | 32 | | |
| 33 | |.define FCARG1, ecx // Fastcall arguments. | ||
| 34 | |.define FCARG2, edx | ||
| 35 | | | ||
| 33 | |// Type definitions. Some of these are only used for documentation. | 36 | |// Type definitions. Some of these are only used for documentation. |
| 34 | |.type L, lua_State | 37 | |.type L, lua_State |
| 35 | |.type GL, global_State | 38 | |.type GL, global_State |
| @@ -1066,7 +1069,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
| 1066 | | mov RB, LJ_TNUMX | 1069 | | mov RB, LJ_TNUMX |
| 1067 | |7: | 1070 | |7: |
| 1068 | | not RB | 1071 | | not RB |
| 1069 | | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)] | 1072 | | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])] |
| 1070 | | jmp <2 | 1073 | | jmp <2 |
| 1071 | | | 1074 | | |
| 1072 | |.ffunc_2 setmetatable | 1075 | |.ffunc_2 setmetatable |
| @@ -1126,17 +1129,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
| 1126 | | jmp ->fff_res1 | 1129 | | jmp ->fff_res1 |
| 1127 | |3: // Handle numbers inline, unless a number base metatable is present. | 1130 | |3: // Handle numbers inline, unless a number base metatable is present. |
| 1128 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback | 1131 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback |
| 1129 | | cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0 | 1132 | | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 |
| 1130 | | jne ->fff_fallback | 1133 | | jne ->fff_fallback |
| 1131 | | ffgccheck // Caveat: uses label 1. | 1134 | | ffgccheck // Caveat: uses label 1. |
| 1132 | | mov L:RB, SAVE_L | 1135 | | mov L:RB, SAVE_L |
| 1133 | | mov ARG1, L:RB | ||
| 1134 | | mov ARG2, RA | ||
| 1135 | | mov L:RB->base, RA // Add frame since C call can throw. | 1136 | | mov L:RB->base, RA // Add frame since C call can throw. |
| 1136 | | mov [RA-4], PC | 1137 | | mov [RA-4], PC |
| 1137 | | mov SAVE_PC, PC // Redundant (but a defined value). | 1138 | | mov SAVE_PC, PC // Redundant (but a defined value). |
| 1138 | | mov ARG3, BASE // Save BASE. | 1139 | | mov ARG3, BASE // Save BASE. |
| 1139 | | call extern lj_str_fromnum // (lua_State *L, lua_Number *np) | 1140 | | mov FCARG2, RA // Caveat: FCARG2 == BASE |
| 1141 | | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA | ||
| 1142 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) | ||
| 1140 | | // GCstr returned in eax (RC). | 1143 | | // GCstr returned in eax (RC). |
| 1141 | | mov RA, L:RB->base | 1144 | | mov RA, L:RB->base |
| 1142 | | mov BASE, ARG3 | 1145 | | mov BASE, ARG3 |
| @@ -1762,11 +1765,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
| 1762 | | | 1765 | | |
| 1763 | |.ffunc_1 table_getn | 1766 | |.ffunc_1 table_getn |
| 1764 | | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback | 1767 | | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback |
| 1765 | | mov TAB:RB, [RA] | 1768 | | mov ARG2, BASE // Save RA and BASE. |
| 1766 | | mov ARG1, TAB:RB | 1769 | | mov RB, RA |
| 1767 | | mov RB, RA // Save RA and BASE. | 1770 | | mov TAB:FCARG1, [RA] // Caveat: FCARG1 == RA |
| 1768 | | mov ARG2, BASE | 1771 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) |
| 1769 | | call extern lj_tab_len // (GCtab *t) | ||
| 1770 | | // Length of table returned in eax (RC). | 1772 | | // Length of table returned in eax (RC). |
| 1771 | | mov ARG1, RC | 1773 | | mov ARG1, RC |
| 1772 | | mov RA, RB // Restore RA and BASE. | 1774 | | mov RA, RB // Restore RA and BASE. |
| @@ -2512,10 +2514,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | |||
| 2512 | | ins_next | 2514 | | ins_next |
| 2513 | |2: | 2515 | |2: |
| 2514 | | checktab RD, ->vmeta_len | 2516 | | checktab RD, ->vmeta_len |
| 2515 | | mov TAB:RD, [BASE+RD*8] | 2517 | | mov TAB:FCARG1, [BASE+RD*8] |
| 2516 | | mov ARG1, TAB:RD | ||
| 2517 | | mov RB, BASE // Save BASE. | 2518 | | mov RB, BASE // Save BASE. |
| 2518 | | call extern lj_tab_len // (GCtab *t) | 2519 | | call extern lj_tab_len@4 // (GCtab *t) |
| 2519 | | // Length of table returned in eax (RC). | 2520 | | // Length of table returned in eax (RC). |
| 2520 | | mov ARG1, RC | 2521 | | mov ARG1, RC |
| 2521 | | mov BASE, RB // Restore BASE. | 2522 | | mov BASE, RB // Restore BASE. |
| @@ -2665,66 +2666,63 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | |||
| 2665 | | ins_next | 2666 | | ins_next |
| 2666 | break; | 2667 | break; |
| 2667 | case BC_USETV: | 2668 | case BC_USETV: |
| 2669 | #define TV2MARKOFS \ | ||
| 2670 | ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) | ||
| 2668 | | ins_AD // RA = upvalue #, RD = src | 2671 | | ins_AD // RA = upvalue #, RD = src |
| 2669 | | // Really ugly code due to the lack of a 4th free register. | ||
| 2670 | | mov LFUNC:RB, [BASE-8] | 2672 | | mov LFUNC:RB, [BASE-8] |
| 2671 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | 2673 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] |
| 2672 | | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) | 2674 | | cmp byte UPVAL:RB->closed, 0 |
| 2673 | | jnz >4 | ||
| 2674 | |1: | ||
| 2675 | | mov RA, [BASE+RD*8] | ||
| 2676 | |2: | ||
| 2677 | | mov RB, UPVAL:RB->v | 2675 | | mov RB, UPVAL:RB->v |
| 2676 | | mov RA, [BASE+RD*8] | ||
| 2678 | | mov RD, [BASE+RD*8+4] | 2677 | | mov RD, [BASE+RD*8+4] |
| 2679 | | mov [RB], RA | 2678 | | mov [RB], RA |
| 2680 | | mov [RB+4], RD | 2679 | | mov [RB+4], RD |
| 2681 | |3: | 2680 | | jz >1 |
| 2681 | | // Check barrier for closed upvalue. | ||
| 2682 | | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) | ||
| 2683 | | jnz >2 | ||
| 2684 | |1: | ||
| 2682 | | ins_next | 2685 | | ins_next |
| 2683 | | | 2686 | | |
| 2684 | |4: // Upvalue is black. Check if new value is collectable and white. | 2687 | |2: // Upvalue is black. Check if new value is collectable and white. |
| 2685 | | mov RA, [BASE+RD*8+4] | 2688 | | sub RD, LJ_TISGCV |
| 2686 | | sub RA, LJ_TISGCV | 2689 | | cmp RD, LJ_TISNUM - LJ_TISGCV // tvisgcv(v) |
| 2687 | | cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v) | ||
| 2688 | | jbe <1 | 2690 | | jbe <1 |
| 2689 | | mov GCOBJ:RA, [BASE+RD*8] | ||
| 2690 | | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) | 2691 | | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) |
| 2691 | | jz <2 | 2692 | | jz <1 |
| 2692 | | // Crossed a write barrier. So move the barrier forward. | 2693 | | // Crossed a write barrier. Move the barrier forward. |
| 2693 | | mov ARG2, UPVAL:RB | 2694 | | xchg FCARG2, RB // Save BASE (FCARG2 == BASE). |
| 2694 | | mov ARG3, GCOBJ:RA | 2695 | | lea GL:FCARG1, [DISPATCH+GG_DISP2G] |
| 2695 | | mov RB, UPVAL:RB->v | 2696 | | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) |
| 2696 | | mov RD, [BASE+RD*8+4] | 2697 | | mov BASE, RB // Restore BASE. |
| 2697 | | mov [RB], GCOBJ:RA | 2698 | | jmp <1 |
| 2698 | | mov [RB+4], RD | ||
| 2699 | |->BC_USETV_Z: | ||
| 2700 | | mov L:RB, SAVE_L | ||
| 2701 | | lea GL:RA, [DISPATCH+GG_DISP2G] | ||
| 2702 | | mov L:RB->base, BASE | ||
| 2703 | | mov ARG1, GL:RA | ||
| 2704 | | call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v) | ||
| 2705 | | mov BASE, L:RB->base | ||
| 2706 | | jmp <3 | ||
| 2707 | break; | 2699 | break; |
| 2700 | #undef TV2MARKOFS | ||
| 2708 | case BC_USETS: | 2701 | case BC_USETS: |
| 2709 | | ins_AND // RA = upvalue #, RD = str const (~) | 2702 | | ins_AND // RA = upvalue #, RD = str const (~) |
| 2710 | | mov LFUNC:RB, [BASE-8] | 2703 | | mov LFUNC:RB, [BASE-8] |
| 2711 | | mov GCOBJ:RD, [KBASE+RD*4] | ||
| 2712 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | 2704 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] |
| 2713 | | mov RA, UPVAL:RB->v | 2705 | | mov GCOBJ:RA, [KBASE+RD*4] |
| 2714 | | mov dword [RA+4], LJ_TSTR | 2706 | | mov RD, UPVAL:RB->v |
| 2715 | | mov [RA], GCOBJ:RD | 2707 | | mov [RD], GCOBJ:RA |
| 2708 | | mov dword [RD+4], LJ_TSTR | ||
| 2716 | | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) | 2709 | | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) |
| 2717 | | jnz >2 | 2710 | | jnz >2 |
| 2718 | |1: | 2711 | |1: |
| 2719 | | ins_next | 2712 | | ins_next |
| 2720 | | | 2713 | | |
| 2721 | |2: // Upvalue is black. Check if string is white. | 2714 | |2: // Check if string is white and ensure upvalue is closed. |
| 2722 | | test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str) | 2715 | | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) |
| 2723 | | jz <1 | 2716 | | jz <1 |
| 2724 | | // Crossed a write barrier. So move the barrier forward. | 2717 | | cmp byte UPVAL:RB->closed, 0 |
| 2725 | | mov ARG3, GCOBJ:RD | 2718 | | jz <1 |
| 2726 | | mov ARG2, UPVAL:RB | 2719 | | // Crossed a write barrier. Move the barrier forward. |
| 2727 | | jmp ->BC_USETV_Z | 2720 | | mov RB, BASE // Save BASE (FCARG2 == BASE). |
| 2721 | | mov FCARG2, RD | ||
| 2722 | | lea GL:FCARG1, [DISPATCH+GG_DISP2G] | ||
| 2723 | | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) | ||
| 2724 | | mov BASE, RB // Restore BASE. | ||
| 2725 | | jmp <1 | ||
| 2728 | break; | 2726 | break; |
| 2729 | case BC_USETN: | 2727 | case BC_USETN: |
| 2730 | | ins_AD // RA = upvalue #, RD = num const | 2728 | | ins_AD // RA = upvalue #, RD = num const |
| @@ -2808,23 +2806,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | |||
| 2808 | | mov dword [BASE+RA*8+4], LJ_TTAB | 2806 | | mov dword [BASE+RA*8+4], LJ_TTAB |
| 2809 | | ins_next | 2807 | | ins_next |
| 2810 | |2: | 2808 | |2: |
| 2811 | | call extern lj_gc_step_fixtop // (lua_State *L) | 2809 | | mov L:FCARG1, L:RB |
| 2812 | | mov ARG1, L:RB // Args owned by callee. Set it again. | 2810 | | call extern lj_gc_step_fixtop@4 // (lua_State *L) |
| 2813 | | jmp <1 | 2811 | | jmp <1 |
| 2814 | break; | 2812 | break; |
| 2815 | case BC_TDUP: | 2813 | case BC_TDUP: |
| 2816 | | ins_AND // RA = dst, RD = table const (~) (holding template table) | 2814 | | ins_AND // RA = dst, RD = table const (~) (holding template table) |
| 2817 | | mov TAB:RD, [KBASE+RD*4] | ||
| 2818 | | mov L:RB, SAVE_L | 2815 | | mov L:RB, SAVE_L |
| 2819 | | mov ARG2, TAB:RD | ||
| 2820 | | mov ARG1, L:RB | ||
| 2821 | | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] | 2816 | | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] |
| 2822 | | mov SAVE_PC, PC | 2817 | | mov SAVE_PC, PC |
| 2823 | | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] | 2818 | | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] |
| 2824 | | mov L:RB->base, BASE | 2819 | | mov L:RB->base, BASE |
| 2825 | | jae >3 | 2820 | | jae >3 |
| 2826 | |2: | 2821 | |2: |
| 2827 | | call extern lj_tab_dup // (lua_State *L, Table *kt) | 2822 | | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE |
| 2823 | | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA | ||
| 2824 | | call extern lj_tab_dup@8 // (lua_State *L, Table *kt) | ||
| 2828 | | // Table * returned in eax (RC). | 2825 | | // Table * returned in eax (RC). |
| 2829 | | mov BASE, L:RB->base | 2826 | | mov BASE, L:RB->base |
| 2830 | | movzx RA, PC_RA | 2827 | | movzx RA, PC_RA |
| @@ -2832,8 +2829,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | |||
| 2832 | | mov dword [BASE+RA*8+4], LJ_TTAB | 2829 | | mov dword [BASE+RA*8+4], LJ_TTAB |
| 2833 | | ins_next | 2830 | | ins_next |
| 2834 | |3: | 2831 | |3: |
| 2835 | | call extern lj_gc_step_fixtop // (lua_State *L) | 2832 | | mov L:FCARG1, L:RB |
| 2836 | | mov ARG1, L:RB // Args owned by callee. Set it again. | 2833 | | call extern lj_gc_step_fixtop@4 // (lua_State *L) |
| 2834 | | movzx RD, PC_RD // Need to reload RD. | ||
| 2835 | | not RD | ||
| 2837 | | jmp <2 | 2836 | | jmp <2 |
| 2838 | break; | 2837 | break; |
| 2839 | 2838 | ||
diff --git a/src/lib_base.c b/src/lib_base.c index 6b9e8eef..821c81b4 100644 --- a/src/lib_base.c +++ b/src/lib_base.c | |||
| @@ -183,7 +183,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) | |||
| 183 | int32_t base = lj_lib_optint(L, 2, 10); | 183 | int32_t base = lj_lib_optint(L, 2, 10); |
| 184 | if (base == 10) { | 184 | if (base == 10) { |
| 185 | TValue *o = lj_lib_checkany(L, 1); | 185 | TValue *o = lj_lib_checkany(L, 1); |
| 186 | if (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))) { | 186 | if (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o))) { |
| 187 | setnumV(L->base-1, numV(o)); | 187 | setnumV(L->base-1, numV(o)); |
| 188 | return FFH_RES(1); | 188 | return FFH_RES(1); |
| 189 | } | 189 | } |
| @@ -206,6 +206,9 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) | |||
| 206 | return FFH_RES(1); | 206 | return FFH_RES(1); |
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | LJLIB_PUSH("nil") | ||
| 210 | LJLIB_PUSH("false") | ||
| 211 | LJLIB_PUSH("true") | ||
| 209 | LJLIB_ASM(tostring) LJLIB_REC(.) | 212 | LJLIB_ASM(tostring) LJLIB_REC(.) |
| 210 | { | 213 | { |
| 211 | TValue *o = lj_lib_checkany(L, 1); | 214 | TValue *o = lj_lib_checkany(L, 1); |
| @@ -218,12 +221,8 @@ LJLIB_ASM(tostring) LJLIB_REC(.) | |||
| 218 | GCstr *s; | 221 | GCstr *s; |
| 219 | if (tvisnum(o)) { | 222 | if (tvisnum(o)) { |
| 220 | s = lj_str_fromnum(L, &o->n); | 223 | s = lj_str_fromnum(L, &o->n); |
| 221 | } else if (tvisnil(o)) { | 224 | } else if (tvispri(o)) { |
| 222 | s = lj_str_newlit(L, "nil"); | 225 | s = strV(lj_lib_upvalue(L, -itype(o))); |
| 223 | } else if (tvisfalse(o)) { | ||
| 224 | s = lj_str_newlit(L, "false"); | ||
| 225 | } else if (tvistrue(o)) { | ||
| 226 | s = lj_str_newlit(L, "true"); | ||
| 227 | } else { | 226 | } else { |
| 228 | if (tvisfunc(o) && isffunc(funcV(o))) | 227 | if (tvisfunc(o) && isffunc(funcV(o))) |
| 229 | lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid); | 228 | lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid); |
diff --git a/src/lib_io.c b/src/lib_io.c index aefe4213..d69b99a4 100644 --- a/src/lib_io.c +++ b/src/lib_io.c | |||
| @@ -17,14 +17,28 @@ | |||
| 17 | #include "lualib.h" | 17 | #include "lualib.h" |
| 18 | 18 | ||
| 19 | #include "lj_obj.h" | 19 | #include "lj_obj.h" |
| 20 | #include "lj_err.h" | ||
| 21 | #include "lj_gc.h" | 20 | #include "lj_gc.h" |
| 21 | #include "lj_err.h" | ||
| 22 | #include "lj_str.h" | ||
| 22 | #include "lj_ff.h" | 23 | #include "lj_ff.h" |
| 24 | #include "lj_trace.h" | ||
| 23 | #include "lj_lib.h" | 25 | #include "lj_lib.h" |
| 24 | 26 | ||
| 25 | /* Index of standard handles in function environment. */ | 27 | /* Userdata payload for I/O file. */ |
| 26 | #define IO_INPUT 1 | 28 | typedef struct IOFileUD { |
| 27 | #define IO_OUTPUT 2 | 29 | FILE *fp; /* File handle. */ |
| 30 | uint32_t type; /* File type. */ | ||
| 31 | } IOFileUD; | ||
| 32 | |||
| 33 | #define IOFILE_TYPE_FILE 0 /* Regular file. */ | ||
| 34 | #define IOFILE_TYPE_PIPE 1 /* Pipe. */ | ||
| 35 | #define IOFILE_TYPE_STDF 2 /* Standard file handle. */ | ||
| 36 | #define IOFILE_TYPE_MASK 3 | ||
| 37 | |||
| 38 | #define IOFILE_FLAG_CLOSE 4 /* Close after io.lines() iterator. */ | ||
| 39 | |||
| 40 | #define IOSTDF_UD(L, id) (&gcref(G(L)->gcroot[(id)])->ud) | ||
| 41 | #define IOSTDF_IOF(L, id) ((IOFileUD *)uddata(IOSTDF_UD(L, (id)))) | ||
| 28 | 42 | ||
| 29 | /* -- Error handling ------------------------------------------------------ */ | 43 | /* -- Error handling ------------------------------------------------------ */ |
| 30 | 44 | ||
| @@ -35,95 +49,102 @@ static int io_pushresult(lua_State *L, int ok, const char *fname) | |||
| 35 | return 1; | 49 | return 1; |
| 36 | } else { | 50 | } else { |
| 37 | int en = errno; /* Lua API calls may change this value. */ | 51 | int en = errno; /* Lua API calls may change this value. */ |
| 38 | lua_pushnil(L); | 52 | setnilV(L->top++); |
| 39 | if (fname) | 53 | if (fname) |
| 40 | lua_pushfstring(L, "%s: %s", fname, strerror(en)); | 54 | lua_pushfstring(L, "%s: %s", fname, strerror(en)); |
| 41 | else | 55 | else |
| 42 | lua_pushfstring(L, "%s", strerror(en)); | 56 | lua_pushfstring(L, "%s", strerror(en)); |
| 43 | lua_pushinteger(L, en); | 57 | setintV(L->top++, en); |
| 58 | lj_trace_abort(G(L)); | ||
| 44 | return 3; | 59 | return 3; |
| 45 | } | 60 | } |
| 46 | } | 61 | } |
| 47 | 62 | ||
| 48 | static void io_file_error(lua_State *L, int arg, const char *fname) | 63 | /* -- Open/close helpers -------------------------------------------------- */ |
| 64 | |||
| 65 | static IOFileUD *io_tofilep(lua_State *L) | ||
| 49 | { | 66 | { |
| 50 | lua_pushfstring(L, "%s: %s", fname, strerror(errno)); | 67 | if (!(L->base < L->top && tvisudata(L->base) && |
| 51 | luaL_argerror(L, arg, lua_tostring(L, -1)); | 68 | udataV(L->base)->udtype == UDTYPE_IO_FILE)) |
| 69 | lj_err_argtype(L, 1, "FILE*"); | ||
| 70 | return (IOFileUD *)uddata(udataV(L->base)); | ||
| 52 | } | 71 | } |
| 53 | 72 | ||
| 54 | /* -- Open helpers -------------------------------------------------------- */ | 73 | static IOFileUD *io_tofile(lua_State *L) |
| 55 | |||
| 56 | #define io_tofilep(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE)) | ||
| 57 | |||
| 58 | static FILE *io_tofile(lua_State *L) | ||
| 59 | { | 74 | { |
| 60 | FILE **f = io_tofilep(L); | 75 | IOFileUD *iof = io_tofilep(L); |
| 61 | if (*f == NULL) | 76 | if (iof->fp == NULL) |
| 62 | lj_err_caller(L, LJ_ERR_IOCLFL); | 77 | lj_err_caller(L, LJ_ERR_IOCLFL); |
| 63 | return *f; | 78 | return iof; |
| 64 | } | 79 | } |
| 65 | 80 | ||
| 66 | static FILE **io_file_new(lua_State *L) | 81 | static FILE *io_stdfile(lua_State *L, ptrdiff_t id) |
| 67 | { | 82 | { |
| 68 | FILE **pf = (FILE **)lua_newuserdata(L, sizeof(FILE *)); | 83 | IOFileUD *iof = IOSTDF_IOF(L, id); |
| 69 | *pf = NULL; | 84 | if (iof->fp == NULL) |
| 70 | luaL_getmetatable(L, LUA_FILEHANDLE); | 85 | lj_err_caller(L, LJ_ERR_IOSTDCL); |
| 71 | lua_setmetatable(L, -2); | 86 | return iof->fp; |
| 72 | return pf; | ||
| 73 | } | 87 | } |
| 74 | 88 | ||
| 75 | /* -- Close helpers ------------------------------------------------------- */ | 89 | static IOFileUD *io_file_new(lua_State *L) |
| 90 | { | ||
| 91 | IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD)); | ||
| 92 | GCudata *ud = udataV(L->top-1); | ||
| 93 | ud->udtype = UDTYPE_IO_FILE; | ||
| 94 | /* NOBARRIER: The GCudata is new (marked white). */ | ||
| 95 | setgcrefr(ud->metatable, curr_func(L)->c.env); | ||
| 96 | iof->fp = NULL; | ||
| 97 | iof->type = IOFILE_TYPE_FILE; | ||
| 98 | return iof; | ||
| 99 | } | ||
| 76 | 100 | ||
| 77 | static int lj_cf_io_std_close(lua_State *L) | 101 | static IOFileUD *io_file_open(lua_State *L, const char *mode) |
| 78 | { | 102 | { |
| 79 | lua_pushnil(L); | 103 | const char *fname = strdata(lj_lib_checkstr(L, 1)); |
| 80 | lua_pushliteral(L, "cannot close standard file"); | 104 | IOFileUD *iof = io_file_new(L); |
| 81 | return 2; | 105 | iof->fp = fopen(fname, mode); |
| 106 | if (iof->fp == NULL) | ||
| 107 | luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); | ||
| 108 | return iof; | ||
| 82 | } | 109 | } |
| 83 | 110 | ||
| 84 | static int lj_cf_io_pipe_close(lua_State *L) | 111 | static int io_file_close(lua_State *L, IOFileUD *iof) |
| 85 | { | 112 | { |
| 86 | FILE **p = io_tofilep(L); | 113 | int ok; |
| 114 | if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_FILE) { | ||
| 115 | ok = (fclose(iof->fp) == 0); | ||
| 116 | } else if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_PIPE) { | ||
| 87 | #if defined(LUA_USE_POSIX) | 117 | #if defined(LUA_USE_POSIX) |
| 88 | int ok = (pclose(*p) != -1); | 118 | ok = (pclose(iof->fp) != -1); |
| 89 | #elif defined(LUA_USE_WIN) | 119 | #elif defined(LUA_USE_WIN) |
| 90 | int ok = (_pclose(*p) != -1); | 120 | ok = (_pclose(iof->fp) != -1); |
| 91 | #else | 121 | #else |
| 92 | int ok = 0; | 122 | ok = 0; |
| 93 | #endif | 123 | #endif |
| 94 | *p = NULL; | 124 | } else { |
| 95 | return io_pushresult(L, ok, NULL); | 125 | lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF); |
| 96 | } | 126 | setnilV(L->top++); |
| 97 | 127 | lua_pushliteral(L, "cannot close standard file"); | |
| 98 | static int lj_cf_io_file_close(lua_State *L) | 128 | return 2; |
| 99 | { | 129 | } |
| 100 | FILE **p = io_tofilep(L); | 130 | iof->fp = NULL; |
| 101 | int ok = (fclose(*p) == 0); | ||
| 102 | *p = NULL; | ||
| 103 | return io_pushresult(L, ok, NULL); | 131 | return io_pushresult(L, ok, NULL); |
| 104 | } | 132 | } |
| 105 | 133 | ||
| 106 | static int io_file_close(lua_State *L) | ||
| 107 | { | ||
| 108 | lua_getfenv(L, 1); | ||
| 109 | lua_getfield(L, -1, "__close"); | ||
| 110 | return (lua_tocfunction(L, -1))(L); | ||
| 111 | } | ||
| 112 | |||
| 113 | /* -- Read/write helpers -------------------------------------------------- */ | 134 | /* -- Read/write helpers -------------------------------------------------- */ |
| 114 | 135 | ||
| 115 | static int io_file_readnum(lua_State *L, FILE *fp) | 136 | static int io_file_readnum(lua_State *L, FILE *fp) |
| 116 | { | 137 | { |
| 117 | lua_Number d; | 138 | lua_Number d; |
| 118 | if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { | 139 | if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { |
| 119 | lua_pushnumber(L, d); | 140 | setnumV(L->top++, d); |
| 120 | return 1; | 141 | return 1; |
| 121 | } else { | 142 | } else { |
| 122 | return 0; /* read fails */ | 143 | return 0; |
| 123 | } | 144 | } |
| 124 | } | 145 | } |
| 125 | 146 | ||
| 126 | static int test_eof(lua_State *L, FILE *fp) | 147 | static int io_file_testeof(lua_State *L, FILE *fp) |
| 127 | { | 148 | { |
| 128 | int c = getc(fp); | 149 | int c = getc(fp); |
| 129 | ungetc(c, fp); | 150 | ungetc(c, fp); |
| @@ -168,7 +189,7 @@ static int io_file_readchars(lua_State *L, FILE *fp, size_t n) | |||
| 168 | n -= nr; /* still have to read `n' chars */ | 189 | n -= nr; /* still have to read `n' chars */ |
| 169 | } while (n > 0 && nr == rlen); /* until end of count or eof */ | 190 | } while (n > 0 && nr == rlen); /* until end of count or eof */ |
| 170 | luaL_pushresult(&b); /* close buffer */ | 191 | luaL_pushresult(&b); /* close buffer */ |
| 171 | return (n == 0 || lua_objlen(L, -1) > 0); | 192 | return (n == 0 || strV(L->top-1)->len > 0); |
| 172 | } | 193 | } |
| 173 | 194 | ||
| 174 | static int io_file_read(lua_State *L, FILE *fp, int start) | 195 | static int io_file_read(lua_State *L, FILE *fp, int start) |
| @@ -197,7 +218,7 @@ static int io_file_read(lua_State *L, FILE *fp, int start) | |||
| 197 | lj_err_arg(L, n+1, LJ_ERR_INVFMT); | 218 | lj_err_arg(L, n+1, LJ_ERR_INVFMT); |
| 198 | } else if (tvisnum(L->base+n)) { | 219 | } else if (tvisnum(L->base+n)) { |
| 199 | size_t len = (size_t)lj_lib_checkint(L, n+1); | 220 | size_t len = (size_t)lj_lib_checkint(L, n+1); |
| 200 | ok = len ? io_file_readchars(L, fp, len) : test_eof(L, fp); | 221 | ok = len ? io_file_readchars(L, fp, len) : io_file_testeof(L, fp); |
| 201 | } else { | 222 | } else { |
| 202 | lj_err_arg(L, n+1, LJ_ERR_INVOPT); | 223 | lj_err_arg(L, n+1, LJ_ERR_INVOPT); |
| 203 | } | 224 | } |
| @@ -233,30 +254,29 @@ static int io_file_write(lua_State *L, FILE *fp, int start) | |||
| 233 | 254 | ||
| 234 | LJLIB_CF(io_method_close) | 255 | LJLIB_CF(io_method_close) |
| 235 | { | 256 | { |
| 236 | if (lua_isnone(L, 1)) | 257 | IOFileUD *iof = L->base < L->top ? io_tofile(L) : |
| 237 | lua_rawgeti(L, LUA_ENVIRONINDEX, IO_OUTPUT); | 258 | IOSTDF_IOF(L, GCROOT_IO_OUTPUT); |
| 238 | io_tofile(L); | 259 | return io_file_close(L, iof); |
| 239 | return io_file_close(L); | ||
| 240 | } | 260 | } |
| 241 | 261 | ||
| 242 | LJLIB_CF(io_method_read) | 262 | LJLIB_CF(io_method_read) |
| 243 | { | 263 | { |
| 244 | return io_file_read(L, io_tofile(L), 1); | 264 | return io_file_read(L, io_tofile(L)->fp, 1); |
| 245 | } | 265 | } |
| 246 | 266 | ||
| 247 | LJLIB_CF(io_method_write) | 267 | LJLIB_CF(io_method_write) LJLIB_REC(io_write 0) |
| 248 | { | 268 | { |
| 249 | return io_file_write(L, io_tofile(L), 1); | 269 | return io_file_write(L, io_tofile(L)->fp, 1); |
| 250 | } | 270 | } |
| 251 | 271 | ||
| 252 | LJLIB_CF(io_method_flush) | 272 | LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0) |
| 253 | { | 273 | { |
| 254 | return io_pushresult(L, fflush(io_tofile(L)) == 0, NULL); | 274 | return io_pushresult(L, fflush(io_tofile(L)->fp) == 0, NULL); |
| 255 | } | 275 | } |
| 256 | 276 | ||
| 257 | LJLIB_CF(io_method_seek) | 277 | LJLIB_CF(io_method_seek) |
| 258 | { | 278 | { |
| 259 | FILE *fp = io_tofile(L); | 279 | FILE *fp = io_tofile(L)->fp; |
| 260 | int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end"); | 280 | int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end"); |
| 261 | lua_Number ofs; | 281 | lua_Number ofs; |
| 262 | int res; | 282 | int res; |
| @@ -294,39 +314,40 @@ LJLIB_CF(io_method_seek) | |||
| 294 | 314 | ||
| 295 | LJLIB_CF(io_method_setvbuf) | 315 | LJLIB_CF(io_method_setvbuf) |
| 296 | { | 316 | { |
| 297 | FILE *fp = io_tofile(L); | 317 | FILE *fp = io_tofile(L)->fp; |
| 298 | int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no"); | 318 | int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no"); |
| 299 | size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE); | 319 | size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE); |
| 300 | if (opt == 0) opt = _IOFBF; | 320 | if (opt == 0) opt = _IOFBF; |
| 301 | else if (opt == 1) opt = _IOLBF; | 321 | else if (opt == 1) opt = _IOLBF; |
| 302 | else if (opt == 2) opt = _IONBF; | 322 | else if (opt == 2) opt = _IONBF; |
| 303 | return io_pushresult(L, (setvbuf(fp, NULL, opt, sz) == 0), NULL); | 323 | return io_pushresult(L, setvbuf(fp, NULL, opt, sz) == 0, NULL); |
| 304 | } | 324 | } |
| 305 | 325 | ||
| 306 | /* Forward declaration. */ | 326 | LJLIB_PUSH(top-2) /* io_lines_iter */ |
| 307 | static void io_file_lines(lua_State *L, int idx, int toclose); | ||
| 308 | |||
| 309 | LJLIB_CF(io_method_lines) | 327 | LJLIB_CF(io_method_lines) |
| 310 | { | 328 | { |
| 311 | io_tofile(L); | 329 | io_tofile(L); |
| 312 | io_file_lines(L, 1, 0); | 330 | setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1))); |
| 313 | return 1; | 331 | setudataV(L, L->top+1, udataV(L->base)); |
| 332 | L->top += 2; | ||
| 333 | return 2; | ||
| 314 | } | 334 | } |
| 315 | 335 | ||
| 316 | LJLIB_CF(io_method___gc) | 336 | LJLIB_CF(io_method___gc) |
| 317 | { | 337 | { |
| 318 | FILE *fp = *io_tofilep(L); | 338 | IOFileUD *iof = io_tofilep(L); |
| 319 | if (fp != NULL) io_file_close(L); | 339 | if (iof->fp != NULL) |
| 340 | io_file_close(L, iof); | ||
| 320 | return 0; | 341 | return 0; |
| 321 | } | 342 | } |
| 322 | 343 | ||
| 323 | LJLIB_CF(io_method___tostring) | 344 | LJLIB_CF(io_method___tostring) |
| 324 | { | 345 | { |
| 325 | FILE *fp = *io_tofilep(L); | 346 | IOFileUD *iof = io_tofilep(L); |
| 326 | if (fp == NULL) | 347 | if (iof->fp != NULL) |
| 327 | lua_pushliteral(L, "file (closed)"); | 348 | lua_pushfstring(L, "file (%p)", iof->fp); |
| 328 | else | 349 | else |
| 329 | lua_pushfstring(L, "file (%p)", fp); | 350 | lua_pushliteral(L, "file (closed)"); |
| 330 | return 1; | 351 | return 1; |
| 331 | } | 352 | } |
| 332 | 353 | ||
| @@ -340,30 +361,41 @@ LJLIB_PUSH(top-1) LJLIB_SET(__index) | |||
| 340 | 361 | ||
| 341 | LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ | 362 | LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ |
| 342 | 363 | ||
| 343 | static FILE *io_file_get(lua_State *L, int findex) | 364 | LJLIB_CF(io_open) |
| 344 | { | 365 | { |
| 345 | GCtab *fenv = tabref(curr_func(L)->c.env); | 366 | const char *fname = strdata(lj_lib_checkstr(L, 1)); |
| 346 | GCudata *ud = udataV(&tvref(fenv->array)[findex]); | 367 | GCstr *s = lj_lib_optstr(L, 2); |
| 347 | FILE *fp = *(FILE **)uddata(ud); | 368 | const char *mode = s ? strdata(s) : "r"; |
| 348 | if (fp == NULL) | 369 | IOFileUD *iof = io_file_new(L); |
| 349 | lj_err_caller(L, LJ_ERR_IOSTDCL); | 370 | iof->fp = fopen(fname, mode); |
| 350 | return fp; | 371 | return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname); |
| 351 | } | 372 | } |
| 352 | 373 | ||
| 353 | LJLIB_CF(io_open) | 374 | LJLIB_CF(io_popen) |
| 354 | { | 375 | { |
| 355 | const char *fname = luaL_checkstring(L, 1); | 376 | #if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN) |
| 356 | const char *mode = luaL_optstring(L, 2, "r"); | 377 | const char *fname = strdata(lj_lib_checkstr(L, 1)); |
| 357 | FILE **pf = io_file_new(L); | 378 | GCstr *s = lj_lib_optstr(L, 2); |
| 358 | *pf = fopen(fname, mode); | 379 | const char *mode = s ? strdata(s) : "r"; |
| 359 | return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1; | 380 | IOFileUD *iof = io_file_new(L); |
| 381 | iof->type = IOFILE_TYPE_PIPE; | ||
| 382 | #ifdef LUA_USE_POSIX | ||
| 383 | fflush(NULL); | ||
| 384 | iof->fp = popen(fname, mode); | ||
| 385 | #else | ||
| 386 | iof->fp = _popen(fname, mode); | ||
| 387 | #endif | ||
| 388 | return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname); | ||
| 389 | #else | ||
| 390 | luaL_error(L, LUA_QL("popen") " not supported"); | ||
| 391 | #endif | ||
| 360 | } | 392 | } |
| 361 | 393 | ||
| 362 | LJLIB_CF(io_tmpfile) | 394 | LJLIB_CF(io_tmpfile) |
| 363 | { | 395 | { |
| 364 | FILE **pf = io_file_new(L); | 396 | IOFileUD *iof = io_file_new(L); |
| 365 | *pf = tmpfile(); | 397 | iof->fp = tmpfile(); |
| 366 | return (*pf == NULL) ? io_pushresult(L, 0, NULL) : 1; | 398 | return iof->fp != NULL ? 1 : io_pushresult(L, 0, NULL); |
| 367 | } | 399 | } |
| 368 | 400 | ||
| 369 | LJLIB_CF(io_close) | 401 | LJLIB_CF(io_close) |
| @@ -373,169 +405,112 @@ LJLIB_CF(io_close) | |||
| 373 | 405 | ||
| 374 | LJLIB_CF(io_read) | 406 | LJLIB_CF(io_read) |
| 375 | { | 407 | { |
| 376 | return io_file_read(L, io_file_get(L, IO_INPUT), 0); | 408 | return io_file_read(L, io_stdfile(L, GCROOT_IO_INPUT), 0); |
| 377 | } | ||
| 378 | |||
| 379 | LJLIB_CF(io_write) | ||
| 380 | { | ||
| 381 | return io_file_write(L, io_file_get(L, IO_OUTPUT), 0); | ||
| 382 | } | ||
| 383 | |||
| 384 | LJLIB_CF(io_flush) | ||
| 385 | { | ||
| 386 | return io_pushresult(L, fflush(io_file_get(L, IO_OUTPUT)) == 0, NULL); | ||
| 387 | } | 409 | } |
| 388 | 410 | ||
| 389 | LJLIB_NOREG LJLIB_CF(io_lines_iter) | 411 | LJLIB_CF(io_write) LJLIB_REC(io_write GCROOT_IO_OUTPUT) |
| 390 | { | ||
| 391 | FILE *fp = *(FILE **)uddata(udataV(lj_lib_upvalue(L, 1))); | ||
| 392 | int ok; | ||
| 393 | if (fp == NULL) | ||
| 394 | lj_err_caller(L, LJ_ERR_IOCLFL); | ||
| 395 | ok = io_file_readline(L, fp); | ||
| 396 | if (ferror(fp)) | ||
| 397 | return luaL_error(L, "%s", strerror(errno)); | ||
| 398 | if (ok) | ||
| 399 | return 1; | ||
| 400 | if (tvistrue(lj_lib_upvalue(L, 2))) { /* Need to close file? */ | ||
| 401 | L->top = L->base+1; | ||
| 402 | setudataV(L, L->base, udataV(lj_lib_upvalue(L, 1))); | ||
| 403 | io_file_close(L); | ||
| 404 | } | ||
| 405 | return 0; | ||
| 406 | } | ||
| 407 | |||
| 408 | static void io_file_lines(lua_State *L, int idx, int toclose) | ||
| 409 | { | 412 | { |
| 410 | lua_pushvalue(L, idx); | 413 | return io_file_write(L, io_stdfile(L, GCROOT_IO_OUTPUT), 0); |
| 411 | lua_pushboolean(L, toclose); | ||
| 412 | lua_pushcclosure(L, lj_cf_io_lines_iter, 2); | ||
| 413 | funcV(L->top-1)->c.ffid = FF_io_lines_iter; | ||
| 414 | } | 414 | } |
| 415 | 415 | ||
| 416 | LJLIB_CF(io_lines) | 416 | LJLIB_CF(io_flush) LJLIB_REC(io_flush GCROOT_IO_OUTPUT) |
| 417 | { | 417 | { |
| 418 | if (lua_isnoneornil(L, 1)) { /* no arguments? */ | 418 | return io_pushresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL); |
| 419 | /* will iterate over default input */ | ||
| 420 | lua_rawgeti(L, LUA_ENVIRONINDEX, IO_INPUT); | ||
| 421 | return lj_cf_io_method_lines(L); | ||
| 422 | } else { | ||
| 423 | const char *fname = luaL_checkstring(L, 1); | ||
| 424 | FILE **pf = io_file_new(L); | ||
| 425 | *pf = fopen(fname, "r"); | ||
| 426 | if (*pf == NULL) | ||
| 427 | io_file_error(L, 1, fname); | ||
| 428 | io_file_lines(L, lua_gettop(L), 1); | ||
| 429 | return 1; | ||
| 430 | } | ||
| 431 | } | 419 | } |
| 432 | 420 | ||
| 433 | static int io_std_get(lua_State *L, int fp, const char *mode) | 421 | static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode) |
| 434 | { | 422 | { |
| 435 | if (!lua_isnoneornil(L, 1)) { | 423 | if (L->base < L->top && !tvisnil(L->base)) { |
| 436 | const char *fname = lua_tostring(L, 1); | 424 | if (tvisudata(L->base)) { |
| 437 | if (fname) { | 425 | io_tofile(L); |
| 438 | FILE **pf = io_file_new(L); | 426 | L->top = L->base+1; |
| 439 | *pf = fopen(fname, mode); | ||
| 440 | if (*pf == NULL) | ||
| 441 | io_file_error(L, 1, fname); | ||
| 442 | } else { | 427 | } else { |
| 443 | io_tofile(L); /* check that it's a valid file handle */ | 428 | io_file_open(L, mode); |
| 444 | lua_pushvalue(L, 1); | ||
| 445 | } | 429 | } |
| 446 | lua_rawseti(L, LUA_ENVIRONINDEX, fp); | 430 | /* NOBARRIER: The standard I/O handles are GC roots. */ |
| 431 | setgcref(G(L)->gcroot[id], gcV(L->top-1)); | ||
| 432 | } else { | ||
| 433 | setudataV(L, L->top++, IOSTDF_UD(L, id)); | ||
| 447 | } | 434 | } |
| 448 | /* return current value */ | ||
| 449 | lua_rawgeti(L, LUA_ENVIRONINDEX, fp); | ||
| 450 | return 1; | 435 | return 1; |
| 451 | } | 436 | } |
| 452 | 437 | ||
| 453 | LJLIB_CF(io_input) | 438 | LJLIB_CF(io_input) |
| 454 | { | 439 | { |
| 455 | return io_std_get(L, IO_INPUT, "r"); | 440 | return io_std_getset(L, GCROOT_IO_INPUT, "r"); |
| 456 | } | 441 | } |
| 457 | 442 | ||
| 458 | LJLIB_CF(io_output) | 443 | LJLIB_CF(io_output) |
| 459 | { | 444 | { |
| 460 | return io_std_get(L, IO_OUTPUT, "w"); | 445 | return io_std_getset(L, GCROOT_IO_OUTPUT, "w"); |
| 461 | } | 446 | } |
| 462 | 447 | ||
| 463 | LJLIB_CF(io_type) | 448 | LJLIB_NOREG LJLIB_CF(io_lines_iter) |
| 464 | { | 449 | { |
| 465 | void *ud; | 450 | IOFileUD *iof = io_tofile(L); |
| 466 | luaL_checkany(L, 1); | 451 | int ok = io_file_readline(L, iof->fp); |
| 467 | ud = lua_touserdata(L, 1); | 452 | if (ferror(iof->fp)) |
| 468 | lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); | 453 | lj_err_callermsg(L, strerror(errno)); |
| 469 | if (ud == NULL || !lua_getmetatable(L, 1) || !lua_rawequal(L, -2, -1)) | 454 | if (!ok && (iof->type & IOFILE_FLAG_CLOSE)) |
| 470 | lua_pushnil(L); /* not a file */ | 455 | io_file_close(L, iof); /* Return values are ignored (ok is 0). */ |
| 471 | else if (*((FILE **)ud) == NULL) | 456 | return ok; |
| 472 | lua_pushliteral(L, "closed file"); | ||
| 473 | else | ||
| 474 | lua_pushliteral(L, "file"); | ||
| 475 | return 1; | ||
| 476 | } | 457 | } |
| 477 | 458 | ||
| 478 | LJLIB_PUSH(top-3) LJLIB_SET(!) /* Set environment. */ | 459 | LJLIB_PUSH(top-3) /* io_lines_iter */ |
| 460 | LJLIB_CF(io_lines) | ||
| 461 | { | ||
| 462 | if (L->base < L->top && !tvisnil(L->base)) { /* io.lines(fname) */ | ||
| 463 | IOFileUD *iof = io_file_open(L, "r"); | ||
| 464 | iof->type = IOFILE_TYPE_FILE|IOFILE_FLAG_CLOSE; | ||
| 465 | setfuncV(L, L->top-2, funcV(lj_lib_upvalue(L, 1))); | ||
| 466 | } else { /* io.lines() iterates over stdin. */ | ||
| 467 | setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1))); | ||
| 468 | setudataV(L, L->top+1, IOSTDF_UD(L, GCROOT_IO_INPUT)); | ||
| 469 | L->top += 2; | ||
| 470 | } | ||
| 471 | return 2; | ||
| 472 | } | ||
| 479 | 473 | ||
| 480 | LJLIB_CF(io_popen) | 474 | LJLIB_CF(io_type) |
| 481 | { | 475 | { |
| 482 | #if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN) | 476 | cTValue *o = lj_lib_checkany(L, 1); |
| 483 | const char *fname = luaL_checkstring(L, 1); | 477 | if (!(tvisudata(o) && udataV(o)->udtype == UDTYPE_IO_FILE)) |
| 484 | const char *mode = luaL_optstring(L, 2, "r"); | 478 | setnilV(L->top++); |
| 485 | FILE **pf = io_file_new(L); | 479 | else if (((IOFileUD *)uddata(udataV(o)))->fp != NULL) |
| 486 | #ifdef LUA_USE_POSIX | 480 | lua_pushliteral(L, "file"); |
| 487 | fflush(NULL); | 481 | else |
| 488 | *pf = popen(fname, mode); | 482 | lua_pushliteral(L, "closed file"); |
| 489 | #else | 483 | return 1; |
| 490 | *pf = _popen(fname, mode); | ||
| 491 | #endif | ||
| 492 | return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1; | ||
| 493 | #else | ||
| 494 | luaL_error(L, LUA_QL("popen") " not supported"); | ||
| 495 | #endif | ||
| 496 | } | 484 | } |
| 497 | 485 | ||
| 498 | #include "lj_libdef.h" | 486 | #include "lj_libdef.h" |
| 499 | 487 | ||
| 500 | /* ------------------------------------------------------------------------ */ | 488 | /* ------------------------------------------------------------------------ */ |
| 501 | 489 | ||
| 502 | static void io_std_new(lua_State *L, FILE *fp, int k, const char *fname) | 490 | static GCobj *io_std_new(lua_State *L, FILE *fp, const char *name) |
| 503 | { | 491 | { |
| 504 | FILE **pf = io_file_new(L); | 492 | IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD)); |
| 505 | GCudata *ud = udataV(L->top-1); | 493 | GCudata *ud = udataV(L->top-1); |
| 506 | GCtab *envt = tabV(L->top-2); | 494 | ud->udtype = UDTYPE_IO_FILE; |
| 507 | *pf = fp; | 495 | /* NOBARRIER: The GCudata is new (marked white). */ |
| 508 | setgcref(ud->env, obj2gco(envt)); | 496 | setgcref(ud->metatable, gcV(L->top-3)); |
| 509 | lj_gc_objbarrier(L, obj2gco(ud), envt); | 497 | iof->fp = fp; |
| 510 | if (k > 0) { | 498 | iof->type = IOFILE_TYPE_STDF; |
| 511 | lua_pushvalue(L, -1); | 499 | lua_setfield(L, -2, name); |
| 512 | lua_rawseti(L, -5, k); | 500 | return obj2gco(ud); |
| 513 | } | ||
| 514 | lua_setfield(L, -3, fname); | ||
| 515 | } | ||
| 516 | |||
| 517 | static void io_fenv_new(lua_State *L, int narr, lua_CFunction cls) | ||
| 518 | { | ||
| 519 | lua_createtable(L, narr, 1); | ||
| 520 | lua_pushcfunction(L, cls); | ||
| 521 | lua_setfield(L, -2, "__close"); | ||
| 522 | } | 501 | } |
| 523 | 502 | ||
| 524 | LUALIB_API int luaopen_io(lua_State *L) | 503 | LUALIB_API int luaopen_io(lua_State *L) |
| 525 | { | 504 | { |
| 526 | lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); | 505 | lua_pushcfunction(L, lj_cf_io_lines_iter); |
| 527 | if (tvisnil(L->top-1)) { | 506 | funcV(L->top-1)->c.ffid = FF_io_lines_iter; |
| 528 | LJ_LIB_REG_(L, NULL, io_method); | 507 | LJ_LIB_REG_(L, NULL, io_method); |
| 529 | lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); | 508 | copyTV(L, L->top, L->top-1); L->top++; |
| 530 | } | 509 | lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); |
| 531 | io_fenv_new(L, 0, lj_cf_io_pipe_close); /* top-3 */ | ||
| 532 | io_fenv_new(L, 2, lj_cf_io_file_close); /* top-2 */ | ||
| 533 | LJ_LIB_REG(L, io); | 510 | LJ_LIB_REG(L, io); |
| 534 | io_fenv_new(L, 0, lj_cf_io_std_close); | 511 | setgcref(G(L)->gcroot[GCROOT_IO_INPUT], io_std_new(L, stdin, "stdin")); |
| 535 | io_std_new(L, stdin, IO_INPUT, "stdin"); | 512 | setgcref(G(L)->gcroot[GCROOT_IO_OUTPUT], io_std_new(L, stdout, "stdout")); |
| 536 | io_std_new(L, stdout, IO_OUTPUT, "stdout"); | 513 | io_std_new(L, stderr, "stderr"); |
| 537 | io_std_new(L, stderr, 0, "stderr"); | ||
| 538 | L->top--; | ||
| 539 | return 1; | 514 | return 1; |
| 540 | } | 515 | } |
| 541 | 516 | ||
diff --git a/src/lib_math.c b/src/lib_math.c index adc77c9d..f3803e8f 100644 --- a/src/lib_math.c +++ b/src/lib_math.c | |||
| @@ -36,9 +36,9 @@ LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) | |||
| 36 | LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) | 36 | LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) |
| 37 | LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) | 37 | LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) |
| 38 | LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) | 38 | LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) |
| 39 | LJLIB_ASM_(math_sinh) | 39 | LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh) |
| 40 | LJLIB_ASM_(math_cosh) | 40 | LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh) |
| 41 | LJLIB_ASM_(math_tanh) | 41 | LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) |
| 42 | LJLIB_ASM_(math_frexp) | 42 | LJLIB_ASM_(math_frexp) |
| 43 | LJLIB_ASM_(math_modf) LJLIB_REC(.) | 43 | LJLIB_ASM_(math_modf) LJLIB_REC(.) |
| 44 | 44 | ||
| @@ -82,35 +82,33 @@ LJ_FUNCA double lj_wrapper_tanh(double x) { return tanh(x); } | |||
| 82 | */ | 82 | */ |
| 83 | 83 | ||
| 84 | /* PRNG state. */ | 84 | /* PRNG state. */ |
| 85 | typedef struct TW223State { | 85 | struct RandomState { |
| 86 | uint64_t gen[4]; /* State of the 4 LFSR generators. */ | 86 | uint64_t gen[4]; /* State of the 4 LFSR generators. */ |
| 87 | int valid; /* State is valid. */ | 87 | int valid; /* State is valid. */ |
| 88 | } TW223State; | 88 | }; |
| 89 | 89 | ||
| 90 | /* Union needed for bit-pattern conversion between uint64_t and double. */ | 90 | /* Union needed for bit-pattern conversion between uint64_t and double. */ |
| 91 | typedef union { uint64_t u64; double d; } U64double; | 91 | typedef union { uint64_t u64; double d; } U64double; |
| 92 | 92 | ||
| 93 | /* Update generator i and compute a running xor of all states. */ | 93 | /* Update generator i and compute a running xor of all states. */ |
| 94 | #define TW223_GEN(i, k, q, s) \ | 94 | #define TW223_GEN(i, k, q, s) \ |
| 95 | z = tw->gen[i]; \ | 95 | z = rs->gen[i]; \ |
| 96 | z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ | 96 | z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ |
| 97 | r ^= z; tw->gen[i] = z; | 97 | r ^= z; rs->gen[i] = z; |
| 98 | 98 | ||
| 99 | /* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ | 99 | /* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ |
| 100 | static LJ_NOINLINE double tw223_step(TW223State *tw) | 100 | LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs) |
| 101 | { | 101 | { |
| 102 | uint64_t z, r = 0; | 102 | uint64_t z, r = 0; |
| 103 | U64double u; | ||
| 104 | TW223_GEN(0, 63, 31, 18) | 103 | TW223_GEN(0, 63, 31, 18) |
| 105 | TW223_GEN(1, 58, 19, 28) | 104 | TW223_GEN(1, 58, 19, 28) |
| 106 | TW223_GEN(2, 55, 24, 7) | 105 | TW223_GEN(2, 55, 24, 7) |
| 107 | TW223_GEN(3, 47, 21, 8) | 106 | TW223_GEN(3, 47, 21, 8) |
| 108 | u.u64 = (r & (((uint64_t)1 << 52)-1)) | ((uint64_t)0x3ff << 52); | 107 | return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); |
| 109 | return u.d; | ||
| 110 | } | 108 | } |
| 111 | 109 | ||
| 112 | /* PRNG initialization function. */ | 110 | /* PRNG initialization function. */ |
| 113 | static void tw223_init(TW223State *tw, double d) | 111 | static void random_init(RandomState *rs, double d) |
| 114 | { | 112 | { |
| 115 | uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ | 113 | uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ |
| 116 | int i; | 114 | int i; |
| @@ -120,22 +118,24 @@ static void tw223_init(TW223State *tw, double d) | |||
| 120 | r >>= 8; | 118 | r >>= 8; |
| 121 | u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; | 119 | u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; |
| 122 | if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ | 120 | if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ |
| 123 | tw->gen[i] = u.u64; | 121 | rs->gen[i] = u.u64; |
| 124 | } | 122 | } |
| 125 | tw->valid = 1; | 123 | rs->valid = 1; |
| 126 | for (i = 0; i < 10; i++) | 124 | for (i = 0; i < 10; i++) |
| 127 | tw223_step(tw); | 125 | lj_math_random_step(rs); |
| 128 | } | 126 | } |
| 129 | 127 | ||
| 130 | /* PRNG extract function. */ | 128 | /* PRNG extract function. */ |
| 131 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ | 129 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ |
| 132 | LJLIB_CF(math_random) | 130 | LJLIB_CF(math_random) LJLIB_REC(.) |
| 133 | { | 131 | { |
| 134 | int n = cast_int(L->top - L->base); | 132 | int n = cast_int(L->top - L->base); |
| 135 | TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); | 133 | RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); |
| 134 | U64double u; | ||
| 136 | double d; | 135 | double d; |
| 137 | if (LJ_UNLIKELY(!tw->valid)) tw223_init(tw, 0.0); | 136 | if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0); |
| 138 | d = tw223_step(tw) - 1.0; | 137 | u.u64 = lj_math_random_step(rs); |
| 138 | d = u.d - 1.0; | ||
| 139 | if (n > 0) { | 139 | if (n > 0) { |
| 140 | double r1 = lj_lib_checknum(L, 1); | 140 | double r1 = lj_lib_checknum(L, 1); |
| 141 | if (n == 1) { | 141 | if (n == 1) { |
| @@ -150,11 +150,11 @@ LJLIB_CF(math_random) | |||
| 150 | } | 150 | } |
| 151 | 151 | ||
| 152 | /* PRNG seed function. */ | 152 | /* PRNG seed function. */ |
| 153 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ | 153 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ |
| 154 | LJLIB_CF(math_randomseed) | 154 | LJLIB_CF(math_randomseed) |
| 155 | { | 155 | { |
| 156 | TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); | 156 | RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); |
| 157 | tw223_init(tw, lj_lib_checknum(L, 1)); | 157 | random_init(rs, lj_lib_checknum(L, 1)); |
| 158 | return 0; | 158 | return 0; |
| 159 | } | 159 | } |
| 160 | 160 | ||
| @@ -164,9 +164,9 @@ LJLIB_CF(math_randomseed) | |||
| 164 | 164 | ||
| 165 | LUALIB_API int luaopen_math(lua_State *L) | 165 | LUALIB_API int luaopen_math(lua_State *L) |
| 166 | { | 166 | { |
| 167 | TW223State *tw; | 167 | RandomState *rs; |
| 168 | tw = (TW223State *)lua_newuserdata(L, sizeof(TW223State)); | 168 | rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); |
| 169 | tw->valid = 0; /* Use lazy initialization to save some time on startup. */ | 169 | rs->valid = 0; /* Use lazy initialization to save some time on startup. */ |
| 170 | LJ_LIB_REG(L, math); | 170 | LJ_LIB_REG(L, math); |
| 171 | #if defined(LUA_COMPAT_MOD) | 171 | #if defined(LUA_COMPAT_MOD) |
| 172 | lua_getfield(L, -1, "fmod"); | 172 | lua_getfield(L, -1, "fmod"); |
diff --git a/src/lib_string.c b/src/lib_string.c index 6c857328..e7ad12df 100644 --- a/src/lib_string.c +++ b/src/lib_string.c | |||
| @@ -776,16 +776,18 @@ LUALIB_API int luaopen_string(lua_State *L) | |||
| 776 | { | 776 | { |
| 777 | GCtab *mt; | 777 | GCtab *mt; |
| 778 | GCstr *mmstr; | 778 | GCstr *mmstr; |
| 779 | global_State *g; | ||
| 779 | LJ_LIB_REG(L, string); | 780 | LJ_LIB_REG(L, string); |
| 780 | #if defined(LUA_COMPAT_GFIND) | 781 | #if defined(LUA_COMPAT_GFIND) |
| 781 | lua_getfield(L, -1, "gmatch"); | 782 | lua_getfield(L, -1, "gmatch"); |
| 782 | lua_setfield(L, -2, "gfind"); | 783 | lua_setfield(L, -2, "gfind"); |
| 783 | #endif | 784 | #endif |
| 784 | mt = lj_tab_new(L, 0, 1); | 785 | mt = lj_tab_new(L, 0, 1); |
| 785 | /* NOBARRIER: G(L)->mmname[] is a GC root. */ | 786 | /* NOBARRIER: basemt is a GC root. */ |
| 786 | setgcref(G(L)->basemt[~LJ_TSTR], obj2gco(mt)); | 787 | g = G(L); |
| 787 | mmstr = strref(G(L)->mmname[MM_index]); | 788 | setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); |
| 788 | if (isdead(G(L), obj2gco(mmstr))) flipwhite(obj2gco(mmstr)); | 789 | mmstr = strref(g->mmname[MM_index]); |
| 790 | if (isdead(g, obj2gco(mmstr))) flipwhite(obj2gco(mmstr)); | ||
| 789 | settabV(L, lj_tab_setstr(L, mt, mmstr), tabV(L->top-1)); | 791 | settabV(L, lj_tab_setstr(L, mt, mmstr), tabV(L->top-1)); |
| 790 | mt->nomm = cast_byte(~(1u<<MM_index)); | 792 | mt->nomm = cast_byte(~(1u<<MM_index)); |
| 791 | return 1; | 793 | return 1; |
diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 8ad4f8fb..6d8b4ccb 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c | |||
| @@ -1186,10 +1186,10 @@ static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize) | |||
| 1186 | size_t rsize = oldsize - nb; | 1186 | size_t rsize = oldsize - nb; |
| 1187 | newp = oldp; | 1187 | newp = oldp; |
| 1188 | if (rsize >= MIN_CHUNK_SIZE) { | 1188 | if (rsize >= MIN_CHUNK_SIZE) { |
| 1189 | mchunkptr remainder = chunk_plus_offset(newp, nb); | 1189 | mchunkptr rem = chunk_plus_offset(newp, nb); |
| 1190 | set_inuse(m, newp, nb); | 1190 | set_inuse(m, newp, nb); |
| 1191 | set_inuse(m, remainder, rsize); | 1191 | set_inuse(m, rem, rsize); |
| 1192 | lj_alloc_free(m, chunk2mem(remainder)); | 1192 | lj_alloc_free(m, chunk2mem(rem)); |
| 1193 | } | 1193 | } |
| 1194 | } else if (next == m->top && oldsize + m->topsize > nb) { | 1194 | } else if (next == m->top && oldsize + m->topsize > nb) { |
| 1195 | /* Expand into top */ | 1195 | /* Expand into top */ |
diff --git a/src/lj_api.c b/src/lj_api.c index 7a759e5f..4bac5024 100644 --- a/src/lj_api.c +++ b/src/lj_api.c | |||
| @@ -227,7 +227,7 @@ LUA_API int lua_isnumber(lua_State *L, int idx) | |||
| 227 | { | 227 | { |
| 228 | cTValue *o = index2adr(L, idx); | 228 | cTValue *o = index2adr(L, idx); |
| 229 | TValue tmp; | 229 | TValue tmp; |
| 230 | return (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))); | 230 | return (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), &tmp))); |
| 231 | } | 231 | } |
| 232 | 232 | ||
| 233 | LUA_API int lua_isstring(lua_State *L, int idx) | 233 | LUA_API int lua_isstring(lua_State *L, int idx) |
| @@ -307,7 +307,7 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx) | |||
| 307 | TValue tmp; | 307 | TValue tmp; |
| 308 | if (LJ_LIKELY(tvisnum(o))) | 308 | if (LJ_LIKELY(tvisnum(o))) |
| 309 | return numV(o); | 309 | return numV(o); |
| 310 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) | 310 | else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) |
| 311 | return numV(&tmp); | 311 | return numV(&tmp); |
| 312 | else | 312 | else |
| 313 | return 0; | 313 | return 0; |
| @@ -319,7 +319,7 @@ LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) | |||
| 319 | TValue tmp; | 319 | TValue tmp; |
| 320 | if (tvisnum(o)) | 320 | if (tvisnum(o)) |
| 321 | return numV(o); | 321 | return numV(o); |
| 322 | else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))) | 322 | else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp))) |
| 323 | lj_err_argt(L, idx, LUA_TNUMBER); | 323 | lj_err_argt(L, idx, LUA_TNUMBER); |
| 324 | return numV(&tmp); | 324 | return numV(&tmp); |
| 325 | } | 325 | } |
| @@ -332,7 +332,7 @@ LUALIB_API lua_Number luaL_optnumber(lua_State *L, int idx, lua_Number def) | |||
| 332 | return numV(o); | 332 | return numV(o); |
| 333 | else if (tvisnil(o)) | 333 | else if (tvisnil(o)) |
| 334 | return def; | 334 | return def; |
| 335 | else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))) | 335 | else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp))) |
| 336 | lj_err_argt(L, idx, LUA_TNUMBER); | 336 | lj_err_argt(L, idx, LUA_TNUMBER); |
| 337 | return numV(&tmp); | 337 | return numV(&tmp); |
| 338 | } | 338 | } |
| @@ -344,7 +344,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) | |||
| 344 | lua_Number n; | 344 | lua_Number n; |
| 345 | if (LJ_LIKELY(tvisnum(o))) | 345 | if (LJ_LIKELY(tvisnum(o))) |
| 346 | n = numV(o); | 346 | n = numV(o); |
| 347 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) | 347 | else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) |
| 348 | n = numV(&tmp); | 348 | n = numV(&tmp); |
| 349 | else | 349 | else |
| 350 | return 0; | 350 | return 0; |
| @@ -362,7 +362,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) | |||
| 362 | lua_Number n; | 362 | lua_Number n; |
| 363 | if (LJ_LIKELY(tvisnum(o))) | 363 | if (LJ_LIKELY(tvisnum(o))) |
| 364 | n = numV(o); | 364 | n = numV(o); |
| 365 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) | 365 | else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) |
| 366 | n = numV(&tmp); | 366 | n = numV(&tmp); |
| 367 | else | 367 | else |
| 368 | lj_err_argt(L, idx, LUA_TNUMBER); | 368 | lj_err_argt(L, idx, LUA_TNUMBER); |
| @@ -382,7 +382,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) | |||
| 382 | n = numV(o); | 382 | n = numV(o); |
| 383 | else if (tvisnil(o)) | 383 | else if (tvisnil(o)) |
| 384 | return def; | 384 | return def; |
| 385 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) | 385 | else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) |
| 386 | n = numV(&tmp); | 386 | n = numV(&tmp); |
| 387 | else | 387 | else |
| 388 | lj_err_argt(L, idx, LUA_TNUMBER); | 388 | lj_err_argt(L, idx, LUA_TNUMBER); |
| @@ -753,7 +753,7 @@ LUA_API int lua_getmetatable(lua_State *L, int idx) | |||
| 753 | else if (tvisudata(o)) | 753 | else if (tvisudata(o)) |
| 754 | mt = tabref(udataV(o)->metatable); | 754 | mt = tabref(udataV(o)->metatable); |
| 755 | else | 755 | else |
| 756 | mt = tabref(G(L)->basemt[itypemap(o)]); | 756 | mt = tabref(basemt_obj(G(L), o)); |
| 757 | if (mt == NULL) | 757 | if (mt == NULL) |
| 758 | return 0; | 758 | return 0; |
| 759 | settabV(L, L->top, mt); | 759 | settabV(L, L->top, mt); |
| @@ -941,12 +941,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) | |||
| 941 | if (lj_trace_flushall(L)) | 941 | if (lj_trace_flushall(L)) |
| 942 | lj_err_caller(L, LJ_ERR_NOGCMM); | 942 | lj_err_caller(L, LJ_ERR_NOGCMM); |
| 943 | if (tvisbool(o)) { | 943 | if (tvisbool(o)) { |
| 944 | /* NOBARRIER: g->basemt[] is a GC root. */ | 944 | /* NOBARRIER: basemt is a GC root. */ |
| 945 | setgcref(g->basemt[~LJ_TTRUE], obj2gco(mt)); | 945 | setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt)); |
| 946 | setgcref(g->basemt[~LJ_TFALSE], obj2gco(mt)); | 946 | setgcref(basemt_it(g, LJ_TFALSE), obj2gco(mt)); |
| 947 | } else { | 947 | } else { |
| 948 | /* NOBARRIER: g->basemt[] is a GC root. */ | 948 | /* NOBARRIER: basemt is a GC root. */ |
| 949 | setgcref(g->basemt[itypemap(o)], obj2gco(mt)); | 949 | setgcref(basemt_obj(g, o), obj2gco(mt)); |
| 950 | } | 950 | } |
| 951 | } | 951 | } |
| 952 | L->top--; | 952 | L->top--; |
diff --git a/src/lj_asm.c b/src/lj_asm.c index a4d0c606..f26a40a5 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
| 14 | #include "lj_str.h" | 14 | #include "lj_str.h" |
| 15 | #include "lj_tab.h" | 15 | #include "lj_tab.h" |
| 16 | #include "lj_frame.h" | ||
| 16 | #include "lj_ir.h" | 17 | #include "lj_ir.h" |
| 17 | #include "lj_jit.h" | 18 | #include "lj_jit.h" |
| 18 | #include "lj_iropt.h" | 19 | #include "lj_iropt.h" |
| @@ -81,6 +82,10 @@ typedef struct ASMState { | |||
| 81 | 82 | ||
| 82 | #define IR(ref) (&as->ir[(ref)]) | 83 | #define IR(ref) (&as->ir[(ref)]) |
| 83 | 84 | ||
| 85 | #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ | ||
| 86 | #define ASMREF_TMP2 REF_FALSE /* Temp. register. */ | ||
| 87 | #define ASMREF_L REF_NIL /* Stores register for L. */ | ||
| 88 | |||
| 84 | /* Check for variant to invariant references. */ | 89 | /* Check for variant to invariant references. */ |
| 85 | #define iscrossref(as, ref) ((ref) < as->sectref) | 90 | #define iscrossref(as, ref) ((ref) < as->sectref) |
| 86 | 91 | ||
| @@ -115,9 +120,11 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as) | |||
| 115 | { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ | 120 | { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ |
| 116 | if (rex != 0x40) *--(p) = rex; } | 121 | if (rex != 0x40) *--(p) = rex; } |
| 117 | #define FORCE_REX 0x200 | 122 | #define FORCE_REX 0x200 |
| 123 | #define REX_64 (FORCE_REX|0x080000) | ||
| 118 | #else | 124 | #else |
| 119 | #define REXRB(p, rr, rb) ((void)0) | 125 | #define REXRB(p, rr, rb) ((void)0) |
| 120 | #define FORCE_REX 0 | 126 | #define FORCE_REX 0 |
| 127 | #define REX_64 0 | ||
| 121 | #endif | 128 | #endif |
| 122 | 129 | ||
| 123 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) | 130 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) |
| @@ -144,6 +151,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, | |||
| 144 | { | 151 | { |
| 145 | uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); | 152 | uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); |
| 146 | if (rex != 0x40) { | 153 | if (rex != 0x40) { |
| 154 | rex |= (rr >> 16); | ||
| 147 | if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } | 155 | if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } |
| 148 | *--p = (MCode)rex; | 156 | *--p = (MCode)rex; |
| 149 | } | 157 | } |
| @@ -451,14 +459,6 @@ static void emit_call_(ASMState *as, MCode *target) | |||
| 451 | 459 | ||
| 452 | #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) | 460 | #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) |
| 453 | 461 | ||
| 454 | /* Argument setup for C calls. Up to 3 args need no stack adjustment. */ | ||
| 455 | #define emit_setargr(as, narg, r) \ | ||
| 456 | emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4); | ||
| 457 | #define emit_setargi(as, narg, imm) \ | ||
| 458 | emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm)) | ||
| 459 | #define emit_setargp(as, narg, ptr) \ | ||
| 460 | emit_setargi(as, (narg), ptr2addr((ptr))) | ||
| 461 | |||
| 462 | /* -- Register allocator debugging ---------------------------------------- */ | 462 | /* -- Register allocator debugging ---------------------------------------- */ |
| 463 | 463 | ||
| 464 | /* #define LUAJIT_DEBUG_RA */ | 464 | /* #define LUAJIT_DEBUG_RA */ |
| @@ -578,10 +578,6 @@ static void ra_setup(ASMState *as) | |||
| 578 | memset(as->phireg, 0, sizeof(as->phireg)); | 578 | memset(as->phireg, 0, sizeof(as->phireg)); |
| 579 | memset(as->cost, 0, sizeof(as->cost)); | 579 | memset(as->cost, 0, sizeof(as->cost)); |
| 580 | as->cost[RID_ESP] = REGCOST(~0u, 0u); | 580 | as->cost[RID_ESP] = REGCOST(~0u, 0u); |
| 581 | |||
| 582 | /* Start slots for spill slot allocation. */ | ||
| 583 | as->evenspill = (SPS_FIRST+1)&~1; | ||
| 584 | as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0; | ||
| 585 | } | 581 | } |
| 586 | 582 | ||
| 587 | /* Rematerialize constants. */ | 583 | /* Rematerialize constants. */ |
| @@ -598,6 +594,9 @@ static Reg ra_rematk(ASMState *as, IRIns *ir) | |||
| 598 | } else if (ir->o == IR_BASE) { | 594 | } else if (ir->o == IR_BASE) { |
| 599 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ | 595 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ |
| 600 | emit_getgl(as, r, jit_base); | 596 | emit_getgl(as, r, jit_base); |
| 597 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ | ||
| 598 | lua_assert(irt_isnil(ir->t)); | ||
| 599 | emit_getgl(as, r, jit_L); | ||
| 601 | } else { | 600 | } else { |
| 602 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 601 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || |
| 603 | ir->o == IR_KPTR || ir->o == IR_KNULL); | 602 | ir->o == IR_KPTR || ir->o == IR_KNULL); |
| @@ -629,6 +628,18 @@ static int32_t ra_spill(ASMState *as, IRIns *ir) | |||
| 629 | return sps_scale(slot); | 628 | return sps_scale(slot); |
| 630 | } | 629 | } |
| 631 | 630 | ||
| 631 | /* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */ | ||
| 632 | static Reg ra_releasetmp(ASMState *as, IRRef ref) | ||
| 633 | { | ||
| 634 | IRIns *ir = IR(ref); | ||
| 635 | Reg r = ir->r; | ||
| 636 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | ||
| 637 | ra_free(as, r); | ||
| 638 | ra_modified(as, r); | ||
| 639 | ir->r = RID_INIT; | ||
| 640 | return r; | ||
| 641 | } | ||
| 642 | |||
| 632 | /* Restore a register (marked as free). Rematerialize or force a spill. */ | 643 | /* Restore a register (marked as free). Rematerialize or force a spill. */ |
| 633 | static Reg ra_restore(ASMState *as, IRRef ref) | 644 | static Reg ra_restore(ASMState *as, IRRef ref) |
| 634 | { | 645 | { |
| @@ -1008,7 +1019,7 @@ static void asm_guardcc(ASMState *as, int cc) | |||
| 1008 | 1019 | ||
| 1009 | /* Arch-specific field offsets. */ | 1020 | /* Arch-specific field offsets. */ |
| 1010 | static const uint8_t field_ofs[IRFL__MAX+1] = { | 1021 | static const uint8_t field_ofs[IRFL__MAX+1] = { |
| 1011 | #define FLOFS(name, type, field) (uint8_t)offsetof(type, field), | 1022 | #define FLOFS(name, ofs) (uint8_t)(ofs), |
| 1012 | IRFLDEF(FLOFS) | 1023 | IRFLDEF(FLOFS) |
| 1013 | #undef FLOFS | 1024 | #undef FLOFS |
| 1014 | 0 | 1025 | 0 |
| @@ -1129,7 +1140,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) | |||
| 1129 | { | 1140 | { |
| 1130 | IRIns *irr; | 1141 | IRIns *irr; |
| 1131 | lua_assert(ir->o == IR_STRREF); | 1142 | lua_assert(ir->o == IR_STRREF); |
| 1132 | as->mrm.idx = as->mrm.base = RID_NONE; | 1143 | as->mrm.base = as->mrm.idx = RID_NONE; |
| 1133 | as->mrm.scale = XM_SCALE1; | 1144 | as->mrm.scale = XM_SCALE1; |
| 1134 | as->mrm.ofs = sizeof(GCstr); | 1145 | as->mrm.ofs = sizeof(GCstr); |
| 1135 | if (irref_isk(ir->op1)) { | 1146 | if (irref_isk(ir->op1)) { |
| @@ -1158,6 +1169,17 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) | |||
| 1158 | } | 1169 | } |
| 1159 | } | 1170 | } |
| 1160 | 1171 | ||
| 1172 | static void asm_fusexref(ASMState *as, IRIns *ir, RegSet allow) | ||
| 1173 | { | ||
| 1174 | if (ir->o == IR_KPTR) { | ||
| 1175 | as->mrm.ofs = ir->i; | ||
| 1176 | as->mrm.base = as->mrm.idx = RID_NONE; | ||
| 1177 | } else { | ||
| 1178 | lua_assert(ir->o == IR_STRREF); | ||
| 1179 | asm_fusestrref(as, ir, allow); | ||
| 1180 | } | ||
| 1181 | } | ||
| 1182 | |||
| 1161 | /* Fuse load into memory operand. */ | 1183 | /* Fuse load into memory operand. */ |
| 1162 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | 1184 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) |
| 1163 | { | 1185 | { |
| @@ -1172,8 +1194,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
| 1172 | return RID_MRM; | 1194 | return RID_MRM; |
| 1173 | } | 1195 | } |
| 1174 | if (ir->o == IR_KNUM) { | 1196 | if (ir->o == IR_KNUM) { |
| 1197 | RegSet avail = as->freeset & ~as->modset & RSET_FPR; | ||
| 1175 | lua_assert(allow != RSET_EMPTY); | 1198 | lua_assert(allow != RSET_EMPTY); |
| 1176 | if (!(as->freeset & ~as->modset & RSET_FPR)) { | 1199 | if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ |
| 1177 | as->mrm.ofs = ptr2addr(ir_knum(ir)); | 1200 | as->mrm.ofs = ptr2addr(ir_knum(ir)); |
| 1178 | as->mrm.base = as->mrm.idx = RID_NONE; | 1201 | as->mrm.base = as->mrm.idx = RID_NONE; |
| 1179 | return RID_MRM; | 1202 | return RID_MRM; |
| @@ -1188,8 +1211,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
| 1188 | return RID_MRM; | 1211 | return RID_MRM; |
| 1189 | } | 1212 | } |
| 1190 | } else if (ir->o == IR_FLOAD) { | 1213 | } else if (ir->o == IR_FLOAD) { |
| 1191 | /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */ | 1214 | /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ |
| 1192 | if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) { | 1215 | if ((irt_isint(ir->t) || irt_isaddr(ir->t)) && |
| 1216 | noconflict(as, ref, IR_FSTORE)) { | ||
| 1193 | asm_fusefref(as, ir, xallow); | 1217 | asm_fusefref(as, ir, xallow); |
| 1194 | return RID_MRM; | 1218 | return RID_MRM; |
| 1195 | } | 1219 | } |
| @@ -1199,11 +1223,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
| 1199 | return RID_MRM; | 1223 | return RID_MRM; |
| 1200 | } | 1224 | } |
| 1201 | } else if (ir->o == IR_XLOAD) { | 1225 | } else if (ir->o == IR_XLOAD) { |
| 1202 | /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). | 1226 | /* Generic fusion is only ok for 32 bit operand (but see asm_comp). |
| 1203 | ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). | 1227 | ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). |
| 1204 | */ | 1228 | */ |
| 1205 | if (irt_isint(ir->t)) { | 1229 | if (irt_isint(ir->t) || irt_isaddr(ir->t)) { |
| 1206 | asm_fusestrref(as, IR(ir->op1), xallow); | 1230 | asm_fusexref(as, IR(ir->op1), xallow); |
| 1207 | return RID_MRM; | 1231 | return RID_MRM; |
| 1208 | } | 1232 | } |
| 1209 | } | 1233 | } |
| @@ -1214,6 +1238,137 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
| 1214 | return ra_allocref(as, ref, allow); | 1238 | return ra_allocref(as, ref, allow); |
| 1215 | } | 1239 | } |
| 1216 | 1240 | ||
| 1241 | /* -- Calls --------------------------------------------------------------- */ | ||
| 1242 | |||
| 1243 | /* Generate a call to a C function. */ | ||
| 1244 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | ||
| 1245 | { | ||
| 1246 | RegSet allow = RSET_ALL; | ||
| 1247 | uint32_t n, nargs = CCI_NARGS(ci); | ||
| 1248 | int32_t ofs = 0; | ||
| 1249 | lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */ | ||
| 1250 | emit_call(as, ci->func); | ||
| 1251 | for (n = 0; n < nargs; n++) { /* Setup args. */ | ||
| 1252 | #if LJ_64 | ||
| 1253 | #error "NYI: 64 bit mode call argument setup" | ||
| 1254 | #endif | ||
| 1255 | IRIns *ir = IR(args[n]); | ||
| 1256 | if (irt_isnum(ir->t)) { | ||
| 1257 | if ((ofs & 4) && irref_isk(args[n])) { | ||
| 1258 | /* Split stores for unaligned FP consts. */ | ||
| 1259 | emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); | ||
| 1260 | emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); | ||
| 1261 | } else { | ||
| 1262 | Reg r; | ||
| 1263 | if ((allow & RSET_FPR) == RSET_EMPTY) | ||
| 1264 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | ||
| 1265 | r = ra_alloc1(as, args[n], allow & RSET_FPR); | ||
| 1266 | allow &= ~RID2RSET(r); | ||
| 1267 | emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs); | ||
| 1268 | } | ||
| 1269 | ofs += 8; | ||
| 1270 | } else { | ||
| 1271 | if ((ci->flags & CCI_FASTCALL) && n < 2) { | ||
| 1272 | Reg r = n == 0 ? RID_ECX : RID_EDX; | ||
| 1273 | if (args[n] < ASMREF_TMP1) { | ||
| 1274 | emit_loadi(as, r, ir->i); | ||
| 1275 | } else { | ||
| 1276 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ | ||
| 1277 | allow &= ~RID2RSET(r); | ||
| 1278 | if (ra_hasreg(ir->r)) | ||
| 1279 | emit_movrr(as, r, ir->r); | ||
| 1280 | else | ||
| 1281 | ra_allocref(as, args[n], RID2RSET(r)); | ||
| 1282 | } | ||
| 1283 | } else { | ||
| 1284 | if (args[n] < ASMREF_TMP1) { | ||
| 1285 | emit_movmroi(as, RID_ESP, ofs, ir->i); | ||
| 1286 | } else { | ||
| 1287 | Reg r; | ||
| 1288 | if ((allow & RSET_GPR) == RSET_EMPTY) | ||
| 1289 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | ||
| 1290 | r = ra_alloc1(as, args[n], allow & RSET_GPR); | ||
| 1291 | allow &= ~RID2RSET(r); | ||
| 1292 | emit_movtomro(as, r, RID_ESP, ofs); | ||
| 1293 | } | ||
| 1294 | ofs += 4; | ||
| 1295 | } | ||
| 1296 | } | ||
| 1297 | } | ||
| 1298 | } | ||
| 1299 | |||
| 1300 | /* Setup result reg/sp for call. Evict scratch regs. */ | ||
| 1301 | static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | ||
| 1302 | { | ||
| 1303 | RegSet drop = RSET_SCRATCH; | ||
| 1304 | if ((ci->flags & CCI_NOFPRCLOBBER)) | ||
| 1305 | drop &= ~RSET_FPR; | ||
| 1306 | if (ra_hasreg(ir->r)) | ||
| 1307 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
| 1308 | ra_evictset(as, drop); /* Evictions must be performed first. */ | ||
| 1309 | if (ra_used(ir)) { | ||
| 1310 | if (irt_isnum(ir->t)) { | ||
| 1311 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ | ||
| 1312 | #if LJ_64 | ||
| 1313 | if ((ci->flags & CCI_CASTU64)) { | ||
| 1314 | Reg dest = ir->r; | ||
| 1315 | if (ra_hasreg(dest)) { | ||
| 1316 | ra_free(as, dest); | ||
| 1317 | ra_modified(as, dest); | ||
| 1318 | emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */ | ||
| 1319 | } else { | ||
| 1320 | emit_movrmro(as, RID_RET, RID_ESP, ofs); | ||
| 1321 | } | ||
| 1322 | } else { | ||
| 1323 | ra_destreg(as, ir, RID_FPRET); | ||
| 1324 | } | ||
| 1325 | #else | ||
| 1326 | /* Number result is in x87 st0 for x86 calling convention. */ | ||
| 1327 | Reg dest = ir->r; | ||
| 1328 | if (ra_hasreg(dest)) { | ||
| 1329 | ra_free(as, dest); | ||
| 1330 | ra_modified(as, dest); | ||
| 1331 | emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); | ||
| 1332 | } | ||
| 1333 | if ((ci->flags & CCI_CASTU64)) { | ||
| 1334 | emit_movtomro(as, RID_RET, RID_ESP, ofs); | ||
| 1335 | emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4); | ||
| 1336 | } else { | ||
| 1337 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
| 1338 | } | ||
| 1339 | #endif | ||
| 1340 | } else { | ||
| 1341 | lua_assert(!irt_ispri(ir->t)); | ||
| 1342 | ra_destreg(as, ir, RID_RET); | ||
| 1343 | } | ||
| 1344 | } | ||
| 1345 | } | ||
| 1346 | |||
| 1347 | /* Collect arguments from CALL* and ARG instructions. */ | ||
| 1348 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
| 1349 | const CCallInfo *ci, IRRef *args) | ||
| 1350 | { | ||
| 1351 | uint32_t n = CCI_NARGS(ci); | ||
| 1352 | lua_assert(n <= CCI_NARGS_MAX); | ||
| 1353 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
| 1354 | while (n-- > 1) { | ||
| 1355 | ir = IR(ir->op1); | ||
| 1356 | lua_assert(ir->o == IR_CARG); | ||
| 1357 | args[n] = ir->op2; | ||
| 1358 | } | ||
| 1359 | args[0] = ir->op1; | ||
| 1360 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | static void asm_call(ASMState *as, IRIns *ir) | ||
| 1364 | { | ||
| 1365 | IRRef args[CCI_NARGS_MAX]; | ||
| 1366 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
| 1367 | asm_collectargs(as, ir, ci, args); | ||
| 1368 | asm_setupresult(as, ir, ci); | ||
| 1369 | asm_gencall(as, ci, args); | ||
| 1370 | } | ||
| 1371 | |||
| 1217 | /* -- Type conversions ---------------------------------------------------- */ | 1372 | /* -- Type conversions ---------------------------------------------------- */ |
| 1218 | 1373 | ||
| 1219 | static void asm_tonum(ASMState *as, IRIns *ir) | 1374 | static void asm_tonum(ASMState *as, IRIns *ir) |
| @@ -1260,48 +1415,41 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
| 1260 | 1415 | ||
| 1261 | static void asm_strto(ASMState *as, IRIns *ir) | 1416 | static void asm_strto(ASMState *as, IRIns *ir) |
| 1262 | { | 1417 | { |
| 1263 | Reg str; | ||
| 1264 | int32_t ofs; | ||
| 1265 | RegSet drop = RSET_SCRATCH; | ||
| 1266 | /* Force a spill slot for the destination register (if any). */ | 1418 | /* Force a spill slot for the destination register (if any). */ |
| 1419 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum]; | ||
| 1420 | IRRef args[2]; | ||
| 1421 | RegSet drop = RSET_SCRATCH; | ||
| 1267 | if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) | 1422 | if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) |
| 1268 | rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */ | 1423 | rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */ |
| 1269 | ra_evictset(as, drop); | 1424 | ra_evictset(as, drop); |
| 1270 | asm_guardcc(as, CC_E); | 1425 | asm_guardcc(as, CC_E); |
| 1271 | emit_rr(as, XO_TEST, RID_RET, RID_RET); | 1426 | emit_rr(as, XO_TEST, RID_RET, RID_RET); |
| 1272 | /* int lj_str_numconv(const char *s, TValue *n) */ | 1427 | args[0] = ir->op1; |
| 1273 | emit_call(as, lj_str_numconv); | 1428 | args[1] = ASMREF_TMP1; |
| 1274 | ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ | 1429 | asm_gencall(as, ci, args); |
| 1275 | if (ofs == 0) { | 1430 | /* Store the result to the spill slot or slots SPS_TEMP1/2. */ |
| 1276 | emit_setargr(as, 2, RID_ESP); | 1431 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), |
| 1277 | } else { | 1432 | RID_ESP, sps_scale(ir->s)); |
| 1278 | emit_setargr(as, 2, RID_RET); | ||
| 1279 | emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs); | ||
| 1280 | } | ||
| 1281 | emit_setargr(as, 1, RID_RET); | ||
| 1282 | str = ra_alloc1(as, ir->op1, RSET_GPR); | ||
| 1283 | emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr)); | ||
| 1284 | } | 1433 | } |
| 1285 | 1434 | ||
| 1286 | static void asm_tostr(ASMState *as, IRIns *ir) | 1435 | static void asm_tostr(ASMState *as, IRIns *ir) |
| 1287 | { | 1436 | { |
| 1288 | IRIns *irl = IR(ir->op1); | 1437 | IRIns *irl = IR(ir->op1); |
| 1289 | ra_destreg(as, ir, RID_RET); | 1438 | IRRef args[2]; |
| 1290 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1439 | args[0] = ASMREF_L; |
| 1291 | as->gcsteps++; | 1440 | as->gcsteps++; |
| 1292 | if (irt_isnum(irl->t)) { | 1441 | if (irt_isnum(irl->t)) { |
| 1293 | /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */ | 1442 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; |
| 1294 | emit_call(as, lj_str_fromnum); | 1443 | args[1] = ASMREF_TMP1; |
| 1295 | emit_setargr(as, 1, RID_RET); | 1444 | asm_setupresult(as, ir, ci); |
| 1296 | emit_getgl(as, RID_RET, jit_L); | 1445 | asm_gencall(as, ci, args); |
| 1297 | emit_setargr(as, 2, RID_RET); | 1446 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), |
| 1298 | emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl)); | 1447 | RID_ESP, ra_spill(as, irl)); |
| 1299 | } else { | 1448 | } else { |
| 1300 | /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */ | 1449 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; |
| 1301 | emit_call(as, lj_str_fromint); | 1450 | args[1] = ir->op1; |
| 1302 | emit_setargr(as, 1, RID_RET); | 1451 | asm_setupresult(as, ir, ci); |
| 1303 | emit_getgl(as, RID_RET, jit_L); | 1452 | asm_gencall(as, ci, args); |
| 1304 | emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR)); | ||
| 1305 | } | 1453 | } |
| 1306 | } | 1454 | } |
| 1307 | 1455 | ||
| @@ -1330,7 +1478,7 @@ static uint32_t ir_khash(IRIns *ir) | |||
| 1330 | lua_assert(!irt_isnil(ir->t)); | 1478 | lua_assert(!irt_isnil(ir->t)); |
| 1331 | return irt_type(ir->t)-IRT_FALSE; | 1479 | return irt_type(ir->t)-IRT_FALSE; |
| 1332 | } else { | 1480 | } else { |
| 1333 | lua_assert(irt_isaddr(ir->t)); | 1481 | lua_assert(irt_isgcv(ir->t)); |
| 1334 | lo = u32ptr(ir_kgc(ir)); | 1482 | lo = u32ptr(ir_kgc(ir)); |
| 1335 | hi = lo - 0x04c11db7; | 1483 | hi = lo - 0x04c11db7; |
| 1336 | } | 1484 | } |
| @@ -1517,33 +1665,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
| 1517 | 1665 | ||
| 1518 | static void asm_newref(ASMState *as, IRIns *ir) | 1666 | static void asm_newref(ASMState *as, IRIns *ir) |
| 1519 | { | 1667 | { |
| 1520 | IRRef keyref = ir->op2; | 1668 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; |
| 1521 | IRIns *irkey = IR(keyref); | 1669 | IRRef args[3]; |
| 1522 | RegSet allow = RSET_GPR; | 1670 | IRIns *irkey; |
| 1523 | Reg tab, tmp; | 1671 | Reg tmp; |
| 1524 | ra_destreg(as, ir, RID_RET); | 1672 | args[0] = ASMREF_L; |
| 1525 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1673 | args[1] = ir->op1; |
| 1526 | tab = ra_alloc1(as, ir->op1, allow); | 1674 | args[2] = ASMREF_TMP1; |
| 1527 | tmp = ra_scratch(as, rset_clear(allow, tab)); | 1675 | asm_setupresult(as, ir, ci); |
| 1528 | /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */ | 1676 | asm_gencall(as, ci, args); |
| 1529 | emit_call(as, lj_tab_newkey); | 1677 | tmp = ra_releasetmp(as, ASMREF_TMP1); |
| 1530 | emit_setargr(as, 1, tmp); | 1678 | irkey = IR(ir->op2); |
| 1531 | emit_setargr(as, 2, tab); | ||
| 1532 | emit_getgl(as, tmp, jit_L); | ||
| 1533 | if (irt_isnum(irkey->t)) { | 1679 | if (irt_isnum(irkey->t)) { |
| 1534 | /* For numbers use the constant itself or a spill slot as a TValue. */ | 1680 | /* For numbers use the constant itself or a spill slot as a TValue. */ |
| 1535 | if (irref_isk(keyref)) { | 1681 | if (irref_isk(ir->op2)) |
| 1536 | emit_setargp(as, 3, ir_knum(irkey)); | 1682 | emit_loada(as, tmp, ir_knum(irkey)); |
| 1537 | } else { | 1683 | else |
| 1538 | emit_setargr(as, 3, tmp); | ||
| 1539 | emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); | 1684 | emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); |
| 1540 | } | ||
| 1541 | } else { | 1685 | } else { |
| 1542 | /* Otherwise use g->tmptv to hold the TValue. */ | 1686 | /* Otherwise use g->tmptv to hold the TValue. */ |
| 1543 | lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t)); | 1687 | if (!irref_isk(ir->op2)) { |
| 1544 | emit_setargr(as, 3, tmp); | 1688 | Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); |
| 1545 | if (!irref_isk(keyref)) { | ||
| 1546 | Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp)); | ||
| 1547 | emit_movtomro(as, src, tmp, 0); | 1689 | emit_movtomro(as, src, tmp, 0); |
| 1548 | } else if (!irt_ispri(irkey->t)) { | 1690 | } else if (!irt_ispri(irkey->t)) { |
| 1549 | emit_movmroi(as, tmp, 0, irkey->i); | 1691 | emit_movmroi(as, tmp, 0, irkey->i); |
| @@ -1600,11 +1742,15 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
| 1600 | 1742 | ||
| 1601 | /* -- Loads and stores ---------------------------------------------------- */ | 1743 | /* -- Loads and stores ---------------------------------------------------- */ |
| 1602 | 1744 | ||
| 1603 | static void asm_fload(ASMState *as, IRIns *ir) | 1745 | static void asm_fxload(ASMState *as, IRIns *ir) |
| 1604 | { | 1746 | { |
| 1605 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1747 | Reg dest = ra_dest(as, ir, RSET_GPR); |
| 1606 | x86Op xo; | 1748 | x86Op xo; |
| 1607 | asm_fusefref(as, ir, RSET_GPR); | 1749 | if (ir->o == IR_FLOAD) |
| 1750 | asm_fusefref(as, ir, RSET_GPR); | ||
| 1751 | else | ||
| 1752 | asm_fusexref(as, IR(ir->op1), RSET_GPR); | ||
| 1753 | /* ir->op2 is ignored -- unaligned loads are ok on x86. */ | ||
| 1608 | switch (irt_type(ir->t)) { | 1754 | switch (irt_type(ir->t)) { |
| 1609 | case IRT_I8: xo = XO_MOVSXb; break; | 1755 | case IRT_I8: xo = XO_MOVSXb; break; |
| 1610 | case IRT_U8: xo = XO_MOVZXb; break; | 1756 | case IRT_U8: xo = XO_MOVZXb; break; |
| @@ -1731,96 +1877,44 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
| 1731 | } | 1877 | } |
| 1732 | } | 1878 | } |
| 1733 | 1879 | ||
| 1734 | static void asm_xload(ASMState *as, IRIns *ir) | 1880 | /* -- Allocations --------------------------------------------------------- */ |
| 1735 | { | ||
| 1736 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
| 1737 | x86Op xo; | ||
| 1738 | asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */ | ||
| 1739 | /* ir->op2 is ignored -- unaligned loads are ok on x86. */ | ||
| 1740 | switch (irt_type(ir->t)) { | ||
| 1741 | case IRT_I8: xo = XO_MOVSXb; break; | ||
| 1742 | case IRT_U8: xo = XO_MOVZXb; break; | ||
| 1743 | case IRT_I16: xo = XO_MOVSXw; break; | ||
| 1744 | case IRT_U16: xo = XO_MOVZXw; break; | ||
| 1745 | default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break; | ||
| 1746 | } | ||
| 1747 | emit_mrm(as, xo, dest, RID_MRM); | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | /* -- String ops ---------------------------------------------------------- */ | ||
| 1751 | 1881 | ||
| 1752 | static void asm_snew(ASMState *as, IRIns *ir) | 1882 | static void asm_snew(ASMState *as, IRIns *ir) |
| 1753 | { | 1883 | { |
| 1754 | RegSet allow = RSET_GPR; | 1884 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; |
| 1755 | Reg left, right; | 1885 | IRRef args[3]; |
| 1756 | IRIns *irl; | 1886 | args[0] = ASMREF_L; |
| 1757 | ra_destreg(as, ir, RID_RET); | 1887 | args[1] = ir->op1; |
| 1758 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1888 | args[2] = ir->op2; |
| 1759 | irl = IR(ir->op1); | ||
| 1760 | left = irl->r; | ||
| 1761 | right = IR(ir->op2)->r; | ||
| 1762 | if (ra_noreg(left)) { | ||
| 1763 | lua_assert(irl->o == IR_STRREF); | ||
| 1764 | /* Get register only for non-const STRREF. */ | ||
| 1765 | if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) { | ||
| 1766 | if (ra_hasreg(right)) rset_clear(allow, right); | ||
| 1767 | left = ra_allocref(as, ir->op1, allow); | ||
| 1768 | } | ||
| 1769 | } | ||
| 1770 | if (ra_noreg(right) && !irref_isk(ir->op2)) { | ||
| 1771 | if (ra_hasreg(left)) rset_clear(allow, left); | ||
| 1772 | right = ra_allocref(as, ir->op2, allow); | ||
| 1773 | } | ||
| 1774 | /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */ | ||
| 1775 | emit_call(as, lj_str_new); | ||
| 1776 | emit_setargr(as, 1, RID_RET); | ||
| 1777 | emit_getgl(as, RID_RET, jit_L); | ||
| 1778 | if (ra_noreg(left)) /* Use immediate for const STRREF. */ | ||
| 1779 | emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i + | ||
| 1780 | (int32_t)sizeof(GCstr)); | ||
| 1781 | else | ||
| 1782 | emit_setargr(as, 2, left); | ||
| 1783 | if (ra_noreg(right)) | ||
| 1784 | emit_setargi(as, 3, IR(ir->op2)->i); | ||
| 1785 | else | ||
| 1786 | emit_setargr(as, 3, right); | ||
| 1787 | as->gcsteps++; | 1889 | as->gcsteps++; |
| 1890 | asm_setupresult(as, ir, ci); | ||
| 1891 | asm_gencall(as, ci, args); | ||
| 1788 | } | 1892 | } |
| 1789 | 1893 | ||
| 1790 | /* -- Table ops ----------------------------------------------------------- */ | ||
| 1791 | |||
| 1792 | static void asm_tnew(ASMState *as, IRIns *ir) | 1894 | static void asm_tnew(ASMState *as, IRIns *ir) |
| 1793 | { | 1895 | { |
| 1794 | ra_destreg(as, ir, RID_RET); | 1896 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; |
| 1795 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1897 | IRRef args[2]; |
| 1796 | /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */ | 1898 | args[0] = ASMREF_L; |
| 1797 | emit_call(as, lj_tab_new); | 1899 | args[1] = ASMREF_TMP1; |
| 1798 | emit_setargr(as, 1, RID_RET); | ||
| 1799 | emit_setargi(as, 2, ir->op1); | ||
| 1800 | emit_setargi(as, 3, ir->op2); | ||
| 1801 | emit_getgl(as, RID_RET, jit_L); | ||
| 1802 | as->gcsteps++; | 1900 | as->gcsteps++; |
| 1901 | asm_setupresult(as, ir, ci); | ||
| 1902 | asm_gencall(as, ci, args); | ||
| 1903 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24)); | ||
| 1803 | } | 1904 | } |
| 1804 | 1905 | ||
| 1805 | static void asm_tdup(ASMState *as, IRIns *ir) | 1906 | static void asm_tdup(ASMState *as, IRIns *ir) |
| 1806 | { | 1907 | { |
| 1807 | ra_destreg(as, ir, RID_RET); | 1908 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; |
| 1808 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1909 | IRRef args[2]; |
| 1809 | /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */ | 1910 | args[0] = ASMREF_L; |
| 1810 | emit_call(as, lj_tab_dup); | 1911 | args[1] = ir->op1; |
| 1811 | emit_setargr(as, 1, RID_RET); | ||
| 1812 | emit_setargp(as, 2, ir_kgc(IR(ir->op1))); | ||
| 1813 | emit_getgl(as, RID_RET, jit_L); | ||
| 1814 | as->gcsteps++; | 1912 | as->gcsteps++; |
| 1913 | asm_setupresult(as, ir, ci); | ||
| 1914 | asm_gencall(as, ci, args); | ||
| 1815 | } | 1915 | } |
| 1816 | 1916 | ||
| 1817 | static void asm_tlen(ASMState *as, IRIns *ir) | 1917 | /* -- Write barriers ------------------------------------------------------ */ |
| 1818 | { | ||
| 1819 | ra_destreg(as, ir, RID_RET); | ||
| 1820 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | ||
| 1821 | emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */ | ||
| 1822 | emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR)); | ||
| 1823 | } | ||
| 1824 | 1918 | ||
| 1825 | static void asm_tbar(ASMState *as, IRIns *ir) | 1919 | static void asm_tbar(ASMState *as, IRIns *ir) |
| 1826 | { | 1920 | { |
| @@ -1839,51 +1933,31 @@ static void asm_tbar(ASMState *as, IRIns *ir) | |||
| 1839 | 1933 | ||
| 1840 | static void asm_obar(ASMState *as, IRIns *ir) | 1934 | static void asm_obar(ASMState *as, IRIns *ir) |
| 1841 | { | 1935 | { |
| 1842 | RegSet allow = RSET_GPR; | 1936 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; |
| 1843 | Reg obj, val; | 1937 | IRRef args[2]; |
| 1844 | GCobj *valp; | ||
| 1845 | MCLabel l_end; | 1938 | MCLabel l_end; |
| 1846 | int32_t ofs; | 1939 | Reg obj; |
| 1847 | ra_evictset(as, RSET_SCRATCH); | ||
| 1848 | if (irref_isk(ir->op2)) { | ||
| 1849 | valp = ir_kgc(IR(ir->op2)); | ||
| 1850 | val = RID_NONE; | ||
| 1851 | } else { | ||
| 1852 | valp = NULL; | ||
| 1853 | val = ra_alloc1(as, ir->op2, allow); | ||
| 1854 | rset_clear(allow, val); | ||
| 1855 | } | ||
| 1856 | obj = ra_alloc1(as, ir->op1, allow); | ||
| 1857 | l_end = emit_label(as); | ||
| 1858 | /* No need for other object barriers (yet). */ | 1940 | /* No need for other object barriers (yet). */ |
| 1859 | lua_assert(IR(ir->op1)->o == IR_UREFC); | 1941 | lua_assert(IR(ir->op1)->o == IR_UREFC); |
| 1860 | ofs = -(int32_t)offsetof(GCupval, tv); | 1942 | l_end = emit_label(as); |
| 1861 | /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */ | 1943 | args[0] = ASMREF_TMP1; |
| 1862 | emit_call(as, lj_gc_barrieruv); | 1944 | args[1] = ir->op1; |
| 1863 | if (ofs == 0) { | 1945 | asm_gencall(as, ci, args); |
| 1864 | emit_setargr(as, 2, obj); | 1946 | emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J)); |
| 1865 | } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) { | 1947 | obj = IR(ir->op1)->r; |
| 1866 | emit_setargr(as, 2, obj); | ||
| 1867 | emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs); | ||
| 1868 | } else { | ||
| 1869 | emit_setargr(as, 2, RID_RET); | ||
| 1870 | emit_rmro(as, XO_LEA, RID_RET, obj, ofs); | ||
| 1871 | } | ||
| 1872 | emit_setargp(as, 1, J2G(as->J)); | ||
| 1873 | if (valp) | ||
| 1874 | emit_setargp(as, 3, valp); | ||
| 1875 | else | ||
| 1876 | emit_setargr(as, 3, val); | ||
| 1877 | emit_sjcc(as, CC_Z, l_end); | 1948 | emit_sjcc(as, CC_Z, l_end); |
| 1878 | emit_i8(as, LJ_GC_WHITES); | 1949 | emit_i8(as, LJ_GC_WHITES); |
| 1879 | if (valp) | 1950 | if (irref_isk(ir->op2)) { |
| 1880 | emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked); | 1951 | GCobj *vp = ir_kgc(IR(ir->op2)); |
| 1881 | else | 1952 | emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked); |
| 1953 | } else { | ||
| 1954 | Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj)); | ||
| 1882 | emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked)); | 1955 | emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked)); |
| 1956 | } | ||
| 1883 | emit_sjcc(as, CC_Z, l_end); | 1957 | emit_sjcc(as, CC_Z, l_end); |
| 1884 | emit_i8(as, LJ_GC_BLACK); | 1958 | emit_i8(as, LJ_GC_BLACK); |
| 1885 | emit_rmro(as, XO_GROUP3b, XOg_TEST, obj, | 1959 | emit_rmro(as, XO_GROUP3b, XOg_TEST, obj, |
| 1886 | ofs + (int32_t)offsetof(GChead, marked)); | 1960 | (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); |
| 1887 | } | 1961 | } |
| 1888 | 1962 | ||
| 1889 | /* -- FP/int arithmetic and logic operations ------------------------------ */ | 1963 | /* -- FP/int arithmetic and logic operations ------------------------------ */ |
| @@ -2260,10 +2334,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
| 2260 | } | 2334 | } |
| 2261 | } | 2335 | } |
| 2262 | emit_mrm(as, XO_UCOMISD, left, right); | 2336 | emit_mrm(as, XO_UCOMISD, left, right); |
| 2263 | } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) { | 2337 | } else { |
| 2264 | IRRef lref = ir->op1, rref = ir->op2; | 2338 | IRRef lref = ir->op1, rref = ir->op2; |
| 2265 | IROp leftop = (IROp)(IR(lref)->o); | 2339 | IROp leftop = (IROp)(IR(lref)->o); |
| 2266 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | 2340 | lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E)); |
| 2267 | /* Swap constants (only for ABC) and fusable loads to the right. */ | 2341 | /* Swap constants (only for ABC) and fusable loads to the right. */ |
| 2268 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { | 2342 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { |
| 2269 | if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ | 2343 | if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ |
| @@ -2294,11 +2368,15 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
| 2294 | } else { | 2368 | } else { |
| 2295 | Reg left; | 2369 | Reg left; |
| 2296 | if (opisfusableload((IROp)irl->o) && | 2370 | if (opisfusableload((IROp)irl->o) && |
| 2297 | ((irt_isi8(irl->t) && checki8(imm)) || | 2371 | ((irt_isu8(irl->t) && checku8(imm)) || |
| 2298 | (irt_isu8(irl->t) && checku8(imm)))) { | 2372 | ((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) || |
| 2299 | /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8 | 2373 | (irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) { |
| 2300 | ** loads are handled here. The IRT_I16/IRT_U16 loads should never be | 2374 | /* Only the IRT_INT case is fused by asm_fuseload. |
| 2301 | ** fused, since cmp word [mem], imm16 has a length-changing prefix. | 2375 | ** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads |
| 2376 | ** are handled here. | ||
| 2377 | ** Note that cmp word [mem], imm16 should not be generated, | ||
| 2378 | ** since it has a length-changing prefix. Compares of a word | ||
| 2379 | ** against a sign-extended imm8 are ok, however. | ||
| 2302 | */ | 2380 | */ |
| 2303 | IRType1 origt = irl->t; /* Temporarily flip types. */ | 2381 | IRType1 origt = irl->t; /* Temporarily flip types. */ |
| 2304 | irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT; | 2382 | irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT; |
| @@ -2307,7 +2385,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
| 2307 | if (left == RID_MRM) { /* Fusion succeeded? */ | 2385 | if (left == RID_MRM) { /* Fusion succeeded? */ |
| 2308 | asm_guardcc(as, cc); | 2386 | asm_guardcc(as, cc); |
| 2309 | emit_i8(as, imm); | 2387 | emit_i8(as, imm); |
| 2310 | emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM); | 2388 | emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ? |
| 2389 | XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM); | ||
| 2311 | return; | 2390 | return; |
| 2312 | } /* Otherwise handle register case as usual. */ | 2391 | } /* Otherwise handle register case as usual. */ |
| 2313 | } else { | 2392 | } else { |
| @@ -2337,26 +2416,6 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
| 2337 | asm_guardcc(as, cc); | 2416 | asm_guardcc(as, cc); |
| 2338 | emit_mrm(as, XO_CMP, left, right); | 2417 | emit_mrm(as, XO_CMP, left, right); |
| 2339 | } | 2418 | } |
| 2340 | } else { /* Handle ordered string compares. */ | ||
| 2341 | RegSet allow = RSET_GPR; | ||
| 2342 | /* This assumes lj_str_cmp never uses any SSE registers. */ | ||
| 2343 | ra_evictset(as, (RSET_SCRATCH & RSET_GPR)); | ||
| 2344 | asm_guardcc(as, cc); | ||
| 2345 | emit_rr(as, XO_TEST, RID_RET, RID_RET); | ||
| 2346 | emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */ | ||
| 2347 | if (irref_isk(ir->op1)) { | ||
| 2348 | emit_setargi(as, 1, IR(ir->op1)->i); | ||
| 2349 | } else { | ||
| 2350 | Reg left = ra_alloc1(as, ir->op1, allow); | ||
| 2351 | rset_clear(allow, left); | ||
| 2352 | emit_setargr(as, 1, left); | ||
| 2353 | } | ||
| 2354 | if (irref_isk(ir->op2)) { | ||
| 2355 | emit_setargi(as, 2, IR(ir->op2)->i); | ||
| 2356 | } else { | ||
| 2357 | Reg right = ra_alloc1(as, ir->op2, allow); | ||
| 2358 | emit_setargr(as, 2, right); | ||
| 2359 | } | ||
| 2360 | } | 2419 | } |
| 2361 | } | 2420 | } |
| 2362 | 2421 | ||
| @@ -2366,8 +2425,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
| 2366 | /* -- GC handling --------------------------------------------------------- */ | 2425 | /* -- GC handling --------------------------------------------------------- */ |
| 2367 | 2426 | ||
| 2368 | /* Sync all live GC values to Lua stack slots. */ | 2427 | /* Sync all live GC values to Lua stack slots. */ |
| 2369 | static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) | 2428 | static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base) |
| 2370 | { | 2429 | { |
| 2430 | /* Some care must be taken when allocating registers here, since this is | ||
| 2431 | ** not part of the fast path. All scratch registers are evicted in the | ||
| 2432 | ** fast path, so it's easiest to force allocation from scratch registers | ||
| 2433 | ** only. This avoids register allocation state unification. | ||
| 2434 | */ | ||
| 2435 | RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); | ||
| 2371 | IRRef2 *map = &as->T->snapmap[snap->mapofs]; | 2436 | IRRef2 *map = &as->T->snapmap[snap->mapofs]; |
| 2372 | BCReg s, nslots = snap->nslots; | 2437 | BCReg s, nslots = snap->nslots; |
| 2373 | for (s = 0; s < nslots; s++) { | 2438 | for (s = 0; s < nslots; s++) { |
| @@ -2392,27 +2457,36 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) | |||
| 2392 | /* Check GC threshold and do one or more GC steps. */ | 2457 | /* Check GC threshold and do one or more GC steps. */ |
| 2393 | static void asm_gc_check(ASMState *as, SnapShot *snap) | 2458 | static void asm_gc_check(ASMState *as, SnapShot *snap) |
| 2394 | { | 2459 | { |
| 2460 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; | ||
| 2461 | IRRef args[2]; | ||
| 2395 | MCLabel l_end; | 2462 | MCLabel l_end; |
| 2396 | const BCIns *pc; | 2463 | Reg base, lstate, tmp; |
| 2397 | Reg tmp, base; | ||
| 2398 | RegSet drop = RSET_SCRATCH; | 2464 | RegSet drop = RSET_SCRATCH; |
| 2399 | /* Must evict BASE because the stack may be reallocated by the GC. */ | 2465 | if (ra_hasreg(IR(REF_BASE)->r)) /* Stack may be reallocated by the GC. */ |
| 2400 | if (ra_hasreg(IR(REF_BASE)->r)) | 2466 | drop |= RID2RSET(IR(REF_BASE)->r); /* Need to evict BASE, too. */ |
| 2401 | drop |= RID2RSET(IR(REF_BASE)->r); | ||
| 2402 | ra_evictset(as, drop); | 2467 | ra_evictset(as, drop); |
| 2403 | base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET)); | ||
| 2404 | l_end = emit_label(as); | 2468 | l_end = emit_label(as); |
| 2405 | /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */ | 2469 | args[0] = ASMREF_L; |
| 2406 | emit_call(as, lj_gc_step_jit); | 2470 | args[1] = ASMREF_TMP1; |
| 2407 | emit_movtomro(as, base, RID_RET, offsetof(lua_State, base)); | 2471 | asm_gencall(as, ci, args); |
| 2408 | emit_setargr(as, 1, RID_RET); | 2472 | tmp = ra_releasetmp(as, ASMREF_TMP1); |
| 2409 | emit_setargi(as, 3, (int32_t)as->gcsteps); | 2473 | emit_loadi(as, tmp, (int32_t)as->gcsteps); |
| 2410 | emit_getgl(as, RID_RET, jit_L); | 2474 | /* We don't know spadj yet, so get the C frame from L->cframe. */ |
| 2411 | pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots]; | 2475 | emit_movmroi(as, tmp, CFRAME_OFS_PC, |
| 2412 | emit_setargp(as, 2, pc); | 2476 | (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]); |
| 2413 | asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base)); | 2477 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); |
| 2414 | if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */ | 2478 | lstate = IR(ASMREF_L)->r; |
| 2415 | ra_restore(as, REF_BASE); /* Better do it inside the slow path. */ | 2479 | emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); |
| 2480 | /* It's ok if lstate is already in a non-scratch reg. But all allocations | ||
| 2481 | ** in the non-fast path must use a scratch reg. See comment above. | ||
| 2482 | */ | ||
| 2483 | base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate)); | ||
| 2484 | emit_movtomro(as, base, lstate, offsetof(lua_State, base)); | ||
| 2485 | asm_gc_sync(as, snap, base); | ||
| 2486 | /* BASE/L get restored anyway, better do it inside the slow path. */ | ||
| 2487 | if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE); | ||
| 2488 | if (rset_test(RSET_SCRATCH, lstate) && ra_hasreg(IR(ASMREF_L)->r)) | ||
| 2489 | ra_restore(as, ASMREF_L); | ||
| 2416 | /* Jump around GC step if GC total < GC threshold. */ | 2490 | /* Jump around GC step if GC total < GC threshold. */ |
| 2417 | tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR); | 2491 | tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR); |
| 2418 | emit_sjcc(as, CC_B, l_end); | 2492 | emit_sjcc(as, CC_B, l_end); |
| @@ -2666,7 +2740,7 @@ static void asm_head_root(ASMState *as) | |||
| 2666 | { | 2740 | { |
| 2667 | int32_t spadj; | 2741 | int32_t spadj; |
| 2668 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); | 2742 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); |
| 2669 | spadj = sps_adjust(as); | 2743 | spadj = sps_adjust(as->evenspill); |
| 2670 | as->T->spadjust = (uint16_t)spadj; | 2744 | as->T->spadjust = (uint16_t)spadj; |
| 2671 | emit_addptr(as, RID_ESP, -spadj); | 2745 | emit_addptr(as, RID_ESP, -spadj); |
| 2672 | } | 2746 | } |
| @@ -2676,11 +2750,13 @@ static void asm_head_base(ASMState *as) | |||
| 2676 | { | 2750 | { |
| 2677 | IRIns *ir = IR(REF_BASE); | 2751 | IRIns *ir = IR(REF_BASE); |
| 2678 | Reg r = ir->r; | 2752 | Reg r = ir->r; |
| 2679 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | 2753 | lua_assert(!ra_hasspill(ir->s)); |
| 2680 | ra_free(as, r); | 2754 | if (ra_hasreg(r)) { |
| 2681 | if (r != RID_BASE) { | 2755 | ra_free(as, r); |
| 2682 | ra_scratch(as, RID2RSET(RID_BASE)); | 2756 | if (r != RID_BASE) { |
| 2683 | emit_rr(as, XO_MOV, r, RID_BASE); | 2757 | ra_scratch(as, RID2RSET(RID_BASE)); |
| 2758 | emit_rr(as, XO_MOV, r, RID_BASE); | ||
| 2759 | } | ||
| 2684 | } | 2760 | } |
| 2685 | } | 2761 | } |
| 2686 | 2762 | ||
| @@ -2749,7 +2825,7 @@ static void asm_head_side(ASMState *as) | |||
| 2749 | } | 2825 | } |
| 2750 | 2826 | ||
| 2751 | /* Calculate stack frame adjustment. */ | 2827 | /* Calculate stack frame adjustment. */ |
| 2752 | spadj = sps_adjust(as); | 2828 | spadj = sps_adjust(as->evenspill); |
| 2753 | spdelta = spadj - (int32_t)as->parent->spadjust; | 2829 | spdelta = spadj - (int32_t)as->parent->spadjust; |
| 2754 | if (spdelta < 0) { /* Don't shrink the stack frame. */ | 2830 | if (spdelta < 0) { /* Don't shrink the stack frame. */ |
| 2755 | spadj = (int32_t)as->parent->spadjust; | 2831 | spadj = (int32_t)as->parent->spadjust; |
| @@ -2877,9 +2953,11 @@ static void asm_tail_sync(ASMState *as) | |||
| 2877 | GCfunc *fn = ir_kfunc(IR(ir->op2)); | 2953 | GCfunc *fn = ir_kfunc(IR(ir->op2)); |
| 2878 | if (isluafunc(fn)) { | 2954 | if (isluafunc(fn)) { |
| 2879 | BCReg fs = s + funcproto(fn)->framesize; | 2955 | BCReg fs = s + funcproto(fn)->framesize; |
| 2880 | newbase = s; | ||
| 2881 | if (secondbase == ~(BCReg)0) secondbase = s; | ||
| 2882 | if (fs > topslot) topslot = fs; | 2956 | if (fs > topslot) topslot = fs; |
| 2957 | if (s != 0) { | ||
| 2958 | newbase = s; | ||
| 2959 | if (secondbase == ~(BCReg)0) secondbase = s; | ||
| 2960 | } | ||
| 2883 | } | 2961 | } |
| 2884 | } | 2962 | } |
| 2885 | } | 2963 | } |
| @@ -3063,20 +3141,18 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
| 3063 | 3141 | ||
| 3064 | /* Loads and stores. */ | 3142 | /* Loads and stores. */ |
| 3065 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break; | 3143 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break; |
| 3066 | case IR_FLOAD: asm_fload(as, ir); break; | 3144 | case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; |
| 3067 | case IR_SLOAD: asm_sload(as, ir); break; | 3145 | case IR_SLOAD: asm_sload(as, ir); break; |
| 3068 | case IR_XLOAD: asm_xload(as, ir); break; | ||
| 3069 | 3146 | ||
| 3070 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | 3147 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; |
| 3071 | case IR_FSTORE: asm_fstore(as, ir); break; | 3148 | case IR_FSTORE: asm_fstore(as, ir); break; |
| 3072 | 3149 | ||
| 3073 | /* String ops. */ | 3150 | /* Allocations. */ |
| 3074 | case IR_SNEW: asm_snew(as, ir); break; | 3151 | case IR_SNEW: asm_snew(as, ir); break; |
| 3075 | |||
| 3076 | /* Table ops. */ | ||
| 3077 | case IR_TNEW: asm_tnew(as, ir); break; | 3152 | case IR_TNEW: asm_tnew(as, ir); break; |
| 3078 | case IR_TDUP: asm_tdup(as, ir); break; | 3153 | case IR_TDUP: asm_tdup(as, ir); break; |
| 3079 | case IR_TLEN: asm_tlen(as, ir); break; | 3154 | |
| 3155 | /* Write barriers. */ | ||
| 3080 | case IR_TBAR: asm_tbar(as, ir); break; | 3156 | case IR_TBAR: asm_tbar(as, ir); break; |
| 3081 | case IR_OBAR: asm_obar(as, ir); break; | 3157 | case IR_OBAR: asm_obar(as, ir); break; |
| 3082 | 3158 | ||
| @@ -3092,6 +3168,10 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
| 3092 | case IR_TOSTR: asm_tostr(as, ir); break; | 3168 | case IR_TOSTR: asm_tostr(as, ir); break; |
| 3093 | case IR_STRTO: asm_strto(as, ir); break; | 3169 | case IR_STRTO: asm_strto(as, ir); break; |
| 3094 | 3170 | ||
| 3171 | /* Calls. */ | ||
| 3172 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
| 3173 | case IR_CARG: break; | ||
| 3174 | |||
| 3095 | default: | 3175 | default: |
| 3096 | setintV(&as->J->errinfo, ir->o); | 3176 | setintV(&as->J->errinfo, ir->o); |
| 3097 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | 3177 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); |
| @@ -3123,6 +3203,8 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
| 3123 | IRRef i, nins; | 3203 | IRRef i, nins; |
| 3124 | int inloop; | 3204 | int inloop; |
| 3125 | 3205 | ||
| 3206 | ra_setup(as); | ||
| 3207 | |||
| 3126 | /* Clear reg/sp for constants. */ | 3208 | /* Clear reg/sp for constants. */ |
| 3127 | for (i = T->nk; i < REF_BIAS; i++) | 3209 | for (i = T->nk; i < REF_BIAS; i++) |
| 3128 | IR(i)->prev = REGSP_INIT; | 3210 | IR(i)->prev = REGSP_INIT; |
| @@ -3144,6 +3226,7 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
| 3144 | as->curins = nins; | 3226 | as->curins = nins; |
| 3145 | 3227 | ||
| 3146 | inloop = 0; | 3228 | inloop = 0; |
| 3229 | as->evenspill = SPS_FIRST; | ||
| 3147 | for (i = REF_FIRST; i < nins; i++) { | 3230 | for (i = REF_FIRST; i < nins; i++) { |
| 3148 | IRIns *ir = IR(i); | 3231 | IRIns *ir = IR(i); |
| 3149 | switch (ir->o) { | 3232 | switch (ir->o) { |
| @@ -3166,8 +3249,23 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
| 3166 | if (i == as->stopins+1 && ir->op1 == ir->op2) | 3249 | if (i == as->stopins+1 && ir->op1 == ir->op2) |
| 3167 | as->stopins++; | 3250 | as->stopins++; |
| 3168 | break; | 3251 | break; |
| 3252 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | ||
| 3253 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
| 3254 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ | ||
| 3255 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ | ||
| 3256 | as->evenspill = (int32_t)CCI_NARGS(ci); | ||
| 3257 | #if LJ_64 | ||
| 3258 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); | ||
| 3259 | #else | ||
| 3260 | ir->prev = REGSP_HINT(RID_RET); | ||
| 3261 | #endif | ||
| 3262 | if (inloop) | ||
| 3263 | as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ? | ||
| 3264 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; | ||
| 3265 | continue; | ||
| 3266 | } | ||
| 3169 | /* C calls evict all scratch regs and return results in RID_RET. */ | 3267 | /* C calls evict all scratch regs and return results in RID_RET. */ |
| 3170 | case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR: | 3268 | case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR: |
| 3171 | case IR_NEWREF: | 3269 | case IR_NEWREF: |
| 3172 | ir->prev = REGSP_HINT(RID_RET); | 3270 | ir->prev = REGSP_HINT(RID_RET); |
| 3173 | if (inloop) | 3271 | if (inloop) |
| @@ -3177,11 +3275,6 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
| 3177 | if (inloop) | 3275 | if (inloop) |
| 3178 | as->modset = RSET_SCRATCH; | 3276 | as->modset = RSET_SCRATCH; |
| 3179 | break; | 3277 | break; |
| 3180 | /* Ordered string compares evict all integer scratch registers. */ | ||
| 3181 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
| 3182 | if (irt_isstr(ir->t) && inloop) | ||
| 3183 | as->modset |= (RSET_SCRATCH & RSET_GPR); | ||
| 3184 | break; | ||
| 3185 | /* Non-constant shift counts need to be in RID_ECX. */ | 3278 | /* Non-constant shift counts need to be in RID_ECX. */ |
| 3186 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 3279 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: |
| 3187 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) | 3280 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) |
| @@ -3200,6 +3293,10 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
| 3200 | } | 3293 | } |
| 3201 | ir->prev = REGSP_INIT; | 3294 | ir->prev = REGSP_INIT; |
| 3202 | } | 3295 | } |
| 3296 | if ((as->evenspill & 1)) | ||
| 3297 | as->oddspill = as->evenspill++; | ||
| 3298 | else | ||
| 3299 | as->oddspill = 0; | ||
| 3203 | } | 3300 | } |
| 3204 | 3301 | ||
| 3205 | /* -- Assembler core ------------------------------------------------------ */ | 3302 | /* -- Assembler core ------------------------------------------------------ */ |
| @@ -3263,7 +3360,6 @@ void lj_asm_trace(jit_State *J, Trace *T) | |||
| 3263 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; | 3360 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; |
| 3264 | 3361 | ||
| 3265 | /* Setup register allocation. */ | 3362 | /* Setup register allocation. */ |
| 3266 | ra_setup(as); | ||
| 3267 | asm_setup_regsp(as, T); | 3363 | asm_setup_regsp(as, T); |
| 3268 | 3364 | ||
| 3269 | if (!as->loopref) { | 3365 | if (!as->loopref) { |
diff --git a/src/lj_def.h b/src/lj_def.h index dbfd5bf5..3d6ba417 100644 --- a/src/lj_def.h +++ b/src/lj_def.h | |||
| @@ -88,6 +88,7 @@ typedef unsigned __int32 uintptr_t; | |||
| 88 | #define checki8(x) ((x) == (int32_t)(int8_t)(x)) | 88 | #define checki8(x) ((x) == (int32_t)(int8_t)(x)) |
| 89 | #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) | 89 | #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) |
| 90 | #define checki16(x) ((x) == (int32_t)(int16_t)(x)) | 90 | #define checki16(x) ((x) == (int32_t)(int16_t)(x)) |
| 91 | #define checku16(x) ((x) == (int32_t)(uint16_t)(x)) | ||
| 91 | 92 | ||
| 92 | /* Every half-decent C compiler transforms this into a rotate instruction. */ | 93 | /* Every half-decent C compiler transforms this into a rotate instruction. */ |
| 93 | #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n)))) | 94 | #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n)))) |
diff --git a/src/lj_gc.c b/src/lj_gc.c index 0d8a03ec..5c9d2bcb 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c | |||
| @@ -73,13 +73,13 @@ static void gc_mark(global_State *g, GCobj *o) | |||
| 73 | } | 73 | } |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | /* Mark the base metatables. */ | 76 | /* Mark GC roots. */ |
| 77 | static void gc_mark_basemt(global_State *g) | 77 | static void gc_mark_gcroot(global_State *g) |
| 78 | { | 78 | { |
| 79 | int i; | 79 | ptrdiff_t i; |
| 80 | for (i = 0; i < BASEMT_MAX; i++) | 80 | for (i = 0; i < GCROOT__MAX; i++) |
| 81 | if (tabref(g->basemt[i]) != NULL) | 81 | if (gcref(g->gcroot[i]) != NULL) |
| 82 | gc_markobj(g, tabref(g->basemt[i])); | 82 | gc_markobj(g, gcref(g->gcroot[i])); |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | /* Start a GC cycle and mark the root set. */ | 85 | /* Start a GC cycle and mark the root set. */ |
| @@ -91,7 +91,7 @@ static void gc_mark_start(global_State *g) | |||
| 91 | gc_markobj(g, mainthread(g)); | 91 | gc_markobj(g, mainthread(g)); |
| 92 | gc_markobj(g, tabref(mainthread(g)->env)); | 92 | gc_markobj(g, tabref(mainthread(g)->env)); |
| 93 | gc_marktv(g, &g->registrytv); | 93 | gc_marktv(g, &g->registrytv); |
| 94 | gc_mark_basemt(g); | 94 | gc_mark_gcroot(g); |
| 95 | g->gc.state = GCSpropagate; | 95 | g->gc.state = GCSpropagate; |
| 96 | } | 96 | } |
| 97 | 97 | ||
| @@ -541,7 +541,7 @@ static void atomic(global_State *g, lua_State *L) | |||
| 541 | lua_assert(!iswhite(obj2gco(mainthread(g)))); | 541 | lua_assert(!iswhite(obj2gco(mainthread(g)))); |
| 542 | gc_markobj(g, L); /* Mark running thread. */ | 542 | gc_markobj(g, L); /* Mark running thread. */ |
| 543 | gc_mark_curtrace(g); /* Mark current trace. */ | 543 | gc_mark_curtrace(g); /* Mark current trace. */ |
| 544 | gc_mark_basemt(g); /* Mark base metatables (again). */ | 544 | gc_mark_gcroot(g); /* Mark GC roots (again). */ |
| 545 | gc_propagate_gray(g); /* Propagate all of the above. */ | 545 | gc_propagate_gray(g); /* Propagate all of the above. */ |
| 546 | 546 | ||
| 547 | setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */ | 547 | setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */ |
| @@ -643,16 +643,15 @@ int lj_gc_step(lua_State *L) | |||
| 643 | } | 643 | } |
| 644 | 644 | ||
| 645 | /* Ditto, but fix the stack top first. */ | 645 | /* Ditto, but fix the stack top first. */ |
| 646 | void lj_gc_step_fixtop(lua_State *L) | 646 | void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L) |
| 647 | { | 647 | { |
| 648 | if (curr_funcisL(L)) L->top = curr_topL(L); | 648 | if (curr_funcisL(L)) L->top = curr_topL(L); |
| 649 | lj_gc_step(L); | 649 | lj_gc_step(L); |
| 650 | } | 650 | } |
| 651 | 651 | ||
| 652 | /* Perform multiple GC steps. Called from JIT-compiled code. */ | 652 | /* Perform multiple GC steps. Called from JIT-compiled code. */ |
| 653 | void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) | 653 | void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps) |
| 654 | { | 654 | { |
| 655 | cframe_pc(cframe_raw(L->cframe)) = pc; | ||
| 656 | L->top = curr_topL(L); | 655 | L->top = curr_topL(L); |
| 657 | while (steps-- > 0 && lj_gc_step(L) == 0) | 656 | while (steps-- > 0 && lj_gc_step(L) == 0) |
| 658 | ; | 657 | ; |
| @@ -711,17 +710,16 @@ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) | |||
| 711 | makewhite(g, o); /* Make it white to avoid the following barrier. */ | 710 | makewhite(g, o); /* Make it white to avoid the following barrier. */ |
| 712 | } | 711 | } |
| 713 | 712 | ||
| 714 | /* The reason for duplicating this is that it needs to be visible from ASM. */ | 713 | /* Specialized barrier for closed upvalue. Pass &uv->tv. */ |
| 715 | void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) | 714 | void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv) |
| 716 | { | 715 | { |
| 717 | lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); | 716 | #define TV2MARKED(x) \ |
| 718 | lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); | 717 | (*((uint8_t *)(x) - offsetof(GCupval, tv) + offsetof(GCupval, marked))) |
| 719 | lua_assert(o->gch.gct == ~LJ_TUPVAL); | ||
| 720 | /* Preserve invariant during propagation. Otherwise it doesn't matter. */ | ||
| 721 | if (g->gc.state == GCSpropagate) | 718 | if (g->gc.state == GCSpropagate) |
| 722 | gc_mark(g, v); /* Move frontier forward. */ | 719 | gc_mark(g, gcV(tv)); |
| 723 | else | 720 | else |
| 724 | makewhite(g, o); /* Make it white to avoid the following barrier. */ | 721 | TV2MARKED(tv) = (TV2MARKED(tv) & cast_byte(~LJ_GC_COLORS)) | curwhite(g); |
| 722 | #undef TV2MARKED | ||
| 725 | } | 723 | } |
| 726 | 724 | ||
| 727 | /* Close upvalue. Also needs a write barrier. */ | 725 | /* Close upvalue. Also needs a write barrier. */ |
diff --git a/src/lj_gc.h b/src/lj_gc.h index 192066d3..0dbb9b82 100644 --- a/src/lj_gc.h +++ b/src/lj_gc.h | |||
| @@ -43,8 +43,8 @@ LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all); | |||
| 43 | LJ_FUNC void lj_gc_finalizeudata(lua_State *L); | 43 | LJ_FUNC void lj_gc_finalizeudata(lua_State *L); |
| 44 | LJ_FUNC void lj_gc_freeall(global_State *g); | 44 | LJ_FUNC void lj_gc_freeall(global_State *g); |
| 45 | LJ_FUNCA int lj_gc_step(lua_State *L); | 45 | LJ_FUNCA int lj_gc_step(lua_State *L); |
| 46 | LJ_FUNCA void lj_gc_step_fixtop(lua_State *L); | 46 | LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L); |
| 47 | LJ_FUNCA void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps); | 47 | LJ_FUNC void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps); |
| 48 | LJ_FUNC void lj_gc_fullgc(lua_State *L); | 48 | LJ_FUNC void lj_gc_fullgc(lua_State *L); |
| 49 | 49 | ||
| 50 | /* GC check: drive collector forward if the GC threshold has been reached. */ | 50 | /* GC check: drive collector forward if the GC threshold has been reached. */ |
| @@ -58,7 +58,7 @@ LJ_FUNC void lj_gc_fullgc(lua_State *L); | |||
| 58 | /* Write barriers. */ | 58 | /* Write barriers. */ |
| 59 | LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t); | 59 | LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t); |
| 60 | LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v); | 60 | LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v); |
| 61 | LJ_FUNCA void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v); | 61 | LJ_FUNCA void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv); |
| 62 | LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv); | 62 | LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv); |
| 63 | LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T); | 63 | LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T); |
| 64 | 64 | ||
diff --git a/src/lj_ir.c b/src/lj_ir.c index 1efb12f0..cf0b6b55 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c | |||
| @@ -6,16 +6,22 @@ | |||
| 6 | #define lj_ir_c | 6 | #define lj_ir_c |
| 7 | #define LUA_CORE | 7 | #define LUA_CORE |
| 8 | 8 | ||
| 9 | /* For pointers to libc/libm functions. */ | ||
| 10 | #include <stdio.h> | ||
| 11 | #include <math.h> | ||
| 12 | |||
| 9 | #include "lj_obj.h" | 13 | #include "lj_obj.h" |
| 10 | 14 | ||
| 11 | #if LJ_HASJIT | 15 | #if LJ_HASJIT |
| 12 | 16 | ||
| 13 | #include "lj_gc.h" | 17 | #include "lj_gc.h" |
| 14 | #include "lj_str.h" | 18 | #include "lj_str.h" |
| 19 | #include "lj_tab.h" | ||
| 15 | #include "lj_ir.h" | 20 | #include "lj_ir.h" |
| 16 | #include "lj_jit.h" | 21 | #include "lj_jit.h" |
| 17 | #include "lj_iropt.h" | 22 | #include "lj_iropt.h" |
| 18 | #include "lj_trace.h" | 23 | #include "lj_trace.h" |
| 24 | #include "lj_lib.h" | ||
| 19 | 25 | ||
| 20 | /* Some local macros to save typing. Undef'd at the end. */ | 26 | /* Some local macros to save typing. Undef'd at the end. */ |
| 21 | #define IR(ref) (&J->cur.ir[(ref)]) | 27 | #define IR(ref) (&J->cur.ir[(ref)]) |
| @@ -32,6 +38,17 @@ IRDEF(IRMODE) | |||
| 32 | 0 | 38 | 0 |
| 33 | }; | 39 | }; |
| 34 | 40 | ||
| 41 | /* C call info for CALL* instructions. */ | ||
| 42 | LJ_DATADEF const CCallInfo lj_ir_callinfo[] = { | ||
| 43 | #define IRCALLCI(name, nargs, kind, type, flags) \ | ||
| 44 | { (ASMFunction)name, \ | ||
| 45 | (nargs)|(CCI_CALL_##kind)|(IRT_##type<<CCI_OTSHIFT)|(flags) }, | ||
| 46 | IRCALLDEF(IRCALLCI) | ||
| 47 | #undef IRCALLCI | ||
| 48 | { NULL, 0 } | ||
| 49 | }; | ||
| 50 | |||
| 51 | |||
| 35 | /* -- IR emitter ---------------------------------------------------------- */ | 52 | /* -- IR emitter ---------------------------------------------------------- */ |
| 36 | 53 | ||
| 37 | /* Grow IR buffer at the top. */ | 54 | /* Grow IR buffer at the top. */ |
| @@ -92,6 +109,25 @@ TRef LJ_FASTCALL lj_ir_emit(jit_State *J) | |||
| 92 | return TREF(ref, irt_t((ir->t = fins->t))); | 109 | return TREF(ref, irt_t((ir->t = fins->t))); |
| 93 | } | 110 | } |
| 94 | 111 | ||
| 112 | /* Emit call to a C function. */ | ||
| 113 | TRef lj_ir_call(jit_State *J, IRCallID id, ...) | ||
| 114 | { | ||
| 115 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
| 116 | uint32_t n = CCI_NARGS(ci); | ||
| 117 | TRef tr = TREF_NIL; | ||
| 118 | va_list argp; | ||
| 119 | va_start(argp, id); | ||
| 120 | if ((ci->flags & CCI_L)) n--; | ||
| 121 | if (n > 0) | ||
| 122 | tr = va_arg(argp, IRRef); | ||
| 123 | while (n-- > 1) | ||
| 124 | tr = emitir(IRT(IR_CARG, IRT_NIL), tr, va_arg(argp, IRRef)); | ||
| 125 | va_end(argp); | ||
| 126 | if (CCI_OP(ci) == IR_CALLS) | ||
| 127 | J->needsnap = 1; /* Need snapshot after call with side effect. */ | ||
| 128 | return emitir(CCI_OPTYPE(ci), tr, id); | ||
| 129 | } | ||
| 130 | |||
| 95 | /* -- Interning of constants ---------------------------------------------- */ | 131 | /* -- Interning of constants ---------------------------------------------- */ |
| 96 | 132 | ||
| 97 | /* | 133 | /* |
diff --git a/src/lj_ir.h b/src/lj_ir.h index a6973a81..9a7e711d 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
| @@ -8,6 +8,8 @@ | |||
| 8 | 8 | ||
| 9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
| 10 | 10 | ||
| 11 | /* -- IR instructions ----------------------------------------------------- */ | ||
| 12 | |||
| 11 | /* IR instruction definition. Order matters, see below. */ | 13 | /* IR instruction definition. Order matters, see below. */ |
| 12 | #define IRDEF(_) \ | 14 | #define IRDEF(_) \ |
| 13 | /* Miscellaneous ops. */ \ | 15 | /* Miscellaneous ops. */ \ |
| @@ -101,13 +103,12 @@ | |||
| 101 | _(USTORE, S , ref, ref) \ | 103 | _(USTORE, S , ref, ref) \ |
| 102 | _(FSTORE, S , ref, ref) \ | 104 | _(FSTORE, S , ref, ref) \ |
| 103 | \ | 105 | \ |
| 104 | /* String ops. */ \ | 106 | /* Allocations. */ \ |
| 105 | _(SNEW, N , ref, ref) \ | 107 | _(SNEW, N , ref, ref) /* CSE is ok, so not marked as A. */ \ |
| 106 | \ | ||
| 107 | /* Table ops. */ \ | ||
| 108 | _(TNEW, A , lit, lit) \ | 108 | _(TNEW, A , lit, lit) \ |
| 109 | _(TDUP, A , ref, ___) \ | 109 | _(TDUP, A , ref, ___) \ |
| 110 | _(TLEN, L , ref, ___) \ | 110 | \ |
| 111 | /* Write barriers. */ \ | ||
| 111 | _(TBAR, S , ref, ___) \ | 112 | _(TBAR, S , ref, ___) \ |
| 112 | _(OBAR, S , ref, ref) \ | 113 | _(OBAR, S , ref, ref) \ |
| 113 | \ | 114 | \ |
| @@ -118,6 +119,12 @@ | |||
| 118 | _(TOSTR, N , ref, ___) \ | 119 | _(TOSTR, N , ref, ___) \ |
| 119 | _(STRTO, G , ref, ___) \ | 120 | _(STRTO, G , ref, ___) \ |
| 120 | \ | 121 | \ |
| 122 | /* Calls. */ \ | ||
| 123 | _(CALLN, N , ref, lit) \ | ||
| 124 | _(CALLL, L , ref, lit) \ | ||
| 125 | _(CALLS, S , ref, lit) \ | ||
| 126 | _(CARG, N , ref, ref) \ | ||
| 127 | \ | ||
| 121 | /* End of list. */ | 128 | /* End of list. */ |
| 122 | 129 | ||
| 123 | /* IR opcodes (max. 256). */ | 130 | /* IR opcodes (max. 256). */ |
| @@ -144,6 +151,8 @@ LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE); | |||
| 144 | LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE); | 151 | LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE); |
| 145 | LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE); | 152 | LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE); |
| 146 | 153 | ||
| 154 | /* -- Named IR literals --------------------------------------------------- */ | ||
| 155 | |||
| 147 | /* FPMATH sub-functions. ORDER FPM. */ | 156 | /* FPMATH sub-functions. ORDER FPM. */ |
| 148 | #define IRFPMDEF(_) \ | 157 | #define IRFPMDEF(_) \ |
| 149 | _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ | 158 | _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ |
| @@ -158,20 +167,22 @@ IRFPMDEF(FPMENUM) | |||
| 158 | IRFPM__MAX | 167 | IRFPM__MAX |
| 159 | } IRFPMathOp; | 168 | } IRFPMathOp; |
| 160 | 169 | ||
| 161 | /* FLOAD field IDs. */ | 170 | /* FLOAD fields. */ |
| 162 | #define IRFLDEF(_) \ | 171 | #define IRFLDEF(_) \ |
| 163 | _(STR_LEN, GCstr, len) \ | 172 | _(STR_LEN, offsetof(GCstr, len)) \ |
| 164 | _(FUNC_ENV, GCfunc, l.env) \ | 173 | _(FUNC_ENV, offsetof(GCfunc, l.env)) \ |
| 165 | _(TAB_META, GCtab, metatable) \ | 174 | _(TAB_META, offsetof(GCtab, metatable)) \ |
| 166 | _(TAB_ARRAY, GCtab, array) \ | 175 | _(TAB_ARRAY, offsetof(GCtab, array)) \ |
| 167 | _(TAB_NODE, GCtab, node) \ | 176 | _(TAB_NODE, offsetof(GCtab, node)) \ |
| 168 | _(TAB_ASIZE, GCtab, asize) \ | 177 | _(TAB_ASIZE, offsetof(GCtab, asize)) \ |
| 169 | _(TAB_HMASK, GCtab, hmask) \ | 178 | _(TAB_HMASK, offsetof(GCtab, hmask)) \ |
| 170 | _(TAB_NOMM, GCtab, nomm) \ | 179 | _(TAB_NOMM, offsetof(GCtab, nomm)) \ |
| 171 | _(UDATA_META, GCudata, metatable) | 180 | _(UDATA_META, offsetof(GCudata, metatable)) \ |
| 181 | _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ | ||
| 182 | _(UDATA_FILE, sizeof(GCudata)) | ||
| 172 | 183 | ||
| 173 | typedef enum { | 184 | typedef enum { |
| 174 | #define FLENUM(name, type, field) IRFL_##name, | 185 | #define FLENUM(name, ofs) IRFL_##name, |
| 175 | IRFLDEF(FLENUM) | 186 | IRFLDEF(FLENUM) |
| 176 | #undef FLENUM | 187 | #undef FLENUM |
| 177 | IRFL__MAX | 188 | IRFL__MAX |
| @@ -183,7 +194,8 @@ IRFLDEF(FLENUM) | |||
| 183 | #define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */ | 194 | #define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */ |
| 184 | 195 | ||
| 185 | /* XLOAD mode, stored in op2. */ | 196 | /* XLOAD mode, stored in op2. */ |
| 186 | #define IRXLOAD_UNALIGNED 1 | 197 | #define IRXLOAD_READONLY 1 /* Load from read-only data. */ |
| 198 | #define IRXLOAD_UNALIGNED 2 /* Unaligned load. */ | ||
| 187 | 199 | ||
| 188 | /* TOINT mode, stored in op2. Ordered by strength of the checks. */ | 200 | /* TOINT mode, stored in op2. Ordered by strength of the checks. */ |
| 189 | #define IRTOINT_CHECK 0 /* Number checked for integerness. */ | 201 | #define IRTOINT_CHECK 0 /* Number checked for integerness. */ |
| @@ -191,6 +203,67 @@ IRFLDEF(FLENUM) | |||
| 191 | #define IRTOINT_ANY 2 /* Any FP number is ok. */ | 203 | #define IRTOINT_ANY 2 /* Any FP number is ok. */ |
| 192 | #define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */ | 204 | #define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */ |
| 193 | 205 | ||
| 206 | /* C call info for CALL* instructions. */ | ||
| 207 | typedef struct CCallInfo { | ||
| 208 | ASMFunction func; /* Function pointer. */ | ||
| 209 | uint32_t flags; /* Number of arguments and flags. */ | ||
| 210 | } CCallInfo; | ||
| 211 | |||
| 212 | #define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ | ||
| 213 | #define CCI_NARGS_MAX 16 /* Max. # of args. */ | ||
| 214 | |||
| 215 | #define CCI_OTSHIFT 16 | ||
| 216 | #define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ | ||
| 217 | #define CCI_OPSHIFT 24 | ||
| 218 | #define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ | ||
| 219 | |||
| 220 | #define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) | ||
| 221 | #define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) | ||
| 222 | #define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) | ||
| 223 | #define CCI_CALL_FN (CCI_CALL_N|CCI_FASTCALL) | ||
| 224 | #define CCI_CALL_FL (CCI_CALL_L|CCI_FASTCALL) | ||
| 225 | #define CCI_CALL_FS (CCI_CALL_S|CCI_FASTCALL) | ||
| 226 | |||
| 227 | /* C call info flags. */ | ||
| 228 | #define CCI_L 0x0100 /* Implicit L arg. */ | ||
| 229 | #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ | ||
| 230 | #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ | ||
| 231 | #define CCI_FASTCALL 0x0800 /* Fastcall convention. */ | ||
| 232 | |||
| 233 | /* Function definitions for CALL* instructions. */ | ||
| 234 | #define IRCALLDEF(_) \ | ||
| 235 | _(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ | ||
| 236 | _(lj_str_new, 3, S, STR, CCI_L) \ | ||
| 237 | _(lj_str_tonum, 2, FN, INT, 0) \ | ||
| 238 | _(lj_str_fromint, 2, FN, STR, CCI_L) \ | ||
| 239 | _(lj_str_fromnum, 2, FN, STR, CCI_L) \ | ||
| 240 | _(lj_tab_new1, 2, FS, TAB, CCI_L) \ | ||
| 241 | _(lj_tab_dup, 2, FS, TAB, CCI_L) \ | ||
| 242 | _(lj_tab_newkey, 3, S, PTR, CCI_L) \ | ||
| 243 | _(lj_tab_len, 1, FL, INT, 0) \ | ||
| 244 | _(lj_gc_step_jit, 2, FS, NIL, CCI_L) \ | ||
| 245 | _(lj_gc_barrieruv, 2, FS, NIL, 0) \ | ||
| 246 | _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \ | ||
| 247 | _(sinh, 1, N, NUM, 0) \ | ||
| 248 | _(cosh, 1, N, NUM, 0) \ | ||
| 249 | _(tanh, 1, N, NUM, 0) \ | ||
| 250 | _(fputc, 2, S, INT, 0) \ | ||
| 251 | _(fwrite, 4, S, INT, 0) \ | ||
| 252 | _(fflush, 1, S, INT, 0) \ | ||
| 253 | \ | ||
| 254 | /* End of list. */ | ||
| 255 | |||
| 256 | typedef enum { | ||
| 257 | #define IRCALLENUM(name, nargs, kind, type, flags) IRCALL_##name, | ||
| 258 | IRCALLDEF(IRCALLENUM) | ||
| 259 | #undef IRCALLENUM | ||
| 260 | IRCALL__MAX | ||
| 261 | } IRCallID; | ||
| 262 | |||
| 263 | LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; | ||
| 264 | |||
| 265 | /* -- IR operands --------------------------------------------------------- */ | ||
| 266 | |||
| 194 | /* IR operand mode (2 bit). */ | 267 | /* IR operand mode (2 bit). */ |
| 195 | typedef enum { | 268 | typedef enum { |
| 196 | IRMref, /* IR reference. */ | 269 | IRMref, /* IR reference. */ |
| @@ -227,6 +300,8 @@ typedef enum { | |||
| 227 | 300 | ||
| 228 | LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; | 301 | LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; |
| 229 | 302 | ||
| 303 | /* -- IR instruction types ------------------------------------------------ */ | ||
| 304 | |||
| 230 | /* IR result type and flags (8 bit). */ | 305 | /* IR result type and flags (8 bit). */ |
| 231 | typedef enum { | 306 | typedef enum { |
| 232 | /* Map of itypes to non-negative numbers. ORDER LJ_T */ | 307 | /* Map of itypes to non-negative numbers. ORDER LJ_T */ |
| @@ -314,6 +389,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
| 314 | /* Stored combined IR opcode and type. */ | 389 | /* Stored combined IR opcode and type. */ |
| 315 | typedef uint16_t IROpT; | 390 | typedef uint16_t IROpT; |
| 316 | 391 | ||
| 392 | /* -- IR references ------------------------------------------------------- */ | ||
| 393 | |||
| 317 | /* IR references. */ | 394 | /* IR references. */ |
| 318 | typedef uint16_t IRRef1; /* One stored reference. */ | 395 | typedef uint16_t IRRef1; /* One stored reference. */ |
| 319 | typedef uint32_t IRRef2; /* Two stored references. */ | 396 | typedef uint32_t IRRef2; /* Two stored references. */ |
| @@ -382,6 +459,8 @@ typedef uint32_t TRef; | |||
| 382 | #define TREF_FALSE (TREF_PRI(IRT_FALSE)) | 459 | #define TREF_FALSE (TREF_PRI(IRT_FALSE)) |
| 383 | #define TREF_TRUE (TREF_PRI(IRT_TRUE)) | 460 | #define TREF_TRUE (TREF_PRI(IRT_TRUE)) |
| 384 | 461 | ||
| 462 | /* -- IR format ----------------------------------------------------------- */ | ||
| 463 | |||
| 385 | /* IR instruction format (64 bit). | 464 | /* IR instruction format (64 bit). |
| 386 | ** | 465 | ** |
| 387 | ** 16 16 8 8 8 8 | 466 | ** 16 16 8 8 8 8 |
| @@ -425,5 +504,6 @@ typedef union IRIns { | |||
| 425 | #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) | 504 | #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) |
| 426 | #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) | 505 | #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) |
| 427 | #define ir_knum(ir) (mref((ir)->ptr, cTValue)) | 506 | #define ir_knum(ir) (mref((ir)->ptr, cTValue)) |
| 507 | #define ir_kptr(ir) (mref((ir)->ptr, void)) | ||
| 428 | 508 | ||
| 429 | #endif | 509 | #endif |
diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 69b0a955..52077ad5 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h | |||
| @@ -6,6 +6,8 @@ | |||
| 6 | #ifndef _LJ_IROPT_H | 6 | #ifndef _LJ_IROPT_H |
| 7 | #define _LJ_IROPT_H | 7 | #define _LJ_IROPT_H |
| 8 | 8 | ||
| 9 | #include <stdarg.h> | ||
| 10 | |||
| 9 | #include "lj_obj.h" | 11 | #include "lj_obj.h" |
| 10 | #include "lj_jit.h" | 12 | #include "lj_jit.h" |
| 11 | 13 | ||
| @@ -13,6 +15,7 @@ | |||
| 13 | /* IR emitter. */ | 15 | /* IR emitter. */ |
| 14 | LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J); | 16 | LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J); |
| 15 | LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J); | 17 | LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J); |
| 18 | LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...); | ||
| 16 | 19 | ||
| 17 | /* Save current IR in J->fold.ins, but do not emit it (yet). */ | 20 | /* Save current IR in J->fold.ins, but do not emit it (yet). */ |
| 18 | static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) | 21 | static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) |
| @@ -83,6 +86,7 @@ LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref); | |||
| 83 | /* Emit IR instructions with on-the-fly optimizations. */ | 86 | /* Emit IR instructions with on-the-fly optimizations. */ |
| 84 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J); | 87 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J); |
| 85 | LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J); | 88 | LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J); |
| 89 | LJ_FUNC TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim); | ||
| 86 | 90 | ||
| 87 | /* Special return values for the fold functions. */ | 91 | /* Special return values for the fold functions. */ |
| 88 | enum { | 92 | enum { |
| @@ -106,7 +110,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J); | |||
| 106 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); | 110 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); |
| 107 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); | 111 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); |
| 108 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); | 112 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); |
| 109 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J); | 113 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); |
| 110 | LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); | 114 | LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); |
| 111 | 115 | ||
| 112 | /* Dead-store elimination. */ | 116 | /* Dead-store elimination. */ |
diff --git a/src/lj_lib.c b/src/lj_lib.c index 683c66d6..d8254093 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c | |||
| @@ -152,7 +152,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg) | |||
| 152 | { | 152 | { |
| 153 | TValue *o = L->base + narg-1; | 153 | TValue *o = L->base + narg-1; |
| 154 | if (!(o < L->top && | 154 | if (!(o < L->top && |
| 155 | (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))))) | 155 | (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o))))) |
| 156 | lj_err_argt(L, narg, LUA_TNUMBER); | 156 | lj_err_argt(L, narg, LUA_TNUMBER); |
| 157 | return numV(o); | 157 | return numV(o); |
| 158 | } | 158 | } |
diff --git a/src/lj_lib.h b/src/lj_lib.h index 59a0f2be..a7a6317e 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h | |||
| @@ -90,4 +90,9 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, | |||
| 90 | #define LIBINIT_FFID 0xfe | 90 | #define LIBINIT_FFID 0xfe |
| 91 | #define LIBINIT_END 0xff | 91 | #define LIBINIT_END 0xff |
| 92 | 92 | ||
| 93 | /* Exported library functions. */ | ||
| 94 | |||
| 95 | typedef struct RandomState RandomState; | ||
| 96 | LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs); | ||
| 97 | |||
| 93 | #endif | 98 | #endif |
diff --git a/src/lj_meta.c b/src/lj_meta.c index dff01f85..1182d908 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c | |||
| @@ -60,7 +60,7 @@ cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm) | |||
| 60 | else if (tvisudata(o)) | 60 | else if (tvisudata(o)) |
| 61 | mt = tabref(udataV(o)->metatable); | 61 | mt = tabref(udataV(o)->metatable); |
| 62 | else | 62 | else |
| 63 | mt = tabref(G(L)->basemt[itypemap(o)]); | 63 | mt = tabref(basemt_obj(G(L), o)); |
| 64 | if (mt) { | 64 | if (mt) { |
| 65 | cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm])); | 65 | cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm])); |
| 66 | if (mo) | 66 | if (mo) |
| @@ -157,7 +157,7 @@ static cTValue *str2num(cTValue *o, TValue *n) | |||
| 157 | { | 157 | { |
| 158 | if (tvisnum(o)) | 158 | if (tvisnum(o)) |
| 159 | return o; | 159 | return o; |
| 160 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), n)) | 160 | else if (tvisstr(o) && lj_str_tonum(strV(o), n)) |
| 161 | return n; | 161 | return n; |
| 162 | else | 162 | else |
| 163 | return NULL; | 163 | return NULL; |
| @@ -295,7 +295,7 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne) | |||
| 295 | top = curr_top(L); | 295 | top = curr_top(L); |
| 296 | setcont(top, ne ? lj_cont_condf : lj_cont_condt); | 296 | setcont(top, ne ? lj_cont_condf : lj_cont_condt); |
| 297 | copyTV(L, top+1, mo); | 297 | copyTV(L, top+1, mo); |
| 298 | it = o1->gch.gct == ~LJ_TTAB ? LJ_TTAB : LJ_TUDATA; | 298 | it = ~o1->gch.gct; |
| 299 | setgcV(L, top+2, &o1->gch, it); | 299 | setgcV(L, top+2, &o1->gch, it); |
| 300 | setgcV(L, top+3, &o2->gch, it); | 300 | setgcV(L, top+3, &o2->gch, it); |
| 301 | return top+2; /* Trigger metamethod call. */ | 301 | return top+2; /* Trigger metamethod call. */ |
diff --git a/src/lj_obj.h b/src/lj_obj.h index 9101f053..cebeda9b 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h | |||
| @@ -315,7 +315,7 @@ typedef struct GCstr { | |||
| 315 | /* Userdata object. Payload follows. */ | 315 | /* Userdata object. Payload follows. */ |
| 316 | typedef struct GCudata { | 316 | typedef struct GCudata { |
| 317 | GCHeader; | 317 | GCHeader; |
| 318 | uint8_t unused1; | 318 | uint8_t udtype; /* Userdata type. */ |
| 319 | uint8_t unused2; | 319 | uint8_t unused2; |
| 320 | GCRef env; /* Should be at same offset in GCfunc. */ | 320 | GCRef env; /* Should be at same offset in GCfunc. */ |
| 321 | MSize len; /* Size of payload. */ | 321 | MSize len; /* Size of payload. */ |
| @@ -323,6 +323,13 @@ typedef struct GCudata { | |||
| 323 | uint32_t align1; /* To force 8 byte alignment of the payload. */ | 323 | uint32_t align1; /* To force 8 byte alignment of the payload. */ |
| 324 | } GCudata; | 324 | } GCudata; |
| 325 | 325 | ||
| 326 | /* Userdata types. */ | ||
| 327 | enum { | ||
| 328 | UDTYPE_USERDATA, /* Regular userdata. */ | ||
| 329 | UDTYPE_IO_FILE, /* I/O library FILE. */ | ||
| 330 | UDTYPE__MAX | ||
| 331 | }; | ||
| 332 | |||
| 326 | #define uddata(u) ((void *)((u)+1)) | 333 | #define uddata(u) ((void *)((u)+1)) |
| 327 | #define sizeudata(u) (sizeof(struct GCudata)+(u)->len) | 334 | #define sizeudata(u) (sizeof(struct GCudata)+(u)->len) |
| 328 | 335 | ||
| @@ -496,7 +503,17 @@ MMDEF(MMENUM) | |||
| 496 | MM_FAST = MM_eq | 503 | MM_FAST = MM_eq |
| 497 | } MMS; | 504 | } MMS; |
| 498 | 505 | ||
| 499 | #define BASEMT_MAX ((~LJ_TNUMX)+1) | 506 | /* GC root IDs. */ |
| 507 | typedef enum { | ||
| 508 | GCROOT_BASEMT, /* Metatables for base types. */ | ||
| 509 | GCROOT_BASEMT_NUM = ~LJ_TNUMX, /* Last base metatable. */ | ||
| 510 | GCROOT_IO_INPUT, /* Userdata for default I/O input file. */ | ||
| 511 | GCROOT_IO_OUTPUT, /* Userdata for default I/O output file. */ | ||
| 512 | GCROOT__MAX | ||
| 513 | } GCRootID; | ||
| 514 | |||
| 515 | #define basemt_it(g, it) ((g)->gcroot[GCROOT_BASEMT+~(it)]) | ||
| 516 | #define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)]) | ||
| 500 | 517 | ||
| 501 | typedef struct GCState { | 518 | typedef struct GCState { |
| 502 | MSize total; /* Memory currently allocated. */ | 519 | MSize total; /* Memory currently allocated. */ |
| @@ -544,7 +561,7 @@ typedef struct global_State { | |||
| 544 | volatile int32_t vmstate; /* VM state or current JIT code trace number. */ | 561 | volatile int32_t vmstate; /* VM state or current JIT code trace number. */ |
| 545 | GCRef jit_L; /* Current JIT code lua_State or NULL. */ | 562 | GCRef jit_L; /* Current JIT code lua_State or NULL. */ |
| 546 | MRef jit_base; /* Current JIT code L->base. */ | 563 | MRef jit_base; /* Current JIT code L->base. */ |
| 547 | GCRef basemt[BASEMT_MAX]; /* Metatables for base types. */ | 564 | GCRef gcroot[GCROOT__MAX]; /* GC roots. */ |
| 548 | GCRef mmname[MM_MAX]; /* Array holding metamethod names. */ | 565 | GCRef mmname[MM_MAX]; /* Array holding metamethod names. */ |
| 549 | } global_State; | 566 | } global_State; |
| 550 | 567 | ||
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 2102561d..98266d21 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
| @@ -282,21 +282,50 @@ LJFOLD(STRTO KGC) | |||
| 282 | LJFOLDF(kfold_strto) | 282 | LJFOLDF(kfold_strto) |
| 283 | { | 283 | { |
| 284 | TValue n; | 284 | TValue n; |
| 285 | if (lj_str_numconv(strdata(ir_kstr(fleft)), &n)) | 285 | if (lj_str_tonum(ir_kstr(fleft), &n)) |
| 286 | return lj_ir_knum(J, numV(&n)); | 286 | return lj_ir_knum(J, numV(&n)); |
| 287 | return FAILFOLD; | 287 | return FAILFOLD; |
| 288 | } | 288 | } |
| 289 | 289 | ||
| 290 | LJFOLD(SNEW STRREF KINT) | 290 | LJFOLD(SNEW KPTR KINT) |
| 291 | LJFOLDF(kfold_snew) | 291 | LJFOLDF(kfold_snew_kptr) |
| 292 | { | ||
| 293 | GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i); | ||
| 294 | return lj_ir_kstr(J, s); | ||
| 295 | } | ||
| 296 | |||
| 297 | LJFOLD(SNEW any KINT) | ||
| 298 | LJFOLDF(kfold_snew_empty) | ||
| 292 | { | 299 | { |
| 293 | if (fright->i == 0) | 300 | if (fright->i == 0) |
| 294 | return lj_ir_kstr(J, lj_str_new(J->L, "", 0)); | 301 | return lj_ir_kstr(J, lj_str_new(J->L, "", 0)); |
| 302 | return NEXTFOLD; | ||
| 303 | } | ||
| 304 | |||
| 305 | LJFOLD(STRREF KGC KINT) | ||
| 306 | LJFOLDF(kfold_strref) | ||
| 307 | { | ||
| 308 | GCstr *str = ir_kstr(fleft); | ||
| 309 | lua_assert((MSize)fright->i < str->len); | ||
| 310 | return lj_ir_kptr(J, (char *)strdata(str) + fright->i); | ||
| 311 | } | ||
| 312 | |||
| 313 | LJFOLD(STRREF SNEW any) | ||
| 314 | LJFOLDF(kfold_strref_snew) | ||
| 315 | { | ||
| 295 | PHIBARRIER(fleft); | 316 | PHIBARRIER(fleft); |
| 296 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { | 317 | if (irref_isk(fins->op2) && fright->i == 0) { |
| 297 | const char *s = strdata(ir_kstr(IR(fleft->op1))); | 318 | return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */ |
| 298 | int32_t ofs = IR(fleft->op2)->i; | 319 | } else { |
| 299 | return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i)); | 320 | /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */ |
| 321 | IRIns *ir = IR(fleft->op1); | ||
| 322 | IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */ | ||
| 323 | lua_assert(ir->o == IR_STRREF); | ||
| 324 | PHIBARRIER(ir); | ||
| 325 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ | ||
| 326 | fins->op1 = str; | ||
| 327 | fins->ot = IRT(IR_STRREF, IRT_PTR); | ||
| 328 | return RETRYFOLD; | ||
| 300 | } | 329 | } |
| 301 | return NEXTFOLD; | 330 | return NEXTFOLD; |
| 302 | } | 331 | } |
| @@ -343,16 +372,13 @@ LJFOLDF(kfold_intcomp) | |||
| 343 | } | 372 | } |
| 344 | } | 373 | } |
| 345 | 374 | ||
| 346 | LJFOLD(LT KGC KGC) | 375 | LJFOLD(CALLN CARG IRCALL_lj_str_cmp) |
| 347 | LJFOLD(GE KGC KGC) | 376 | LJFOLDF(kfold_strcmp) |
| 348 | LJFOLD(LE KGC KGC) | ||
| 349 | LJFOLD(GT KGC KGC) | ||
| 350 | LJFOLDF(kfold_strcomp) | ||
| 351 | { | 377 | { |
| 352 | if (irt_isstr(fins->t)) { | 378 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { |
| 353 | GCstr *a = ir_kstr(fleft); | 379 | GCstr *a = ir_kstr(IR(fleft->op1)); |
| 354 | GCstr *b = ir_kstr(fright); | 380 | GCstr *b = ir_kstr(IR(fleft->op2)); |
| 355 | return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o)); | 381 | return INTFOLD(lj_str_cmp(a, b)); |
| 356 | } | 382 | } |
| 357 | return NEXTFOLD; | 383 | return NEXTFOLD; |
| 358 | } | 384 | } |
| @@ -1070,7 +1096,8 @@ LJFOLDF(merge_eqne_snew_kgc) | |||
| 1070 | uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) : | 1096 | uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) : |
| 1071 | len == 2 ? IRT(IR_XLOAD, IRT_U16) : | 1097 | len == 2 ? IRT(IR_XLOAD, IRT_U16) : |
| 1072 | IRTI(IR_XLOAD)); | 1098 | IRTI(IR_XLOAD)); |
| 1073 | TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0); | 1099 | TRef tmp = emitir(ot, strref, |
| 1100 | IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0)); | ||
| 1074 | TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr))); | 1101 | TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr))); |
| 1075 | if (len == 3) | 1102 | if (len == 3) |
| 1076 | tmp = emitir(IRTI(IR_BAND), tmp, | 1103 | tmp = emitir(IRTI(IR_BAND), tmp, |
| @@ -1103,8 +1130,8 @@ LJFOLDX(lj_opt_fwd_hload) | |||
| 1103 | LJFOLD(ULOAD any) | 1130 | LJFOLD(ULOAD any) |
| 1104 | LJFOLDX(lj_opt_fwd_uload) | 1131 | LJFOLDX(lj_opt_fwd_uload) |
| 1105 | 1132 | ||
| 1106 | LJFOLD(TLEN any) | 1133 | LJFOLD(CALLL any IRCALL_lj_tab_len) |
| 1107 | LJFOLDX(lj_opt_fwd_tlen) | 1134 | LJFOLDX(lj_opt_fwd_tab_len) |
| 1108 | 1135 | ||
| 1109 | /* Upvalue refs are really loads, but there are no corresponding stores. | 1136 | /* Upvalue refs are really loads, but there are no corresponding stores. |
| 1110 | ** So CSE is ok for them, except for UREFO across a GC step (see below). | 1137 | ** So CSE is ok for them, except for UREFO across a GC step (see below). |
| @@ -1194,13 +1221,23 @@ LJFOLDF(fload_tab_ah) | |||
| 1194 | 1221 | ||
| 1195 | /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */ | 1222 | /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */ |
| 1196 | LJFOLD(FLOAD KGC IRFL_STR_LEN) | 1223 | LJFOLD(FLOAD KGC IRFL_STR_LEN) |
| 1197 | LJFOLDF(fload_str_len) | 1224 | LJFOLDF(fload_str_len_kgc) |
| 1198 | { | 1225 | { |
| 1199 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) | 1226 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) |
| 1200 | return INTFOLD((int32_t)ir_kstr(fleft)->len); | 1227 | return INTFOLD((int32_t)ir_kstr(fleft)->len); |
| 1201 | return NEXTFOLD; | 1228 | return NEXTFOLD; |
| 1202 | } | 1229 | } |
| 1203 | 1230 | ||
| 1231 | LJFOLD(FLOAD SNEW IRFL_STR_LEN) | ||
| 1232 | LJFOLDF(fload_str_len_snew) | ||
| 1233 | { | ||
| 1234 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { | ||
| 1235 | PHIBARRIER(fleft); | ||
| 1236 | return fleft->op2; | ||
| 1237 | } | ||
| 1238 | return NEXTFOLD; | ||
| 1239 | } | ||
| 1240 | |||
| 1204 | LJFOLD(FLOAD any IRFL_STR_LEN) | 1241 | LJFOLD(FLOAD any IRFL_STR_LEN) |
| 1205 | LJFOLDX(lj_opt_cse) | 1242 | LJFOLDX(lj_opt_cse) |
| 1206 | 1243 | ||
| @@ -1216,20 +1253,28 @@ LJFOLDF(fwd_sload) | |||
| 1216 | return J->slot[fins->op1]; | 1253 | return J->slot[fins->op1]; |
| 1217 | } | 1254 | } |
| 1218 | 1255 | ||
| 1219 | /* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */ | 1256 | LJFOLD(XLOAD KPTR any) |
| 1220 | LJFOLD(XLOAD STRREF any) | 1257 | LJFOLDF(xload_kptr) |
| 1221 | LJFOLDF(xload_str) | ||
| 1222 | { | 1258 | { |
| 1223 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { | 1259 | /* Only fold read-only integer loads for now. */ |
| 1224 | GCstr *str = ir_kstr(IR(fleft->op1)); | 1260 | if ((fins->op2 & IRXLOAD_READONLY) && irt_isinteger(fins->t)) |
| 1225 | int32_t ofs = IR(fleft->op2)->i; | 1261 | return INTFOLD(kfold_xload(fins, ir_kptr(fleft))); |
| 1226 | lua_assert((MSize)ofs < str->len); | 1262 | return NEXTFOLD; |
| 1227 | lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len); | 1263 | } |
| 1228 | return INTFOLD(kfold_xload(fins, strdata(str)+ofs)); | 1264 | |
| 1265 | /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ | ||
| 1266 | LJFOLD(XLOAD any any) | ||
| 1267 | LJFOLDF(fwd_xload) | ||
| 1268 | { | ||
| 1269 | IRRef ref = J->chain[IR_XLOAD]; | ||
| 1270 | IRRef op1 = fins->op1; | ||
| 1271 | while (ref > op1) { | ||
| 1272 | if (IR(ref)->op1 == op1 && irt_sametype(IR(ref)->t, fins->t)) | ||
| 1273 | return ref; | ||
| 1274 | ref = IR(ref)->prev; | ||
| 1229 | } | 1275 | } |
| 1230 | return CSEFOLD; | 1276 | return EMITFOLD; |
| 1231 | } | 1277 | } |
| 1232 | /* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */ | ||
| 1233 | 1278 | ||
| 1234 | /* -- Write barriers ------------------------------------------------------ */ | 1279 | /* -- Write barriers ------------------------------------------------------ */ |
| 1235 | 1280 | ||
| @@ -1279,12 +1324,11 @@ LJFOLD(FSTORE any any) | |||
| 1279 | LJFOLDX(lj_opt_dse_fstore) | 1324 | LJFOLDX(lj_opt_dse_fstore) |
| 1280 | 1325 | ||
| 1281 | LJFOLD(NEWREF any any) /* Treated like a store. */ | 1326 | LJFOLD(NEWREF any any) /* Treated like a store. */ |
| 1327 | LJFOLD(CALLS any any) | ||
| 1328 | LJFOLD(CALLL any any) /* Safeguard fallback. */ | ||
| 1282 | LJFOLD(TNEW any any) | 1329 | LJFOLD(TNEW any any) |
| 1283 | LJFOLD(TDUP any) | 1330 | LJFOLD(TDUP any) |
| 1284 | LJFOLDF(store_raw) | 1331 | LJFOLDX(lj_ir_emit) |
| 1285 | { | ||
| 1286 | return EMITFOLD; | ||
| 1287 | } | ||
| 1288 | 1332 | ||
| 1289 | /* ------------------------------------------------------------------------ */ | 1333 | /* ------------------------------------------------------------------------ */ |
| 1290 | 1334 | ||
| @@ -1402,6 +1446,19 @@ TRef LJ_FASTCALL lj_opt_cse(jit_State *J) | |||
| 1402 | } | 1446 | } |
| 1403 | } | 1447 | } |
| 1404 | 1448 | ||
| 1449 | /* CSE with explicit search limit. */ | ||
| 1450 | TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim) | ||
| 1451 | { | ||
| 1452 | IRRef ref = J->chain[fins->o]; | ||
| 1453 | IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); | ||
| 1454 | while (ref > lim) { | ||
| 1455 | if (IR(ref)->op12 == op12) | ||
| 1456 | return ref; | ||
| 1457 | ref = IR(ref)->prev; | ||
| 1458 | } | ||
| 1459 | return lj_ir_emit(J); | ||
| 1460 | } | ||
| 1461 | |||
| 1405 | /* ------------------------------------------------------------------------ */ | 1462 | /* ------------------------------------------------------------------------ */ |
| 1406 | 1463 | ||
| 1407 | #undef IR | 1464 | #undef IR |
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index f9a2a808..90ab1b6f 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c | |||
| @@ -310,7 +310,13 @@ static void loop_unroll(jit_State *J) | |||
| 310 | /* Undo any partial changes made by the loop optimization. */ | 310 | /* Undo any partial changes made by the loop optimization. */ |
| 311 | static void loop_undo(jit_State *J, IRRef ins) | 311 | static void loop_undo(jit_State *J, IRRef ins) |
| 312 | { | 312 | { |
| 313 | ptrdiff_t i; | ||
| 313 | lj_ir_rollback(J, ins); | 314 | lj_ir_rollback(J, ins); |
| 315 | for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ | ||
| 316 | BPropEntry *bp = &J->bpropcache[i]; | ||
| 317 | if (bp->val >= ins) | ||
| 318 | bp->key = 0; | ||
| 319 | } | ||
| 314 | for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */ | 320 | for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */ |
| 315 | IRIns *ir = IR(ins); | 321 | IRIns *ir = IR(ins); |
| 316 | irt_clearphi(ir->t); | 322 | irt_clearphi(ir->t); |
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 94fc4ad8..882ba6c5 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c | |||
| @@ -307,14 +307,7 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) | |||
| 307 | 307 | ||
| 308 | conflict: | 308 | conflict: |
| 309 | /* Try to find a matching load. Below the conflicting store, if any. */ | 309 | /* Try to find a matching load. Below the conflicting store, if any. */ |
| 310 | ref = J->chain[IR_ULOAD]; | 310 | return lj_opt_cselim(J, lim); |
| 311 | while (ref > lim) { | ||
| 312 | IRIns *load = IR(ref); | ||
| 313 | if (load->op1 == uref) | ||
| 314 | return ref; /* Load forwarding. */ | ||
| 315 | ref = load->prev; | ||
| 316 | } | ||
| 317 | return EMITFOLD; /* Conflict or no match. */ | ||
| 318 | } | 311 | } |
| 319 | 312 | ||
| 320 | /* USTORE elimination. */ | 313 | /* USTORE elimination. */ |
| @@ -405,14 +398,7 @@ TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J) | |||
| 405 | 398 | ||
| 406 | conflict: | 399 | conflict: |
| 407 | /* Try to find a matching load. Below the conflicting store, if any. */ | 400 | /* Try to find a matching load. Below the conflicting store, if any. */ |
| 408 | ref = J->chain[IR_FLOAD]; | 401 | return lj_opt_cselim(J, lim); |
| 409 | while (ref > lim) { | ||
| 410 | IRIns *load = IR(ref); | ||
| 411 | if (load->op1 == oref && load->op2 == fid) | ||
| 412 | return ref; /* Load forwarding. */ | ||
| 413 | ref = load->prev; | ||
| 414 | } | ||
| 415 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | ||
| 416 | } | 402 | } |
| 417 | 403 | ||
| 418 | /* FSTORE elimination. */ | 404 | /* FSTORE elimination. */ |
| @@ -458,10 +444,10 @@ doemit: | |||
| 458 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | 444 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ |
| 459 | } | 445 | } |
| 460 | 446 | ||
| 461 | /* -- TLEN forwarding ----------------------------------------------------- */ | 447 | /* -- Forwarding of lj_tab_len -------------------------------------------- */ |
| 462 | 448 | ||
| 463 | /* This is rather simplistic right now, but better than nothing. */ | 449 | /* This is rather simplistic right now, but better than nothing. */ |
| 464 | TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J) | 450 | TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) |
| 465 | { | 451 | { |
| 466 | IRRef tab = fins->op1; /* Table reference. */ | 452 | IRRef tab = fins->op1; /* Table reference. */ |
| 467 | IRRef lim = tab; /* Search limit. */ | 453 | IRRef lim = tab; /* Search limit. */ |
| @@ -484,14 +470,7 @@ TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J) | |||
| 484 | } | 470 | } |
| 485 | 471 | ||
| 486 | /* Try to find a matching load. Below the conflicting store, if any. */ | 472 | /* Try to find a matching load. Below the conflicting store, if any. */ |
| 487 | ref = J->chain[IR_TLEN]; | 473 | return lj_opt_cselim(J, lim); |
| 488 | while (ref > lim) { | ||
| 489 | IRIns *tlen = IR(ref); | ||
| 490 | if (tlen->op1 == tab) | ||
| 491 | return ref; /* Load forwarding. */ | ||
| 492 | ref = tlen->prev; | ||
| 493 | } | ||
| 494 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | ||
| 495 | } | 474 | } |
| 496 | 475 | ||
| 497 | /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ | 476 | /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ |
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 60a6afb8..b9107c5e 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
| @@ -370,7 +370,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc) | |||
| 370 | TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) | 370 | TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) |
| 371 | { | 371 | { |
| 372 | lua_Number n; | 372 | lua_Number n; |
| 373 | if (tvisstr(vc) && !lj_str_numconv(strVdata(vc), vc)) | 373 | if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) |
| 374 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 374 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
| 375 | n = numV(vc); | 375 | n = numV(vc); |
| 376 | /* Limit narrowing for pow to small exponents (or for two constants). */ | 376 | /* Limit narrowing for pow to small exponents (or for two constants). */ |
diff --git a/src/lj_parse.c b/src/lj_parse.c index 000772fe..1de07e92 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c | |||
| @@ -317,6 +317,7 @@ GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len) | |||
| 317 | GCstr *s = lj_str_new(L, str, len); | 317 | GCstr *s = lj_str_new(L, str, len); |
| 318 | TValue *tv = lj_tab_setstr(L, ls->fs->kt, s); | 318 | TValue *tv = lj_tab_setstr(L, ls->fs->kt, s); |
| 319 | if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */ | 319 | if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */ |
| 320 | lj_gc_check(L); | ||
| 320 | return s; | 321 | return s; |
| 321 | } | 322 | } |
| 322 | 323 | ||
diff --git a/src/lj_record.c b/src/lj_record.c index 68a233b9..9b223ff6 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
| @@ -441,7 +441,7 @@ static int rec_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) | |||
| 441 | mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); | 441 | mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); |
| 442 | } else { | 442 | } else { |
| 443 | /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */ | 443 | /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */ |
| 444 | mt = tabref(J2G(J)->basemt[itypemap(&ix->tabv)]); | 444 | mt = tabref(basemt_obj(J2G(J), &ix->tabv)); |
| 445 | if (mt == NULL) | 445 | if (mt == NULL) |
| 446 | return 0; /* No metamethod. */ | 446 | return 0; /* No metamethod. */ |
| 447 | mix.tab = lj_ir_ktab(J, mt); | 447 | mix.tab = lj_ir_ktab(J, mt); |
| @@ -855,7 +855,7 @@ typedef void (*RecordFunc)(jit_State *J, TRef *res, RecordFFData *rd); | |||
| 855 | /* Get runtime value of int argument. */ | 855 | /* Get runtime value of int argument. */ |
| 856 | static int32_t argv2int(jit_State *J, TValue *o) | 856 | static int32_t argv2int(jit_State *J, TValue *o) |
| 857 | { | 857 | { |
| 858 | if (tvisstr(o) && !lj_str_numconv(strVdata(o), o)) | 858 | if (tvisstr(o) && !lj_str_tonum(strV(o), o)) |
| 859 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 859 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
| 860 | return lj_num2bit(numV(o)); | 860 | return lj_num2bit(numV(o)); |
| 861 | } | 861 | } |
| @@ -1017,6 +1017,8 @@ static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1017 | /* Otherwise res[0] already contains the result. */ | 1017 | /* Otherwise res[0] already contains the result. */ |
| 1018 | } else if (tref_isnumber(tr)) { | 1018 | } else if (tref_isnumber(tr)) { |
| 1019 | res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); | 1019 | res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); |
| 1020 | } else if (tref_ispri(tr)) { | ||
| 1021 | res[0] = lj_ir_kstr(J, strV(&rd->fn->c.upvalue[tref_type(tr)])); | ||
| 1020 | } else { | 1022 | } else { |
| 1021 | recff_err_nyi(J, rd); | 1023 | recff_err_nyi(J, rd); |
| 1022 | } | 1024 | } |
| @@ -1165,10 +1167,16 @@ static void recff_math_atrig(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1165 | res[0] = emitir(IRTN(IR_ATAN2), y, x); | 1167 | res[0] = emitir(IRTN(IR_ATAN2), y, x); |
| 1166 | } | 1168 | } |
| 1167 | 1169 | ||
| 1170 | static void recff_math_htrig(jit_State *J, TRef *res, RecordFFData *rd) | ||
| 1171 | { | ||
| 1172 | TRef tr = lj_ir_tonum(J, arg[0]); | ||
| 1173 | res[0] = lj_ir_call(J, rd->data, tr); | ||
| 1174 | } | ||
| 1175 | |||
| 1168 | static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd) | 1176 | static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd) |
| 1169 | { | 1177 | { |
| 1170 | TRef tr = arg[0]; | 1178 | TRef tr = arg[0]; |
| 1171 | if (tref_isinteger(arg[0])) { | 1179 | if (tref_isinteger(tr)) { |
| 1172 | res[0] = tr; | 1180 | res[0] = tr; |
| 1173 | res[1] = lj_ir_kint(J, 0); | 1181 | res[1] = lj_ir_kint(J, 0); |
| 1174 | } else { | 1182 | } else { |
| @@ -1187,9 +1195,10 @@ static void recff_math_degrad(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1187 | 1195 | ||
| 1188 | static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd) | 1196 | static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd) |
| 1189 | { | 1197 | { |
| 1198 | TRef tr = lj_ir_tonum(J, arg[0]); | ||
| 1190 | if (!tref_isnumber_str(arg[1])) | 1199 | if (!tref_isnumber_str(arg[1])) |
| 1191 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 1200 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
| 1192 | res[0] = lj_opt_narrow_pow(J, lj_ir_tonum(J, arg[0]), arg[1], &rd->argv[1]); | 1201 | res[0] = lj_opt_narrow_pow(J, tr, arg[1], &rd->argv[1]); |
| 1193 | UNUSED(rd); | 1202 | UNUSED(rd); |
| 1194 | } | 1203 | } |
| 1195 | 1204 | ||
| @@ -1203,6 +1212,32 @@ static void recff_math_minmax(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1203 | res[0] = tr; | 1212 | res[0] = tr; |
| 1204 | } | 1213 | } |
| 1205 | 1214 | ||
| 1215 | static void recff_math_random(jit_State *J, TRef *res, RecordFFData *rd) | ||
| 1216 | { | ||
| 1217 | GCudata *ud = udataV(&rd->fn->c.upvalue[0]); | ||
| 1218 | TRef tr, one; | ||
| 1219 | lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */ | ||
| 1220 | tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud))); | ||
| 1221 | one = lj_ir_knum_one(J); | ||
| 1222 | tr = emitir(IRTN(IR_SUB), tr, one); | ||
| 1223 | if (arg[0]) { | ||
| 1224 | TRef tr1 = lj_ir_tonum(J, arg[0]); | ||
| 1225 | if (arg[1]) { /* d = floor(d*(r2-r1+1.0)) + r1 */ | ||
| 1226 | TRef tr2 = lj_ir_tonum(J, arg[1]); | ||
| 1227 | tr2 = emitir(IRTN(IR_SUB), tr2, tr1); | ||
| 1228 | tr2 = emitir(IRTN(IR_ADD), tr2, one); | ||
| 1229 | tr = emitir(IRTN(IR_MUL), tr, tr2); | ||
| 1230 | tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR); | ||
| 1231 | tr = emitir(IRTN(IR_ADD), tr, tr1); | ||
| 1232 | } else { /* d = floor(d*r1) + 1.0 */ | ||
| 1233 | tr = emitir(IRTN(IR_MUL), tr, tr1); | ||
| 1234 | tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR); | ||
| 1235 | tr = emitir(IRTN(IR_ADD), tr, one); | ||
| 1236 | } | ||
| 1237 | } | ||
| 1238 | res[0] = tr; | ||
| 1239 | } | ||
| 1240 | |||
| 1206 | /* -- Bit library fast functions ------------------------------------------ */ | 1241 | /* -- Bit library fast functions ------------------------------------------ */ |
| 1207 | 1242 | ||
| 1208 | /* Record unary bit.tobit, bit.bnot, bit.bswap. */ | 1243 | /* Record unary bit.tobit, bit.bnot, bit.bswap. */ |
| @@ -1321,7 +1356,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1321 | for (i = 0; i < len; i++) { | 1356 | for (i = 0; i < len; i++) { |
| 1322 | TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i)); | 1357 | TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i)); |
| 1323 | tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp); | 1358 | tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp); |
| 1324 | res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, 0); | 1359 | res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); |
| 1325 | } | 1360 | } |
| 1326 | } else { /* Empty range or range underflow: return no results. */ | 1361 | } else { /* Empty range or range underflow: return no results. */ |
| 1327 | emitir(IRTGI(IR_LE), trend, trstart); | 1362 | emitir(IRTGI(IR_LE), trend, trstart); |
| @@ -1335,7 +1370,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1335 | static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd) | 1370 | static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd) |
| 1336 | { | 1371 | { |
| 1337 | if (tref_istab(arg[0])) { | 1372 | if (tref_istab(arg[0])) { |
| 1338 | res[0] = emitir(IRTI(IR_TLEN), arg[0], 0); | 1373 | res[0] = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]); |
| 1339 | } /* else: Interpreter will throw. */ | 1374 | } /* else: Interpreter will throw. */ |
| 1340 | UNUSED(rd); | 1375 | UNUSED(rd); |
| 1341 | } | 1376 | } |
| @@ -1344,7 +1379,7 @@ static void recff_table_remove(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1344 | { | 1379 | { |
| 1345 | if (tref_istab(arg[0])) { | 1380 | if (tref_istab(arg[0])) { |
| 1346 | if (!arg[1] || tref_isnil(arg[1])) { /* Simple pop: t[#t] = nil */ | 1381 | if (!arg[1] || tref_isnil(arg[1])) { /* Simple pop: t[#t] = nil */ |
| 1347 | TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0); | 1382 | TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]); |
| 1348 | GCtab *t = tabV(&rd->argv[0]); | 1383 | GCtab *t = tabV(&rd->argv[0]); |
| 1349 | MSize len = lj_tab_len(t); | 1384 | MSize len = lj_tab_len(t); |
| 1350 | emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); | 1385 | emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); |
| @@ -1376,7 +1411,7 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1376 | rd->nres = 0; | 1411 | rd->nres = 0; |
| 1377 | if (tref_istab(arg[0]) && arg[1]) { | 1412 | if (tref_istab(arg[0]) && arg[1]) { |
| 1378 | if (!arg[2]) { /* Simple push: t[#t+1] = v */ | 1413 | if (!arg[2]) { /* Simple push: t[#t+1] = v */ |
| 1379 | TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0); | 1414 | TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]); |
| 1380 | GCtab *t = tabV(&rd->argv[0]); | 1415 | GCtab *t = tabV(&rd->argv[0]); |
| 1381 | RecordIndex ix; | 1416 | RecordIndex ix; |
| 1382 | ix.tab = arg[0]; | 1417 | ix.tab = arg[0]; |
| @@ -1392,6 +1427,62 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd) | |||
| 1392 | } /* else: Interpreter will throw. */ | 1427 | } /* else: Interpreter will throw. */ |
| 1393 | } | 1428 | } |
| 1394 | 1429 | ||
| 1430 | /* -- I/O library fast functions ------------------------------------------ */ | ||
| 1431 | |||
| 1432 | /* Get FILE* for I/O function. Any I/O error aborts recording, so there's | ||
| 1433 | ** no need to encode the alternate cases for any of the guards. | ||
| 1434 | */ | ||
| 1435 | static TRef recff_io_fp(jit_State *J, TRef *res, uint32_t id) | ||
| 1436 | { | ||
| 1437 | TRef tr, ud, fp; | ||
| 1438 | if (id) { /* io.func() */ | ||
| 1439 | tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); | ||
| 1440 | ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); | ||
| 1441 | } else { /* fp:method() */ | ||
| 1442 | ud = arg[0]; | ||
| 1443 | if (!tref_isudata(ud)) | ||
| 1444 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
| 1445 | tr = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE); | ||
| 1446 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE)); | ||
| 1447 | } | ||
| 1448 | fp = emitir(IRT(IR_FLOAD, IRT_LIGHTUD), ud, IRFL_UDATA_FILE); | ||
| 1449 | emitir(IRTG(IR_NE, IRT_LIGHTUD), fp, lj_ir_knull(J, IRT_LIGHTUD)); | ||
| 1450 | return fp; | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | static void recff_io_write(jit_State *J, TRef *res, RecordFFData *rd) | ||
| 1454 | { | ||
| 1455 | TRef fp = recff_io_fp(J, res, rd->data); | ||
| 1456 | TRef zero = lj_ir_kint(J, 0); | ||
| 1457 | TRef one = lj_ir_kint(J, 1); | ||
| 1458 | ptrdiff_t i = rd->data == 0 ? 1 : 0; | ||
| 1459 | for (; arg[i]; i++) { | ||
| 1460 | TRef str = lj_ir_tostr(J, arg[i]); | ||
| 1461 | TRef buf = emitir(IRT(IR_STRREF, IRT_PTR), str, zero); | ||
| 1462 | TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); | ||
| 1463 | if (tref_isk(len) && IR(tref_ref(len))->i == 1) { | ||
| 1464 | TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); | ||
| 1465 | tr = lj_ir_call(J, IRCALL_fputc, tr, fp); | ||
| 1466 | if (rd->cres != 0) /* Check result only if requested. */ | ||
| 1467 | emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); | ||
| 1468 | } else { | ||
| 1469 | TRef tr = lj_ir_call(J, IRCALL_fwrite, buf, one, len, fp); | ||
| 1470 | if (rd->cres != 0) /* Check result only if requested. */ | ||
| 1471 | emitir(IRTGI(IR_EQ), tr, len); | ||
| 1472 | } | ||
| 1473 | } | ||
| 1474 | res[0] = TREF_TRUE; | ||
| 1475 | } | ||
| 1476 | |||
| 1477 | static void recff_io_flush(jit_State *J, TRef *res, RecordFFData *rd) | ||
| 1478 | { | ||
| 1479 | TRef fp = recff_io_fp(J, res, rd->data); | ||
| 1480 | TRef tr = lj_ir_call(J, IRCALL_fflush, fp); | ||
| 1481 | if (rd->cres != 0) /* Check result only if requested. */ | ||
| 1482 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, 0)); | ||
| 1483 | res[0] = TREF_TRUE; | ||
| 1484 | } | ||
| 1485 | |||
| 1395 | /* -- Record calls and returns -------------------------------------------- */ | 1486 | /* -- Record calls and returns -------------------------------------------- */ |
| 1396 | 1487 | ||
| 1397 | #undef arg | 1488 | #undef arg |
| @@ -1696,6 +1787,9 @@ void lj_record_ins(jit_State *J) | |||
| 1696 | if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; | 1787 | if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; |
| 1697 | } else if (ta == IRT_STR) { | 1788 | } else if (ta == IRT_STR) { |
| 1698 | if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; | 1789 | if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; |
| 1790 | ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); | ||
| 1791 | rc = lj_ir_kint(J, 0); | ||
| 1792 | ta = IRT_INT; | ||
| 1699 | } else { | 1793 | } else { |
| 1700 | rec_mm_comp(J, &ix, (int)op); | 1794 | rec_mm_comp(J, &ix, (int)op); |
| 1701 | break; | 1795 | break; |
| @@ -1745,7 +1839,7 @@ void lj_record_ins(jit_State *J) | |||
| 1745 | if (tref_isstr(rc)) { | 1839 | if (tref_isstr(rc)) { |
| 1746 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); | 1840 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); |
| 1747 | } else if (tref_istab(rc)) { | 1841 | } else if (tref_istab(rc)) { |
| 1748 | rc = emitir(IRTI(IR_TLEN), rc, 0); | 1842 | rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); |
| 1749 | } else { | 1843 | } else { |
| 1750 | ix.tab = rc; | 1844 | ix.tab = rc; |
| 1751 | copyTV(J->L, &ix.tabv, &ix.keyv); | 1845 | copyTV(J->L, &ix.tabv, &ix.keyv); |
| @@ -1879,8 +1973,6 @@ void lj_record_ins(jit_State *J) | |||
| 1879 | /* fallthrough */ | 1973 | /* fallthrough */ |
| 1880 | case BC_CALL: | 1974 | case BC_CALL: |
| 1881 | callop: | 1975 | callop: |
| 1882 | if (rb == (TRef)(CALLRES_TAILCALL+1)) { /* Tail call. */ | ||
| 1883 | } | ||
| 1884 | rec_call(J, ra, (int)(rb-1), (int)(rc-1)); | 1976 | rec_call(J, ra, (int)(rb-1), (int)(rc-1)); |
| 1885 | break; | 1977 | break; |
| 1886 | 1978 | ||
| @@ -2064,8 +2156,11 @@ static void rec_setup_side(jit_State *J, Trace *T) | |||
| 2064 | BCReg j; | 2156 | BCReg j; |
| 2065 | for (j = 0; j < s; j++) | 2157 | for (j = 0; j < s; j++) |
| 2066 | if (snap_ref(map[j]) == ref) { | 2158 | if (snap_ref(map[j]) == ref) { |
| 2067 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) | 2159 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { |
| 2160 | lua_assert(s != 0); | ||
| 2068 | J->baseslot = s+1; | 2161 | J->baseslot = s+1; |
| 2162 | J->framedepth++; | ||
| 2163 | } | ||
| 2069 | tr = J->slot[j]; | 2164 | tr = J->slot[j]; |
| 2070 | goto dupslot; | 2165 | goto dupslot; |
| 2071 | } | 2166 | } |
| @@ -2078,8 +2173,10 @@ static void rec_setup_side(jit_State *J, Trace *T) | |||
| 2078 | case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; | 2173 | case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; |
| 2079 | case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ | 2174 | case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ |
| 2080 | if (irt_isfunc(ir->t)) { | 2175 | if (irt_isfunc(ir->t)) { |
| 2081 | J->baseslot = s+1; | 2176 | if (s != 0) { |
| 2082 | J->framedepth++; | 2177 | J->baseslot = s+1; |
| 2178 | J->framedepth++; | ||
| 2179 | } | ||
| 2083 | tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); | 2180 | tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); |
| 2084 | tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); | 2181 | tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); |
| 2085 | } else { | 2182 | } else { |
diff --git a/src/lj_snap.c b/src/lj_snap.c index 09cd095c..d27404f2 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
| @@ -251,9 +251,9 @@ void lj_snap_restore(jit_State *J, void *exptr) | |||
| 251 | GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); | 251 | GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); |
| 252 | if (isluafunc(fn)) { | 252 | if (isluafunc(fn)) { |
| 253 | TValue *fs; | 253 | TValue *fs; |
| 254 | newbase = o+1; | 254 | fs = o+1 + funcproto(fn)->framesize; |
| 255 | fs = newbase + funcproto(fn)->framesize; | ||
| 256 | if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ | 255 | if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ |
| 256 | if (s != 0) newbase = o+1; | ||
| 257 | } | 257 | } |
| 258 | } | 258 | } |
| 259 | } | 259 | } |
| @@ -262,21 +262,17 @@ void lj_snap_restore(jit_State *J, void *exptr) | |||
| 262 | setnilV(o); /* Clear unreferenced slots of newly added frames. */ | 262 | setnilV(o); /* Clear unreferenced slots of newly added frames. */ |
| 263 | } | 263 | } |
| 264 | } | 264 | } |
| 265 | if (newbase) { /* Clear remainder of newly added frames. */ | 265 | if (newbase) L->base = newbase; |
| 266 | L->base = newbase; | 266 | if (ntop >= L->maxstack) { /* Need to grow the stack again. */ |
| 267 | if (ntop >= L->maxstack) { /* Need to grow the stack again. */ | 267 | MSize need = (MSize)(ntop - o); |
| 268 | MSize need = (MSize)(ntop - o); | 268 | L->top = o; |
| 269 | L->top = o; | 269 | lj_state_growstack(L, need); |
| 270 | lj_state_growstack(L, need); | 270 | o = L->top; |
| 271 | o = L->top; | 271 | ntop = o + need; |
| 272 | ntop = o + need; | ||
| 273 | } | ||
| 274 | L->top = curr_topL(L); | ||
| 275 | for (; o < ntop; o++) | ||
| 276 | setnilV(o); | ||
| 277 | } else { /* Must not clear slots of existing frame. */ | ||
| 278 | L->top = curr_topL(L); | ||
| 279 | } | 272 | } |
| 273 | L->top = curr_topL(L); | ||
| 274 | for (; o < ntop; o++) /* Clear remainder of newly added frames. */ | ||
| 275 | setnilV(o); | ||
| 280 | lua_assert(map + nslots == flinks-1); | 276 | lua_assert(map + nslots == flinks-1); |
| 281 | J->pc = (const BCIns *)(uintptr_t)(*--flinks); | 277 | J->pc = (const BCIns *)(uintptr_t)(*--flinks); |
| 282 | } | 278 | } |
diff --git a/src/lj_state.h b/src/lj_state.h index 54e85405..4e4185c0 100644 --- a/src/lj_state.h +++ b/src/lj_state.h | |||
| @@ -17,7 +17,7 @@ | |||
| 17 | LJ_FUNC void lj_state_relimitstack(lua_State *L); | 17 | LJ_FUNC void lj_state_relimitstack(lua_State *L); |
| 18 | LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); | 18 | LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); |
| 19 | LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need); | 19 | LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need); |
| 20 | LJ_FUNCA void lj_state_growstack1(lua_State *L); | 20 | LJ_FUNC void lj_state_growstack1(lua_State *L); |
| 21 | 21 | ||
| 22 | static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) | 22 | static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) |
| 23 | { | 23 | { |
diff --git a/src/lj_str.c b/src/lj_str.c index 26f91cba..62322b59 100644 --- a/src/lj_str.c +++ b/src/lj_str.c | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | /* -- String interning ---------------------------------------------------- */ | 21 | /* -- String interning ---------------------------------------------------- */ |
| 22 | 22 | ||
| 23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ | 23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ |
| 24 | int32_t lj_str_cmp(GCstr *a, GCstr *b) | 24 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) |
| 25 | { | 25 | { |
| 26 | MSize i, n = a->len > b->len ? b->len : a->len; | 26 | MSize i, n = a->len > b->len ? b->len : a->len; |
| 27 | for (i = 0; i < n; i += 4) { | 27 | for (i = 0; i < n; i += 4) { |
| @@ -119,8 +119,14 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) | |||
| 119 | 119 | ||
| 120 | /* -- Type conversions ---------------------------------------------------- */ | 120 | /* -- Type conversions ---------------------------------------------------- */ |
| 121 | 121 | ||
| 122 | /* Convert string object to number. */ | ||
| 123 | int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n) | ||
| 124 | { | ||
| 125 | return lj_str_numconv(strdata(str), n); | ||
| 126 | } | ||
| 127 | |||
| 122 | /* Convert string to number. */ | 128 | /* Convert string to number. */ |
| 123 | int lj_str_numconv(const char *s, TValue *n) | 129 | int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n) |
| 124 | { | 130 | { |
| 125 | lua_Number sign = 1; | 131 | lua_Number sign = 1; |
| 126 | const uint8_t *p = (const uint8_t *)s; | 132 | const uint8_t *p = (const uint8_t *)s; |
| @@ -167,7 +173,7 @@ parsedbl: | |||
| 167 | } | 173 | } |
| 168 | 174 | ||
| 169 | /* Convert number to string. */ | 175 | /* Convert number to string. */ |
| 170 | GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) | 176 | GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) |
| 171 | { | 177 | { |
| 172 | char s[LUAI_MAXNUMBER2STR]; | 178 | char s[LUAI_MAXNUMBER2STR]; |
| 173 | lua_Number n = *np; | 179 | lua_Number n = *np; |
| @@ -176,7 +182,7 @@ GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) | |||
| 176 | } | 182 | } |
| 177 | 183 | ||
| 178 | /* Convert integer to string. */ | 184 | /* Convert integer to string. */ |
| 179 | GCstr *lj_str_fromint(lua_State *L, int32_t k) | 185 | GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) |
| 180 | { | 186 | { |
| 181 | char s[1+10]; | 187 | char s[1+10]; |
| 182 | char *p = s+sizeof(s); | 188 | char *p = s+sizeof(s); |
diff --git a/src/lj_str.h b/src/lj_str.h index f7e56d16..e8b242c0 100644 --- a/src/lj_str.h +++ b/src/lj_str.h | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | #include "lj_obj.h" | 11 | #include "lj_obj.h" |
| 12 | 12 | ||
| 13 | /* String interning. */ | 13 | /* String interning. */ |
| 14 | LJ_FUNCA int32_t lj_str_cmp(GCstr *a, GCstr *b); | 14 | LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); |
| 15 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); | 15 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); |
| 16 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); | 16 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); |
| 17 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); | 17 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); |
| @@ -20,9 +20,10 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); | |||
| 20 | #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) | 20 | #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) |
| 21 | 21 | ||
| 22 | /* Type conversions. */ | 22 | /* Type conversions. */ |
| 23 | LJ_FUNCA int lj_str_numconv(const char *s, TValue *n); | 23 | LJ_FUNC int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n); |
| 24 | LJ_FUNCA GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np); | 24 | LJ_FUNC int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n); |
| 25 | LJ_FUNCA GCstr *lj_str_fromint(lua_State *L, int32_t k); | 25 | LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np); |
| 26 | LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k); | ||
| 26 | 27 | ||
| 27 | /* String formatting. */ | 28 | /* String formatting. */ |
| 28 | LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); | 29 | LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); |
diff --git a/src/lj_tab.c b/src/lj_tab.c index 9af51027..ceafb770 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c | |||
| @@ -160,8 +160,16 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits) | |||
| 160 | return t; | 160 | return t; |
| 161 | } | 161 | } |
| 162 | 162 | ||
| 163 | GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) | ||
| 164 | { | ||
| 165 | GCtab *t = newtab(L, ahsize & 0xffffff, ahsize >> 24); | ||
| 166 | clearapart(t); | ||
| 167 | if (t->hmask > 0) clearhpart(t); | ||
| 168 | return t; | ||
| 169 | } | ||
| 170 | |||
| 163 | /* Duplicate a table. */ | 171 | /* Duplicate a table. */ |
| 164 | GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) | 172 | GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) |
| 165 | { | 173 | { |
| 166 | GCtab *t; | 174 | GCtab *t; |
| 167 | uint32_t asize, hmask; | 175 | uint32_t asize, hmask; |
| @@ -334,8 +342,8 @@ static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray) | |||
| 334 | static uint32_t bestasize(uint32_t bins[], uint32_t *narray) | 342 | static uint32_t bestasize(uint32_t bins[], uint32_t *narray) |
| 335 | { | 343 | { |
| 336 | uint32_t b, sum, na = 0, sz = 0, nn = *narray; | 344 | uint32_t b, sum, na = 0, sz = 0, nn = *narray; |
| 337 | for (b = 0, sum = 0; (1u<<b) <= nn && sum != nn; b++) | 345 | for (b = 0, sum = 0; 2*nn > (1u<<b) && sum != nn; b++) |
| 338 | if (bins[b] > 0 && (sum += bins[b]) >= (1u<<b)) { | 346 | if (bins[b] > 0 && 2*(sum += bins[b]) > (1u<<b)) { |
| 339 | sz = (2u<<b)+1; | 347 | sz = (2u<<b)+1; |
| 340 | na = sum; | 348 | na = sum; |
| 341 | } | 349 | } |
| @@ -599,7 +607,7 @@ static MSize unbound_search(GCtab *t, MSize j) | |||
| 599 | ** Try to find a boundary in table `t'. A `boundary' is an integer index | 607 | ** Try to find a boundary in table `t'. A `boundary' is an integer index |
| 600 | ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). | 608 | ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). |
| 601 | */ | 609 | */ |
| 602 | MSize lj_tab_len(GCtab *t) | 610 | MSize LJ_FASTCALL lj_tab_len(GCtab *t) |
| 603 | { | 611 | { |
| 604 | MSize j = (MSize)t->asize; | 612 | MSize j = (MSize)t->asize; |
| 605 | if (j > 1 && tvisnil(arrayslot(t, j-1))) { | 613 | if (j > 1 && tvisnil(arrayslot(t, j-1))) { |
diff --git a/src/lj_tab.h b/src/lj_tab.h index e9e8bcd1..b2a8c3aa 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h | |||
| @@ -11,7 +11,8 @@ | |||
| 11 | #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) | 11 | #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) |
| 12 | 12 | ||
| 13 | LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); | 13 | LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); |
| 14 | LJ_FUNCA GCtab *lj_tab_dup(lua_State *L, const GCtab *kt); | 14 | LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); |
| 15 | LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); | ||
| 15 | LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); | 16 | LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); |
| 16 | LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); | 17 | LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); |
| 17 | 18 | ||
| @@ -36,6 +37,6 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); | |||
| 36 | (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) | 37 | (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) |
| 37 | 38 | ||
| 38 | LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); | 39 | LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); |
| 39 | LJ_FUNCA MSize lj_tab_len(GCtab *t); | 40 | LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); |
| 40 | 41 | ||
| 41 | #endif | 42 | #endif |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 3ee4fa00..2fb3c4b8 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
| @@ -32,6 +32,11 @@ enum { | |||
| 32 | 32 | ||
| 33 | /* Calling conventions. */ | 33 | /* Calling conventions. */ |
| 34 | RID_RET = RID_EAX, | 34 | RID_RET = RID_EAX, |
| 35 | #if LJ_64 | ||
| 36 | RID_FPRET = RID_XMM0, | ||
| 37 | #else | ||
| 38 | RID_RETHI = RID_EDX, | ||
| 39 | #endif | ||
| 35 | 40 | ||
| 36 | /* These definitions must match with the *.dasc file(s): */ | 41 | /* These definitions must match with the *.dasc file(s): */ |
| 37 | RID_BASE = RID_EDX, /* Interpreter BASE. */ | 42 | RID_BASE = RID_EDX, /* Interpreter BASE. */ |
| @@ -98,8 +103,8 @@ enum { | |||
| 98 | }; | 103 | }; |
| 99 | 104 | ||
| 100 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ | 105 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ |
| 101 | #define sps_scale(slot) (4 * (int32_t)(slot)) | 106 | #define sps_scale(slot) (4 * (int32_t)(slot)) |
| 102 | #define sps_adjust(as) (sps_scale((as->evenspill-SPS_FIXED+3)&~3)) | 107 | #define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3)) |
| 103 | 108 | ||
| 104 | /* -- Exit state ---------------------------------------------------------- */ | 109 | /* -- Exit state ---------------------------------------------------------- */ |
| 105 | 110 | ||
| @@ -185,6 +190,7 @@ typedef enum { | |||
| 185 | XO_ARITHib = XO_(80), | 190 | XO_ARITHib = XO_(80), |
| 186 | XO_ARITHi = XO_(81), | 191 | XO_ARITHi = XO_(81), |
| 187 | XO_ARITHi8 = XO_(83), | 192 | XO_ARITHi8 = XO_(83), |
| 193 | XO_ARITHiw8 = XO_66(83), | ||
| 188 | XO_SHIFTi = XO_(c1), | 194 | XO_SHIFTi = XO_(c1), |
| 189 | XO_SHIFT1 = XO_(d1), | 195 | XO_SHIFT1 = XO_(d1), |
| 190 | XO_SHIFTcl = XO_(d3), | 196 | XO_SHIFTcl = XO_(d3), |
| @@ -216,6 +222,7 @@ typedef enum { | |||
| 216 | XO_CVTSI2SD = XO_f20f(2a), | 222 | XO_CVTSI2SD = XO_f20f(2a), |
| 217 | XO_CVTSD2SI = XO_f20f(2d), | 223 | XO_CVTSD2SI = XO_f20f(2d), |
| 218 | XO_CVTTSD2SI= XO_f20f(2c), | 224 | XO_CVTTSD2SI= XO_f20f(2c), |
| 225 | XO_MOVD = XO_660f(6e), | ||
| 219 | XO_MOVDto = XO_660f(7e), | 226 | XO_MOVDto = XO_660f(7e), |
| 220 | 227 | ||
| 221 | XO_FLDq = XO_(dd), XOg_FLDq = 0, | 228 | XO_FLDq = XO_(dd), XOg_FLDq = 0, |
diff --git a/src/lj_udata.c b/src/lj_udata.c index 863889c9..717d483b 100644 --- a/src/lj_udata.c +++ b/src/lj_udata.c | |||
| @@ -16,6 +16,7 @@ GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) | |||
| 16 | global_State *g = G(L); | 16 | global_State *g = G(L); |
| 17 | newwhite(g, ud); /* Not finalized. */ | 17 | newwhite(g, ud); /* Not finalized. */ |
| 18 | ud->gct = ~LJ_TUDATA; | 18 | ud->gct = ~LJ_TUDATA; |
| 19 | ud->udtype = UDTYPE_USERDATA; | ||
| 19 | ud->len = sz; | 20 | ud->len = sz; |
| 20 | /* NOBARRIER: The GCudata is new (marked white). */ | 21 | /* NOBARRIER: The GCudata is new (marked white). */ |
| 21 | setgcrefnull(ud->metatable); | 22 | setgcrefnull(ud->metatable); |
