diff options
author | Mike Pall <mike> | 2020-05-27 19:20:44 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2020-05-27 19:20:44 +0200 |
commit | 1a4ff1311740aa6c85f7a9101b6aa9bfaafa3f8e (patch) | |
tree | c0bb622eb6a70b71c58d83d4cceed4e4b3279300 | |
parent | b2307c8ad817e350d65cc909a579ca2f77439682 (diff) | |
download | luajit-1a4ff1311740aa6c85f7a9101b6aa9bfaafa3f8e.tar.gz luajit-1a4ff1311740aa6c85f7a9101b6aa9bfaafa3f8e.tar.bz2 luajit-1a4ff1311740aa6c85f7a9101b6aa9bfaafa3f8e.zip |
Optimize table length computation with hinting.
10x faster on loop with t[#t+1] = x idiom. Also used by table.insert.
-rw-r--r-- | src/lj_asm.c | 7 | ||||
-rw-r--r-- | src/lj_ffrecord.c | 6 | ||||
-rw-r--r-- | src/lj_ir.h | 1 | ||||
-rw-r--r-- | src/lj_ircall.h | 1 | ||||
-rw-r--r-- | src/lj_iropt.h | 2 | ||||
-rw-r--r-- | src/lj_opt_fold.c | 4 | ||||
-rw-r--r-- | src/lj_opt_loop.c | 10 | ||||
-rw-r--r-- | src/lj_opt_mem.c | 97 | ||||
-rw-r--r-- | src/lj_opt_sink.c | 3 | ||||
-rw-r--r-- | src/lj_record.c | 4 | ||||
-rw-r--r-- | src/lj_tab.c | 79 | ||||
-rw-r--r-- | src/lj_tab.h | 3 |
12 files changed, 135 insertions, 82 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c index dd84a4f2..90373f27 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -1634,6 +1634,12 @@ static void asm_fuseequal(ASMState *as, IRIns *ir) | |||
1634 | } | 1634 | } |
1635 | } | 1635 | } |
1636 | 1636 | ||
1637 | static void asm_alen(ASMState *as, IRIns *ir) | ||
1638 | { | ||
1639 | asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len : | ||
1640 | IRCALL_lj_tab_len_hint); | ||
1641 | } | ||
1642 | |||
1637 | /* -- Instruction dispatch ------------------------------------------------ */ | 1643 | /* -- Instruction dispatch ------------------------------------------------ */ |
1638 | 1644 | ||
1639 | /* Assemble a single instruction. */ | 1645 | /* Assemble a single instruction. */ |
@@ -1716,6 +1722,7 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
1716 | case IR_FLOAD: asm_fload(as, ir); break; | 1722 | case IR_FLOAD: asm_fload(as, ir); break; |
1717 | case IR_XLOAD: asm_xload(as, ir); break; | 1723 | case IR_XLOAD: asm_xload(as, ir); break; |
1718 | case IR_SLOAD: asm_sload(as, ir); break; | 1724 | case IR_SLOAD: asm_sload(as, ir); break; |
1725 | case IR_ALEN: asm_alen(as, ir); break; | ||
1719 | 1726 | ||
1720 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | 1727 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; |
1721 | case IR_FSTORE: asm_fstore(as, ir); break; | 1728 | case IR_FSTORE: asm_fstore(as, ir); break; |
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 42049511..2557cadf 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c | |||
@@ -281,7 +281,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd) | |||
281 | if (tref_isstr(tr)) | 281 | if (tref_isstr(tr)) |
282 | J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); | 282 | J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); |
283 | else if (tref_istab(tr)) | 283 | else if (tref_istab(tr)) |
284 | J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr); | 284 | J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL); |
285 | /* else: Interpreter will throw. */ | 285 | /* else: Interpreter will throw. */ |
286 | UNUSED(rd); | 286 | UNUSED(rd); |
287 | } | 287 | } |
@@ -1026,7 +1026,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) | |||
1026 | rd->nres = 0; | 1026 | rd->nres = 0; |
1027 | if (tref_istab(ix.tab) && ix.val) { | 1027 | if (tref_istab(ix.tab) && ix.val) { |
1028 | if (!J->base[2]) { /* Simple push: t[#t+1] = v */ | 1028 | if (!J->base[2]) { /* Simple push: t[#t+1] = v */ |
1029 | TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab); | 1029 | TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL); |
1030 | GCtab *t = tabV(&rd->argv[0]); | 1030 | GCtab *t = tabV(&rd->argv[0]); |
1031 | ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); | 1031 | ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); |
1032 | settabV(J->L, &ix.tabv, t); | 1032 | settabV(J->L, &ix.tabv, t); |
@@ -1050,7 +1050,7 @@ static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd) | |||
1050 | lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1); | 1050 | lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1); |
1051 | TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ? | 1051 | TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ? |
1052 | lj_opt_narrow_toint(J, J->base[3]) : | 1052 | lj_opt_narrow_toint(J, J->base[3]) : |
1053 | lj_ir_call(J, IRCALL_lj_tab_len, tab); | 1053 | emitir(IRTI(IR_ALEN), tab, TREF_NIL); |
1054 | TRef hdr = recff_bufhdr(J); | 1054 | TRef hdr = recff_bufhdr(J); |
1055 | TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre); | 1055 | TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre); |
1056 | emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL)); | 1056 | emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL)); |
diff --git a/src/lj_ir.h b/src/lj_ir.h index 1a9a89a3..a801d5d0 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -106,6 +106,7 @@ | |||
106 | _(XLOAD, L , ref, lit) \ | 106 | _(XLOAD, L , ref, lit) \ |
107 | _(SLOAD, L , lit, lit) \ | 107 | _(SLOAD, L , lit, lit) \ |
108 | _(VLOAD, L , ref, ___) \ | 108 | _(VLOAD, L , ref, ___) \ |
109 | _(ALEN, L , ref, ref) \ | ||
109 | \ | 110 | \ |
110 | _(ASTORE, S , ref, ref) \ | 111 | _(ASTORE, S , ref, ref) \ |
111 | _(HSTORE, S , ref, ref) \ | 112 | _(HSTORE, S , ref, ref) \ |
diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 5c72478b..dbc8c0db 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h | |||
@@ -168,6 +168,7 @@ typedef struct CCallInfo { | |||
168 | _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ | 168 | _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ |
169 | _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \ | 169 | _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \ |
170 | _(ANY, lj_tab_len, 1, FL, INT, 0) \ | 170 | _(ANY, lj_tab_len, 1, FL, INT, 0) \ |
171 | _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \ | ||
171 | _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ | 172 | _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ |
172 | _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ | 173 | _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ |
173 | _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ | 174 | _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ |
diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 02d6b946..8333483f 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h | |||
@@ -120,7 +120,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); | |||
120 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); | 120 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); |
121 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); | 121 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); |
122 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); | 122 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); |
123 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); | 123 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J); |
124 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); | 124 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); |
125 | LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); | 125 | LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); |
126 | LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); | 126 | LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); |
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 7a02c6ff..42c57c9b 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -2132,8 +2132,8 @@ LJFOLDX(lj_opt_fwd_hload) | |||
2132 | LJFOLD(ULOAD any) | 2132 | LJFOLD(ULOAD any) |
2133 | LJFOLDX(lj_opt_fwd_uload) | 2133 | LJFOLDX(lj_opt_fwd_uload) |
2134 | 2134 | ||
2135 | LJFOLD(CALLL any IRCALL_lj_tab_len) | 2135 | LJFOLD(ALEN any any) |
2136 | LJFOLDX(lj_opt_fwd_tab_len) | 2136 | LJFOLDX(lj_opt_fwd_alen) |
2137 | 2137 | ||
2138 | /* Upvalue refs are really loads, but there are no corresponding stores. | 2138 | /* Upvalue refs are really loads, but there are no corresponding stores. |
2139 | ** So CSE is ok for them, except for UREFO across a GC step (see below). | 2139 | ** So CSE is ok for them, except for UREFO across a GC step (see below). |
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index c5919ca0..2eacb7d7 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c | |||
@@ -352,10 +352,12 @@ static void loop_unroll(LoopState *lps) | |||
352 | irr = IR(ref); | 352 | irr = IR(ref); |
353 | goto phiconv; | 353 | goto phiconv; |
354 | } | 354 | } |
355 | } else if (ref != REF_DROP && irr->o == IR_CONV && | 355 | } else if (ref != REF_DROP && ref > invar && |
356 | ref > invar && irr->op1 < invar) { | 356 | ((irr->o == IR_CONV && irr->op1 < invar) || |
357 | /* May need an extra PHI for a CONV. */ | 357 | (irr->o == IR_ALEN && irr->op2 < invar && |
358 | ref = irr->op1; | 358 | irr->op2 != REF_NIL))) { |
359 | /* May need an extra PHI for a CONV or ALEN hint. */ | ||
360 | ref = irr->o == IR_CONV ? irr->op1 : irr->op2; | ||
359 | irr = IR(ref); | 361 | irr = IR(ref); |
360 | phiconv: | 362 | phiconv: |
361 | if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { | 363 | if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { |
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 079f7cfe..4c2c05fe 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c | |||
@@ -363,7 +363,7 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) | |||
363 | IRIns *ir; | 363 | IRIns *ir; |
364 | /* Check for any intervening guards (includes conflicting loads). */ | 364 | /* Check for any intervening guards (includes conflicting loads). */ |
365 | for (ir = IR(J->cur.nins-1); ir > store; ir--) | 365 | for (ir = IR(J->cur.nins-1); ir > store; ir--) |
366 | if (irt_isguard(ir->t) || ir->o == IR_CALLL) | 366 | if (irt_isguard(ir->t) || ir->o == IR_ALEN) |
367 | goto doemit; /* No elimination possible. */ | 367 | goto doemit; /* No elimination possible. */ |
368 | /* Remove redundant store from chain and replace with NOP. */ | 368 | /* Remove redundant store from chain and replace with NOP. */ |
369 | *refp = store->prev; | 369 | *refp = store->prev; |
@@ -381,6 +381,67 @@ doemit: | |||
381 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | 381 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ |
382 | } | 382 | } |
383 | 383 | ||
384 | /* ALEN forwarding. */ | ||
385 | TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J) | ||
386 | { | ||
387 | IRRef tab = fins->op1; /* Table reference. */ | ||
388 | IRRef lim = tab; /* Search limit. */ | ||
389 | IRRef ref; | ||
390 | |||
391 | /* Search for conflicting HSTORE with numeric key. */ | ||
392 | ref = J->chain[IR_HSTORE]; | ||
393 | while (ref > lim) { | ||
394 | IRIns *store = IR(ref); | ||
395 | IRIns *href = IR(store->op1); | ||
396 | IRIns *key = IR(href->op2); | ||
397 | if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { | ||
398 | lim = ref; /* Conflicting store found, limits search for ALEN. */ | ||
399 | break; | ||
400 | } | ||
401 | ref = store->prev; | ||
402 | } | ||
403 | |||
404 | /* Try to find a matching ALEN. */ | ||
405 | ref = J->chain[IR_ALEN]; | ||
406 | while (ref > lim) { | ||
407 | /* CSE for ALEN only depends on the table, not the hint. */ | ||
408 | if (IR(ref)->op1 == tab) { | ||
409 | IRRef sref; | ||
410 | |||
411 | /* Search for aliasing table.clear. */ | ||
412 | if (!fwd_aa_tab_clear(J, ref, tab)) | ||
413 | break; | ||
414 | |||
415 | /* Search for hint-forwarding or conflicting store. */ | ||
416 | sref = J->chain[IR_ASTORE]; | ||
417 | while (sref > ref) { | ||
418 | IRIns *store = IR(sref); | ||
419 | IRIns *aref = IR(store->op1); | ||
420 | IRIns *fref = IR(aref->op1); | ||
421 | if (tab == fref->op1) { /* ASTORE to the same table. */ | ||
422 | /* Detect t[#t+1] = x idiom for push. */ | ||
423 | IRIns *idx = IR(aref->op2); | ||
424 | if (!irt_isnil(store->t) && | ||
425 | idx->o == IR_ADD && idx->op1 == ref && | ||
426 | IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) { | ||
427 | /* Note: this requires an extra PHI check in loop unroll. */ | ||
428 | fins->op2 = aref->op2; /* Set ALEN hint. */ | ||
429 | } | ||
430 | goto doemit; /* Conflicting store, possibly giving a hint. */ | ||
431 | } else if (aa_table(J, tab, fref->op1) == ALIAS_NO) { | ||
432 | goto doemit; /* Conflicting store. */ | ||
433 | } | ||
434 | sref = store->prev; | ||
435 | } | ||
436 | |||
437 | return ref; /* Plain ALEN forwarding. */ | ||
438 | } | ||
439 | ref = IR(ref)->prev; | ||
440 | } | ||
441 | doemit: | ||
442 | return EMITFOLD; | ||
443 | } | ||
444 | |||
384 | /* -- ULOAD forwarding ---------------------------------------------------- */ | 445 | /* -- ULOAD forwarding ---------------------------------------------------- */ |
385 | 446 | ||
386 | /* The current alias analysis for upvalues is very simplistic. It only | 447 | /* The current alias analysis for upvalues is very simplistic. It only |
@@ -430,7 +491,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) | |||
430 | 491 | ||
431 | cselim: | 492 | cselim: |
432 | /* Try to find a matching load. Below the conflicting store, if any. */ | 493 | /* Try to find a matching load. Below the conflicting store, if any. */ |
433 | |||
434 | ref = J->chain[IR_ULOAD]; | 494 | ref = J->chain[IR_ULOAD]; |
435 | while (ref > lim) { | 495 | while (ref > lim) { |
436 | IRIns *ir = IR(ref); | 496 | IRIns *ir = IR(ref); |
@@ -845,39 +905,6 @@ doemit: | |||
845 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | 905 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ |
846 | } | 906 | } |
847 | 907 | ||
848 | /* -- Forwarding of lj_tab_len -------------------------------------------- */ | ||
849 | |||
850 | /* This is rather simplistic right now, but better than nothing. */ | ||
851 | TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) | ||
852 | { | ||
853 | IRRef tab = fins->op1; /* Table reference. */ | ||
854 | IRRef lim = tab; /* Search limit. */ | ||
855 | IRRef ref; | ||
856 | |||
857 | /* Any ASTORE is a conflict and limits the search. */ | ||
858 | if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE]; | ||
859 | |||
860 | /* Search for conflicting HSTORE with numeric key. */ | ||
861 | ref = J->chain[IR_HSTORE]; | ||
862 | while (ref > lim) { | ||
863 | IRIns *store = IR(ref); | ||
864 | IRIns *href = IR(store->op1); | ||
865 | IRIns *key = IR(href->op2); | ||
866 | if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { | ||
867 | lim = ref; /* Conflicting store found, limits search for TLEN. */ | ||
868 | break; | ||
869 | } | ||
870 | ref = store->prev; | ||
871 | } | ||
872 | |||
873 | /* Search for aliasing table.clear. */ | ||
874 | if (!fwd_aa_tab_clear(J, lim, tab)) | ||
875 | return lj_ir_emit(J); | ||
876 | |||
877 | /* Try to find a matching load. Below the conflicting store, if any. */ | ||
878 | return lj_opt_cselim(J, lim); | ||
879 | } | ||
880 | |||
881 | /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ | 908 | /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ |
882 | 909 | ||
883 | /* Check whether the previous value for a table store is non-nil. | 910 | /* Check whether the previous value for a table store is non-nil. |
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c index c5323b11..11101702 100644 --- a/src/lj_opt_sink.c +++ b/src/lj_opt_sink.c | |||
@@ -78,8 +78,7 @@ static void sink_mark_ins(jit_State *J) | |||
78 | switch (ir->o) { | 78 | switch (ir->o) { |
79 | case IR_BASE: | 79 | case IR_BASE: |
80 | return; /* Finished. */ | 80 | return; /* Finished. */ |
81 | case IR_CALLL: /* IRCALL_lj_tab_len */ | 81 | case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN: |
82 | case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: | ||
83 | irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ | 82 | irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ |
84 | break; | 83 | break; |
85 | case IR_FLOAD: | 84 | case IR_FLOAD: |
diff --git a/src/lj_record.c b/src/lj_record.c index 8eec0071..4fc22742 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -1058,7 +1058,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | |||
1058 | lj_record_call(J, func, 2); | 1058 | lj_record_call(J, func, 2); |
1059 | } else { | 1059 | } else { |
1060 | if (LJ_52 && tref_istab(tr)) | 1060 | if (LJ_52 && tref_istab(tr)) |
1061 | return lj_ir_call(J, IRCALL_lj_tab_len, tr); | 1061 | return emitir(IRTI(IR_ALEN), tr, TREF_NIL); |
1062 | lj_trace_err(J, LJ_TRERR_NOMM); | 1062 | lj_trace_err(J, LJ_TRERR_NOMM); |
1063 | } | 1063 | } |
1064 | return 0; /* No result yet. */ | 1064 | return 0; /* No result yet. */ |
@@ -2191,7 +2191,7 @@ void lj_record_ins(jit_State *J) | |||
2191 | if (tref_isstr(rc)) | 2191 | if (tref_isstr(rc)) |
2192 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); | 2192 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); |
2193 | else if (!LJ_52 && tref_istab(rc)) | 2193 | else if (!LJ_52 && tref_istab(rc)) |
2194 | rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); | 2194 | rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL); |
2195 | else | 2195 | else |
2196 | rc = rec_mm_len(J, rc, rcv); | 2196 | rc = rec_mm_len(J, rc, rcv); |
2197 | break; | 2197 | break; |
diff --git a/src/lj_tab.c b/src/lj_tab.c index dcd24d31..eb9ef4af 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c | |||
@@ -639,49 +639,62 @@ int lj_tab_next(lua_State *L, GCtab *t, TValue *key) | |||
639 | 639 | ||
640 | /* -- Table length calculation -------------------------------------------- */ | 640 | /* -- Table length calculation -------------------------------------------- */ |
641 | 641 | ||
642 | static MSize unbound_search(GCtab *t, MSize j) | 642 | /* Compute table length. Slow path with mixed array/hash lookups. */ |
643 | LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi) | ||
643 | { | 644 | { |
644 | cTValue *tv; | 645 | cTValue *tv; |
645 | MSize i = j; /* i is zero or a present index */ | 646 | size_t lo = hi; |
646 | j++; | 647 | hi++; |
647 | /* find `i' and `j' such that i is present and j is not */ | 648 | /* Widening search for an upper bound. */ |
648 | while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) { | 649 | while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) { |
649 | i = j; | 650 | lo = hi; |
650 | j *= 2; | 651 | hi += hi; |
651 | if (j > (MSize)(INT_MAX-2)) { /* overflow? */ | 652 | if (hi > (size_t)(INT_MAX-2)) { /* Punt and do a linear search. */ |
652 | /* table was built with bad purposes: resort to linear search */ | 653 | lo = 1; |
653 | i = 1; | 654 | while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++; |
654 | while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++; | 655 | return (MSize)(lo - 1); |
655 | return i - 1; | ||
656 | } | 656 | } |
657 | } | 657 | } |
658 | /* now do a binary search between them */ | 658 | /* Binary search to find a non-nil to nil transition. */ |
659 | while (j - i > 1) { | 659 | while (hi - lo > 1) { |
660 | MSize m = (i+j)/2; | 660 | size_t mid = (lo+hi) >> 1; |
661 | cTValue *tvb = lj_tab_getint(t, (int32_t)m); | 661 | cTValue *tvb = lj_tab_getint(t, (int32_t)mid); |
662 | if (tvb && !tvisnil(tvb)) i = m; else j = m; | 662 | if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid; |
663 | } | 663 | } |
664 | return i; | 664 | return (MSize)lo; |
665 | } | 665 | } |
666 | 666 | ||
667 | /* | 667 | /* Compute table length. Fast path. */ |
668 | ** Try to find a boundary in table `t'. A `boundary' is an integer index | ||
669 | ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). | ||
670 | */ | ||
671 | MSize LJ_FASTCALL lj_tab_len(GCtab *t) | 668 | MSize LJ_FASTCALL lj_tab_len(GCtab *t) |
672 | { | 669 | { |
673 | MSize j = (MSize)t->asize; | 670 | size_t hi = (size_t)t->asize; |
674 | if (j > 1 && tvisnil(arrayslot(t, j-1))) { | 671 | if (hi) hi--; |
675 | MSize i = 1; | 672 | /* In a growing array the last array element is very likely nil. */ |
676 | while (j - i > 1) { | 673 | if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) { |
677 | MSize m = (i+j)/2; | 674 | /* Binary search to find a non-nil to nil transition in the array. */ |
678 | if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; | 675 | size_t lo = 0; |
676 | while (hi - lo > 1) { | ||
677 | size_t mid = (lo+hi) >> 1; | ||
678 | if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid; | ||
679 | } | 679 | } |
680 | return i-1; | 680 | return (MSize)lo; |
681 | } | ||
682 | /* Without a hash part, there's an implicit nil after the last element. */ | ||
683 | return t->hmask ? tab_len_slow(t, hi) : (MSize)hi; | ||
684 | } | ||
685 | |||
686 | #if LJ_HASJIT | ||
687 | /* Verify hinted table length or compute it. */ | ||
688 | MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint) | ||
689 | { | ||
690 | size_t asize = (size_t)t->asize; | ||
691 | cTValue *tv = arrayslot(t, hint); | ||
692 | if (LJ_LIKELY(hint+1 < asize)) { | ||
693 | if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint; | ||
694 | } else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) && !tvisnil(tv)) { | ||
695 | return (MSize)hint; | ||
681 | } | 696 | } |
682 | if (j) j--; | 697 | return lj_tab_len(t); |
683 | if (t->hmask <= 0) | ||
684 | return j; | ||
685 | return unbound_search(t, j); | ||
686 | } | 698 | } |
699 | #endif | ||
687 | 700 | ||
diff --git a/src/lj_tab.h b/src/lj_tab.h index 597c94b2..f31590cd 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h | |||
@@ -69,5 +69,8 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); | |||
69 | 69 | ||
70 | LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); | 70 | LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); |
71 | LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); | 71 | LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); |
72 | #if LJ_HASJIT | ||
73 | LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint); | ||
74 | #endif | ||
72 | 75 | ||
73 | #endif | 76 | #endif |