aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2020-05-27 19:20:44 +0200
committerMike Pall <mike>2020-05-27 19:20:44 +0200
commit1a4ff1311740aa6c85f7a9101b6aa9bfaafa3f8e (patch)
treec0bb622eb6a70b71c58d83d4cceed4e4b3279300
parentb2307c8ad817e350d65cc909a579ca2f77439682 (diff)
downloadluajit-1a4ff1311740aa6c85f7a9101b6aa9bfaafa3f8e.tar.gz
luajit-1a4ff1311740aa6c85f7a9101b6aa9bfaafa3f8e.tar.bz2
luajit-1a4ff1311740aa6c85f7a9101b6aa9bfaafa3f8e.zip
Optimize table length computation with hinting.
10x faster on loop with t[#t+1] = x idiom. Also used by table.insert.
-rw-r--r--src/lj_asm.c7
-rw-r--r--src/lj_ffrecord.c6
-rw-r--r--src/lj_ir.h1
-rw-r--r--src/lj_ircall.h1
-rw-r--r--src/lj_iropt.h2
-rw-r--r--src/lj_opt_fold.c4
-rw-r--r--src/lj_opt_loop.c10
-rw-r--r--src/lj_opt_mem.c97
-rw-r--r--src/lj_opt_sink.c3
-rw-r--r--src/lj_record.c4
-rw-r--r--src/lj_tab.c79
-rw-r--r--src/lj_tab.h3
12 files changed, 135 insertions, 82 deletions
diff --git a/src/lj_asm.c b/src/lj_asm.c
index dd84a4f2..90373f27 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1634,6 +1634,12 @@ static void asm_fuseequal(ASMState *as, IRIns *ir)
1634 } 1634 }
1635} 1635}
1636 1636
1637static void asm_alen(ASMState *as, IRIns *ir)
1638{
1639 asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len :
1640 IRCALL_lj_tab_len_hint);
1641}
1642
1637/* -- Instruction dispatch ------------------------------------------------ */ 1643/* -- Instruction dispatch ------------------------------------------------ */
1638 1644
1639/* Assemble a single instruction. */ 1645/* Assemble a single instruction. */
@@ -1716,6 +1722,7 @@ static void asm_ir(ASMState *as, IRIns *ir)
1716 case IR_FLOAD: asm_fload(as, ir); break; 1722 case IR_FLOAD: asm_fload(as, ir); break;
1717 case IR_XLOAD: asm_xload(as, ir); break; 1723 case IR_XLOAD: asm_xload(as, ir); break;
1718 case IR_SLOAD: asm_sload(as, ir); break; 1724 case IR_SLOAD: asm_sload(as, ir); break;
1725 case IR_ALEN: asm_alen(as, ir); break;
1719 1726
1720 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; 1727 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1721 case IR_FSTORE: asm_fstore(as, ir); break; 1728 case IR_FSTORE: asm_fstore(as, ir); break;
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 42049511..2557cadf 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -281,7 +281,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd)
281 if (tref_isstr(tr)) 281 if (tref_isstr(tr))
282 J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); 282 J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
283 else if (tref_istab(tr)) 283 else if (tref_istab(tr))
284 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr); 284 J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL);
285 /* else: Interpreter will throw. */ 285 /* else: Interpreter will throw. */
286 UNUSED(rd); 286 UNUSED(rd);
287} 287}
@@ -1026,7 +1026,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
1026 rd->nres = 0; 1026 rd->nres = 0;
1027 if (tref_istab(ix.tab) && ix.val) { 1027 if (tref_istab(ix.tab) && ix.val) {
1028 if (!J->base[2]) { /* Simple push: t[#t+1] = v */ 1028 if (!J->base[2]) { /* Simple push: t[#t+1] = v */
1029 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab); 1029 TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL);
1030 GCtab *t = tabV(&rd->argv[0]); 1030 GCtab *t = tabV(&rd->argv[0]);
1031 ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); 1031 ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1));
1032 settabV(J->L, &ix.tabv, t); 1032 settabV(J->L, &ix.tabv, t);
@@ -1050,7 +1050,7 @@ static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
1050 lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1); 1050 lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
1051 TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ? 1051 TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ?
1052 lj_opt_narrow_toint(J, J->base[3]) : 1052 lj_opt_narrow_toint(J, J->base[3]) :
1053 lj_ir_call(J, IRCALL_lj_tab_len, tab); 1053 emitir(IRTI(IR_ALEN), tab, TREF_NIL);
1054 TRef hdr = recff_bufhdr(J); 1054 TRef hdr = recff_bufhdr(J);
1055 TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre); 1055 TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
1056 emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL)); 1056 emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
diff --git a/src/lj_ir.h b/src/lj_ir.h
index 1a9a89a3..a801d5d0 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -106,6 +106,7 @@
106 _(XLOAD, L , ref, lit) \ 106 _(XLOAD, L , ref, lit) \
107 _(SLOAD, L , lit, lit) \ 107 _(SLOAD, L , lit, lit) \
108 _(VLOAD, L , ref, ___) \ 108 _(VLOAD, L , ref, ___) \
109 _(ALEN, L , ref, ref) \
109 \ 110 \
110 _(ASTORE, S , ref, ref) \ 111 _(ASTORE, S , ref, ref) \
111 _(HSTORE, S , ref, ref) \ 112 _(HSTORE, S , ref, ref) \
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 5c72478b..dbc8c0db 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -168,6 +168,7 @@ typedef struct CCallInfo {
168 _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ 168 _(ANY, lj_tab_clear, 1, FS, NIL, 0) \
169 _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \ 169 _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \
170 _(ANY, lj_tab_len, 1, FL, INT, 0) \ 170 _(ANY, lj_tab_len, 1, FL, INT, 0) \
171 _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \
171 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ 172 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
172 _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ 173 _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \
173 _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \ 174 _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index 02d6b946..8333483f 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -120,7 +120,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
120LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); 120LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
121LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); 121LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
122LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); 122LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J);
123LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); 123LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J);
124LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); 124LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J);
125LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); 125LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J);
126LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); 126LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim);
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 7a02c6ff..42c57c9b 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -2132,8 +2132,8 @@ LJFOLDX(lj_opt_fwd_hload)
2132LJFOLD(ULOAD any) 2132LJFOLD(ULOAD any)
2133LJFOLDX(lj_opt_fwd_uload) 2133LJFOLDX(lj_opt_fwd_uload)
2134 2134
2135LJFOLD(CALLL any IRCALL_lj_tab_len) 2135LJFOLD(ALEN any any)
2136LJFOLDX(lj_opt_fwd_tab_len) 2136LJFOLDX(lj_opt_fwd_alen)
2137 2137
2138/* Upvalue refs are really loads, but there are no corresponding stores. 2138/* Upvalue refs are really loads, but there are no corresponding stores.
2139** So CSE is ok for them, except for UREFO across a GC step (see below). 2139** So CSE is ok for them, except for UREFO across a GC step (see below).
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index c5919ca0..2eacb7d7 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -352,10 +352,12 @@ static void loop_unroll(LoopState *lps)
352 irr = IR(ref); 352 irr = IR(ref);
353 goto phiconv; 353 goto phiconv;
354 } 354 }
355 } else if (ref != REF_DROP && irr->o == IR_CONV && 355 } else if (ref != REF_DROP && ref > invar &&
356 ref > invar && irr->op1 < invar) { 356 ((irr->o == IR_CONV && irr->op1 < invar) ||
357 /* May need an extra PHI for a CONV. */ 357 (irr->o == IR_ALEN && irr->op2 < invar &&
358 ref = irr->op1; 358 irr->op2 != REF_NIL))) {
359 /* May need an extra PHI for a CONV or ALEN hint. */
360 ref = irr->o == IR_CONV ? irr->op1 : irr->op2;
359 irr = IR(ref); 361 irr = IR(ref);
360 phiconv: 362 phiconv:
361 if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { 363 if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) {
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index 079f7cfe..4c2c05fe 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -363,7 +363,7 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
363 IRIns *ir; 363 IRIns *ir;
364 /* Check for any intervening guards (includes conflicting loads). */ 364 /* Check for any intervening guards (includes conflicting loads). */
365 for (ir = IR(J->cur.nins-1); ir > store; ir--) 365 for (ir = IR(J->cur.nins-1); ir > store; ir--)
366 if (irt_isguard(ir->t) || ir->o == IR_CALLL) 366 if (irt_isguard(ir->t) || ir->o == IR_ALEN)
367 goto doemit; /* No elimination possible. */ 367 goto doemit; /* No elimination possible. */
368 /* Remove redundant store from chain and replace with NOP. */ 368 /* Remove redundant store from chain and replace with NOP. */
369 *refp = store->prev; 369 *refp = store->prev;
@@ -381,6 +381,67 @@ doemit:
381 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ 381 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
382} 382}
383 383
384/* ALEN forwarding. */
385TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J)
386{
387 IRRef tab = fins->op1; /* Table reference. */
388 IRRef lim = tab; /* Search limit. */
389 IRRef ref;
390
391 /* Search for conflicting HSTORE with numeric key. */
392 ref = J->chain[IR_HSTORE];
393 while (ref > lim) {
394 IRIns *store = IR(ref);
395 IRIns *href = IR(store->op1);
396 IRIns *key = IR(href->op2);
397 if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
398 lim = ref; /* Conflicting store found, limits search for ALEN. */
399 break;
400 }
401 ref = store->prev;
402 }
403
404 /* Try to find a matching ALEN. */
405 ref = J->chain[IR_ALEN];
406 while (ref > lim) {
407 /* CSE for ALEN only depends on the table, not the hint. */
408 if (IR(ref)->op1 == tab) {
409 IRRef sref;
410
411 /* Search for aliasing table.clear. */
412 if (!fwd_aa_tab_clear(J, ref, tab))
413 break;
414
415 /* Search for hint-forwarding or conflicting store. */
416 sref = J->chain[IR_ASTORE];
417 while (sref > ref) {
418 IRIns *store = IR(sref);
419 IRIns *aref = IR(store->op1);
420 IRIns *fref = IR(aref->op1);
421 if (tab == fref->op1) { /* ASTORE to the same table. */
422 /* Detect t[#t+1] = x idiom for push. */
423 IRIns *idx = IR(aref->op2);
424 if (!irt_isnil(store->t) &&
425 idx->o == IR_ADD && idx->op1 == ref &&
426 IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) {
427 /* Note: this requires an extra PHI check in loop unroll. */
428 fins->op2 = aref->op2; /* Set ALEN hint. */
429 }
430 goto doemit; /* Conflicting store, possibly giving a hint. */
431 } else if (aa_table(J, tab, fref->op1) == ALIAS_NO) {
432 goto doemit; /* Conflicting store. */
433 }
434 sref = store->prev;
435 }
436
437 return ref; /* Plain ALEN forwarding. */
438 }
439 ref = IR(ref)->prev;
440 }
441doemit:
442 return EMITFOLD;
443}
444
384/* -- ULOAD forwarding ---------------------------------------------------- */ 445/* -- ULOAD forwarding ---------------------------------------------------- */
385 446
386/* The current alias analysis for upvalues is very simplistic. It only 447/* The current alias analysis for upvalues is very simplistic. It only
@@ -430,7 +491,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
430 491
431cselim: 492cselim:
432 /* Try to find a matching load. Below the conflicting store, if any. */ 493 /* Try to find a matching load. Below the conflicting store, if any. */
433
434 ref = J->chain[IR_ULOAD]; 494 ref = J->chain[IR_ULOAD];
435 while (ref > lim) { 495 while (ref > lim) {
436 IRIns *ir = IR(ref); 496 IRIns *ir = IR(ref);
@@ -845,39 +905,6 @@ doemit:
845 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ 905 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
846} 906}
847 907
848/* -- Forwarding of lj_tab_len -------------------------------------------- */
849
850/* This is rather simplistic right now, but better than nothing. */
851TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
852{
853 IRRef tab = fins->op1; /* Table reference. */
854 IRRef lim = tab; /* Search limit. */
855 IRRef ref;
856
857 /* Any ASTORE is a conflict and limits the search. */
858 if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE];
859
860 /* Search for conflicting HSTORE with numeric key. */
861 ref = J->chain[IR_HSTORE];
862 while (ref > lim) {
863 IRIns *store = IR(ref);
864 IRIns *href = IR(store->op1);
865 IRIns *key = IR(href->op2);
866 if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
867 lim = ref; /* Conflicting store found, limits search for TLEN. */
868 break;
869 }
870 ref = store->prev;
871 }
872
873 /* Search for aliasing table.clear. */
874 if (!fwd_aa_tab_clear(J, lim, tab))
875 return lj_ir_emit(J);
876
877 /* Try to find a matching load. Below the conflicting store, if any. */
878 return lj_opt_cselim(J, lim);
879}
880
881/* -- ASTORE/HSTORE previous type analysis -------------------------------- */ 908/* -- ASTORE/HSTORE previous type analysis -------------------------------- */
882 909
883/* Check whether the previous value for a table store is non-nil. 910/* Check whether the previous value for a table store is non-nil.
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
index c5323b11..11101702 100644
--- a/src/lj_opt_sink.c
+++ b/src/lj_opt_sink.c
@@ -78,8 +78,7 @@ static void sink_mark_ins(jit_State *J)
78 switch (ir->o) { 78 switch (ir->o) {
79 case IR_BASE: 79 case IR_BASE:
80 return; /* Finished. */ 80 return; /* Finished. */
81 case IR_CALLL: /* IRCALL_lj_tab_len */ 81 case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN:
82 case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR:
83 irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ 82 irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */
84 break; 83 break;
85 case IR_FLOAD: 84 case IR_FLOAD:
diff --git a/src/lj_record.c b/src/lj_record.c
index 8eec0071..4fc22742 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -1058,7 +1058,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
1058 lj_record_call(J, func, 2); 1058 lj_record_call(J, func, 2);
1059 } else { 1059 } else {
1060 if (LJ_52 && tref_istab(tr)) 1060 if (LJ_52 && tref_istab(tr))
1061 return lj_ir_call(J, IRCALL_lj_tab_len, tr); 1061 return emitir(IRTI(IR_ALEN), tr, TREF_NIL);
1062 lj_trace_err(J, LJ_TRERR_NOMM); 1062 lj_trace_err(J, LJ_TRERR_NOMM);
1063 } 1063 }
1064 return 0; /* No result yet. */ 1064 return 0; /* No result yet. */
@@ -2191,7 +2191,7 @@ void lj_record_ins(jit_State *J)
2191 if (tref_isstr(rc)) 2191 if (tref_isstr(rc))
2192 rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); 2192 rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
2193 else if (!LJ_52 && tref_istab(rc)) 2193 else if (!LJ_52 && tref_istab(rc))
2194 rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); 2194 rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL);
2195 else 2195 else
2196 rc = rec_mm_len(J, rc, rcv); 2196 rc = rec_mm_len(J, rc, rcv);
2197 break; 2197 break;
diff --git a/src/lj_tab.c b/src/lj_tab.c
index dcd24d31..eb9ef4af 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -639,49 +639,62 @@ int lj_tab_next(lua_State *L, GCtab *t, TValue *key)
639 639
640/* -- Table length calculation -------------------------------------------- */ 640/* -- Table length calculation -------------------------------------------- */
641 641
642static MSize unbound_search(GCtab *t, MSize j) 642/* Compute table length. Slow path with mixed array/hash lookups. */
643LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi)
643{ 644{
644 cTValue *tv; 645 cTValue *tv;
645 MSize i = j; /* i is zero or a present index */ 646 size_t lo = hi;
646 j++; 647 hi++;
647 /* find `i' and `j' such that i is present and j is not */ 648 /* Widening search for an upper bound. */
648 while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) { 649 while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) {
649 i = j; 650 lo = hi;
650 j *= 2; 651 hi += hi;
651 if (j > (MSize)(INT_MAX-2)) { /* overflow? */ 652 if (hi > (size_t)(INT_MAX-2)) { /* Punt and do a linear search. */
652 /* table was built with bad purposes: resort to linear search */ 653 lo = 1;
653 i = 1; 654 while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++;
654 while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++; 655 return (MSize)(lo - 1);
655 return i - 1;
656 } 656 }
657 } 657 }
658 /* now do a binary search between them */ 658 /* Binary search to find a non-nil to nil transition. */
659 while (j - i > 1) { 659 while (hi - lo > 1) {
660 MSize m = (i+j)/2; 660 size_t mid = (lo+hi) >> 1;
661 cTValue *tvb = lj_tab_getint(t, (int32_t)m); 661 cTValue *tvb = lj_tab_getint(t, (int32_t)mid);
662 if (tvb && !tvisnil(tvb)) i = m; else j = m; 662 if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid;
663 } 663 }
664 return i; 664 return (MSize)lo;
665} 665}
666 666
667/* 667/* Compute table length. Fast path. */
668** Try to find a boundary in table `t'. A `boundary' is an integer index
669** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
670*/
671MSize LJ_FASTCALL lj_tab_len(GCtab *t) 668MSize LJ_FASTCALL lj_tab_len(GCtab *t)
672{ 669{
673 MSize j = (MSize)t->asize; 670 size_t hi = (size_t)t->asize;
674 if (j > 1 && tvisnil(arrayslot(t, j-1))) { 671 if (hi) hi--;
675 MSize i = 1; 672 /* In a growing array the last array element is very likely nil. */
676 while (j - i > 1) { 673 if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) {
677 MSize m = (i+j)/2; 674 /* Binary search to find a non-nil to nil transition in the array. */
678 if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; 675 size_t lo = 0;
676 while (hi - lo > 1) {
677 size_t mid = (lo+hi) >> 1;
678 if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid;
679 } 679 }
680 return i-1; 680 return (MSize)lo;
681 }
682 /* Without a hash part, there's an implicit nil after the last element. */
683 return t->hmask ? tab_len_slow(t, hi) : (MSize)hi;
684}
685
686#if LJ_HASJIT
687/* Verify hinted table length or compute it. */
688MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint)
689{
690 size_t asize = (size_t)t->asize;
691 cTValue *tv = arrayslot(t, hint);
692 if (LJ_LIKELY(hint+1 < asize)) {
693 if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint;
694 } else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) && !tvisnil(tv)) {
695 return (MSize)hint;
681 } 696 }
682 if (j) j--; 697 return lj_tab_len(t);
683 if (t->hmask <= 0)
684 return j;
685 return unbound_search(t, j);
686} 698}
699#endif
687 700
diff --git a/src/lj_tab.h b/src/lj_tab.h
index 597c94b2..f31590cd 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -69,5 +69,8 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
69 69
70LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); 70LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
71LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); 71LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
72#if LJ_HASJIT
73LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint);
74#endif
72 75
73#endif 76#endif