ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

lj_opt_mem.c (31273B)


      1 /*
      2 ** Memory access optimizations.
      3 ** AA: Alias Analysis using high-level semantic disambiguation.
      4 ** FWD: Load Forwarding (L2L) + Store Forwarding (S2L).
      5 ** DSE: Dead-Store Elimination.
      6 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
      7 */
      8 
      9 #define lj_opt_mem_c
     10 #define LUA_CORE
     11 
     12 #include "lj_obj.h"
     13 
     14 #if LJ_HASJIT
     15 
     16 #include "lj_tab.h"
     17 #include "lj_ir.h"
     18 #include "lj_jit.h"
     19 #include "lj_iropt.h"
     20 #include "lj_ircall.h"
     21 
     22 /* Some local macros to save typing. Undef'd at the end. */
     23 #define IR(ref)		(&J->cur.ir[(ref)])
     24 #define fins		(&J->fold.ins)
     25 #define fleft		(J->fold.left)
     26 #define fright		(J->fold.right)
     27 
     28 /*
     29 ** Caveat #1: return value is not always a TRef -- only use with tref_ref().
     30 ** Caveat #2: FWD relies on active CSE for xREF operands -- see lj_opt_fold().
     31 */
     32 
     33 /* Return values from alias analysis. */
     34 typedef enum {
     35   ALIAS_NO,	/* The two refs CANNOT alias (exact). */
     36   ALIAS_MAY,	/* The two refs MAY alias (inexact). */
     37   ALIAS_MUST	/* The two refs MUST alias (exact). */
     38 } AliasRet;
     39 
     40 /* -- ALOAD/HLOAD forwarding and ASTORE/HSTORE elimination ---------------- */
     41 
     42 /* Simplified escape analysis: check for intervening stores. */
     43 static AliasRet aa_escape(jit_State *J, IRIns *ir, IRIns *stop)
     44 {
     45   IRRef ref = (IRRef)(ir - J->cur.ir);  /* The ref that might be stored. */
     46   for (ir++; ir < stop; ir++)
     47     if (ir->op2 == ref &&
     48 	(ir->o == IR_ASTORE || ir->o == IR_HSTORE ||
     49 	 ir->o == IR_USTORE || ir->o == IR_FSTORE))
     50       return ALIAS_MAY;  /* Reference was stored and might alias. */
     51   return ALIAS_NO;  /* Reference was not stored. */
     52 }
     53 
     54 /* Alias analysis for two different table references. */
     55 static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb)
     56 {
     57   IRIns *taba = IR(ta), *tabb = IR(tb);
     58   int newa, newb;
     59   lua_assert(ta != tb);
     60   lua_assert(irt_istab(taba->t) && irt_istab(tabb->t));
     61   /* Disambiguate new allocations. */
     62   newa = (taba->o == IR_TNEW || taba->o == IR_TDUP);
     63   newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP);
     64   if (newa && newb)
     65     return ALIAS_NO;  /* Two different allocations never alias. */
     66   if (newb) {  /* At least one allocation? */
     67     IRIns *tmp = taba; taba = tabb; tabb = tmp;
     68   } else if (!newa) {
     69     return ALIAS_MAY;  /* Anything else: we just don't know. */
     70   }
     71   return aa_escape(J, taba, tabb);
     72 }
     73 
     74 /* Alias analysis for array and hash access using key-based disambiguation. */
     75 static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
     76 {
     77   IRRef ka = refa->op2;
     78   IRRef kb = refb->op2;
     79   IRIns *keya, *keyb;
     80   IRRef ta, tb;
     81   if (refa == refb)
     82     return ALIAS_MUST;  /* Shortcut for same refs. */
     83   keya = IR(ka);
     84   if (keya->o == IR_KSLOT) { ka = keya->op1; keya = IR(ka); }
     85   keyb = IR(kb);
     86   if (keyb->o == IR_KSLOT) { kb = keyb->op1; keyb = IR(kb); }
     87   ta = (refa->o==IR_HREFK || refa->o==IR_AREF) ? IR(refa->op1)->op1 : refa->op1;
     88   tb = (refb->o==IR_HREFK || refb->o==IR_AREF) ? IR(refb->op1)->op1 : refb->op1;
     89   if (ka == kb) {
     90     /* Same key. Check for same table with different ref (NEWREF vs. HREF). */
     91     if (ta == tb)
     92       return ALIAS_MUST;  /* Same key, same table. */
     93     else
     94       return aa_table(J, ta, tb);  /* Same key, possibly different table. */
     95   }
     96   if (irref_isk(ka) && irref_isk(kb))
     97     return ALIAS_NO;  /* Different constant keys. */
     98   if (refa->o == IR_AREF) {
     99     /* Disambiguate array references based on index arithmetic. */
    100     int32_t ofsa = 0, ofsb = 0;
    101     IRRef basea = ka, baseb = kb;
    102     lua_assert(refb->o == IR_AREF);
    103     /* Gather base and offset from t[base] or t[base+-ofs]. */
    104     if (keya->o == IR_ADD && irref_isk(keya->op2)) {
    105       basea = keya->op1;
    106       ofsa = IR(keya->op2)->i;
    107       if (basea == kb && ofsa != 0)
    108 	return ALIAS_NO;  /* t[base+-ofs] vs. t[base]. */
    109     }
    110     if (keyb->o == IR_ADD && irref_isk(keyb->op2)) {
    111       baseb = keyb->op1;
    112       ofsb = IR(keyb->op2)->i;
    113       if (ka == baseb && ofsb != 0)
    114 	return ALIAS_NO;  /* t[base] vs. t[base+-ofs]. */
    115     }
    116     if (basea == baseb && ofsa != ofsb)
    117       return ALIAS_NO;  /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */
    118   } else {
    119     /* Disambiguate hash references based on the type of their keys. */
    120     lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) &&
    121 	       (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF));
    122     if (!irt_sametype(keya->t, keyb->t))
    123       return ALIAS_NO;  /* Different key types. */
    124   }
    125   if (ta == tb)
    126     return ALIAS_MAY;  /* Same table, cannot disambiguate keys. */
    127   else
    128     return aa_table(J, ta, tb);  /* Try to disambiguate tables. */
    129 }
    130 
    131 /* Array and hash load forwarding. */
    132 static TRef fwd_ahload(jit_State *J, IRRef xref)
    133 {
    134   IRIns *xr = IR(xref);
    135   IRRef lim = xref;  /* Search limit. */
    136   IRRef ref;
    137 
    138   /* Search for conflicting stores. */
    139   ref = J->chain[fins->o+IRDELTA_L2S];
    140   while (ref > xref) {
    141     IRIns *store = IR(ref);
    142     switch (aa_ahref(J, xr, IR(store->op1))) {
    143     case ALIAS_NO:   break;  /* Continue searching. */
    144     case ALIAS_MAY:  lim = ref; goto cselim;  /* Limit search for load. */
    145     case ALIAS_MUST: return store->op2;  /* Store forwarding. */
    146     }
    147     ref = store->prev;
    148   }
    149 
    150   /* No conflicting store (yet): const-fold loads from allocations. */
    151   {
    152     IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr;
    153     IRRef tab = ir->op1;
    154     ir = IR(tab);
    155     if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) {
    156       /* A NEWREF with a number key may end up pointing to the array part.
    157       ** But it's referenced from HSTORE and not found in the ASTORE chain.
    158       ** For now simply consider this a conflict without forwarding anything.
    159       */
    160       if (xr->o == IR_AREF) {
    161 	IRRef ref2 = J->chain[IR_NEWREF];
    162 	while (ref2 > tab) {
    163 	  IRIns *newref = IR(ref2);
    164 	  if (irt_isnum(IR(newref->op2)->t))
    165 	    goto cselim;
    166 	  ref2 = newref->prev;
    167 	}
    168       }
    169       /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF.
    170       ** But the above search for conflicting stores was limited by xref.
    171       ** So continue searching, limited by the TNEW/TDUP. Store forwarding
    172       ** is ok, too. A conflict does NOT limit the search for a matching load.
    173       */
    174       while (ref > tab) {
    175 	IRIns *store = IR(ref);
    176 	switch (aa_ahref(J, xr, IR(store->op1))) {
    177 	case ALIAS_NO:   break;  /* Continue searching. */
    178 	case ALIAS_MAY:  goto cselim;  /* Conflicting store. */
    179 	case ALIAS_MUST: return store->op2;  /* Store forwarding. */
    180 	}
    181 	ref = store->prev;
    182       }
    183       lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t));
    184       if (irt_ispri(fins->t)) {
    185 	return TREF_PRI(irt_type(fins->t));
    186       } else if (irt_isnum(fins->t) || (LJ_DUALNUM && irt_isint(fins->t)) ||
    187 		 irt_isstr(fins->t)) {
    188 	TValue keyv;
    189 	cTValue *tv;
    190 	IRIns *key = IR(xr->op2);
    191 	if (key->o == IR_KSLOT) key = IR(key->op1);
    192 	lj_ir_kvalue(J->L, &keyv, key);
    193 	tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
    194 	lua_assert(itype2irt(tv) == irt_type(fins->t));
    195 	if (irt_isnum(fins->t))
    196 	  return lj_ir_knum_u64(J, tv->u64);
    197 	else if (LJ_DUALNUM && irt_isint(fins->t))
    198 	  return lj_ir_kint(J, intV(tv));
    199 	else
    200 	  return lj_ir_kstr(J, strV(tv));
    201       }
    202       /* Othwerwise: don't intern as a constant. */
    203     }
    204   }
    205 
    206 cselim:
    207   /* Try to find a matching load. Below the conflicting store, if any. */
    208   ref = J->chain[fins->o];
    209   while (ref > lim) {
    210     IRIns *load = IR(ref);
    211     if (load->op1 == xref)
    212       return ref;  /* Load forwarding. */
    213     ref = load->prev;
    214   }
    215   return 0;  /* Conflict or no match. */
    216 }
    217 
    218 /* Reassociate ALOAD across PHIs to handle t[i-1] forwarding case. */
    219 static TRef fwd_aload_reassoc(jit_State *J)
    220 {
    221   IRIns *irx = IR(fins->op1);
    222   IRIns *key = IR(irx->op2);
    223   if (key->o == IR_ADD && irref_isk(key->op2)) {
    224     IRIns *add2 = IR(key->op1);
    225     if (add2->o == IR_ADD && irref_isk(add2->op2) &&
    226 	IR(key->op2)->i == -IR(add2->op2)->i) {
    227       IRRef ref = J->chain[IR_AREF];
    228       IRRef lim = add2->op1;
    229       if (irx->op1 > lim) lim = irx->op1;
    230       while (ref > lim) {
    231 	IRIns *ir = IR(ref);
    232 	if (ir->op1 == irx->op1 && ir->op2 == add2->op1)
    233 	  return fwd_ahload(J, ref);
    234 	ref = ir->prev;
    235       }
    236     }
    237   }
    238   return 0;
    239 }
    240 
    241 /* ALOAD forwarding. */
    242 TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J)
    243 {
    244   IRRef ref;
    245   if ((ref = fwd_ahload(J, fins->op1)) ||
    246       (ref = fwd_aload_reassoc(J)))
    247     return ref;
    248   return EMITFOLD;
    249 }
    250 
    251 /* HLOAD forwarding. */
    252 TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J)
    253 {
    254   IRRef ref = fwd_ahload(J, fins->op1);
    255   if (ref)
    256     return ref;
    257   return EMITFOLD;
    258 }
    259 
    260 /* HREFK forwarding. */
    261 TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J)
    262 {
    263   IRRef tab = fleft->op1;
    264   IRRef ref = J->chain[IR_NEWREF];
    265   while (ref > tab) {
    266     IRIns *newref = IR(ref);
    267     if (tab == newref->op1) {
    268       if (fright->op1 == newref->op2)
    269 	return ref;  /* Forward from NEWREF. */
    270       else
    271 	goto docse;
    272     } else if (aa_table(J, tab, newref->op1) != ALIAS_NO) {
    273       goto docse;
    274     }
    275     ref = newref->prev;
    276   }
    277   /* No conflicting NEWREF: key location unchanged for HREFK of TDUP. */
    278   if (IR(tab)->o == IR_TDUP)
    279     fins->t.irt &= ~IRT_GUARD;  /* Drop HREFK guard. */
    280 docse:
    281   return CSEFOLD;
    282 }
    283 
    284 /* Check whether HREF of TNEW/TDUP can be folded to niltv. */
    285 int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J)
    286 {
    287   IRRef lim = fins->op1;  /* Search limit. */
    288   IRRef ref;
    289 
    290   /* The key for an ASTORE may end up in the hash part after a NEWREF. */
    291   if (irt_isnum(fright->t) && J->chain[IR_NEWREF] > lim) {
    292     ref = J->chain[IR_ASTORE];
    293     while (ref > lim) {
    294       if (ref < J->chain[IR_NEWREF])
    295 	return 0;  /* Conflict. */
    296       ref = IR(ref)->prev;
    297     }
    298   }
    299 
    300   /* Search for conflicting stores. */
    301   ref = J->chain[IR_HSTORE];
    302   while (ref > lim) {
    303     IRIns *store = IR(ref);
    304     if (aa_ahref(J, fins, IR(store->op1)) != ALIAS_NO)
    305       return 0;  /* Conflict. */
    306     ref = store->prev;
    307   }
    308 
    309   return 1;  /* No conflict. Can fold to niltv. */
    310 }
    311 
    312 /* Check whether there's no aliasing table.clear. */
    313 static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta)
    314 {
    315   IRRef ref = J->chain[IR_CALLS];
    316   while (ref > lim) {
    317     IRIns *calls = IR(ref);
    318     if (calls->op2 == IRCALL_lj_tab_clear &&
    319 	(ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO))
    320       return 0;  /* Conflict. */
    321     ref = calls->prev;
    322   }
    323   return 1;  /* No conflict. Can safely FOLD/CSE. */
    324 }
    325 
    326 /* Check whether there's no aliasing NEWREF/table.clear for the left operand. */
    327 int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
    328 {
    329   IRRef ta = fins->op1;
    330   IRRef ref = J->chain[IR_NEWREF];
    331   while (ref > lim) {
    332     IRIns *newref = IR(ref);
    333     if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO)
    334       return 0;  /* Conflict. */
    335     ref = newref->prev;
    336   }
    337   return fwd_aa_tab_clear(J, lim, ta);
    338 }
    339 
    340 /* ASTORE/HSTORE elimination. */
    341 TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
    342 {
    343   IRRef xref = fins->op1;  /* xREF reference. */
    344   IRRef val = fins->op2;  /* Stored value reference. */
    345   IRIns *xr = IR(xref);
    346   IRRef1 *refp = &J->chain[fins->o];
    347   IRRef ref = *refp;
    348   while (ref > xref) {  /* Search for redundant or conflicting stores. */
    349     IRIns *store = IR(ref);
    350     switch (aa_ahref(J, xr, IR(store->op1))) {
    351     case ALIAS_NO:
    352       break;  /* Continue searching. */
    353     case ALIAS_MAY:	/* Store to MAYBE the same location. */
    354       if (store->op2 != val)  /* Conflict if the value is different. */
    355 	goto doemit;
    356       break;  /* Otherwise continue searching. */
    357     case ALIAS_MUST:	/* Store to the same location. */
    358       if (store->op2 == val)  /* Same value: drop the new store. */
    359 	return DROPFOLD;
    360       /* Different value: try to eliminate the redundant store. */
    361       if (ref > J->chain[IR_LOOP]) {  /* Quick check to avoid crossing LOOP. */
    362 	IRIns *ir;
    363 	/* Check for any intervening guards (includes conflicting loads). */
    364 	for (ir = IR(J->cur.nins-1); ir > store; ir--)
    365 	  if (irt_isguard(ir->t) || ir->o == IR_CALLL)
    366 	    goto doemit;  /* No elimination possible. */
    367 	/* Remove redundant store from chain and replace with NOP. */
    368 	*refp = store->prev;
    369 	store->o = IR_NOP;
    370 	store->t.irt = IRT_NIL;
    371 	store->op1 = store->op2 = 0;
    372 	store->prev = 0;
    373 	/* Now emit the new store instead. */
    374       }
    375       goto doemit;
    376     }
    377     ref = *(refp = &store->prev);
    378   }
    379 doemit:
    380   return EMITFOLD;  /* Otherwise we have a conflict or simply no match. */
    381 }
    382 
    383 /* -- ULOAD forwarding ---------------------------------------------------- */
    384 
    385 /* The current alias analysis for upvalues is very simplistic. It only
    386 ** disambiguates between the unique upvalues of the same function.
    387 ** This is good enough for now, since most upvalues are read-only.
    388 **
    389 ** A more precise analysis would be feasible with the help of the parser:
    390 ** generate a unique key for every upvalue, even across all prototypes.
    391 ** Lacking a realistic use-case, it's unclear whether this is beneficial.
    392 */
    393 static AliasRet aa_uref(IRIns *refa, IRIns *refb)
    394 {
    395   if (refa->o != refb->o)
    396     return ALIAS_NO;  /* Different UREFx type. */
    397   if (refa->op1 == refb->op1) {  /* Same function. */
    398     if (refa->op2 == refb->op2)
    399       return ALIAS_MUST;  /* Same function, same upvalue idx. */
    400     else
    401       return ALIAS_NO;  /* Same function, different upvalue idx. */
    402   } else {  /* Different functions, check disambiguation hash values. */
    403     if (((refa->op2 ^ refb->op2) & 0xff))
    404       return ALIAS_NO;  /* Upvalues with different hash values cannot alias. */
    405     else
    406       return ALIAS_MAY;  /* No conclusion can be drawn for same hash value. */
    407   }
    408 }
    409 
    410 /* ULOAD forwarding. */
    411 TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
    412 {
    413   IRRef uref = fins->op1;
    414   IRRef lim = REF_BASE;  /* Search limit. */
    415   IRIns *xr = IR(uref);
    416   IRRef ref;
    417 
    418   /* Search for conflicting stores. */
    419   ref = J->chain[IR_USTORE];
    420   while (ref > lim) {
    421     IRIns *store = IR(ref);
    422     switch (aa_uref(xr, IR(store->op1))) {
    423     case ALIAS_NO:   break;  /* Continue searching. */
    424     case ALIAS_MAY:  lim = ref; goto cselim;  /* Limit search for load. */
    425     case ALIAS_MUST: return store->op2;  /* Store forwarding. */
    426     }
    427     ref = store->prev;
    428   }
    429 
    430 cselim:
    431   /* Try to find a matching load. Below the conflicting store, if any. */
    432 
    433   ref = J->chain[IR_ULOAD];
    434   while (ref > lim) {
    435     IRIns *ir = IR(ref);
    436     if (ir->op1 == uref ||
    437 	(IR(ir->op1)->op12 == IR(uref)->op12 && IR(ir->op1)->o == IR(uref)->o))
    438       return ref;  /* Match for identical or equal UREFx (non-CSEable UREFO). */
    439     ref = ir->prev;
    440   }
    441   return lj_ir_emit(J);
    442 }
    443 
    444 /* USTORE elimination. */
    445 TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J)
    446 {
    447   IRRef xref = fins->op1;  /* xREF reference. */
    448   IRRef val = fins->op2;  /* Stored value reference. */
    449   IRIns *xr = IR(xref);
    450   IRRef1 *refp = &J->chain[IR_USTORE];
    451   IRRef ref = *refp;
    452   while (ref > xref) {  /* Search for redundant or conflicting stores. */
    453     IRIns *store = IR(ref);
    454     switch (aa_uref(xr, IR(store->op1))) {
    455     case ALIAS_NO:
    456       break;  /* Continue searching. */
    457     case ALIAS_MAY:	/* Store to MAYBE the same location. */
    458       if (store->op2 != val)  /* Conflict if the value is different. */
    459 	goto doemit;
    460       break;  /* Otherwise continue searching. */
    461     case ALIAS_MUST:	/* Store to the same location. */
    462       if (store->op2 == val)  /* Same value: drop the new store. */
    463 	return DROPFOLD;
    464       /* Different value: try to eliminate the redundant store. */
    465       if (ref > J->chain[IR_LOOP]) {  /* Quick check to avoid crossing LOOP. */
    466 	IRIns *ir;
    467 	/* Check for any intervening guards (includes conflicting loads). */
    468 	for (ir = IR(J->cur.nins-1); ir > store; ir--)
    469 	  if (irt_isguard(ir->t))
    470 	    goto doemit;  /* No elimination possible. */
    471 	/* Remove redundant store from chain and replace with NOP. */
    472 	*refp = store->prev;
    473 	store->o = IR_NOP;
    474 	store->t.irt = IRT_NIL;
    475 	store->op1 = store->op2 = 0;
    476 	store->prev = 0;
    477 	if (ref+1 < J->cur.nins &&
    478 	    store[1].o == IR_OBAR && store[1].op1 == xref) {
    479 	  IRRef1 *bp = &J->chain[IR_OBAR];
    480 	  IRIns *obar;
    481 	  for (obar = IR(*bp); *bp > ref+1; obar = IR(*bp))
    482 	    bp = &obar->prev;
    483 	  /* Remove OBAR, too. */
    484 	  *bp = obar->prev;
    485 	  obar->o = IR_NOP;
    486 	  obar->t.irt = IRT_NIL;
    487 	  obar->op1 = obar->op2 = 0;
    488 	  obar->prev = 0;
    489 	}
    490 	/* Now emit the new store instead. */
    491       }
    492       goto doemit;
    493     }
    494     ref = *(refp = &store->prev);
    495   }
    496 doemit:
    497   return EMITFOLD;  /* Otherwise we have a conflict or simply no match. */
    498 }
    499 
    500 /* -- FLOAD forwarding and FSTORE elimination ----------------------------- */
    501 
    502 /* Alias analysis for field access.
    503 ** Field loads are cheap and field stores are rare.
    504 ** Simple disambiguation based on field types is good enough.
    505 */
    506 static AliasRet aa_fref(jit_State *J, IRIns *refa, IRIns *refb)
    507 {
    508   if (refa->op2 != refb->op2)
    509     return ALIAS_NO;  /* Different fields. */
    510   if (refa->op1 == refb->op1)
    511     return ALIAS_MUST;  /* Same field, same object. */
    512   else if (refa->op2 >= IRFL_TAB_META && refa->op2 <= IRFL_TAB_NOMM)
    513     return aa_table(J, refa->op1, refb->op1);  /* Disambiguate tables. */
    514   else
    515     return ALIAS_MAY;  /* Same field, possibly different object. */
    516 }
    517 
    518 /* Only the loads for mutable fields end up here (see FOLD). */
    519 TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J)
    520 {
    521   IRRef oref = fins->op1;  /* Object reference. */
    522   IRRef fid = fins->op2;  /* Field ID. */
    523   IRRef lim = oref;  /* Search limit. */
    524   IRRef ref;
    525 
    526   /* Search for conflicting stores. */
    527   ref = J->chain[IR_FSTORE];
    528   while (ref > oref) {
    529     IRIns *store = IR(ref);
    530     switch (aa_fref(J, fins, IR(store->op1))) {
    531     case ALIAS_NO:   break;  /* Continue searching. */
    532     case ALIAS_MAY:  lim = ref; goto cselim;  /* Limit search for load. */
    533     case ALIAS_MUST: return store->op2;  /* Store forwarding. */
    534     }
    535     ref = store->prev;
    536   }
    537 
    538   /* No conflicting store: const-fold field loads from allocations. */
    539   if (fid == IRFL_TAB_META) {
    540     IRIns *ir = IR(oref);
    541     if (ir->o == IR_TNEW || ir->o == IR_TDUP)
    542       return lj_ir_knull(J, IRT_TAB);
    543   }
    544 
    545 cselim:
    546   /* Try to find a matching load. Below the conflicting store, if any. */
    547   return lj_opt_cselim(J, lim);
    548 }
    549 
    550 /* FSTORE elimination. */
    551 TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J)
    552 {
    553   IRRef fref = fins->op1;  /* FREF reference. */
    554   IRRef val = fins->op2;  /* Stored value reference. */
    555   IRIns *xr = IR(fref);
    556   IRRef1 *refp = &J->chain[IR_FSTORE];
    557   IRRef ref = *refp;
    558   while (ref > fref) {  /* Search for redundant or conflicting stores. */
    559     IRIns *store = IR(ref);
    560     switch (aa_fref(J, xr, IR(store->op1))) {
    561     case ALIAS_NO:
    562       break;  /* Continue searching. */
    563     case ALIAS_MAY:
    564       if (store->op2 != val)  /* Conflict if the value is different. */
    565 	goto doemit;
    566       break;  /* Otherwise continue searching. */
    567     case ALIAS_MUST:
    568       if (store->op2 == val)  /* Same value: drop the new store. */
    569 	return DROPFOLD;
    570       /* Different value: try to eliminate the redundant store. */
    571       if (ref > J->chain[IR_LOOP]) {  /* Quick check to avoid crossing LOOP. */
    572 	IRIns *ir;
    573 	/* Check for any intervening guards or conflicting loads. */
    574 	for (ir = IR(J->cur.nins-1); ir > store; ir--)
    575 	  if (irt_isguard(ir->t) || (ir->o == IR_FLOAD && ir->op2 == xr->op2))
    576 	    goto doemit;  /* No elimination possible. */
    577 	/* Remove redundant store from chain and replace with NOP. */
    578 	*refp = store->prev;
    579 	store->o = IR_NOP;
    580 	store->t.irt = IRT_NIL;
    581 	store->op1 = store->op2 = 0;
    582 	store->prev = 0;
    583 	/* Now emit the new store instead. */
    584       }
    585       goto doemit;
    586     }
    587     ref = *(refp = &store->prev);
    588   }
    589 doemit:
    590   return EMITFOLD;  /* Otherwise we have a conflict or simply no match. */
    591 }
    592 
    593 /* -- XLOAD forwarding and XSTORE elimination ----------------------------- */
    594 
    595 /* Find cdata allocation for a reference (if any). */
    596 static IRIns *aa_findcnew(jit_State *J, IRIns *ir)
    597 {
    598   while (ir->o == IR_ADD) {
    599     if (!irref_isk(ir->op1)) {
    600       IRIns *ir1 = aa_findcnew(J, IR(ir->op1));  /* Left-recursion. */
    601       if (ir1) return ir1;
    602     }
    603     if (irref_isk(ir->op2)) return NULL;
    604     ir = IR(ir->op2);  /* Flatten right-recursion. */
    605   }
    606   return ir->o == IR_CNEW ? ir : NULL;
    607 }
    608 
    609 /* Alias analysis for two cdata allocations. */
    610 static AliasRet aa_cnew(jit_State *J, IRIns *refa, IRIns *refb)
    611 {
    612   IRIns *cnewa = aa_findcnew(J, refa);
    613   IRIns *cnewb = aa_findcnew(J, refb);
    614   if (cnewa == cnewb)
    615     return ALIAS_MAY;  /* Same allocation or neither is an allocation. */
    616   if (cnewa && cnewb)
    617     return ALIAS_NO;  /* Two different allocations never alias. */
    618   if (cnewb) { cnewa = cnewb; refb = refa; }
    619   return aa_escape(J, cnewa, refb);
    620 }
    621 
    622 /* Alias analysis for XLOAD/XSTORE. */
    623 static AliasRet aa_xref(jit_State *J, IRIns *refa, IRIns *xa, IRIns *xb)
    624 {
    625   ptrdiff_t ofsa = 0, ofsb = 0;
    626   IRIns *refb = IR(xb->op1);
    627   IRIns *basea = refa, *baseb = refb;
    628   if (refa == refb && irt_sametype(xa->t, xb->t))
    629     return ALIAS_MUST;  /* Shortcut for same refs with identical type. */
    630   /* Offset-based disambiguation. */
    631   if (refa->o == IR_ADD && irref_isk(refa->op2)) {
    632     IRIns *irk = IR(refa->op2);
    633     basea = IR(refa->op1);
    634     ofsa = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
    635 					    (ptrdiff_t)irk->i;
    636   }
    637   if (refb->o == IR_ADD && irref_isk(refb->op2)) {
    638     IRIns *irk = IR(refb->op2);
    639     baseb = IR(refb->op1);
    640     ofsb = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
    641 					    (ptrdiff_t)irk->i;
    642   }
    643   /* Treat constified pointers like base vs. base+offset. */
    644   if (basea->o == IR_KPTR && baseb->o == IR_KPTR) {
    645     ofsb += (char *)ir_kptr(baseb) - (char *)ir_kptr(basea);
    646     baseb = basea;
    647   }
    648   /* This implements (very) strict aliasing rules.
    649   ** Different types do NOT alias, except for differences in signedness.
    650   ** Type punning through unions is allowed (but forces a reload).
    651   */
    652   if (basea == baseb) {
    653     ptrdiff_t sza = irt_size(xa->t), szb = irt_size(xb->t);
    654     if (ofsa == ofsb) {
    655       if (sza == szb && irt_isfp(xa->t) == irt_isfp(xb->t))
    656 	return ALIAS_MUST;  /* Same-sized, same-kind. May need to convert. */
    657     } else if (ofsa + sza <= ofsb || ofsb + szb <= ofsa) {
    658       return ALIAS_NO;  /* Non-overlapping base+-o1 vs. base+-o2. */
    659     }
    660     /* NYI: extract, extend or reinterpret bits (int <-> fp). */
    661     return ALIAS_MAY;  /* Overlapping or type punning: force reload. */
    662   }
    663   if (!irt_sametype(xa->t, xb->t) &&
    664       !(irt_typerange(xa->t, IRT_I8, IRT_U64) &&
    665 	((xa->t.irt - IRT_I8) ^ (xb->t.irt - IRT_I8)) == 1))
    666     return ALIAS_NO;
    667   /* NYI: structural disambiguation. */
    668   return aa_cnew(J, basea, baseb);  /* Try to disambiguate allocations. */
    669 }
    670 
    671 /* Return CSEd reference or 0. Caveat: swaps lower ref to the right! */
    672 static IRRef reassoc_trycse(jit_State *J, IROp op, IRRef op1, IRRef op2)
    673 {
    674   IRRef ref = J->chain[op];
    675   IRRef lim = op1;
    676   if (op2 > lim) { lim = op2; op2 = op1; op1 = lim; }
    677   while (ref > lim) {
    678     IRIns *ir = IR(ref);
    679     if (ir->op1 == op1 && ir->op2 == op2)
    680       return ref;
    681     ref = ir->prev;
    682   }
    683   return 0;
    684 }
    685 
    686 /* Reassociate index references. */
    687 static IRRef reassoc_xref(jit_State *J, IRIns *ir)
    688 {
    689   ptrdiff_t ofs = 0;
    690   if (ir->o == IR_ADD && irref_isk(ir->op2)) {  /* Get constant offset. */
    691     IRIns *irk = IR(ir->op2);
    692     ofs = (LJ_64 && irk->o == IR_KINT64) ? (ptrdiff_t)ir_k64(irk)->u64 :
    693 					   (ptrdiff_t)irk->i;
    694     ir = IR(ir->op1);
    695   }
    696   if (ir->o == IR_ADD) {  /* Add of base + index. */
    697     /* Index ref > base ref for loop-carried dependences. Only check op1. */
    698     IRIns *ir2, *ir1 = IR(ir->op1);
    699     int32_t shift = 0;
    700     IRRef idxref;
    701     /* Determine index shifts. Don't bother with IR_MUL here. */
    702     if (ir1->o == IR_BSHL && irref_isk(ir1->op2))
    703       shift = IR(ir1->op2)->i;
    704     else if (ir1->o == IR_ADD && ir1->op1 == ir1->op2)
    705       shift = 1;
    706     else
    707       ir1 = ir;
    708     ir2 = IR(ir1->op1);
    709     /* A non-reassociated add. Must be a loop-carried dependence. */
    710     if (ir2->o == IR_ADD && irt_isint(ir2->t) && irref_isk(ir2->op2))
    711       ofs += (ptrdiff_t)IR(ir2->op2)->i << shift;
    712     else
    713       return 0;
    714     idxref = ir2->op1;
    715     /* Try to CSE the reassociated chain. Give up if not found. */
    716     if (ir1 != ir &&
    717 	!(idxref = reassoc_trycse(J, ir1->o, idxref,
    718 				  ir1->o == IR_BSHL ? ir1->op2 : idxref)))
    719       return 0;
    720     if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, ir->op2)))
    721       return 0;
    722     if (ofs != 0) {
    723       IRRef refk = tref_ref(lj_ir_kintp(J, ofs));
    724       if (!(idxref = reassoc_trycse(J, IR_ADD, idxref, refk)))
    725 	return 0;
    726     }
    727     return idxref;  /* Success, found a reassociated index reference. Phew. */
    728   }
    729   return 0;  /* Failure. */
    730 }
    731 
    732 /* XLOAD forwarding. */
    733 TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J)
    734 {
    735   IRRef xref = fins->op1;
    736   IRIns *xr = IR(xref);
    737   IRRef lim = xref;  /* Search limit. */
    738   IRRef ref;
    739 
    740   if ((fins->op2 & IRXLOAD_READONLY))
    741     goto cselim;
    742   if ((fins->op2 & IRXLOAD_VOLATILE))
    743     goto doemit;
    744 
    745   /* Search for conflicting stores. */
    746   ref = J->chain[IR_XSTORE];
    747 retry:
    748   if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS];
    749   if (J->chain[IR_XBAR] > lim) lim = J->chain[IR_XBAR];
    750   while (ref > lim) {
    751     IRIns *store = IR(ref);
    752     switch (aa_xref(J, xr, fins, store)) {
    753     case ALIAS_NO:   break;  /* Continue searching. */
    754     case ALIAS_MAY:  lim = ref; goto cselim;  /* Limit search for load. */
    755     case ALIAS_MUST:
    756       /* Emit conversion if the loaded type doesn't match the forwarded type. */
    757       if (!irt_sametype(fins->t, IR(store->op2)->t)) {
    758 	IRType dt = irt_type(fins->t), st = irt_type(IR(store->op2)->t);
    759 	if (dt == IRT_I8 || dt == IRT_I16) {  /* Trunc + sign-extend. */
    760 	  st = dt | IRCONV_SEXT;
    761 	  dt = IRT_INT;
    762 	} else if (dt == IRT_U8 || dt == IRT_U16) {  /* Trunc + zero-extend. */
    763 	  st = dt;
    764 	  dt = IRT_INT;
    765 	}
    766 	fins->ot = IRT(IR_CONV, dt);
    767 	fins->op1 = store->op2;
    768 	fins->op2 = (dt<<5)|st;
    769 	return RETRYFOLD;
    770       }
    771       return store->op2;  /* Store forwarding. */
    772     }
    773     ref = store->prev;
    774   }
    775 
    776 cselim:
    777   /* Try to find a matching load. Below the conflicting store, if any. */
    778   ref = J->chain[IR_XLOAD];
    779   while (ref > lim) {
    780     /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
    781     if (IR(ref)->op1 == xref && irt_sametype(IR(ref)->t, fins->t))
    782       return ref;
    783     ref = IR(ref)->prev;
    784   }
    785 
    786   /* Reassociate XLOAD across PHIs to handle a[i-1] forwarding case. */
    787   if (!(fins->op2 & IRXLOAD_READONLY) && J->chain[IR_LOOP] &&
    788       xref == fins->op1 && (xref = reassoc_xref(J, xr)) != 0) {
    789     ref = J->chain[IR_XSTORE];
    790     while (ref > lim)  /* Skip stores that have already been checked. */
    791       ref = IR(ref)->prev;
    792     lim = xref;
    793     xr = IR(xref);
    794     goto retry;  /* Retry with the reassociated reference. */
    795   }
    796 doemit:
    797   return EMITFOLD;
    798 }
    799 
    800 /* XSTORE elimination. */
    801 TRef LJ_FASTCALL lj_opt_dse_xstore(jit_State *J)
    802 {
    803   IRRef xref = fins->op1;
    804   IRIns *xr = IR(xref);
    805   IRRef lim = xref;  /* Search limit. */
    806   IRRef val = fins->op2;  /* Stored value reference. */
    807   IRRef1 *refp = &J->chain[IR_XSTORE];
    808   IRRef ref = *refp;
    809   if (J->chain[IR_CALLXS] > lim) lim = J->chain[IR_CALLXS];
    810   if (J->chain[IR_XBAR] > lim) lim = J->chain[IR_XBAR];
    811   if (J->chain[IR_XSNEW] > lim) lim = J->chain[IR_XSNEW];
    812   while (ref > lim) {  /* Search for redundant or conflicting stores. */
    813     IRIns *store = IR(ref);
    814     switch (aa_xref(J, xr, fins, store)) {
    815     case ALIAS_NO:
    816       break;  /* Continue searching. */
    817     case ALIAS_MAY:
    818       if (store->op2 != val)  /* Conflict if the value is different. */
    819 	goto doemit;
    820       break;  /* Otherwise continue searching. */
    821     case ALIAS_MUST:
    822       if (store->op2 == val)  /* Same value: drop the new store. */
    823 	return DROPFOLD;
    824       /* Different value: try to eliminate the redundant store. */
    825       if (ref > J->chain[IR_LOOP]) {  /* Quick check to avoid crossing LOOP. */
    826 	IRIns *ir;
    827 	/* Check for any intervening guards or any XLOADs (no AA performed). */
    828 	for (ir = IR(J->cur.nins-1); ir > store; ir--)
    829 	  if (irt_isguard(ir->t) || ir->o == IR_XLOAD)
    830 	    goto doemit;  /* No elimination possible. */
    831 	/* Remove redundant store from chain and replace with NOP. */
    832 	*refp = store->prev;
    833 	store->o = IR_NOP;
    834 	store->t.irt = IRT_NIL;
    835 	store->op1 = store->op2 = 0;
    836 	store->prev = 0;
    837 	/* Now emit the new store instead. */
    838       }
    839       goto doemit;
    840     }
    841     ref = *(refp = &store->prev);
    842   }
    843 doemit:
    844   return EMITFOLD;  /* Otherwise we have a conflict or simply no match. */
    845 }
    846 
    847 /* -- Forwarding of lj_tab_len -------------------------------------------- */
    848 
    849 /* This is rather simplistic right now, but better than nothing. */
    850 TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
    851 {
    852   IRRef tab = fins->op1;  /* Table reference. */
    853   IRRef lim = tab;  /* Search limit. */
    854   IRRef ref;
    855 
    856   /* Any ASTORE is a conflict and limits the search. */
    857   if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE];
    858 
    859   /* Search for conflicting HSTORE with numeric key. */
    860   ref = J->chain[IR_HSTORE];
    861   while (ref > lim) {
    862     IRIns *store = IR(ref);
    863     IRIns *href = IR(store->op1);
    864     IRIns *key = IR(href->op2);
    865     if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
    866       lim = ref;  /* Conflicting store found, limits search for TLEN. */
    867       break;
    868     }
    869     ref = store->prev;
    870   }
    871 
    872   /* Search for aliasing table.clear. */
    873   if (!fwd_aa_tab_clear(J, lim, tab))
    874     return lj_ir_emit(J);
    875 
    876   /* Try to find a matching load. Below the conflicting store, if any. */
    877   return lj_opt_cselim(J, lim);
    878 }
    879 
    880 /* -- ASTORE/HSTORE previous type analysis -------------------------------- */
    881 
    882 /* Check whether the previous value for a table store is non-nil.
    883 ** This can be derived either from a previous store or from a previous
    884 ** load (because all loads from tables perform a type check).
    885 **
    886 ** The result of the analysis can be used to avoid the metatable check
    887 ** and the guard against HREF returning niltv. Both of these are cheap,
    888 ** so let's not spend too much effort on the analysis.
    889 **
    890 ** A result of 1 is exact: previous value CANNOT be nil.
    891 ** A result of 0 is inexact: previous value MAY be nil.
    892 */
    893 int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref)
    894 {
    895   /* First check stores. */
    896   IRRef ref = J->chain[loadop+IRDELTA_L2S];
    897   while (ref > xref) {
    898     IRIns *store = IR(ref);
    899     if (store->op1 == xref) {  /* Same xREF. */
    900       /* A nil store MAY alias, but a non-nil store MUST alias. */
    901       return !irt_isnil(store->t);
    902     } else if (irt_isnil(store->t)) {  /* Must check any nil store. */
    903       IRRef skref = IR(store->op1)->op2;
    904       IRRef xkref = IR(xref)->op2;
    905       /* Same key type MAY alias. Need ALOAD check due to multiple int types. */
    906       if (loadop == IR_ALOAD || irt_sametype(IR(skref)->t, IR(xkref)->t)) {
    907 	if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref))
    908 	  return 0;  /* A nil store with same const key or var key MAY alias. */
    909 	/* Different const keys CANNOT alias. */
    910       }  /* Different key types CANNOT alias. */
    911     }  /* Other non-nil stores MAY alias. */
    912     ref = store->prev;
    913   }
    914 
    915   /* Check loads since nothing could be derived from stores. */
    916   ref = J->chain[loadop];
    917   while (ref > xref) {
    918     IRIns *load = IR(ref);
    919     if (load->op1 == xref) {  /* Same xREF. */
    920       /* A nil load MAY alias, but a non-nil load MUST alias. */
    921       return !irt_isnil(load->t);
    922     }  /* Other non-nil loads MAY alias. */
    923     ref = load->prev;
    924   }
    925   return 0;  /* Nothing derived at all, previous value MAY be nil. */
    926 }
    927 
    928 /* ------------------------------------------------------------------------ */
    929 
    930 #undef IR
    931 #undef fins
    932 #undef fleft
    933 #undef fright
    934 
    935 #endif