ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

lj_asm_mips.h (68303B)


      1 /*
      2 ** MIPS IR assembler (SSA IR -> machine code).
      3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
      4 */
      5 
      6 /* -- Register allocator extensions --------------------------------------- */
      7 
      8 /* Allocate a register with a hint. */
      9 static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
     10 {
     11   Reg r = IR(ref)->r;
     12   if (ra_noreg(r)) {
     13     if (!ra_hashint(r) && !iscrossref(as, ref))
     14       ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
     15     r = ra_allocref(as, ref, allow);
     16   }
     17   ra_noweak(as, r);
     18   return r;
     19 }
     20 
     21 /* Allocate a register or RID_ZERO. */
     22 static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
     23 {
     24   Reg r = IR(ref)->r;
     25   if (ra_noreg(r)) {
     26     if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0)
     27       return RID_ZERO;
     28     r = ra_allocref(as, ref, allow);
     29   } else {
     30     ra_noweak(as, r);
     31   }
     32   return r;
     33 }
     34 
     35 /* Allocate two source registers for three-operand instructions. */
     36 static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
     37 {
     38   IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
     39   Reg left = irl->r, right = irr->r;
     40   if (ra_hasreg(left)) {
     41     ra_noweak(as, left);
     42     if (ra_noreg(right))
     43       right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
     44     else
     45       ra_noweak(as, right);
     46   } else if (ra_hasreg(right)) {
     47     ra_noweak(as, right);
     48     left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
     49   } else if (ra_hashint(right)) {
     50     right = ra_alloc1z(as, ir->op2, allow);
     51     left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
     52   } else {
     53     left = ra_alloc1z(as, ir->op1, allow);
     54     right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
     55   }
     56   return left | (right << 8);
     57 }
     58 
     59 /* -- Guard handling ------------------------------------------------------ */
     60 
     61 /* Need some spare long-range jump slots, for out-of-range branches. */
     62 #define MIPS_SPAREJUMP		4
     63 
     64 /* Setup spare long-range jump slots per mcarea. */
     65 static void asm_sparejump_setup(ASMState *as)
     66 {
     67   MCode *mxp = as->mcbot;
     68   /* Assumes sizeof(MCLink) == 8. */
     69   if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) {
     70     lua_assert(MIPSI_NOP == 0);
     71     memset(mxp+2, 0, MIPS_SPAREJUMP*8);
     72     mxp += MIPS_SPAREJUMP*2;
     73     lua_assert(mxp < as->mctop);
     74     lj_mcode_sync(as->mcbot, mxp);
     75     lj_mcode_commitbot(as->J, mxp);
     76     as->mcbot = mxp;
     77     as->mclim = as->mcbot + MCLIM_REDZONE;
     78   }
     79 }
     80 
     81 /* Setup exit stub after the end of each trace. */
     82 static void asm_exitstub_setup(ASMState *as)
     83 {
     84   MCode *mxp = as->mctop;
     85   /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
     86   *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno;
     87   *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu);
     88   lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0);
     89   *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0;
     90   as->mctop = mxp;
     91 }
     92 
     93 /* Keep this in-sync with exitstub_trace_addr(). */
     94 #define asm_exitstub_addr(as)	((as)->mctop)
     95 
     96 /* Emit conditional branch to exit for guard. */
     97 static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt)
     98 {
     99   MCode *target = asm_exitstub_addr(as);
    100   MCode *p = as->mcp;
    101   if (LJ_UNLIKELY(p == as->invmcp)) {
    102     as->invmcp = NULL;
    103     as->loopinv = 1;
    104     as->mcp = p+1;
    105     mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u);  /* Invert cond. */
    106     target = p;  /* Patch target later in asm_loop_fixup. */
    107   }
    108   emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
    109   emit_branch(as, mi, rs, rt, target);
    110 }
    111 
    112 /* -- Operand fusion ------------------------------------------------------ */
    113 
    114 /* Limit linear search to this distance. Avoids O(n^2) behavior. */
    115 #define CONFLICT_SEARCH_LIM	31
    116 
    117 /* Check if there's no conflicting instruction between curins and ref. */
    118 static int noconflict(ASMState *as, IRRef ref, IROp conflict)
    119 {
    120   IRIns *ir = as->ir;
    121   IRRef i = as->curins;
    122   if (i > ref + CONFLICT_SEARCH_LIM)
    123     return 0;  /* Give up, ref is too far away. */
    124   while (--i > ref)
    125     if (ir[i].o == conflict)
    126       return 0;  /* Conflict found. */
    127   return 1;  /* Ok, no conflict. */
    128 }
    129 
    130 /* Fuse the array base of colocated arrays. */
    131 static int32_t asm_fuseabase(ASMState *as, IRRef ref)
    132 {
    133   IRIns *ir = IR(ref);
    134   if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
    135       !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
    136     return (int32_t)sizeof(GCtab);
    137   return 0;
    138 }
    139 
    140 /* Fuse array/hash/upvalue reference into register+offset operand. */
    141 static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
    142 {
    143   IRIns *ir = IR(ref);
    144   if (ra_noreg(ir->r)) {
    145     if (ir->o == IR_AREF) {
    146       if (mayfuse(as, ref)) {
    147 	if (irref_isk(ir->op2)) {
    148 	  IRRef tab = IR(ir->op1)->op1;
    149 	  int32_t ofs = asm_fuseabase(as, tab);
    150 	  IRRef refa = ofs ? tab : ir->op1;
    151 	  ofs += 8*IR(ir->op2)->i;
    152 	  if (checki16(ofs)) {
    153 	    *ofsp = ofs;
    154 	    return ra_alloc1(as, refa, allow);
    155 	  }
    156 	}
    157       }
    158     } else if (ir->o == IR_HREFK) {
    159       if (mayfuse(as, ref)) {
    160 	int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
    161 	if (checki16(ofs)) {
    162 	  *ofsp = ofs;
    163 	  return ra_alloc1(as, ir->op1, allow);
    164 	}
    165       }
    166     } else if (ir->o == IR_UREFC) {
    167       if (irref_isk(ir->op1)) {
    168 	GCfunc *fn = ir_kfunc(IR(ir->op1));
    169 	int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
    170 	int32_t jgl = (intptr_t)J2G(as->J);
    171 	if ((uint32_t)(ofs-jgl) < 65536) {
    172 	  *ofsp = ofs-jgl-32768;
    173 	  return RID_JGL;
    174 	} else {
    175 	  *ofsp = (int16_t)ofs;
    176 	  return ra_allock(as, ofs-(int16_t)ofs, allow);
    177 	}
    178       }
    179     }
    180   }
    181   *ofsp = 0;
    182   return ra_alloc1(as, ref, allow);
    183 }
    184 
    185 /* Fuse XLOAD/XSTORE reference into load/store operand. */
    186 static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
    187 			 RegSet allow, int32_t ofs)
    188 {
    189   IRIns *ir = IR(ref);
    190   Reg base;
    191   if (ra_noreg(ir->r) && canfuse(as, ir)) {
    192     if (ir->o == IR_ADD) {
    193       int32_t ofs2;
    194       if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
    195 	ref = ir->op1;
    196 	ofs = ofs2;
    197       }
    198     } else if (ir->o == IR_STRREF) {
    199       int32_t ofs2 = 65536;
    200       lua_assert(ofs == 0);
    201       ofs = (int32_t)sizeof(GCstr);
    202       if (irref_isk(ir->op2)) {
    203 	ofs2 = ofs + IR(ir->op2)->i;
    204 	ref = ir->op1;
    205       } else if (irref_isk(ir->op1)) {
    206 	ofs2 = ofs + IR(ir->op1)->i;
    207 	ref = ir->op2;
    208       }
    209       if (!checki16(ofs2)) {
    210 	/* NYI: Fuse ADD with constant. */
    211 	Reg right, left = ra_alloc2(as, ir, allow);
    212 	right = (left >> 8); left &= 255;
    213 	emit_hsi(as, mi, rt, RID_TMP, ofs);
    214 	emit_dst(as, MIPSI_ADDU, RID_TMP, left, right);
    215 	return;
    216       }
    217       ofs = ofs2;
    218     }
    219   }
    220   base = ra_alloc1(as, ref, allow);
    221   emit_hsi(as, mi, rt, base, ofs);
    222 }
    223 
    224 /* -- Calls --------------------------------------------------------------- */
    225 
    226 /* Generate a call to a C function. */
    227 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
    228 {
    229   uint32_t n, nargs = CCI_XNARGS(ci);
    230   int32_t ofs = 16;
    231 #if LJ_SOFTFP
    232   Reg gpr = REGARG_FIRSTGPR;
    233 #else
    234   Reg gpr, fpr = REGARG_FIRSTFPR;
    235 #endif
    236   if ((void *)ci->func)
    237     emit_call(as, (void *)ci->func, 1);
    238 #if !LJ_SOFTFP
    239   for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
    240     as->cost[gpr] = REGCOST(~0u, ASMREF_L);
    241   gpr = REGARG_FIRSTGPR;
    242 #endif
    243   for (n = 0; n < nargs; n++) {  /* Setup args. */
    244     IRRef ref = args[n];
    245     if (ref) {
    246       IRIns *ir = IR(ref);
    247 #if !LJ_SOFTFP
    248       if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR &&
    249 	  !(ci->flags & CCI_VARARG)) {
    250 	lua_assert(rset_test(as->freeset, fpr));  /* Already evicted. */
    251 	ra_leftov(as, fpr, ref);
    252 	fpr += 2;
    253 	gpr += irt_isnum(ir->t) ? 2 : 1;
    254       } else
    255 #endif
    256       {
    257 #if !LJ_SOFTFP
    258 	fpr = REGARG_LASTFPR+1;
    259 #endif
    260 	if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1;
    261 	if (gpr <= REGARG_LASTGPR) {
    262 	  lua_assert(rset_test(as->freeset, gpr));  /* Already evicted. */
    263 #if !LJ_SOFTFP
    264 	  if (irt_isfp(ir->t)) {
    265 	    RegSet of = as->freeset;
    266 	    Reg r;
    267 	    /* Workaround to protect argument GPRs from being used for remat. */
    268 	    as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1);
    269 	    r = ra_alloc1(as, ref, RSET_FPR);
    270 	    as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
    271 	    if (irt_isnum(ir->t)) {
    272 	      emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1);
    273 	      emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r);
    274 	      lua_assert(rset_test(as->freeset, gpr+1));  /* Already evicted. */
    275 	      gpr += 2;
    276 	    } else if (irt_isfloat(ir->t)) {
    277 	      emit_tg(as, MIPSI_MFC1, gpr, r);
    278 	      gpr++;
    279 	    }
    280 	  } else
    281 #endif
    282 	  {
    283 	    ra_leftov(as, gpr, ref);
    284 	    gpr++;
    285 	  }
    286 	} else {
    287 	  Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
    288 	  if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
    289 	  emit_spstore(as, ir, r, ofs);
    290 	  ofs += irt_isnum(ir->t) ? 8 : 4;
    291 	}
    292       }
    293     } else {
    294 #if !LJ_SOFTFP
    295       fpr = REGARG_LASTFPR+1;
    296 #endif
    297       if (gpr <= REGARG_LASTGPR)
    298 	gpr++;
    299       else
    300 	ofs += 4;
    301     }
    302     checkmclim(as);
    303   }
    304 }
    305 
    306 /* Setup result reg/sp for call. Evict scratch regs. */
    307 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
    308 {
    309   RegSet drop = RSET_SCRATCH;
    310   int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
    311 #if !LJ_SOFTFP
    312   if ((ci->flags & CCI_NOFPRCLOBBER))
    313     drop &= ~RSET_FPR;
    314 #endif
    315   if (ra_hasreg(ir->r))
    316     rset_clear(drop, ir->r);  /* Dest reg handled below. */
    317   if (hiop && ra_hasreg((ir+1)->r))
    318     rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
    319   ra_evictset(as, drop);  /* Evictions must be performed first. */
    320   if (ra_used(ir)) {
    321     lua_assert(!irt_ispri(ir->t));
    322     if (!LJ_SOFTFP && irt_isfp(ir->t)) {
    323       if ((ci->flags & CCI_CASTU64)) {
    324 	int32_t ofs = sps_scale(ir->s);
    325 	Reg dest = ir->r;
    326 	if (ra_hasreg(dest)) {
    327 	  ra_free(as, dest);
    328 	  ra_modified(as, dest);
    329 	  emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1);
    330 	  emit_tg(as, MIPSI_MTC1, RID_RETLO, dest);
    331 	}
    332 	if (ofs) {
    333 	  emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0));
    334 	  emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4));
    335 	}
    336       } else {
    337 	ra_destreg(as, ir, RID_FPRET);
    338       }
    339     } else if (hiop) {
    340       ra_destpair(as, ir);
    341     } else {
    342       ra_destreg(as, ir, RID_RET);
    343     }
    344   }
    345 }
    346 
    347 static void asm_callx(ASMState *as, IRIns *ir)
    348 {
    349   IRRef args[CCI_NARGS_MAX*2];
    350   CCallInfo ci;
    351   IRRef func;
    352   IRIns *irf;
    353   ci.flags = asm_callx_flags(as, ir);
    354   asm_collectargs(as, ir, &ci, args);
    355   asm_setupresult(as, ir, &ci);
    356   func = ir->op2; irf = IR(func);
    357   if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
    358   if (irref_isk(func)) {  /* Call to constant address. */
    359     ci.func = (ASMFunction)(void *)(irf->i);
    360   } else {  /* Need specific register for indirect calls. */
    361     Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
    362     MCode *p = as->mcp;
    363     if (r == RID_CFUNCADDR)
    364       *--p = MIPSI_NOP;
    365     else
    366       *--p = MIPSI_MOVE | MIPSF_D(RID_CFUNCADDR) | MIPSF_S(r);
    367     *--p = MIPSI_JALR | MIPSF_S(r);
    368     as->mcp = p;
    369     ci.func = (ASMFunction)(void *)0;
    370   }
    371   asm_gencall(as, &ci, args);
    372 }
    373 
    374 #if !LJ_SOFTFP
    375 static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
    376 {
    377   /* The modified regs must match with the *.dasc implementation. */
    378   RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
    379 		RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR);
    380   if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
    381   ra_evictset(as, drop);
    382   ra_destreg(as, ir, RID_FPRET);
    383   emit_call(as, (void *)lj_ir_callinfo[id].func, 0);
    384   ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
    385 }
    386 #endif
    387 
    388 /* -- Returns ------------------------------------------------------------- */
    389 
    390 /* Return to lower frame. Guard that it goes to the right spot. */
    391 static void asm_retf(ASMState *as, IRIns *ir)
    392 {
    393   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
    394   void *pc = ir_kptr(IR(ir->op2));
    395   int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
    396   as->topslot -= (BCReg)delta;
    397   if ((int32_t)as->topslot < 0) as->topslot = 0;
    398   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
    399   emit_setgl(as, base, jit_base);
    400   emit_addptr(as, base, -8*delta);
    401   asm_guard(as, MIPSI_BNE, RID_TMP,
    402 	    ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
    403   emit_tsi(as, MIPSI_LW, RID_TMP, base, -8);
    404 }
    405 
    406 /* -- Type conversions ---------------------------------------------------- */
    407 
    408 #if !LJ_SOFTFP
    409 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
    410 {
    411   Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
    412   Reg dest = ra_dest(as, ir, RSET_GPR);
    413   asm_guard(as, MIPSI_BC1F, 0, 0);
    414   emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left);
    415   emit_fg(as, MIPSI_CVT_D_W, tmp, tmp);
    416   emit_tg(as, MIPSI_MFC1, dest, tmp);
    417   emit_fg(as, MIPSI_CVT_W_D, tmp, left);
    418 }
    419 
    420 static void asm_tobit(ASMState *as, IRIns *ir)
    421 {
    422   RegSet allow = RSET_FPR;
    423   Reg dest = ra_dest(as, ir, RSET_GPR);
    424   Reg left = ra_alloc1(as, ir->op1, allow);
    425   Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
    426   Reg tmp = ra_scratch(as, rset_clear(allow, right));
    427   emit_tg(as, MIPSI_MFC1, dest, tmp);
    428   emit_fgh(as, MIPSI_ADD_D, tmp, left, right);
    429 }
    430 #endif
    431 
    432 static void asm_conv(ASMState *as, IRIns *ir)
    433 {
    434   IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
    435 #if !LJ_SOFTFP
    436   int stfp = (st == IRT_NUM || st == IRT_FLOAT);
    437 #endif
    438   IRRef lref = ir->op1;
    439   lua_assert(!(irt_isint64(ir->t) ||
    440 	       (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
    441 #if LJ_SOFTFP
    442   /* FP conversions are handled by SPLIT. */
    443   lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
    444   /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
    445 #else
    446   lua_assert(irt_type(ir->t) != st);
    447   if (irt_isfp(ir->t)) {
    448     Reg dest = ra_dest(as, ir, RSET_FPR);
    449     if (stfp) {  /* FP to FP conversion. */
    450       emit_fg(as, st == IRT_NUM ? MIPSI_CVT_S_D : MIPSI_CVT_D_S,
    451 	      dest, ra_alloc1(as, lref, RSET_FPR));
    452     } else if (st == IRT_U32) {  /* U32 to FP conversion. */
    453       /* y = (x ^ 0x8000000) + 2147483648.0 */
    454       Reg left = ra_alloc1(as, lref, RSET_GPR);
    455       Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
    456       emit_fgh(as, irt_isfloat(ir->t) ? MIPSI_ADD_S : MIPSI_ADD_D,
    457 	       dest, dest, tmp);
    458       emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
    459 	      dest, dest);
    460       if (irt_isfloat(ir->t))
    461 	emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
    462 		   (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
    463       else
    464 	emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
    465 		   (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
    466       emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
    467       emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
    468       emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
    469     } else {  /* Integer to FP conversion. */
    470       Reg left = ra_alloc1(as, lref, RSET_GPR);
    471       emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
    472 	      dest, dest);
    473       emit_tg(as, MIPSI_MTC1, left, dest);
    474     }
    475   } else if (stfp) {  /* FP to integer conversion. */
    476     if (irt_isguard(ir->t)) {
    477       /* Checked conversions are only supported from number to int. */
    478       lua_assert(irt_isint(ir->t) && st == IRT_NUM);
    479       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
    480     } else {
    481       Reg dest = ra_dest(as, ir, RSET_GPR);
    482       Reg left = ra_alloc1(as, lref, RSET_FPR);
    483       Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
    484       if (irt_isu32(ir->t)) {
    485 	/* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
    486 	emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
    487 	emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
    488 	emit_tg(as, MIPSI_MFC1, dest, tmp);
    489 	emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D,
    490 		tmp, tmp);
    491 	emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D,
    492 		 tmp, left, tmp);
    493 	if (st == IRT_FLOAT)
    494 	  emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
    495 		     (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
    496 	else
    497 	  emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
    498 		     (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
    499       } else {
    500 	emit_tg(as, MIPSI_MFC1, dest, tmp);
    501 	emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
    502 		tmp, left);
    503       }
    504     }
    505   } else
    506 #endif
    507   {
    508     Reg dest = ra_dest(as, ir, RSET_GPR);
    509     if (st >= IRT_I8 && st <= IRT_U16) {  /* Extend to 32 bit integer. */
    510       Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
    511       lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
    512       if ((ir->op2 & IRCONV_SEXT)) {
    513 	if ((as->flags & JIT_F_MIPSXXR2)) {
    514 	  emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
    515 	} else {
    516 	  uint32_t shift = st == IRT_I8 ? 24 : 16;
    517 	  emit_dta(as, MIPSI_SRA, dest, dest, shift);
    518 	  emit_dta(as, MIPSI_SLL, dest, left, shift);
    519 	}
    520       } else {
    521 	emit_tsi(as, MIPSI_ANDI, dest, left,
    522 		 (int32_t)(st == IRT_U8 ? 0xff : 0xffff));
    523       }
    524     } else {  /* 32/64 bit integer conversions. */
    525       /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
    526       ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
    527     }
    528   }
    529 }
    530 
    531 static void asm_strto(ASMState *as, IRIns *ir)
    532 {
    533   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
    534   IRRef args[2];
    535   int32_t ofs = 0;
    536 #if LJ_SOFTFP
    537   ra_evictset(as, RSET_SCRATCH);
    538   if (ra_used(ir)) {
    539     if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
    540 	(ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
    541       int i;
    542       for (i = 0; i < 2; i++) {
    543 	Reg r = (ir+i)->r;
    544 	if (ra_hasreg(r)) {
    545 	  ra_free(as, r);
    546 	  ra_modified(as, r);
    547 	  emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
    548 	}
    549       }
    550       ofs = sps_scale(ir->s & ~1);
    551     } else {
    552       Reg rhi = ra_dest(as, ir+1, RSET_GPR);
    553       Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
    554       emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4));
    555       emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0));
    556     }
    557   }
    558 #else
    559   RegSet drop = RSET_SCRATCH;
    560   if (ra_hasreg(ir->r)) rset_set(drop, ir->r);  /* Spill dest reg (if any). */
    561   ra_evictset(as, drop);
    562   ofs = sps_scale(ir->s);
    563 #endif
    564   asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO);  /* Test return status. */
    565   args[0] = ir->op1;      /* GCstr *str */
    566   args[1] = ASMREF_TMP1;  /* TValue *n  */
    567   asm_gencall(as, ci, args);
    568   /* Store the result to the spill slot or temp slots. */
    569   emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1),
    570 	   RID_SP, ofs);
    571 }
    572 
    573 /* -- Memory references --------------------------------------------------- */
    574 
    575 /* Get pointer to TValue. */
    576 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
    577 {
    578   IRIns *ir = IR(ref);
    579   if (irt_isnum(ir->t)) {
    580     if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
    581       ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
    582     else  /* Otherwise force a spill and use the spill slot. */
    583       emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir));
    584   } else {
    585     /* Otherwise use g->tmptv to hold the TValue. */
    586     RegSet allow = rset_exclude(RSET_GPR, dest);
    587     Reg type;
    588     emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768));
    589     if (!irt_ispri(ir->t)) {
    590       Reg src = ra_alloc1(as, ref, allow);
    591       emit_setgl(as, src, tmptv.gcr);
    592     }
    593     if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
    594       type = ra_alloc1(as, ref+1, allow);
    595     else
    596       type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
    597     emit_setgl(as, type, tmptv.it);
    598   }
    599 }
    600 
    601 static void asm_aref(ASMState *as, IRIns *ir)
    602 {
    603   Reg dest = ra_dest(as, ir, RSET_GPR);
    604   Reg idx, base;
    605   if (irref_isk(ir->op2)) {
    606     IRRef tab = IR(ir->op1)->op1;
    607     int32_t ofs = asm_fuseabase(as, tab);
    608     IRRef refa = ofs ? tab : ir->op1;
    609     ofs += 8*IR(ir->op2)->i;
    610     if (checki16(ofs)) {
    611       base = ra_alloc1(as, refa, RSET_GPR);
    612       emit_tsi(as, MIPSI_ADDIU, dest, base, ofs);
    613       return;
    614     }
    615   }
    616   base = ra_alloc1(as, ir->op1, RSET_GPR);
    617   idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
    618   emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base);
    619   emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3);
    620 }
    621 
    622 /* Inlined hash lookup. Specialized for key type and for const keys.
    623 ** The equivalent C code is:
    624 **   Node *n = hashkey(t, key);
    625 **   do {
    626 **     if (lj_obj_equal(&n->key, key)) return &n->val;
    627 **   } while ((n = nextnode(n)));
    628 **   return niltv(L);
    629 */
    630 static void asm_href(ASMState *as, IRIns *ir, IROp merge)
    631 {
    632   RegSet allow = RSET_GPR;
    633   int destused = ra_used(ir);
    634   Reg dest = ra_dest(as, ir, allow);
    635   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
    636   Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
    637   IRRef refkey = ir->op2;
    638   IRIns *irkey = IR(refkey);
    639   IRType1 kt = irkey->t;
    640   uint32_t khash;
    641   MCLabel l_end, l_loop, l_next;
    642 
    643   rset_clear(allow, tab);
    644 #if LJ_SOFTFP
    645   if (!irref_isk(refkey)) {
    646     key = ra_alloc1(as, refkey, allow);
    647     rset_clear(allow, key);
    648     if (irkey[1].o == IR_HIOP) {
    649       if (ra_hasreg((irkey+1)->r)) {
    650 	type = tmpnum = (irkey+1)->r;
    651 	tmp1 = ra_scratch(as, allow);
    652 	rset_clear(allow, tmp1);
    653 	ra_noweak(as, tmpnum);
    654       } else {
    655 	type = tmpnum = ra_allocref(as, refkey+1, allow);
    656       }
    657       rset_clear(allow, tmpnum);
    658     } else {
    659       type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
    660       rset_clear(allow, type);
    661     }
    662   }
    663 #else
    664   if (irt_isnum(kt)) {
    665     key = ra_alloc1(as, refkey, RSET_FPR);
    666     tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
    667   } else if (!irt_ispri(kt)) {
    668     key = ra_alloc1(as, refkey, allow);
    669     rset_clear(allow, key);
    670     type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
    671     rset_clear(allow, type);
    672   }
    673 #endif
    674   tmp2 = ra_scratch(as, allow);
    675   rset_clear(allow, tmp2);
    676 
    677   /* Key not found in chain: jump to exit (if merged) or load niltv. */
    678   l_end = emit_label(as);
    679   as->invmcp = NULL;
    680   if (merge == IR_NE)
    681     asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
    682   else if (destused)
    683     emit_loada(as, dest, niltvg(J2G(as->J)));
    684   /* Follow hash chain until the end. */
    685   emit_move(as, dest, tmp2);
    686   l_loop = --as->mcp;
    687   emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next));
    688   l_next = emit_label(as);
    689 
    690   /* Type and value comparison. */
    691   if (merge == IR_EQ) {  /* Must match asm_guard(). */
    692     emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
    693     l_end = asm_exitstub_addr(as);
    694   }
    695   if (!LJ_SOFTFP && irt_isnum(kt)) {
    696     emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
    697     emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
    698     *--as->mcp = MIPSI_NOP;  /* Avoid NaN comparison overhead. */
    699     emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next);
    700     emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM);
    701     emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
    702   } else {
    703     if (irt_ispri(kt)) {
    704       emit_branch(as, MIPSI_BEQ, tmp2, type, l_end);
    705     } else {
    706       emit_branch(as, MIPSI_BEQ, tmp1, key, l_end);
    707       emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr));
    708       emit_branch(as, MIPSI_BNE, tmp2, type, l_next);
    709     }
    710   }
    711   emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it));
    712   *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu);
    713 
    714   /* Load main position relative to tab->node into dest. */
    715   khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
    716   if (khash == 0) {
    717     emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node));
    718   } else {
    719     Reg tmphash = tmp1;
    720     if (irref_isk(refkey))
    721       tmphash = ra_allock(as, khash, allow);
    722     emit_dst(as, MIPSI_ADDU, dest, dest, tmp1);
    723     lua_assert(sizeof(Node) == 24);
    724     emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1);
    725     emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3);
    726     emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5);
    727     emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash);
    728     emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node));
    729     emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
    730     if (irref_isk(refkey)) {
    731       /* Nothing to do. */
    732     } else if (irt_isstr(kt)) {
    733       emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash));
    734     } else {  /* Must match with hash*() in lj_tab.c. */
    735       emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2);
    736       emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31);
    737       emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2);
    738       emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31);
    739       emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
    740       if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
    741 	emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
    742 	if ((as->flags & JIT_F_MIPSXXR2)) {
    743 	  emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
    744 	} else {
    745 	  emit_dst(as, MIPSI_OR, dest, dest, tmp1);
    746 	  emit_dta(as, MIPSI_SLL, tmp1, tmp1, HASH_ROT1);
    747 	  emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31);
    748 	}
    749 	emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
    750 #if LJ_SOFTFP
    751 	emit_ds(as, MIPSI_MOVE, tmp1, type);
    752 	emit_ds(as, MIPSI_MOVE, tmp2, key);
    753 #else
    754 	emit_tg(as, MIPSI_MFC1, tmp2, key);
    755 	emit_tg(as, MIPSI_MFC1, tmp1, key+1);
    756 #endif
    757       } else {
    758 	emit_dst(as, MIPSI_XOR, tmp2, key, tmp1);
    759 	emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31);
    760 	emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow));
    761       }
    762     }
    763   }
    764 }
    765 
    766 static void asm_hrefk(ASMState *as, IRIns *ir)
    767 {
    768   IRIns *kslot = IR(ir->op2);
    769   IRIns *irkey = IR(kslot->op1);
    770   int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
    771   int32_t kofs = ofs + (int32_t)offsetof(Node, key);
    772   Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
    773   Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
    774   Reg key = RID_NONE, type = RID_TMP, idx = node;
    775   RegSet allow = rset_exclude(RSET_GPR, node);
    776   int32_t lo, hi;
    777   lua_assert(ofs % sizeof(Node) == 0);
    778   if (ofs > 32736) {
    779     idx = dest;
    780     rset_clear(allow, dest);
    781     kofs = (int32_t)offsetof(Node, key);
    782   } else if (ra_hasreg(dest)) {
    783     emit_tsi(as, MIPSI_ADDIU, dest, node, ofs);
    784   }
    785   if (!irt_ispri(irkey->t)) {
    786     key = ra_scratch(as, allow);
    787     rset_clear(allow, key);
    788   }
    789   if (irt_isnum(irkey->t)) {
    790     lo = (int32_t)ir_knum(irkey)->u32.lo;
    791     hi = (int32_t)ir_knum(irkey)->u32.hi;
    792   } else {
    793     lo = irkey->i;
    794     hi = irt_toitype(irkey->t);
    795     if (!ra_hasreg(key))
    796       goto nolo;
    797   }
    798   asm_guard(as, MIPSI_BNE, key, lo ? ra_allock(as, lo, allow) : RID_ZERO);
    799 nolo:
    800   asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO);
    801   if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0));
    802   emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4));
    803   if (ofs > 32736)
    804     emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
    805 }
    806 
    807 static void asm_uref(ASMState *as, IRIns *ir)
    808 {
    809   Reg dest = ra_dest(as, ir, RSET_GPR);
    810   if (irref_isk(ir->op1)) {
    811     GCfunc *fn = ir_kfunc(IR(ir->op1));
    812     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
    813     emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR);
    814   } else {
    815     Reg uv = ra_scratch(as, RSET_GPR);
    816     Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
    817     if (ir->o == IR_UREFC) {
    818       asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
    819       emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
    820       emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
    821     } else {
    822       emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v));
    823     }
    824     emit_tsi(as, MIPSI_LW, uv, func,
    825 	     (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
    826   }
    827 }
    828 
    829 static void asm_fref(ASMState *as, IRIns *ir)
    830 {
    831   UNUSED(as); UNUSED(ir);
    832   lua_assert(!ra_used(ir));
    833 }
    834 
    835 static void asm_strref(ASMState *as, IRIns *ir)
    836 {
    837   Reg dest = ra_dest(as, ir, RSET_GPR);
    838   IRRef ref = ir->op2, refk = ir->op1;
    839   int32_t ofs = (int32_t)sizeof(GCstr);
    840   Reg r;
    841   if (irref_isk(ref)) {
    842     IRRef tmp = refk; refk = ref; ref = tmp;
    843   } else if (!irref_isk(refk)) {
    844     Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
    845     IRIns *irr = IR(ir->op2);
    846     if (ra_hasreg(irr->r)) {
    847       ra_noweak(as, irr->r);
    848       right = irr->r;
    849     } else if (mayfuse(as, irr->op2) &&
    850 	       irr->o == IR_ADD && irref_isk(irr->op2) &&
    851 	       checki16(ofs + IR(irr->op2)->i)) {
    852       ofs += IR(irr->op2)->i;
    853       right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
    854     } else {
    855       right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left));
    856     }
    857     emit_tsi(as, MIPSI_ADDIU, dest, dest, ofs);
    858     emit_dst(as, MIPSI_ADDU, dest, left, right);
    859     return;
    860   }
    861   r = ra_alloc1(as, ref, RSET_GPR);
    862   ofs += IR(refk)->i;
    863   if (checki16(ofs))
    864     emit_tsi(as, MIPSI_ADDIU, dest, r, ofs);
    865   else
    866     emit_dst(as, MIPSI_ADDU, dest, r,
    867 	     ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
    868 }
    869 
    870 /* -- Loads and stores ---------------------------------------------------- */
    871 
    872 static MIPSIns asm_fxloadins(IRIns *ir)
    873 {
    874   switch (irt_type(ir->t)) {
    875   case IRT_I8: return MIPSI_LB;
    876   case IRT_U8: return MIPSI_LBU;
    877   case IRT_I16: return MIPSI_LH;
    878   case IRT_U16: return MIPSI_LHU;
    879   case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1;
    880   case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1;
    881   default: return MIPSI_LW;
    882   }
    883 }
    884 
    885 static MIPSIns asm_fxstoreins(IRIns *ir)
    886 {
    887   switch (irt_type(ir->t)) {
    888   case IRT_I8: case IRT_U8: return MIPSI_SB;
    889   case IRT_I16: case IRT_U16: return MIPSI_SH;
    890   case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1;
    891   case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1;
    892   default: return MIPSI_SW;
    893   }
    894 }
    895 
    896 static void asm_fload(ASMState *as, IRIns *ir)
    897 {
    898   Reg dest = ra_dest(as, ir, RSET_GPR);
    899   MIPSIns mi = asm_fxloadins(ir);
    900   Reg idx;
    901   int32_t ofs;
    902   if (ir->op1 == REF_NIL) {
    903     idx = RID_JGL;
    904     ofs = ir->op2 - 32768;
    905   } else {
    906     idx = ra_alloc1(as, ir->op1, RSET_GPR);
    907     if (ir->op2 == IRFL_TAB_ARRAY) {
    908       ofs = asm_fuseabase(as, ir->op1);
    909       if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
    910 	emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs);
    911 	return;
    912       }
    913     }
    914     ofs = field_ofs[ir->op2];
    915   }
    916   lua_assert(!irt_isfp(ir->t));
    917   emit_tsi(as, mi, dest, idx, ofs);
    918 }
    919 
    920 static void asm_fstore(ASMState *as, IRIns *ir)
    921 {
    922   if (ir->r != RID_SINK) {
    923     Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
    924     IRIns *irf = IR(ir->op1);
    925     Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
    926     int32_t ofs = field_ofs[irf->op2];
    927     MIPSIns mi = asm_fxstoreins(ir);
    928     lua_assert(!irt_isfp(ir->t));
    929     emit_tsi(as, mi, src, idx, ofs);
    930   }
    931 }
    932 
    933 static void asm_xload(ASMState *as, IRIns *ir)
    934 {
    935   Reg dest = ra_dest(as, ir,
    936     (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
    937   lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
    938   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
    939 }
    940 
    941 static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
    942 {
    943   if (ir->r != RID_SINK) {
    944     Reg src = ra_alloc1z(as, ir->op2,
    945       (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
    946     asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
    947 		 rset_exclude(RSET_GPR, src), ofs);
    948   }
    949 }
    950 
    951 #define asm_xstore(as, ir)	asm_xstore_(as, ir, 0)
    952 
    953 static void asm_ahuvload(ASMState *as, IRIns *ir)
    954 {
    955   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
    956   IRType t = hiop ? IRT_NUM : irt_type(ir->t);
    957   Reg dest = RID_NONE, type = RID_TMP, idx;
    958   RegSet allow = RSET_GPR;
    959   int32_t ofs = 0;
    960   if (hiop && ra_used(ir+1)) {
    961     type = ra_dest(as, ir+1, allow);
    962     rset_clear(allow, type);
    963   }
    964   if (ra_used(ir)) {
    965     lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
    966 	       irt_isint(ir->t) || irt_isaddr(ir->t));
    967     dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
    968     rset_clear(allow, dest);
    969   }
    970   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
    971   rset_clear(allow, idx);
    972   if (t == IRT_NUM) {
    973     asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
    974     emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
    975   } else {
    976     asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype_(t), allow));
    977   }
    978   if (ra_hasreg(dest)) {
    979     if (!LJ_SOFTFP && t == IRT_NUM)
    980       emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
    981     else
    982       emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0));
    983   }
    984   emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4));
    985 }
    986 
    987 static void asm_ahustore(ASMState *as, IRIns *ir)
    988 {
    989   RegSet allow = RSET_GPR;
    990   Reg idx, src = RID_NONE, type = RID_NONE;
    991   int32_t ofs = 0;
    992   if (ir->r == RID_SINK)
    993     return;
    994   if (!LJ_SOFTFP && irt_isnum(ir->t)) {
    995     src = ra_alloc1(as, ir->op2, RSET_FPR);
    996   } else {
    997     int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
    998     if (!irt_ispri(ir->t)) {
    999       src = ra_alloc1(as, ir->op2, allow);
   1000       rset_clear(allow, src);
   1001     }
   1002     if (hiop)
   1003       type = ra_alloc1(as, (ir+1)->op2, allow);
   1004     else
   1005       type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
   1006     rset_clear(allow, type);
   1007   }
   1008   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
   1009   if (!LJ_SOFTFP && irt_isnum(ir->t)) {
   1010     emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
   1011   } else {
   1012     if (ra_hasreg(src))
   1013       emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0));
   1014     emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4));
   1015   }
   1016 }
   1017 
   1018 static void asm_sload(ASMState *as, IRIns *ir)
   1019 {
   1020   int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
   1021   int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
   1022   IRType t = hiop ? IRT_NUM : irt_type(ir->t);
   1023   Reg dest = RID_NONE, type = RID_NONE, base;
   1024   RegSet allow = RSET_GPR;
   1025   lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
   1026   lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
   1027 #if LJ_SOFTFP
   1028   lua_assert(!(ir->op2 & IRSLOAD_CONVERT));  /* Handled by LJ_SOFTFP SPLIT. */
   1029   if (hiop && ra_used(ir+1)) {
   1030     type = ra_dest(as, ir+1, allow);
   1031     rset_clear(allow, type);
   1032   }
   1033 #else
   1034   if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) {
   1035     dest = ra_scratch(as, RSET_FPR);
   1036     asm_tointg(as, ir, dest);
   1037     t = IRT_NUM;  /* Continue with a regular number type check. */
   1038   } else
   1039 #endif
   1040   if (ra_used(ir)) {
   1041     lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
   1042 	       irt_isint(ir->t) || irt_isaddr(ir->t));
   1043     dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
   1044     rset_clear(allow, dest);
   1045     base = ra_alloc1(as, REF_BASE, allow);
   1046     rset_clear(allow, base);
   1047     if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
   1048       if (t == IRT_INT) {
   1049 	Reg tmp = ra_scratch(as, RSET_FPR);
   1050 	emit_tg(as, MIPSI_MFC1, dest, tmp);
   1051 	emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
   1052 	dest = tmp;
   1053 	t = IRT_NUM;  /* Check for original type. */
   1054       } else {
   1055 	Reg tmp = ra_scratch(as, RSET_GPR);
   1056 	emit_fg(as, MIPSI_CVT_D_W, dest, dest);
   1057 	emit_tg(as, MIPSI_MTC1, tmp, dest);
   1058 	dest = tmp;
   1059 	t = IRT_INT;  /* Check for original type. */
   1060       }
   1061     }
   1062     goto dotypecheck;
   1063   }
   1064   base = ra_alloc1(as, REF_BASE, allow);
   1065   rset_clear(allow, base);
   1066 dotypecheck:
   1067   if ((ir->op2 & IRSLOAD_TYPECHECK)) {
   1068     if (ra_noreg(type)) {
   1069       if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
   1070 	  rset_test((as->freeset & allow), dest+1)) {
   1071 	type = dest+1;
   1072 	ra_modified(as, type);
   1073       } else {
   1074 	type = RID_TMP;
   1075       }
   1076     }
   1077     if (t == IRT_NUM) {
   1078       asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
   1079       emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
   1080     } else {
   1081       Reg ktype = ra_allock(as, irt_toitype_(t), allow);
   1082       asm_guard(as, MIPSI_BNE, type, ktype);
   1083     }
   1084   }
   1085   if (ra_hasreg(dest)) {
   1086     if (!LJ_SOFTFP && t == IRT_NUM)
   1087       emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
   1088     else
   1089       emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0));
   1090   }
   1091   if (ra_hasreg(type))
   1092     emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4));
   1093 }
   1094 
   1095 /* -- Allocations --------------------------------------------------------- */
   1096 
   1097 #if LJ_HASFFI
   1098 static void asm_cnew(ASMState *as, IRIns *ir)
   1099 {
   1100   CTState *cts = ctype_ctsG(J2G(as->J));
   1101   CTypeID id = (CTypeID)IR(ir->op1)->i;
   1102   CTSize sz;
   1103   CTInfo info = lj_ctype_info(cts, id, &sz);
   1104   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
   1105   IRRef args[4];
   1106   RegSet drop = RSET_SCRATCH;
   1107   lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
   1108 
   1109   as->gcsteps++;
   1110   if (ra_hasreg(ir->r))
   1111     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   1112   ra_evictset(as, drop);
   1113   if (ra_used(ir))
   1114     ra_destreg(as, ir, RID_RET);  /* GCcdata * */
   1115 
   1116   /* Initialize immutable cdata object. */
   1117   if (ir->o == IR_CNEWI) {
   1118     RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
   1119     int32_t ofs = sizeof(GCcdata);
   1120     lua_assert(sz == 4 || sz == 8);
   1121     if (sz == 8) {
   1122       ofs += 4;
   1123       lua_assert((ir+1)->o == IR_HIOP);
   1124       if (LJ_LE) ir++;
   1125     }
   1126     for (;;) {
   1127       Reg r = ra_alloc1z(as, ir->op2, allow);
   1128       emit_tsi(as, MIPSI_SW, r, RID_RET, ofs);
   1129       rset_clear(allow, r);
   1130       if (ofs == sizeof(GCcdata)) break;
   1131       ofs -= 4; if (LJ_BE) ir++; else ir--;
   1132     }
   1133   } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
   1134     ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
   1135     args[0] = ASMREF_L;     /* lua_State *L */
   1136     args[1] = ir->op1;      /* CTypeID id   */
   1137     args[2] = ir->op2;      /* CTSize sz    */
   1138     args[3] = ASMREF_TMP1;  /* CTSize align */
   1139     asm_gencall(as, ci, args);
   1140     emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
   1141     return;
   1142   }
   1143 
   1144   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
   1145   emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
   1146   emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
   1147   emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
   1148   emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
   1149   args[0] = ASMREF_L;     /* lua_State *L */
   1150   args[1] = ASMREF_TMP1;  /* MSize size   */
   1151   asm_gencall(as, ci, args);
   1152   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
   1153 	       ra_releasetmp(as, ASMREF_TMP1));
   1154 }
   1155 #else
   1156 #define asm_cnew(as, ir)	((void)0)
   1157 #endif
   1158 
   1159 /* -- Write barriers ------------------------------------------------------ */
   1160 
   1161 static void asm_tbar(ASMState *as, IRIns *ir)
   1162 {
   1163   Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
   1164   Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
   1165   Reg link = RID_TMP;
   1166   MCLabel l_end = emit_label(as);
   1167   emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist));
   1168   emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
   1169   emit_setgl(as, tab, gc.grayagain);
   1170   emit_getgl(as, link, gc.grayagain);
   1171   emit_dst(as, MIPSI_XOR, mark, mark, RID_TMP);  /* Clear black bit. */
   1172   emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
   1173   emit_tsi(as, MIPSI_ANDI, RID_TMP, mark, LJ_GC_BLACK);
   1174   emit_tsi(as, MIPSI_LBU, mark, tab, (int32_t)offsetof(GCtab, marked));
   1175 }
   1176 
   1177 static void asm_obar(ASMState *as, IRIns *ir)
   1178 {
   1179   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
   1180   IRRef args[2];
   1181   MCLabel l_end;
   1182   Reg obj, val, tmp;
   1183   /* No need for other object barriers (yet). */
   1184   lua_assert(IR(ir->op1)->o == IR_UREFC);
   1185   ra_evictset(as, RSET_SCRATCH);
   1186   l_end = emit_label(as);
   1187   args[0] = ASMREF_TMP1;  /* global_State *g */
   1188   args[1] = ir->op1;      /* TValue *tv      */
   1189   asm_gencall(as, ci, args);
   1190   emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
   1191   obj = IR(ir->op1)->r;
   1192   tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
   1193   emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
   1194   emit_tsi(as, MIPSI_ANDI, tmp, tmp, LJ_GC_BLACK);
   1195   emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
   1196   emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES);
   1197   val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
   1198   emit_tsi(as, MIPSI_LBU, tmp, obj,
   1199 	   (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
   1200   emit_tsi(as, MIPSI_LBU, RID_TMP, val, (int32_t)offsetof(GChead, marked));
   1201 }
   1202 
   1203 /* -- Arithmetic and logic operations ------------------------------------- */
   1204 
   1205 #if !LJ_SOFTFP
   1206 static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi)
   1207 {
   1208   Reg dest = ra_dest(as, ir, RSET_FPR);
   1209   Reg right, left = ra_alloc2(as, ir, RSET_FPR);
   1210   right = (left >> 8); left &= 255;
   1211   emit_fgh(as, mi, dest, left, right);
   1212 }
   1213 
   1214 static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
   1215 {
   1216   Reg dest = ra_dest(as, ir, RSET_FPR);
   1217   Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
   1218   emit_fg(as, mi, dest, left);
   1219 }
   1220 
   1221 static void asm_fpmath(ASMState *as, IRIns *ir)
   1222 {
   1223   if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
   1224     return;
   1225   if (ir->op2 <= IRFPM_TRUNC)
   1226     asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
   1227   else if (ir->op2 == IRFPM_SQRT)
   1228     asm_fpunary(as, ir, MIPSI_SQRT_D);
   1229   else
   1230     asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
   1231 }
   1232 #endif
   1233 
   1234 static void asm_add(ASMState *as, IRIns *ir)
   1235 {
   1236 #if !LJ_SOFTFP
   1237   if (irt_isnum(ir->t)) {
   1238     asm_fparith(as, ir, MIPSI_ADD_D);
   1239   } else
   1240 #endif
   1241   {
   1242     Reg dest = ra_dest(as, ir, RSET_GPR);
   1243     Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
   1244     if (irref_isk(ir->op2)) {
   1245       int32_t k = IR(ir->op2)->i;
   1246       if (checki16(k)) {
   1247 	emit_tsi(as, MIPSI_ADDIU, dest, left, k);
   1248 	return;
   1249       }
   1250     }
   1251     right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
   1252     emit_dst(as, MIPSI_ADDU, dest, left, right);
   1253   }
   1254 }
   1255 
   1256 static void asm_sub(ASMState *as, IRIns *ir)
   1257 {
   1258 #if !LJ_SOFTFP
   1259   if (irt_isnum(ir->t)) {
   1260     asm_fparith(as, ir, MIPSI_SUB_D);
   1261   } else
   1262 #endif
   1263   {
   1264     Reg dest = ra_dest(as, ir, RSET_GPR);
   1265     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
   1266     right = (left >> 8); left &= 255;
   1267     emit_dst(as, MIPSI_SUBU, dest, left, right);
   1268   }
   1269 }
   1270 
   1271 static void asm_mul(ASMState *as, IRIns *ir)
   1272 {
   1273 #if !LJ_SOFTFP
   1274   if (irt_isnum(ir->t)) {
   1275     asm_fparith(as, ir, MIPSI_MUL_D);
   1276   } else
   1277 #endif
   1278   {
   1279     Reg dest = ra_dest(as, ir, RSET_GPR);
   1280     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
   1281     right = (left >> 8); left &= 255;
   1282     emit_dst(as, MIPSI_MUL, dest, left, right);
   1283   }
   1284 }
   1285 
   1286 #define asm_div(as, ir)		asm_fparith(as, ir, MIPSI_DIV_D)
   1287 #define asm_mod(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_modi)
   1288 #define asm_pow(as, ir)		asm_callid(as, ir, IRCALL_lj_vm_powi)
   1289 
   1290 static void asm_neg(ASMState *as, IRIns *ir)
   1291 {
   1292 #if !LJ_SOFTFP
   1293   if (irt_isnum(ir->t)) {
   1294     asm_fpunary(as, ir, MIPSI_NEG_D);
   1295   } else
   1296 #endif
   1297   {
   1298     Reg dest = ra_dest(as, ir, RSET_GPR);
   1299     Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
   1300     emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
   1301   }
   1302 }
   1303 
   1304 #define asm_abs(as, ir)		asm_fpunary(as, ir, MIPSI_ABS_D)
   1305 #define asm_atan2(as, ir)	asm_callid(as, ir, IRCALL_atan2)
   1306 #define asm_ldexp(as, ir)	asm_callid(as, ir, IRCALL_ldexp)
   1307 
   1308 static void asm_arithov(ASMState *as, IRIns *ir)
   1309 {
   1310   Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
   1311   if (irref_isk(ir->op2)) {
   1312     int k = IR(ir->op2)->i;
   1313     if (ir->o == IR_SUBOV) k = -k;
   1314     if (checki16(k)) {  /* (dest < left) == (k >= 0 ? 1 : 0) */
   1315       left = ra_alloc1(as, ir->op1, RSET_GPR);
   1316       asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
   1317       emit_dst(as, MIPSI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left);
   1318       emit_tsi(as, MIPSI_ADDIU, dest, left, k);
   1319       if (dest == left) emit_move(as, RID_TMP, left);
   1320       return;
   1321     }
   1322   }
   1323   left = ra_alloc2(as, ir, RSET_GPR);
   1324   right = (left >> 8); left &= 255;
   1325   tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
   1326 						 right), dest));
   1327   asm_guard(as, MIPSI_BLTZ, RID_TMP, 0);
   1328   emit_dst(as, MIPSI_AND, RID_TMP, RID_TMP, tmp);
   1329   if (ir->o == IR_ADDOV) {  /* ((dest^left) & (dest^right)) < 0 */
   1330     emit_dst(as, MIPSI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right);
   1331   } else {  /* ((dest^left) & (dest^~right)) < 0 */
   1332     emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, dest);
   1333     emit_dst(as, MIPSI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO);
   1334   }
   1335   emit_dst(as, MIPSI_XOR, tmp, dest, dest == left ? RID_TMP : left);
   1336   emit_dst(as, ir->o == IR_ADDOV ? MIPSI_ADDU : MIPSI_SUBU, dest, left, right);
   1337   if (dest == left || dest == right)
   1338     emit_move(as, RID_TMP, dest == left ? left : right);
   1339 }
   1340 
   1341 #define asm_addov(as, ir)	asm_arithov(as, ir)
   1342 #define asm_subov(as, ir)	asm_arithov(as, ir)
   1343 
   1344 static void asm_mulov(ASMState *as, IRIns *ir)
   1345 {
   1346   Reg dest = ra_dest(as, ir, RSET_GPR);
   1347   Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
   1348   right = (left >> 8); left &= 255;
   1349   tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
   1350 						 right), dest));
   1351   asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
   1352   emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
   1353   emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
   1354   emit_dst(as, MIPSI_MFLO, dest, 0, 0);
   1355   emit_dst(as, MIPSI_MULT, 0, left, right);
   1356 }
   1357 
   1358 #if LJ_HASFFI
   1359 static void asm_add64(ASMState *as, IRIns *ir)
   1360 {
   1361   Reg dest = ra_dest(as, ir, RSET_GPR);
   1362   Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
   1363   if (irref_isk(ir->op2)) {
   1364     int32_t k = IR(ir->op2)->i;
   1365     if (k == 0) {
   1366       emit_dst(as, MIPSI_ADDU, dest, left, RID_TMP);
   1367       goto loarith;
   1368     } else if (checki16(k)) {
   1369       emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP);
   1370       emit_tsi(as, MIPSI_ADDIU, dest, left, k);
   1371       goto loarith;
   1372     }
   1373   }
   1374   emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP);
   1375   right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
   1376   emit_dst(as, MIPSI_ADDU, dest, left, right);
   1377 loarith:
   1378   ir--;
   1379   dest = ra_dest(as, ir, RSET_GPR);
   1380   left = ra_alloc1(as, ir->op1, RSET_GPR);
   1381   if (irref_isk(ir->op2)) {
   1382     int32_t k = IR(ir->op2)->i;
   1383     if (k == 0) {
   1384       if (dest != left)
   1385 	emit_move(as, dest, left);
   1386       return;
   1387     } else if (checki16(k)) {
   1388       if (dest == left) {
   1389 	Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, left));
   1390 	emit_move(as, dest, tmp);
   1391 	dest = tmp;
   1392       }
   1393       emit_dst(as, MIPSI_SLTU, RID_TMP, dest, left);
   1394       emit_tsi(as, MIPSI_ADDIU, dest, left, k);
   1395       return;
   1396     }
   1397   }
   1398   right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
   1399   if (dest == left && dest == right) {
   1400     Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
   1401     emit_move(as, dest, tmp);
   1402     dest = tmp;
   1403   }
   1404   emit_dst(as, MIPSI_SLTU, RID_TMP, dest, dest == left ? right : left);
   1405   emit_dst(as, MIPSI_ADDU, dest, left, right);
   1406 }
   1407 
   1408 static void asm_sub64(ASMState *as, IRIns *ir)
   1409 {
   1410   Reg dest = ra_dest(as, ir, RSET_GPR);
   1411   Reg right, left = ra_alloc2(as, ir, RSET_GPR);
   1412   right = (left >> 8); left &= 255;
   1413   emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP);
   1414   emit_dst(as, MIPSI_SUBU, dest, left, right);
   1415   ir--;
   1416   dest = ra_dest(as, ir, RSET_GPR);
   1417   left = ra_alloc2(as, ir, RSET_GPR);
   1418   right = (left >> 8); left &= 255;
   1419   if (dest == left) {
   1420     Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
   1421     emit_move(as, dest, tmp);
   1422     dest = tmp;
   1423   }
   1424   emit_dst(as, MIPSI_SLTU, RID_TMP, left, dest);
   1425   emit_dst(as, MIPSI_SUBU, dest, left, right);
   1426 }
   1427 
   1428 static void asm_neg64(ASMState *as, IRIns *ir)
   1429 {
   1430   Reg dest = ra_dest(as, ir, RSET_GPR);
   1431   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
   1432   emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP);
   1433   emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
   1434   ir--;
   1435   dest = ra_dest(as, ir, RSET_GPR);
   1436   left = ra_alloc1(as, ir->op1, RSET_GPR);
   1437   emit_dst(as, MIPSI_SLTU, RID_TMP, RID_ZERO, dest);
   1438   emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
   1439 }
   1440 #endif
   1441 
   1442 static void asm_bnot(ASMState *as, IRIns *ir)
   1443 {
   1444   Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
   1445   IRIns *irl = IR(ir->op1);
   1446   if (mayfuse(as, ir->op1) && irl->o == IR_BOR) {
   1447     left = ra_alloc2(as, irl, RSET_GPR);
   1448     right = (left >> 8); left &= 255;
   1449   } else {
   1450     left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
   1451     right = RID_ZERO;
   1452   }
   1453   emit_dst(as, MIPSI_NOR, dest, left, right);
   1454 }
   1455 
   1456 static void asm_bswap(ASMState *as, IRIns *ir)
   1457 {
   1458   Reg dest = ra_dest(as, ir, RSET_GPR);
   1459   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
   1460   if ((as->flags & JIT_F_MIPSXXR2)) {
   1461     emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
   1462     emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
   1463   } else {
   1464     Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), dest));
   1465     emit_dst(as, MIPSI_OR, dest, dest, tmp);
   1466     emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
   1467     emit_tsi(as, MIPSI_ANDI, dest, dest, 0xff00);
   1468     emit_dta(as, MIPSI_SLL, RID_TMP, RID_TMP, 8);
   1469     emit_dta(as, MIPSI_SRL, dest, left, 8);
   1470     emit_tsi(as, MIPSI_ANDI, RID_TMP, left, 0xff00);
   1471     emit_dst(as, MIPSI_OR, tmp, tmp, RID_TMP);
   1472     emit_dta(as, MIPSI_SRL, tmp, left, 24);
   1473     emit_dta(as, MIPSI_SLL, RID_TMP, left, 24);
   1474   }
   1475 }
   1476 
   1477 static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
   1478 {
   1479   Reg dest = ra_dest(as, ir, RSET_GPR);
   1480   Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
   1481   if (irref_isk(ir->op2)) {
   1482     int32_t k = IR(ir->op2)->i;
   1483     if (checku16(k)) {
   1484       emit_tsi(as, mik, dest, left, k);
   1485       return;
   1486     }
   1487   }
   1488   right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
   1489   emit_dst(as, mi, dest, left, right);
   1490 }
   1491 
   1492 #define asm_band(as, ir)	asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
   1493 #define asm_bor(as, ir)		asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
   1494 #define asm_bxor(as, ir)	asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
   1495 
   1496 static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
   1497 {
   1498   Reg dest = ra_dest(as, ir, RSET_GPR);
   1499   if (irref_isk(ir->op2)) {  /* Constant shifts. */
   1500     uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31);
   1501     emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift);
   1502   } else {
   1503     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
   1504     right = (left >> 8); left &= 255;
   1505     emit_dst(as, mi, dest, right, left);  /* Shift amount is in rs. */
   1506   }
   1507 }
   1508 
   1509 #define asm_bshl(as, ir)	asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
   1510 #define asm_bshr(as, ir)	asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
   1511 #define asm_bsar(as, ir)	asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
   1512 #define asm_brol(as, ir)	lua_assert(0)
   1513 
   1514 static void asm_bror(ASMState *as, IRIns *ir)
   1515 {
   1516   if ((as->flags & JIT_F_MIPSXXR2)) {
   1517     asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
   1518   } else {
   1519     Reg dest = ra_dest(as, ir, RSET_GPR);
   1520     if (irref_isk(ir->op2)) {  /* Constant shifts. */
   1521       uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31);
   1522       Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
   1523       emit_rotr(as, dest, left, RID_TMP, shift);
   1524     } else {
   1525       Reg right, left = ra_alloc2(as, ir, RSET_GPR);
   1526       right = (left >> 8); left &= 255;
   1527       emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
   1528       emit_dst(as, MIPSI_SRLV, dest, right, left);
   1529       emit_dst(as, MIPSI_SLLV, RID_TMP, RID_TMP, left);
   1530       emit_dst(as, MIPSI_SUBU, RID_TMP, ra_allock(as, 32, RSET_GPR), right);
   1531     }
   1532   }
   1533 }
   1534 
   1535 #if LJ_SOFTFP
   1536 static void asm_sfpmin_max(ASMState *as, IRIns *ir)
   1537 {
   1538   CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax];
   1539   IRRef args[4];
   1540   args[0^LJ_BE] = ir->op1;
   1541   args[1^LJ_BE] = (ir+1)->op1;
   1542   args[2^LJ_BE] = ir->op2;
   1543   args[3^LJ_BE] = (ir+1)->op2;
   1544   asm_setupresult(as, ir, &ci);
   1545   emit_call(as, (void *)ci.func, 0);
   1546   ci.func = NULL;
   1547   asm_gencall(as, &ci, args);
   1548 }
   1549 #endif
   1550 
   1551 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
   1552 {
   1553   if (!LJ_SOFTFP && irt_isnum(ir->t)) {
   1554     Reg dest = ra_dest(as, ir, RSET_FPR);
   1555     Reg right, left = ra_alloc2(as, ir, RSET_FPR);
   1556     right = (left >> 8); left &= 255;
   1557     if (dest == left) {
   1558       emit_fg(as, MIPSI_MOVT_D, dest, right);
   1559     } else {
   1560       emit_fg(as, MIPSI_MOVF_D, dest, left);
   1561       if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
   1562     }
   1563     emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left);
   1564   } else {
   1565     Reg dest = ra_dest(as, ir, RSET_GPR);
   1566     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
   1567     right = (left >> 8); left &= 255;
   1568     if (dest == left) {
   1569       emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
   1570     } else {
   1571       emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
   1572       if (dest != right) emit_move(as, dest, right);
   1573     }
   1574     emit_dst(as, MIPSI_SLT, RID_TMP,
   1575 	     ismax ? left : right, ismax ? right : left);
   1576   }
   1577 }
   1578 
   1579 #define asm_min(as, ir)		asm_min_max(as, ir, 0)
   1580 #define asm_max(as, ir)		asm_min_max(as, ir, 1)
   1581 
   1582 /* -- Comparisons --------------------------------------------------------- */
   1583 
   1584 #if LJ_SOFTFP
   1585 /* SFP comparisons. */
   1586 static void asm_sfpcomp(ASMState *as, IRIns *ir)
   1587 {
   1588   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
   1589   RegSet drop = RSET_SCRATCH;
   1590   Reg r;
   1591   IRRef args[4];
   1592   args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1;
   1593   args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2;
   1594 
   1595   for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
   1596     if (!rset_test(as->freeset, r) &&
   1597 	regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
   1598       rset_clear(drop, r);
   1599   }
   1600   ra_evictset(as, drop);
   1601 
   1602   asm_setupresult(as, ir, ci);
   1603 
   1604   switch ((IROp)ir->o) {
   1605   case IR_LT:
   1606     asm_guard(as, MIPSI_BGEZ, RID_RET, 0);
   1607     break;
   1608   case IR_ULT:
   1609     asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
   1610     emit_loadi(as, RID_TMP, 1);
   1611     asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO);
   1612     break;
   1613   case IR_GE:
   1614     asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
   1615     emit_loadi(as, RID_TMP, 2);
   1616     asm_guard(as, MIPSI_BLTZ, RID_RET, 0);
   1617     break;
   1618   case IR_LE:
   1619     asm_guard(as, MIPSI_BGTZ, RID_RET, 0);
   1620     break;
   1621   case IR_GT:
   1622     asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
   1623     emit_loadi(as, RID_TMP, 2);
   1624     asm_guard(as, MIPSI_BLEZ, RID_RET, 0);
   1625     break;
   1626   case IR_UGE:
   1627     asm_guard(as, MIPSI_BLTZ, RID_RET, 0);
   1628     break;
   1629   case IR_ULE:
   1630     asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
   1631     emit_loadi(as, RID_TMP, 1);
   1632     break;
   1633   case IR_UGT: case IR_ABC:
   1634     asm_guard(as, MIPSI_BLEZ, RID_RET, 0);
   1635     break;
   1636   case IR_EQ: case IR_NE:
   1637     asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO);
   1638   default:
   1639     break;
   1640   }
   1641   asm_gencall(as, ci, args);
   1642 }
   1643 #endif
   1644 
   1645 static void asm_comp(ASMState *as, IRIns *ir)
   1646 {
   1647   /* ORDER IR: LT GE LE GT  ULT UGE ULE UGT. */
   1648   IROp op = ir->o;
   1649   if (!LJ_SOFTFP && irt_isnum(ir->t)) {
   1650     Reg right, left = ra_alloc2(as, ir, RSET_FPR);
   1651     right = (left >> 8); left &= 255;
   1652     asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
   1653     emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
   1654   } else {
   1655     Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
   1656     if (op == IR_ABC) op = IR_UGT;
   1657     if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) {
   1658       MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) :
   1659 			    ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ);
   1660       asm_guard(as, mi, left, 0);
   1661     } else {
   1662       if (irref_isk(ir->op2)) {
   1663 	int32_t k = IR(ir->op2)->i;
   1664 	if ((op&2)) k++;
   1665 	if (checki16(k)) {
   1666 	  asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
   1667 	  emit_tsi(as, (op&4) ? MIPSI_SLTIU : MIPSI_SLTI,
   1668 		   RID_TMP, left, k);
   1669 	  return;
   1670 	}
   1671       }
   1672       right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
   1673       asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
   1674       emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT,
   1675 	       RID_TMP, (op&2) ? right : left, (op&2) ? left : right);
   1676     }
   1677   }
   1678 }
   1679 
   1680 static void asm_equal(ASMState *as, IRIns *ir)
   1681 {
   1682   Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR);
   1683   right = (left >> 8); left &= 255;
   1684   if (!LJ_SOFTFP && irt_isnum(ir->t)) {
   1685     asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
   1686     emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
   1687   } else {
   1688     asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
   1689   }
   1690 }
   1691 
   1692 #if LJ_HASFFI
   1693 /* 64 bit integer comparisons. */
   1694 static void asm_comp64(ASMState *as, IRIns *ir)
   1695 {
   1696   /* ORDER IR: LT GE LE GT  ULT UGE ULE UGT. */
   1697   IROp op = (ir-1)->o;
   1698   MCLabel l_end;
   1699   Reg rightlo, leftlo, righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
   1700   righthi = (lefthi >> 8); lefthi &= 255;
   1701   leftlo = ra_alloc2(as, ir-1,
   1702 		     rset_exclude(rset_exclude(RSET_GPR, lefthi), righthi));
   1703   rightlo = (leftlo >> 8); leftlo &= 255;
   1704   asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
   1705   l_end = emit_label(as);
   1706   if (lefthi != righthi)
   1707     emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, RID_TMP,
   1708 	     (op&2) ? righthi : lefthi, (op&2) ? lefthi : righthi);
   1709   emit_dst(as, MIPSI_SLTU, RID_TMP,
   1710 	   (op&2) ? rightlo : leftlo, (op&2) ? leftlo : rightlo);
   1711   if (lefthi != righthi)
   1712     emit_branch(as, MIPSI_BEQ, lefthi, righthi, l_end);
   1713 }
   1714 
   1715 static void asm_comp64eq(ASMState *as, IRIns *ir)
   1716 {
   1717   Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
   1718   right = (left >> 8); left &= 255;
   1719   asm_guard(as, ((ir-1)->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
   1720   tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
   1721   emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp);
   1722   emit_dst(as, MIPSI_XOR, tmp, left, right);
   1723   left = ra_alloc2(as, ir-1, RSET_GPR);
   1724   right = (left >> 8); left &= 255;
   1725   emit_dst(as, MIPSI_XOR, RID_TMP, left, right);
   1726 }
   1727 #endif
   1728 
   1729 /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
   1730 
   1731 /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
   1732 static void asm_hiop(ASMState *as, IRIns *ir)
   1733 {
   1734 #if LJ_HASFFI || LJ_SOFTFP
   1735   /* HIOP is marked as a store because it needs its own DCE logic. */
   1736   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   1737   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
   1738   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
   1739     as->curins--;  /* Always skip the CONV. */
   1740 #if LJ_HASFFI && !LJ_SOFTFP
   1741     if (usehi || uselo)
   1742       asm_conv64(as, ir);
   1743     return;
   1744 #endif
   1745   } else if ((ir-1)->o < IR_EQ) {  /* 64 bit integer comparisons. ORDER IR. */
   1746     as->curins--;  /* Always skip the loword comparison. */
   1747 #if LJ_SOFTFP
   1748     if (!irt_isint(ir->t)) {
   1749       asm_sfpcomp(as, ir-1);
   1750       return;
   1751     }
   1752 #endif
   1753 #if LJ_HASFFI
   1754     asm_comp64(as, ir);
   1755 #endif
   1756     return;
   1757   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
   1758     as->curins--;  /* Always skip the loword comparison. */
   1759 #if LJ_SOFTFP
   1760     if (!irt_isint(ir->t)) {
   1761       asm_sfpcomp(as, ir-1);
   1762       return;
   1763     }
   1764 #endif
   1765 #if LJ_HASFFI
   1766     asm_comp64eq(as, ir);
   1767 #endif
   1768     return;
   1769 #if LJ_SOFTFP
   1770   } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
   1771       as->curins--;  /* Always skip the loword min/max. */
   1772     if (uselo || usehi)
   1773       asm_sfpmin_max(as, ir-1);
   1774     return;
   1775 #endif
   1776   } else if ((ir-1)->o == IR_XSTORE) {
   1777     as->curins--;  /* Handle both stores here. */
   1778     if ((ir-1)->r != RID_SINK) {
   1779       asm_xstore_(as, ir, LJ_LE ? 4 : 0);
   1780       asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
   1781     }
   1782     return;
   1783   }
   1784   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   1785   switch ((ir-1)->o) {
   1786 #if LJ_HASFFI
   1787   case IR_ADD: as->curins--; asm_add64(as, ir); break;
   1788   case IR_SUB: as->curins--; asm_sub64(as, ir); break;
   1789   case IR_NEG: as->curins--; asm_neg64(as, ir); break;
   1790 #endif
   1791 #if LJ_SOFTFP
   1792   case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
   1793   case IR_STRTO:
   1794     if (!uselo)
   1795       ra_allocref(as, ir->op1, RSET_GPR);  /* Mark lo op as used. */
   1796     break;
   1797 #endif
   1798   case IR_CALLN:
   1799   case IR_CALLS:
   1800   case IR_CALLXS:
   1801     if (!uselo)
   1802       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
   1803     break;
   1804 #if LJ_SOFTFP
   1805   case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
   1806 #endif
   1807   case IR_CNEWI:
   1808     /* Nothing to do here. Handled by lo op itself. */
   1809     break;
   1810   default: lua_assert(0); break;
   1811   }
   1812 #else
   1813   UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused without FFI. */
   1814 #endif
   1815 }
   1816 
   1817 /* -- Profiling ----------------------------------------------------------- */
   1818 
   1819 static void asm_prof(ASMState *as, IRIns *ir)
   1820 {
   1821   UNUSED(ir);
   1822   asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
   1823   emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
   1824   emit_lsglptr(as, MIPSI_LBU, RID_TMP,
   1825 	       (int32_t)offsetof(global_State, hookmask));
   1826 }
   1827 
   1828 /* -- Stack handling ------------------------------------------------------ */
   1829 
   1830 /* Check Lua stack size for overflow. Use exit handler as fallback. */
   1831 static void asm_stack_check(ASMState *as, BCReg topslot,
   1832 			    IRIns *irp, RegSet allow, ExitNo exitno)
   1833 {
   1834   /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */
   1835   Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
   1836   ExitNo oldsnap = as->snapno;
   1837   rset_clear(allow, pbase);
   1838   tmp = allow ? rset_pickbot(allow) :
   1839 		(pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
   1840   as->snapno = exitno;
   1841   asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
   1842   as->snapno = oldsnap;
   1843   if (allow == RSET_EMPTY)  /* Restore temp. register. */
   1844     emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0);
   1845   else
   1846     ra_modified(as, tmp);
   1847   emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
   1848   emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase);
   1849   emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
   1850   if (pbase == RID_TMP)
   1851     emit_getgl(as, RID_TMP, jit_base);
   1852   emit_getgl(as, tmp, cur_L);
   1853   if (allow == RSET_EMPTY)  /* Spill temp. register. */
   1854     emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
   1855 }
   1856 
   1857 /* Restore Lua stack from on-trace state. */
   1858 static void asm_stack_restore(ASMState *as, SnapShot *snap)
   1859 {
   1860   SnapEntry *map = &as->T->snapmap[snap->mapofs];
   1861   SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
   1862   MSize n, nent = snap->nent;
   1863   /* Store the value of all modified slots to the Lua stack. */
   1864   for (n = 0; n < nent; n++) {
   1865     SnapEntry sn = map[n];
   1866     BCReg s = snap_slot(sn);
   1867     int32_t ofs = 8*((int32_t)s-1);
   1868     IRRef ref = snap_ref(sn);
   1869     IRIns *ir = IR(ref);
   1870     if ((sn & SNAP_NORESTORE))
   1871       continue;
   1872     if (irt_isnum(ir->t)) {
   1873 #if LJ_SOFTFP
   1874       Reg tmp;
   1875       RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
   1876       lua_assert(irref_isk(ref));  /* LJ_SOFTFP: must be a number constant. */
   1877       tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
   1878       emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
   1879       if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
   1880       tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
   1881       emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
   1882 #else
   1883       Reg src = ra_alloc1(as, ref, RSET_FPR);
   1884       emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
   1885 #endif
   1886     } else {
   1887       Reg type;
   1888       RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
   1889       lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
   1890       if (!irt_ispri(ir->t)) {
   1891 	Reg src = ra_alloc1(as, ref, allow);
   1892 	rset_clear(allow, src);
   1893 	emit_tsi(as, MIPSI_SW, src, RID_BASE, ofs+(LJ_BE?4:0));
   1894       }
   1895       if ((sn & (SNAP_CONT|SNAP_FRAME))) {
   1896 	if (s == 0) continue;  /* Do not overwrite link to previous frame. */
   1897 	type = ra_allock(as, (int32_t)(*flinks--), allow);
   1898 #if LJ_SOFTFP
   1899       } else if ((sn & SNAP_SOFTFPNUM)) {
   1900 	type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
   1901 #endif
   1902       } else {
   1903 	type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
   1904       }
   1905       emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4));
   1906     }
   1907     checkmclim(as);
   1908   }
   1909   lua_assert(map + nent == flinks);
   1910 }
   1911 
   1912 /* -- GC handling --------------------------------------------------------- */
   1913 
   1914 /* Check GC threshold and do one or more GC steps. */
   1915 static void asm_gc_check(ASMState *as)
   1916 {
   1917   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
   1918   IRRef args[2];
   1919   MCLabel l_end;
   1920   Reg tmp;
   1921   ra_evictset(as, RSET_SCRATCH);
   1922   l_end = emit_label(as);
   1923   /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
   1924   /* Assumes asm_snap_prep() already done. */
   1925   asm_guard(as, MIPSI_BNE, RID_RET, RID_ZERO);
   1926   args[0] = ASMREF_TMP1;  /* global_State *g */
   1927   args[1] = ASMREF_TMP2;  /* MSize steps     */
   1928   asm_gencall(as, ci, args);
   1929   emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
   1930   tmp = ra_releasetmp(as, ASMREF_TMP2);
   1931   emit_loadi(as, tmp, as->gcsteps);
   1932   /* Jump around GC step if GC total < GC threshold. */
   1933   emit_branch(as, MIPSI_BNE, RID_TMP, RID_ZERO, l_end);
   1934   emit_dst(as, MIPSI_SLTU, RID_TMP, RID_TMP, tmp);
   1935   emit_getgl(as, tmp, gc.threshold);
   1936   emit_getgl(as, RID_TMP, gc.total);
   1937   as->gcsteps = 0;
   1938   checkmclim(as);
   1939 }
   1940 
   1941 /* -- Loop handling ------------------------------------------------------- */
   1942 
   1943 /* Fixup the loop branch. */
   1944 static void asm_loop_fixup(ASMState *as)
   1945 {
   1946   MCode *p = as->mctop;
   1947   MCode *target = as->mcp;
   1948   p[-1] = MIPSI_NOP;
   1949   if (as->loopinv) {  /* Inverted loop branch? */
   1950     /* asm_guard already inverted the cond branch. Only patch the target. */
   1951     p[-3] |= ((target-p+2) & 0x0000ffffu);
   1952   } else {
   1953     p[-2] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
   1954   }
   1955 }
   1956 
   1957 /* -- Head of trace ------------------------------------------------------- */
   1958 
   1959 /* Coalesce BASE register for a root trace. */
   1960 static void asm_head_root_base(ASMState *as)
   1961 {
   1962   IRIns *ir = IR(REF_BASE);
   1963   Reg r = ir->r;
   1964   if (as->loopinv) as->mctop--;
   1965   if (ra_hasreg(r)) {
   1966     ra_free(as, r);
   1967     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
   1968       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
   1969     if (r != RID_BASE)
   1970       emit_move(as, r, RID_BASE);
   1971   }
   1972 }
   1973 
   1974 /* Coalesce BASE register for a side trace. */
   1975 static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
   1976 {
   1977   IRIns *ir = IR(REF_BASE);
   1978   Reg r = ir->r;
   1979   if (as->loopinv) as->mctop--;
   1980   if (ra_hasreg(r)) {
   1981     ra_free(as, r);
   1982     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
   1983       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
   1984     if (irp->r == r) {
   1985       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
   1986     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
   1987       rset_clear(allow, irp->r);
   1988       emit_move(as, r, irp->r);  /* Move from coalesced parent reg. */
   1989     } else {
   1990       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
   1991     }
   1992   }
   1993   return allow;
   1994 }
   1995 
   1996 /* -- Tail of trace ------------------------------------------------------- */
   1997 
   1998 /* Fixup the tail code. */
   1999 static void asm_tail_fixup(ASMState *as, TraceNo lnk)
   2000 {
   2001   MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
   2002   int32_t spadj = as->T->spadjust;
   2003   MCode *p = as->mctop-1;
   2004   *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
   2005   p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
   2006 }
   2007 
   2008 /* Prepare tail of code. */
   2009 static void asm_tail_prep(ASMState *as)
   2010 {
   2011   as->mcp = as->mctop-2;  /* Leave room for branch plus nop or stack adj. */
   2012   as->invmcp = as->loopref ? as->mcp : NULL;
   2013 }
   2014 
   2015 /* -- Trace setup --------------------------------------------------------- */
   2016 
   2017 /* Ensure there are enough stack slots for call arguments. */
   2018 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
   2019 {
   2020   IRRef args[CCI_NARGS_MAX*2];
   2021   uint32_t i, nargs = CCI_XNARGS(ci);
   2022   int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   2023   asm_collectargs(as, ir, ci, args);
   2024   for (i = 0; i < nargs; i++) {
   2025     if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) &&
   2026 	nfpr > 0 && !(ci->flags & CCI_VARARG)) {
   2027       nfpr--;
   2028       ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1;
   2029     } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) {
   2030       nfpr = 0;
   2031       ngpr = ngpr & ~1;
   2032       if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1;
   2033     } else {
   2034       nfpr = 0;
   2035       if (ngpr > 0) ngpr--; else nslots++;
   2036     }
   2037   }
   2038   if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
   2039     as->evenspill = nslots;
   2040   return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
   2041 }
   2042 
   2043 static void asm_setup_target(ASMState *as)
   2044 {
   2045   asm_sparejump_setup(as);
   2046   asm_exitstub_setup(as);
   2047 }
   2048 
   2049 /* -- Trace patching ------------------------------------------------------ */
   2050 
   2051 /* Patch exit jumps of existing machine code to a new target. */
   2052 void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
   2053 {
   2054   MCode *p = T->mcode;
   2055   MCode *pe = (MCode *)((char *)p + T->szmcode);
   2056   MCode *px = exitstub_trace_addr(T, exitno);
   2057   MCode *cstart = NULL, *cstop = NULL;
   2058   MCode *mcarea = lj_mcode_patch(J, p, 0);
   2059   MCode exitload = MIPSI_LI | MIPSF_T(RID_TMP) | exitno;
   2060   MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
   2061   for (p++; p < pe; p++) {
   2062     if (*p == exitload) {  /* Look for load of exit number. */
   2063       if (((p[-1] ^ (px-p)) & 0xffffu) == 0) {  /* Look for exitstub branch. */
   2064 	ptrdiff_t delta = target - p;
   2065 	if (((delta + 0x8000) >> 16) == 0) {  /* Patch in-range branch. */
   2066 	patchbranch:
   2067 	  p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu);
   2068 	  *p = MIPSI_NOP;  /* Replace the load of the exit number. */
   2069 	  cstop = p;
   2070 	  if (!cstart) cstart = p-1;
   2071 	} else {  /* Branch out of range. Use spare jump slot in mcarea. */
   2072 	  int i;
   2073 	  for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) {
   2074 	    if (mcarea[i] == tjump) {
   2075 	      delta = mcarea+i - p;
   2076 	      goto patchbranch;
   2077 	    } else if (mcarea[i] == MIPSI_NOP) {
   2078 	      mcarea[i] = tjump;
   2079 	      cstart = mcarea+i;
   2080 	      delta = mcarea+i - p;
   2081 	      goto patchbranch;
   2082 	    }
   2083 	  }
   2084 	  /* Ignore jump slot overflow. Child trace is simply not attached. */
   2085 	}
   2086       } else if (p+1 == pe) {
   2087 	/* Patch NOP after code for inverted loop branch. Use of J is ok. */
   2088 	lua_assert(p[1] == MIPSI_NOP);
   2089 	p[1] = tjump;
   2090 	*p = MIPSI_NOP;  /* Replace the load of the exit number. */
   2091 	cstop = p+2;
   2092 	if (!cstart) cstart = p+1;
   2093       }
   2094     }
   2095   }
   2096   if (cstart) lj_mcode_sync(cstart, cstop);
   2097   lj_mcode_patch(J, mcarea, 1);
   2098 }
   2099