lj_asm_mips.h (68303B)
1 /* 2 ** MIPS IR assembler (SSA IR -> machine code). 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 */ 5 6 /* -- Register allocator extensions --------------------------------------- */ 7 8 /* Allocate a register with a hint. */ 9 static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) 10 { 11 Reg r = IR(ref)->r; 12 if (ra_noreg(r)) { 13 if (!ra_hashint(r) && !iscrossref(as, ref)) 14 ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ 15 r = ra_allocref(as, ref, allow); 16 } 17 ra_noweak(as, r); 18 return r; 19 } 20 21 /* Allocate a register or RID_ZERO. */ 22 static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) 23 { 24 Reg r = IR(ref)->r; 25 if (ra_noreg(r)) { 26 if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0) 27 return RID_ZERO; 28 r = ra_allocref(as, ref, allow); 29 } else { 30 ra_noweak(as, r); 31 } 32 return r; 33 } 34 35 /* Allocate two source registers for three-operand instructions. */ 36 static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) 37 { 38 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); 39 Reg left = irl->r, right = irr->r; 40 if (ra_hasreg(left)) { 41 ra_noweak(as, left); 42 if (ra_noreg(right)) 43 right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); 44 else 45 ra_noweak(as, right); 46 } else if (ra_hasreg(right)) { 47 ra_noweak(as, right); 48 left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); 49 } else if (ra_hashint(right)) { 50 right = ra_alloc1z(as, ir->op2, allow); 51 left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); 52 } else { 53 left = ra_alloc1z(as, ir->op1, allow); 54 right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); 55 } 56 return left | (right << 8); 57 } 58 59 /* -- Guard handling ------------------------------------------------------ */ 60 61 /* Need some spare long-range jump slots, for out-of-range branches. */ 62 #define MIPS_SPAREJUMP 4 63 64 /* Setup spare long-range jump slots per mcarea. */ 65 static void asm_sparejump_setup(ASMState *as) 66 { 67 MCode *mxp = as->mcbot; 68 /* Assumes sizeof(MCLink) == 8. */ 69 if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) { 70 lua_assert(MIPSI_NOP == 0); 71 memset(mxp+2, 0, MIPS_SPAREJUMP*8); 72 mxp += MIPS_SPAREJUMP*2; 73 lua_assert(mxp < as->mctop); 74 lj_mcode_sync(as->mcbot, mxp); 75 lj_mcode_commitbot(as->J, mxp); 76 as->mcbot = mxp; 77 as->mclim = as->mcbot + MCLIM_REDZONE; 78 } 79 } 80 81 /* Setup exit stub after the end of each trace. */ 82 static void asm_exitstub_setup(ASMState *as) 83 { 84 MCode *mxp = as->mctop; 85 /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ 86 *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; 87 *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); 88 lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0); 89 *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; 90 as->mctop = mxp; 91 } 92 93 /* Keep this in-sync with exitstub_trace_addr(). */ 94 #define asm_exitstub_addr(as) ((as)->mctop) 95 96 /* Emit conditional branch to exit for guard. */ 97 static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt) 98 { 99 MCode *target = asm_exitstub_addr(as); 100 MCode *p = as->mcp; 101 if (LJ_UNLIKELY(p == as->invmcp)) { 102 as->invmcp = NULL; 103 as->loopinv = 1; 104 as->mcp = p+1; 105 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ 106 target = p; /* Patch target later in asm_loop_fixup. */ 107 } 108 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); 109 emit_branch(as, mi, rs, rt, target); 110 } 111 112 /* -- Operand fusion ------------------------------------------------------ */ 113 114 /* Limit linear search to this distance. Avoids O(n^2) behavior. */ 115 #define CONFLICT_SEARCH_LIM 31 116 117 /* Check if there's no conflicting instruction between curins and ref. */ 118 static int noconflict(ASMState *as, IRRef ref, IROp conflict) 119 { 120 IRIns *ir = as->ir; 121 IRRef i = as->curins; 122 if (i > ref + CONFLICT_SEARCH_LIM) 123 return 0; /* Give up, ref is too far away. */ 124 while (--i > ref) 125 if (ir[i].o == conflict) 126 return 0; /* Conflict found. */ 127 return 1; /* Ok, no conflict. */ 128 } 129 130 /* Fuse the array base of colocated arrays. */ 131 static int32_t asm_fuseabase(ASMState *as, IRRef ref) 132 { 133 IRIns *ir = IR(ref); 134 if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && 135 !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) 136 return (int32_t)sizeof(GCtab); 137 return 0; 138 } 139 140 /* Fuse array/hash/upvalue reference into register+offset operand. */ 141 static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) 142 { 143 IRIns *ir = IR(ref); 144 if (ra_noreg(ir->r)) { 145 if (ir->o == IR_AREF) { 146 if (mayfuse(as, ref)) { 147 if (irref_isk(ir->op2)) { 148 IRRef tab = IR(ir->op1)->op1; 149 int32_t ofs = asm_fuseabase(as, tab); 150 IRRef refa = ofs ? tab : ir->op1; 151 ofs += 8*IR(ir->op2)->i; 152 if (checki16(ofs)) { 153 *ofsp = ofs; 154 return ra_alloc1(as, refa, allow); 155 } 156 } 157 } 158 } else if (ir->o == IR_HREFK) { 159 if (mayfuse(as, ref)) { 160 int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); 161 if (checki16(ofs)) { 162 *ofsp = ofs; 163 return ra_alloc1(as, ir->op1, allow); 164 } 165 } 166 } else if (ir->o == IR_UREFC) { 167 if (irref_isk(ir->op1)) { 168 GCfunc *fn = ir_kfunc(IR(ir->op1)); 169 int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); 170 int32_t jgl = (intptr_t)J2G(as->J); 171 if ((uint32_t)(ofs-jgl) < 65536) { 172 *ofsp = ofs-jgl-32768; 173 return RID_JGL; 174 } else { 175 *ofsp = (int16_t)ofs; 176 return ra_allock(as, ofs-(int16_t)ofs, allow); 177 } 178 } 179 } 180 } 181 *ofsp = 0; 182 return ra_alloc1(as, ref, allow); 183 } 184 185 /* Fuse XLOAD/XSTORE reference into load/store operand. */ 186 static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, 187 RegSet allow, int32_t ofs) 188 { 189 IRIns *ir = IR(ref); 190 Reg base; 191 if (ra_noreg(ir->r) && canfuse(as, ir)) { 192 if (ir->o == IR_ADD) { 193 int32_t ofs2; 194 if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { 195 ref = ir->op1; 196 ofs = ofs2; 197 } 198 } else if (ir->o == IR_STRREF) { 199 int32_t ofs2 = 65536; 200 lua_assert(ofs == 0); 201 ofs = (int32_t)sizeof(GCstr); 202 if (irref_isk(ir->op2)) { 203 ofs2 = ofs + IR(ir->op2)->i; 204 ref = ir->op1; 205 } else if (irref_isk(ir->op1)) { 206 ofs2 = ofs + IR(ir->op1)->i; 207 ref = ir->op2; 208 } 209 if (!checki16(ofs2)) { 210 /* NYI: Fuse ADD with constant. */ 211 Reg right, left = ra_alloc2(as, ir, allow); 212 right = (left >> 8); left &= 255; 213 emit_hsi(as, mi, rt, RID_TMP, ofs); 214 emit_dst(as, MIPSI_ADDU, RID_TMP, left, right); 215 return; 216 } 217 ofs = ofs2; 218 } 219 } 220 base = ra_alloc1(as, ref, allow); 221 emit_hsi(as, mi, rt, base, ofs); 222 } 223 224 /* -- Calls --------------------------------------------------------------- */ 225 226 /* Generate a call to a C function. */ 227 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 228 { 229 uint32_t n, nargs = CCI_XNARGS(ci); 230 int32_t ofs = 16; 231 #if LJ_SOFTFP 232 Reg gpr = REGARG_FIRSTGPR; 233 #else 234 Reg gpr, fpr = REGARG_FIRSTFPR; 235 #endif 236 if ((void *)ci->func) 237 emit_call(as, (void *)ci->func, 1); 238 #if !LJ_SOFTFP 239 for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) 240 as->cost[gpr] = REGCOST(~0u, ASMREF_L); 241 gpr = REGARG_FIRSTGPR; 242 #endif 243 for (n = 0; n < nargs; n++) { /* Setup args. */ 244 IRRef ref = args[n]; 245 if (ref) { 246 IRIns *ir = IR(ref); 247 #if !LJ_SOFTFP 248 if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && 249 !(ci->flags & CCI_VARARG)) { 250 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ 251 ra_leftov(as, fpr, ref); 252 fpr += 2; 253 gpr += irt_isnum(ir->t) ? 2 : 1; 254 } else 255 #endif 256 { 257 #if !LJ_SOFTFP 258 fpr = REGARG_LASTFPR+1; 259 #endif 260 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1; 261 if (gpr <= REGARG_LASTGPR) { 262 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ 263 #if !LJ_SOFTFP 264 if (irt_isfp(ir->t)) { 265 RegSet of = as->freeset; 266 Reg r; 267 /* Workaround to protect argument GPRs from being used for remat. */ 268 as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); 269 r = ra_alloc1(as, ref, RSET_FPR); 270 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); 271 if (irt_isnum(ir->t)) { 272 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); 273 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); 274 lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ 275 gpr += 2; 276 } else if (irt_isfloat(ir->t)) { 277 emit_tg(as, MIPSI_MFC1, gpr, r); 278 gpr++; 279 } 280 } else 281 #endif 282 { 283 ra_leftov(as, gpr, ref); 284 gpr++; 285 } 286 } else { 287 Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 288 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; 289 emit_spstore(as, ir, r, ofs); 290 ofs += irt_isnum(ir->t) ? 8 : 4; 291 } 292 } 293 } else { 294 #if !LJ_SOFTFP 295 fpr = REGARG_LASTFPR+1; 296 #endif 297 if (gpr <= REGARG_LASTGPR) 298 gpr++; 299 else 300 ofs += 4; 301 } 302 checkmclim(as); 303 } 304 } 305 306 /* Setup result reg/sp for call. Evict scratch regs. */ 307 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) 308 { 309 RegSet drop = RSET_SCRATCH; 310 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 311 #if !LJ_SOFTFP 312 if ((ci->flags & CCI_NOFPRCLOBBER)) 313 drop &= ~RSET_FPR; 314 #endif 315 if (ra_hasreg(ir->r)) 316 rset_clear(drop, ir->r); /* Dest reg handled below. */ 317 if (hiop && ra_hasreg((ir+1)->r)) 318 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 319 ra_evictset(as, drop); /* Evictions must be performed first. */ 320 if (ra_used(ir)) { 321 lua_assert(!irt_ispri(ir->t)); 322 if (!LJ_SOFTFP && irt_isfp(ir->t)) { 323 if ((ci->flags & CCI_CASTU64)) { 324 int32_t ofs = sps_scale(ir->s); 325 Reg dest = ir->r; 326 if (ra_hasreg(dest)) { 327 ra_free(as, dest); 328 ra_modified(as, dest); 329 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); 330 emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); 331 } 332 if (ofs) { 333 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); 334 emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); 335 } 336 } else { 337 ra_destreg(as, ir, RID_FPRET); 338 } 339 } else if (hiop) { 340 ra_destpair(as, ir); 341 } else { 342 ra_destreg(as, ir, RID_RET); 343 } 344 } 345 } 346 347 static void asm_callx(ASMState *as, IRIns *ir) 348 { 349 IRRef args[CCI_NARGS_MAX*2]; 350 CCallInfo ci; 351 IRRef func; 352 IRIns *irf; 353 ci.flags = asm_callx_flags(as, ir); 354 asm_collectargs(as, ir, &ci, args); 355 asm_setupresult(as, ir, &ci); 356 func = ir->op2; irf = IR(func); 357 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 358 if (irref_isk(func)) { /* Call to constant address. */ 359 ci.func = (ASMFunction)(void *)(irf->i); 360 } else { /* Need specific register for indirect calls. */ 361 Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); 362 MCode *p = as->mcp; 363 if (r == RID_CFUNCADDR) 364 *--p = MIPSI_NOP; 365 else 366 *--p = MIPSI_MOVE | MIPSF_D(RID_CFUNCADDR) | MIPSF_S(r); 367 *--p = MIPSI_JALR | MIPSF_S(r); 368 as->mcp = p; 369 ci.func = (ASMFunction)(void *)0; 370 } 371 asm_gencall(as, &ci, args); 372 } 373 374 #if !LJ_SOFTFP 375 static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) 376 { 377 /* The modified regs must match with the *.dasc implementation. */ 378 RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| 379 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); 380 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); 381 ra_evictset(as, drop); 382 ra_destreg(as, ir, RID_FPRET); 383 emit_call(as, (void *)lj_ir_callinfo[id].func, 0); 384 ra_leftov(as, REGARG_FIRSTFPR, ir->op1); 385 } 386 #endif 387 388 /* -- Returns ------------------------------------------------------------- */ 389 390 /* Return to lower frame. Guard that it goes to the right spot. */ 391 static void asm_retf(ASMState *as, IRIns *ir) 392 { 393 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 394 void *pc = ir_kptr(IR(ir->op2)); 395 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); 396 as->topslot -= (BCReg)delta; 397 if ((int32_t)as->topslot < 0) as->topslot = 0; 398 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 399 emit_setgl(as, base, jit_base); 400 emit_addptr(as, base, -8*delta); 401 asm_guard(as, MIPSI_BNE, RID_TMP, 402 ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); 403 emit_tsi(as, MIPSI_LW, RID_TMP, base, -8); 404 } 405 406 /* -- Type conversions ---------------------------------------------------- */ 407 408 #if !LJ_SOFTFP 409 static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 410 { 411 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 412 Reg dest = ra_dest(as, ir, RSET_GPR); 413 asm_guard(as, MIPSI_BC1F, 0, 0); 414 emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); 415 emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); 416 emit_tg(as, MIPSI_MFC1, dest, tmp); 417 emit_fg(as, MIPSI_CVT_W_D, tmp, left); 418 } 419 420 static void asm_tobit(ASMState *as, IRIns *ir) 421 { 422 RegSet allow = RSET_FPR; 423 Reg dest = ra_dest(as, ir, RSET_GPR); 424 Reg left = ra_alloc1(as, ir->op1, allow); 425 Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); 426 Reg tmp = ra_scratch(as, rset_clear(allow, right)); 427 emit_tg(as, MIPSI_MFC1, dest, tmp); 428 emit_fgh(as, MIPSI_ADD_D, tmp, left, right); 429 } 430 #endif 431 432 static void asm_conv(ASMState *as, IRIns *ir) 433 { 434 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 435 #if !LJ_SOFTFP 436 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 437 #endif 438 IRRef lref = ir->op1; 439 lua_assert(!(irt_isint64(ir->t) || 440 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ 441 #if LJ_SOFTFP 442 /* FP conversions are handled by SPLIT. */ 443 lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); 444 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ 445 #else 446 lua_assert(irt_type(ir->t) != st); 447 if (irt_isfp(ir->t)) { 448 Reg dest = ra_dest(as, ir, RSET_FPR); 449 if (stfp) { /* FP to FP conversion. */ 450 emit_fg(as, st == IRT_NUM ? MIPSI_CVT_S_D : MIPSI_CVT_D_S, 451 dest, ra_alloc1(as, lref, RSET_FPR)); 452 } else if (st == IRT_U32) { /* U32 to FP conversion. */ 453 /* y = (x ^ 0x8000000) + 2147483648.0 */ 454 Reg left = ra_alloc1(as, lref, RSET_GPR); 455 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); 456 emit_fgh(as, irt_isfloat(ir->t) ? MIPSI_ADD_S : MIPSI_ADD_D, 457 dest, dest, tmp); 458 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, 459 dest, dest); 460 if (irt_isfloat(ir->t)) 461 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), 462 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); 463 else 464 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 465 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); 466 emit_tg(as, MIPSI_MTC1, RID_TMP, dest); 467 emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); 468 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); 469 } else { /* Integer to FP conversion. */ 470 Reg left = ra_alloc1(as, lref, RSET_GPR); 471 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, 472 dest, dest); 473 emit_tg(as, MIPSI_MTC1, left, dest); 474 } 475 } else if (stfp) { /* FP to integer conversion. */ 476 if (irt_isguard(ir->t)) { 477 /* Checked conversions are only supported from number to int. */ 478 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 479 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 480 } else { 481 Reg dest = ra_dest(as, ir, RSET_GPR); 482 Reg left = ra_alloc1(as, lref, RSET_FPR); 483 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 484 if (irt_isu32(ir->t)) { 485 /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ 486 emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); 487 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); 488 emit_tg(as, MIPSI_MFC1, dest, tmp); 489 emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D, 490 tmp, tmp); 491 emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D, 492 tmp, left, tmp); 493 if (st == IRT_FLOAT) 494 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), 495 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); 496 else 497 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 498 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); 499 } else { 500 emit_tg(as, MIPSI_MFC1, dest, tmp); 501 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, 502 tmp, left); 503 } 504 } 505 } else 506 #endif 507 { 508 Reg dest = ra_dest(as, ir, RSET_GPR); 509 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 510 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 511 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 512 if ((ir->op2 & IRCONV_SEXT)) { 513 if ((as->flags & JIT_F_MIPSXXR2)) { 514 emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); 515 } else { 516 uint32_t shift = st == IRT_I8 ? 24 : 16; 517 emit_dta(as, MIPSI_SRA, dest, dest, shift); 518 emit_dta(as, MIPSI_SLL, dest, left, shift); 519 } 520 } else { 521 emit_tsi(as, MIPSI_ANDI, dest, left, 522 (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); 523 } 524 } else { /* 32/64 bit integer conversions. */ 525 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ 526 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ 527 } 528 } 529 } 530 531 static void asm_strto(ASMState *as, IRIns *ir) 532 { 533 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 534 IRRef args[2]; 535 int32_t ofs = 0; 536 #if LJ_SOFTFP 537 ra_evictset(as, RSET_SCRATCH); 538 if (ra_used(ir)) { 539 if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && 540 (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { 541 int i; 542 for (i = 0; i < 2; i++) { 543 Reg r = (ir+i)->r; 544 if (ra_hasreg(r)) { 545 ra_free(as, r); 546 ra_modified(as, r); 547 emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); 548 } 549 } 550 ofs = sps_scale(ir->s & ~1); 551 } else { 552 Reg rhi = ra_dest(as, ir+1, RSET_GPR); 553 Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); 554 emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4)); 555 emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0)); 556 } 557 } 558 #else 559 RegSet drop = RSET_SCRATCH; 560 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ 561 ra_evictset(as, drop); 562 ofs = sps_scale(ir->s); 563 #endif 564 asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ 565 args[0] = ir->op1; /* GCstr *str */ 566 args[1] = ASMREF_TMP1; /* TValue *n */ 567 asm_gencall(as, ci, args); 568 /* Store the result to the spill slot or temp slots. */ 569 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), 570 RID_SP, ofs); 571 } 572 573 /* -- Memory references --------------------------------------------------- */ 574 575 /* Get pointer to TValue. */ 576 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 577 { 578 IRIns *ir = IR(ref); 579 if (irt_isnum(ir->t)) { 580 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ 581 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 582 else /* Otherwise force a spill and use the spill slot. */ 583 emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir)); 584 } else { 585 /* Otherwise use g->tmptv to hold the TValue. */ 586 RegSet allow = rset_exclude(RSET_GPR, dest); 587 Reg type; 588 emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768)); 589 if (!irt_ispri(ir->t)) { 590 Reg src = ra_alloc1(as, ref, allow); 591 emit_setgl(as, src, tmptv.gcr); 592 } 593 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) 594 type = ra_alloc1(as, ref+1, allow); 595 else 596 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 597 emit_setgl(as, type, tmptv.it); 598 } 599 } 600 601 static void asm_aref(ASMState *as, IRIns *ir) 602 { 603 Reg dest = ra_dest(as, ir, RSET_GPR); 604 Reg idx, base; 605 if (irref_isk(ir->op2)) { 606 IRRef tab = IR(ir->op1)->op1; 607 int32_t ofs = asm_fuseabase(as, tab); 608 IRRef refa = ofs ? tab : ir->op1; 609 ofs += 8*IR(ir->op2)->i; 610 if (checki16(ofs)) { 611 base = ra_alloc1(as, refa, RSET_GPR); 612 emit_tsi(as, MIPSI_ADDIU, dest, base, ofs); 613 return; 614 } 615 } 616 base = ra_alloc1(as, ir->op1, RSET_GPR); 617 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); 618 emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base); 619 emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); 620 } 621 622 /* Inlined hash lookup. Specialized for key type and for const keys. 623 ** The equivalent C code is: 624 ** Node *n = hashkey(t, key); 625 ** do { 626 ** if (lj_obj_equal(&n->key, key)) return &n->val; 627 ** } while ((n = nextnode(n))); 628 ** return niltv(L); 629 */ 630 static void asm_href(ASMState *as, IRIns *ir, IROp merge) 631 { 632 RegSet allow = RSET_GPR; 633 int destused = ra_used(ir); 634 Reg dest = ra_dest(as, ir, allow); 635 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 636 Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; 637 IRRef refkey = ir->op2; 638 IRIns *irkey = IR(refkey); 639 IRType1 kt = irkey->t; 640 uint32_t khash; 641 MCLabel l_end, l_loop, l_next; 642 643 rset_clear(allow, tab); 644 #if LJ_SOFTFP 645 if (!irref_isk(refkey)) { 646 key = ra_alloc1(as, refkey, allow); 647 rset_clear(allow, key); 648 if (irkey[1].o == IR_HIOP) { 649 if (ra_hasreg((irkey+1)->r)) { 650 type = tmpnum = (irkey+1)->r; 651 tmp1 = ra_scratch(as, allow); 652 rset_clear(allow, tmp1); 653 ra_noweak(as, tmpnum); 654 } else { 655 type = tmpnum = ra_allocref(as, refkey+1, allow); 656 } 657 rset_clear(allow, tmpnum); 658 } else { 659 type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); 660 rset_clear(allow, type); 661 } 662 } 663 #else 664 if (irt_isnum(kt)) { 665 key = ra_alloc1(as, refkey, RSET_FPR); 666 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); 667 } else if (!irt_ispri(kt)) { 668 key = ra_alloc1(as, refkey, allow); 669 rset_clear(allow, key); 670 type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow); 671 rset_clear(allow, type); 672 } 673 #endif 674 tmp2 = ra_scratch(as, allow); 675 rset_clear(allow, tmp2); 676 677 /* Key not found in chain: jump to exit (if merged) or load niltv. */ 678 l_end = emit_label(as); 679 as->invmcp = NULL; 680 if (merge == IR_NE) 681 asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO); 682 else if (destused) 683 emit_loada(as, dest, niltvg(J2G(as->J))); 684 /* Follow hash chain until the end. */ 685 emit_move(as, dest, tmp2); 686 l_loop = --as->mcp; 687 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, next)); 688 l_next = emit_label(as); 689 690 /* Type and value comparison. */ 691 if (merge == IR_EQ) { /* Must match asm_guard(). */ 692 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); 693 l_end = asm_exitstub_addr(as); 694 } 695 if (!LJ_SOFTFP && irt_isnum(kt)) { 696 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 697 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); 698 *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ 699 emit_branch(as, MIPSI_BEQ, tmp2, RID_ZERO, l_next); 700 emit_tsi(as, MIPSI_SLTIU, tmp2, tmp2, (int32_t)LJ_TISNUM); 701 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); 702 } else { 703 if (irt_ispri(kt)) { 704 emit_branch(as, MIPSI_BEQ, tmp2, type, l_end); 705 } else { 706 emit_branch(as, MIPSI_BEQ, tmp1, key, l_end); 707 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.gcr)); 708 emit_branch(as, MIPSI_BNE, tmp2, type, l_next); 709 } 710 } 711 emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.it)); 712 *l_loop = MIPSI_BNE | MIPSF_S(tmp2) | ((as->mcp-l_loop-1) & 0xffffu); 713 714 /* Load main position relative to tab->node into dest. */ 715 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 716 if (khash == 0) { 717 emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); 718 } else { 719 Reg tmphash = tmp1; 720 if (irref_isk(refkey)) 721 tmphash = ra_allock(as, khash, allow); 722 emit_dst(as, MIPSI_ADDU, dest, dest, tmp1); 723 lua_assert(sizeof(Node) == 24); 724 emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); 725 emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); 726 emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); 727 emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); 728 emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); 729 emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); 730 if (irref_isk(refkey)) { 731 /* Nothing to do. */ 732 } else if (irt_isstr(kt)) { 733 emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); 734 } else { /* Must match with hash*() in lj_tab.c. */ 735 emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); 736 emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); 737 emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); 738 emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); 739 emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); 740 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { 741 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); 742 if ((as->flags & JIT_F_MIPSXXR2)) { 743 emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); 744 } else { 745 emit_dst(as, MIPSI_OR, dest, dest, tmp1); 746 emit_dta(as, MIPSI_SLL, tmp1, tmp1, HASH_ROT1); 747 emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); 748 } 749 emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); 750 #if LJ_SOFTFP 751 emit_ds(as, MIPSI_MOVE, tmp1, type); 752 emit_ds(as, MIPSI_MOVE, tmp2, key); 753 #else 754 emit_tg(as, MIPSI_MFC1, tmp2, key); 755 emit_tg(as, MIPSI_MFC1, tmp1, key+1); 756 #endif 757 } else { 758 emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); 759 emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); 760 emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); 761 } 762 } 763 } 764 } 765 766 static void asm_hrefk(ASMState *as, IRIns *ir) 767 { 768 IRIns *kslot = IR(ir->op2); 769 IRIns *irkey = IR(kslot->op1); 770 int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); 771 int32_t kofs = ofs + (int32_t)offsetof(Node, key); 772 Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; 773 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 774 Reg key = RID_NONE, type = RID_TMP, idx = node; 775 RegSet allow = rset_exclude(RSET_GPR, node); 776 int32_t lo, hi; 777 lua_assert(ofs % sizeof(Node) == 0); 778 if (ofs > 32736) { 779 idx = dest; 780 rset_clear(allow, dest); 781 kofs = (int32_t)offsetof(Node, key); 782 } else if (ra_hasreg(dest)) { 783 emit_tsi(as, MIPSI_ADDIU, dest, node, ofs); 784 } 785 if (!irt_ispri(irkey->t)) { 786 key = ra_scratch(as, allow); 787 rset_clear(allow, key); 788 } 789 if (irt_isnum(irkey->t)) { 790 lo = (int32_t)ir_knum(irkey)->u32.lo; 791 hi = (int32_t)ir_knum(irkey)->u32.hi; 792 } else { 793 lo = irkey->i; 794 hi = irt_toitype(irkey->t); 795 if (!ra_hasreg(key)) 796 goto nolo; 797 } 798 asm_guard(as, MIPSI_BNE, key, lo ? ra_allock(as, lo, allow) : RID_ZERO); 799 nolo: 800 asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); 801 if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); 802 emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); 803 if (ofs > 32736) 804 emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); 805 } 806 807 static void asm_uref(ASMState *as, IRIns *ir) 808 { 809 Reg dest = ra_dest(as, ir, RSET_GPR); 810 if (irref_isk(ir->op1)) { 811 GCfunc *fn = ir_kfunc(IR(ir->op1)); 812 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 813 emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR); 814 } else { 815 Reg uv = ra_scratch(as, RSET_GPR); 816 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 817 if (ir->o == IR_UREFC) { 818 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 819 emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); 820 emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); 821 } else { 822 emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v)); 823 } 824 emit_tsi(as, MIPSI_LW, uv, func, 825 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); 826 } 827 } 828 829 static void asm_fref(ASMState *as, IRIns *ir) 830 { 831 UNUSED(as); UNUSED(ir); 832 lua_assert(!ra_used(ir)); 833 } 834 835 static void asm_strref(ASMState *as, IRIns *ir) 836 { 837 Reg dest = ra_dest(as, ir, RSET_GPR); 838 IRRef ref = ir->op2, refk = ir->op1; 839 int32_t ofs = (int32_t)sizeof(GCstr); 840 Reg r; 841 if (irref_isk(ref)) { 842 IRRef tmp = refk; refk = ref; ref = tmp; 843 } else if (!irref_isk(refk)) { 844 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); 845 IRIns *irr = IR(ir->op2); 846 if (ra_hasreg(irr->r)) { 847 ra_noweak(as, irr->r); 848 right = irr->r; 849 } else if (mayfuse(as, irr->op2) && 850 irr->o == IR_ADD && irref_isk(irr->op2) && 851 checki16(ofs + IR(irr->op2)->i)) { 852 ofs += IR(irr->op2)->i; 853 right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); 854 } else { 855 right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); 856 } 857 emit_tsi(as, MIPSI_ADDIU, dest, dest, ofs); 858 emit_dst(as, MIPSI_ADDU, dest, left, right); 859 return; 860 } 861 r = ra_alloc1(as, ref, RSET_GPR); 862 ofs += IR(refk)->i; 863 if (checki16(ofs)) 864 emit_tsi(as, MIPSI_ADDIU, dest, r, ofs); 865 else 866 emit_dst(as, MIPSI_ADDU, dest, r, 867 ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); 868 } 869 870 /* -- Loads and stores ---------------------------------------------------- */ 871 872 static MIPSIns asm_fxloadins(IRIns *ir) 873 { 874 switch (irt_type(ir->t)) { 875 case IRT_I8: return MIPSI_LB; 876 case IRT_U8: return MIPSI_LBU; 877 case IRT_I16: return MIPSI_LH; 878 case IRT_U16: return MIPSI_LHU; 879 case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_LDC1; 880 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; 881 default: return MIPSI_LW; 882 } 883 } 884 885 static MIPSIns asm_fxstoreins(IRIns *ir) 886 { 887 switch (irt_type(ir->t)) { 888 case IRT_I8: case IRT_U8: return MIPSI_SB; 889 case IRT_I16: case IRT_U16: return MIPSI_SH; 890 case IRT_NUM: lua_assert(!LJ_SOFTFP); return MIPSI_SDC1; 891 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; 892 default: return MIPSI_SW; 893 } 894 } 895 896 static void asm_fload(ASMState *as, IRIns *ir) 897 { 898 Reg dest = ra_dest(as, ir, RSET_GPR); 899 MIPSIns mi = asm_fxloadins(ir); 900 Reg idx; 901 int32_t ofs; 902 if (ir->op1 == REF_NIL) { 903 idx = RID_JGL; 904 ofs = ir->op2 - 32768; 905 } else { 906 idx = ra_alloc1(as, ir->op1, RSET_GPR); 907 if (ir->op2 == IRFL_TAB_ARRAY) { 908 ofs = asm_fuseabase(as, ir->op1); 909 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 910 emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); 911 return; 912 } 913 } 914 ofs = field_ofs[ir->op2]; 915 } 916 lua_assert(!irt_isfp(ir->t)); 917 emit_tsi(as, mi, dest, idx, ofs); 918 } 919 920 static void asm_fstore(ASMState *as, IRIns *ir) 921 { 922 if (ir->r != RID_SINK) { 923 Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); 924 IRIns *irf = IR(ir->op1); 925 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 926 int32_t ofs = field_ofs[irf->op2]; 927 MIPSIns mi = asm_fxstoreins(ir); 928 lua_assert(!irt_isfp(ir->t)); 929 emit_tsi(as, mi, src, idx, ofs); 930 } 931 } 932 933 static void asm_xload(ASMState *as, IRIns *ir) 934 { 935 Reg dest = ra_dest(as, ir, 936 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); 937 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 938 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 939 } 940 941 static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) 942 { 943 if (ir->r != RID_SINK) { 944 Reg src = ra_alloc1z(as, ir->op2, 945 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); 946 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 947 rset_exclude(RSET_GPR, src), ofs); 948 } 949 } 950 951 #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) 952 953 static void asm_ahuvload(ASMState *as, IRIns *ir) 954 { 955 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 956 IRType t = hiop ? IRT_NUM : irt_type(ir->t); 957 Reg dest = RID_NONE, type = RID_TMP, idx; 958 RegSet allow = RSET_GPR; 959 int32_t ofs = 0; 960 if (hiop && ra_used(ir+1)) { 961 type = ra_dest(as, ir+1, allow); 962 rset_clear(allow, type); 963 } 964 if (ra_used(ir)) { 965 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 966 irt_isint(ir->t) || irt_isaddr(ir->t)); 967 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 968 rset_clear(allow, dest); 969 } 970 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 971 rset_clear(allow, idx); 972 if (t == IRT_NUM) { 973 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 974 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); 975 } else { 976 asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype_(t), allow)); 977 } 978 if (ra_hasreg(dest)) { 979 if (!LJ_SOFTFP && t == IRT_NUM) 980 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); 981 else 982 emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); 983 } 984 emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); 985 } 986 987 static void asm_ahustore(ASMState *as, IRIns *ir) 988 { 989 RegSet allow = RSET_GPR; 990 Reg idx, src = RID_NONE, type = RID_NONE; 991 int32_t ofs = 0; 992 if (ir->r == RID_SINK) 993 return; 994 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 995 src = ra_alloc1(as, ir->op2, RSET_FPR); 996 } else { 997 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 998 if (!irt_ispri(ir->t)) { 999 src = ra_alloc1(as, ir->op2, allow); 1000 rset_clear(allow, src); 1001 } 1002 if (hiop) 1003 type = ra_alloc1(as, (ir+1)->op2, allow); 1004 else 1005 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1006 rset_clear(allow, type); 1007 } 1008 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1009 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 1010 emit_hsi(as, MIPSI_SDC1, src, idx, ofs); 1011 } else { 1012 if (ra_hasreg(src)) 1013 emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); 1014 emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); 1015 } 1016 } 1017 1018 static void asm_sload(ASMState *as, IRIns *ir) 1019 { 1020 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); 1021 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1022 IRType t = hiop ? IRT_NUM : irt_type(ir->t); 1023 Reg dest = RID_NONE, type = RID_NONE, base; 1024 RegSet allow = RSET_GPR; 1025 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1026 lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1027 #if LJ_SOFTFP 1028 lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ 1029 if (hiop && ra_used(ir+1)) { 1030 type = ra_dest(as, ir+1, allow); 1031 rset_clear(allow, type); 1032 } 1033 #else 1034 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) { 1035 dest = ra_scratch(as, RSET_FPR); 1036 asm_tointg(as, ir, dest); 1037 t = IRT_NUM; /* Continue with a regular number type check. */ 1038 } else 1039 #endif 1040 if (ra_used(ir)) { 1041 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 1042 irt_isint(ir->t) || irt_isaddr(ir->t)); 1043 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 1044 rset_clear(allow, dest); 1045 base = ra_alloc1(as, REF_BASE, allow); 1046 rset_clear(allow, base); 1047 if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { 1048 if (t == IRT_INT) { 1049 Reg tmp = ra_scratch(as, RSET_FPR); 1050 emit_tg(as, MIPSI_MFC1, dest, tmp); 1051 emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); 1052 dest = tmp; 1053 t = IRT_NUM; /* Check for original type. */ 1054 } else { 1055 Reg tmp = ra_scratch(as, RSET_GPR); 1056 emit_fg(as, MIPSI_CVT_D_W, dest, dest); 1057 emit_tg(as, MIPSI_MTC1, tmp, dest); 1058 dest = tmp; 1059 t = IRT_INT; /* Check for original type. */ 1060 } 1061 } 1062 goto dotypecheck; 1063 } 1064 base = ra_alloc1(as, REF_BASE, allow); 1065 rset_clear(allow, base); 1066 dotypecheck: 1067 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1068 if (ra_noreg(type)) { 1069 if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && 1070 rset_test((as->freeset & allow), dest+1)) { 1071 type = dest+1; 1072 ra_modified(as, type); 1073 } else { 1074 type = RID_TMP; 1075 } 1076 } 1077 if (t == IRT_NUM) { 1078 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 1079 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); 1080 } else { 1081 Reg ktype = ra_allock(as, irt_toitype_(t), allow); 1082 asm_guard(as, MIPSI_BNE, type, ktype); 1083 } 1084 } 1085 if (ra_hasreg(dest)) { 1086 if (!LJ_SOFTFP && t == IRT_NUM) 1087 emit_hsi(as, MIPSI_LDC1, dest, base, ofs); 1088 else 1089 emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); 1090 } 1091 if (ra_hasreg(type)) 1092 emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); 1093 } 1094 1095 /* -- Allocations --------------------------------------------------------- */ 1096 1097 #if LJ_HASFFI 1098 static void asm_cnew(ASMState *as, IRIns *ir) 1099 { 1100 CTState *cts = ctype_ctsG(J2G(as->J)); 1101 CTypeID id = (CTypeID)IR(ir->op1)->i; 1102 CTSize sz; 1103 CTInfo info = lj_ctype_info(cts, id, &sz); 1104 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1105 IRRef args[4]; 1106 RegSet drop = RSET_SCRATCH; 1107 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); 1108 1109 as->gcsteps++; 1110 if (ra_hasreg(ir->r)) 1111 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1112 ra_evictset(as, drop); 1113 if (ra_used(ir)) 1114 ra_destreg(as, ir, RID_RET); /* GCcdata * */ 1115 1116 /* Initialize immutable cdata object. */ 1117 if (ir->o == IR_CNEWI) { 1118 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1119 int32_t ofs = sizeof(GCcdata); 1120 lua_assert(sz == 4 || sz == 8); 1121 if (sz == 8) { 1122 ofs += 4; 1123 lua_assert((ir+1)->o == IR_HIOP); 1124 if (LJ_LE) ir++; 1125 } 1126 for (;;) { 1127 Reg r = ra_alloc1z(as, ir->op2, allow); 1128 emit_tsi(as, MIPSI_SW, r, RID_RET, ofs); 1129 rset_clear(allow, r); 1130 if (ofs == sizeof(GCcdata)) break; 1131 ofs -= 4; if (LJ_BE) ir++; else ir--; 1132 } 1133 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ 1134 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; 1135 args[0] = ASMREF_L; /* lua_State *L */ 1136 args[1] = ir->op1; /* CTypeID id */ 1137 args[2] = ir->op2; /* CTSize sz */ 1138 args[3] = ASMREF_TMP1; /* CTSize align */ 1139 asm_gencall(as, ci, args); 1140 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); 1141 return; 1142 } 1143 1144 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1145 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1146 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1147 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); 1148 emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ 1149 args[0] = ASMREF_L; /* lua_State *L */ 1150 args[1] = ASMREF_TMP1; /* MSize size */ 1151 asm_gencall(as, ci, args); 1152 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1153 ra_releasetmp(as, ASMREF_TMP1)); 1154 } 1155 #else 1156 #define asm_cnew(as, ir) ((void)0) 1157 #endif 1158 1159 /* -- Write barriers ------------------------------------------------------ */ 1160 1161 static void asm_tbar(ASMState *as, IRIns *ir) 1162 { 1163 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); 1164 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); 1165 Reg link = RID_TMP; 1166 MCLabel l_end = emit_label(as); 1167 emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist)); 1168 emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); 1169 emit_setgl(as, tab, gc.grayagain); 1170 emit_getgl(as, link, gc.grayagain); 1171 emit_dst(as, MIPSI_XOR, mark, mark, RID_TMP); /* Clear black bit. */ 1172 emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); 1173 emit_tsi(as, MIPSI_ANDI, RID_TMP, mark, LJ_GC_BLACK); 1174 emit_tsi(as, MIPSI_LBU, mark, tab, (int32_t)offsetof(GCtab, marked)); 1175 } 1176 1177 static void asm_obar(ASMState *as, IRIns *ir) 1178 { 1179 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; 1180 IRRef args[2]; 1181 MCLabel l_end; 1182 Reg obj, val, tmp; 1183 /* No need for other object barriers (yet). */ 1184 lua_assert(IR(ir->op1)->o == IR_UREFC); 1185 ra_evictset(as, RSET_SCRATCH); 1186 l_end = emit_label(as); 1187 args[0] = ASMREF_TMP1; /* global_State *g */ 1188 args[1] = ir->op1; /* TValue *tv */ 1189 asm_gencall(as, ci, args); 1190 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 1191 obj = IR(ir->op1)->r; 1192 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); 1193 emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); 1194 emit_tsi(as, MIPSI_ANDI, tmp, tmp, LJ_GC_BLACK); 1195 emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); 1196 emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); 1197 val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); 1198 emit_tsi(as, MIPSI_LBU, tmp, obj, 1199 (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); 1200 emit_tsi(as, MIPSI_LBU, RID_TMP, val, (int32_t)offsetof(GChead, marked)); 1201 } 1202 1203 /* -- Arithmetic and logic operations ------------------------------------- */ 1204 1205 #if !LJ_SOFTFP 1206 static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) 1207 { 1208 Reg dest = ra_dest(as, ir, RSET_FPR); 1209 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1210 right = (left >> 8); left &= 255; 1211 emit_fgh(as, mi, dest, left, right); 1212 } 1213 1214 static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) 1215 { 1216 Reg dest = ra_dest(as, ir, RSET_FPR); 1217 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); 1218 emit_fg(as, mi, dest, left); 1219 } 1220 1221 static void asm_fpmath(ASMState *as, IRIns *ir) 1222 { 1223 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) 1224 return; 1225 if (ir->op2 <= IRFPM_TRUNC) 1226 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); 1227 else if (ir->op2 == IRFPM_SQRT) 1228 asm_fpunary(as, ir, MIPSI_SQRT_D); 1229 else 1230 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); 1231 } 1232 #endif 1233 1234 static void asm_add(ASMState *as, IRIns *ir) 1235 { 1236 #if !LJ_SOFTFP 1237 if (irt_isnum(ir->t)) { 1238 asm_fparith(as, ir, MIPSI_ADD_D); 1239 } else 1240 #endif 1241 { 1242 Reg dest = ra_dest(as, ir, RSET_GPR); 1243 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1244 if (irref_isk(ir->op2)) { 1245 int32_t k = IR(ir->op2)->i; 1246 if (checki16(k)) { 1247 emit_tsi(as, MIPSI_ADDIU, dest, left, k); 1248 return; 1249 } 1250 } 1251 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1252 emit_dst(as, MIPSI_ADDU, dest, left, right); 1253 } 1254 } 1255 1256 static void asm_sub(ASMState *as, IRIns *ir) 1257 { 1258 #if !LJ_SOFTFP 1259 if (irt_isnum(ir->t)) { 1260 asm_fparith(as, ir, MIPSI_SUB_D); 1261 } else 1262 #endif 1263 { 1264 Reg dest = ra_dest(as, ir, RSET_GPR); 1265 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1266 right = (left >> 8); left &= 255; 1267 emit_dst(as, MIPSI_SUBU, dest, left, right); 1268 } 1269 } 1270 1271 static void asm_mul(ASMState *as, IRIns *ir) 1272 { 1273 #if !LJ_SOFTFP 1274 if (irt_isnum(ir->t)) { 1275 asm_fparith(as, ir, MIPSI_MUL_D); 1276 } else 1277 #endif 1278 { 1279 Reg dest = ra_dest(as, ir, RSET_GPR); 1280 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1281 right = (left >> 8); left &= 255; 1282 emit_dst(as, MIPSI_MUL, dest, left, right); 1283 } 1284 } 1285 1286 #define asm_div(as, ir) asm_fparith(as, ir, MIPSI_DIV_D) 1287 #define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) 1288 #define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) 1289 1290 static void asm_neg(ASMState *as, IRIns *ir) 1291 { 1292 #if !LJ_SOFTFP 1293 if (irt_isnum(ir->t)) { 1294 asm_fpunary(as, ir, MIPSI_NEG_D); 1295 } else 1296 #endif 1297 { 1298 Reg dest = ra_dest(as, ir, RSET_GPR); 1299 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1300 emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); 1301 } 1302 } 1303 1304 #define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) 1305 #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) 1306 #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) 1307 1308 static void asm_arithov(ASMState *as, IRIns *ir) 1309 { 1310 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); 1311 if (irref_isk(ir->op2)) { 1312 int k = IR(ir->op2)->i; 1313 if (ir->o == IR_SUBOV) k = -k; 1314 if (checki16(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ 1315 left = ra_alloc1(as, ir->op1, RSET_GPR); 1316 asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); 1317 emit_dst(as, MIPSI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left); 1318 emit_tsi(as, MIPSI_ADDIU, dest, left, k); 1319 if (dest == left) emit_move(as, RID_TMP, left); 1320 return; 1321 } 1322 } 1323 left = ra_alloc2(as, ir, RSET_GPR); 1324 right = (left >> 8); left &= 255; 1325 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), 1326 right), dest)); 1327 asm_guard(as, MIPSI_BLTZ, RID_TMP, 0); 1328 emit_dst(as, MIPSI_AND, RID_TMP, RID_TMP, tmp); 1329 if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ 1330 emit_dst(as, MIPSI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); 1331 } else { /* ((dest^left) & (dest^~right)) < 0 */ 1332 emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, dest); 1333 emit_dst(as, MIPSI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO); 1334 } 1335 emit_dst(as, MIPSI_XOR, tmp, dest, dest == left ? RID_TMP : left); 1336 emit_dst(as, ir->o == IR_ADDOV ? MIPSI_ADDU : MIPSI_SUBU, dest, left, right); 1337 if (dest == left || dest == right) 1338 emit_move(as, RID_TMP, dest == left ? left : right); 1339 } 1340 1341 #define asm_addov(as, ir) asm_arithov(as, ir) 1342 #define asm_subov(as, ir) asm_arithov(as, ir) 1343 1344 static void asm_mulov(ASMState *as, IRIns *ir) 1345 { 1346 Reg dest = ra_dest(as, ir, RSET_GPR); 1347 Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); 1348 right = (left >> 8); left &= 255; 1349 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), 1350 right), dest)); 1351 asm_guard(as, MIPSI_BNE, RID_TMP, tmp); 1352 emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); 1353 emit_dst(as, MIPSI_MFHI, tmp, 0, 0); 1354 emit_dst(as, MIPSI_MFLO, dest, 0, 0); 1355 emit_dst(as, MIPSI_MULT, 0, left, right); 1356 } 1357 1358 #if LJ_HASFFI 1359 static void asm_add64(ASMState *as, IRIns *ir) 1360 { 1361 Reg dest = ra_dest(as, ir, RSET_GPR); 1362 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); 1363 if (irref_isk(ir->op2)) { 1364 int32_t k = IR(ir->op2)->i; 1365 if (k == 0) { 1366 emit_dst(as, MIPSI_ADDU, dest, left, RID_TMP); 1367 goto loarith; 1368 } else if (checki16(k)) { 1369 emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP); 1370 emit_tsi(as, MIPSI_ADDIU, dest, left, k); 1371 goto loarith; 1372 } 1373 } 1374 emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP); 1375 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1376 emit_dst(as, MIPSI_ADDU, dest, left, right); 1377 loarith: 1378 ir--; 1379 dest = ra_dest(as, ir, RSET_GPR); 1380 left = ra_alloc1(as, ir->op1, RSET_GPR); 1381 if (irref_isk(ir->op2)) { 1382 int32_t k = IR(ir->op2)->i; 1383 if (k == 0) { 1384 if (dest != left) 1385 emit_move(as, dest, left); 1386 return; 1387 } else if (checki16(k)) { 1388 if (dest == left) { 1389 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, left)); 1390 emit_move(as, dest, tmp); 1391 dest = tmp; 1392 } 1393 emit_dst(as, MIPSI_SLTU, RID_TMP, dest, left); 1394 emit_tsi(as, MIPSI_ADDIU, dest, left, k); 1395 return; 1396 } 1397 } 1398 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1399 if (dest == left && dest == right) { 1400 Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); 1401 emit_move(as, dest, tmp); 1402 dest = tmp; 1403 } 1404 emit_dst(as, MIPSI_SLTU, RID_TMP, dest, dest == left ? right : left); 1405 emit_dst(as, MIPSI_ADDU, dest, left, right); 1406 } 1407 1408 static void asm_sub64(ASMState *as, IRIns *ir) 1409 { 1410 Reg dest = ra_dest(as, ir, RSET_GPR); 1411 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1412 right = (left >> 8); left &= 255; 1413 emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP); 1414 emit_dst(as, MIPSI_SUBU, dest, left, right); 1415 ir--; 1416 dest = ra_dest(as, ir, RSET_GPR); 1417 left = ra_alloc2(as, ir, RSET_GPR); 1418 right = (left >> 8); left &= 255; 1419 if (dest == left) { 1420 Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); 1421 emit_move(as, dest, tmp); 1422 dest = tmp; 1423 } 1424 emit_dst(as, MIPSI_SLTU, RID_TMP, left, dest); 1425 emit_dst(as, MIPSI_SUBU, dest, left, right); 1426 } 1427 1428 static void asm_neg64(ASMState *as, IRIns *ir) 1429 { 1430 Reg dest = ra_dest(as, ir, RSET_GPR); 1431 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1432 emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP); 1433 emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); 1434 ir--; 1435 dest = ra_dest(as, ir, RSET_GPR); 1436 left = ra_alloc1(as, ir->op1, RSET_GPR); 1437 emit_dst(as, MIPSI_SLTU, RID_TMP, RID_ZERO, dest); 1438 emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); 1439 } 1440 #endif 1441 1442 static void asm_bnot(ASMState *as, IRIns *ir) 1443 { 1444 Reg left, right, dest = ra_dest(as, ir, RSET_GPR); 1445 IRIns *irl = IR(ir->op1); 1446 if (mayfuse(as, ir->op1) && irl->o == IR_BOR) { 1447 left = ra_alloc2(as, irl, RSET_GPR); 1448 right = (left >> 8); left &= 255; 1449 } else { 1450 left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1451 right = RID_ZERO; 1452 } 1453 emit_dst(as, MIPSI_NOR, dest, left, right); 1454 } 1455 1456 static void asm_bswap(ASMState *as, IRIns *ir) 1457 { 1458 Reg dest = ra_dest(as, ir, RSET_GPR); 1459 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1460 if ((as->flags & JIT_F_MIPSXXR2)) { 1461 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); 1462 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); 1463 } else { 1464 Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), dest)); 1465 emit_dst(as, MIPSI_OR, dest, dest, tmp); 1466 emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); 1467 emit_tsi(as, MIPSI_ANDI, dest, dest, 0xff00); 1468 emit_dta(as, MIPSI_SLL, RID_TMP, RID_TMP, 8); 1469 emit_dta(as, MIPSI_SRL, dest, left, 8); 1470 emit_tsi(as, MIPSI_ANDI, RID_TMP, left, 0xff00); 1471 emit_dst(as, MIPSI_OR, tmp, tmp, RID_TMP); 1472 emit_dta(as, MIPSI_SRL, tmp, left, 24); 1473 emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); 1474 } 1475 } 1476 1477 static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 1478 { 1479 Reg dest = ra_dest(as, ir, RSET_GPR); 1480 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1481 if (irref_isk(ir->op2)) { 1482 int32_t k = IR(ir->op2)->i; 1483 if (checku16(k)) { 1484 emit_tsi(as, mik, dest, left, k); 1485 return; 1486 } 1487 } 1488 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1489 emit_dst(as, mi, dest, left, right); 1490 } 1491 1492 #define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI) 1493 #define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI) 1494 #define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI) 1495 1496 static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 1497 { 1498 Reg dest = ra_dest(as, ir, RSET_GPR); 1499 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1500 uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); 1501 emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift); 1502 } else { 1503 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1504 right = (left >> 8); left &= 255; 1505 emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ 1506 } 1507 } 1508 1509 #define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL) 1510 #define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL) 1511 #define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA) 1512 #define asm_brol(as, ir) lua_assert(0) 1513 1514 static void asm_bror(ASMState *as, IRIns *ir) 1515 { 1516 if ((as->flags & JIT_F_MIPSXXR2)) { 1517 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); 1518 } else { 1519 Reg dest = ra_dest(as, ir, RSET_GPR); 1520 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1521 uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); 1522 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1523 emit_rotr(as, dest, left, RID_TMP, shift); 1524 } else { 1525 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1526 right = (left >> 8); left &= 255; 1527 emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); 1528 emit_dst(as, MIPSI_SRLV, dest, right, left); 1529 emit_dst(as, MIPSI_SLLV, RID_TMP, RID_TMP, left); 1530 emit_dst(as, MIPSI_SUBU, RID_TMP, ra_allock(as, 32, RSET_GPR), right); 1531 } 1532 } 1533 } 1534 1535 #if LJ_SOFTFP 1536 static void asm_sfpmin_max(ASMState *as, IRIns *ir) 1537 { 1538 CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; 1539 IRRef args[4]; 1540 args[0^LJ_BE] = ir->op1; 1541 args[1^LJ_BE] = (ir+1)->op1; 1542 args[2^LJ_BE] = ir->op2; 1543 args[3^LJ_BE] = (ir+1)->op2; 1544 asm_setupresult(as, ir, &ci); 1545 emit_call(as, (void *)ci.func, 0); 1546 ci.func = NULL; 1547 asm_gencall(as, &ci, args); 1548 } 1549 #endif 1550 1551 static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1552 { 1553 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 1554 Reg dest = ra_dest(as, ir, RSET_FPR); 1555 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1556 right = (left >> 8); left &= 255; 1557 if (dest == left) { 1558 emit_fg(as, MIPSI_MOVT_D, dest, right); 1559 } else { 1560 emit_fg(as, MIPSI_MOVF_D, dest, left); 1561 if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); 1562 } 1563 emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); 1564 } else { 1565 Reg dest = ra_dest(as, ir, RSET_GPR); 1566 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1567 right = (left >> 8); left &= 255; 1568 if (dest == left) { 1569 emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); 1570 } else { 1571 emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); 1572 if (dest != right) emit_move(as, dest, right); 1573 } 1574 emit_dst(as, MIPSI_SLT, RID_TMP, 1575 ismax ? left : right, ismax ? right : left); 1576 } 1577 } 1578 1579 #define asm_min(as, ir) asm_min_max(as, ir, 0) 1580 #define asm_max(as, ir) asm_min_max(as, ir, 1) 1581 1582 /* -- Comparisons --------------------------------------------------------- */ 1583 1584 #if LJ_SOFTFP 1585 /* SFP comparisons. */ 1586 static void asm_sfpcomp(ASMState *as, IRIns *ir) 1587 { 1588 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; 1589 RegSet drop = RSET_SCRATCH; 1590 Reg r; 1591 IRRef args[4]; 1592 args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; 1593 args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; 1594 1595 for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { 1596 if (!rset_test(as->freeset, r) && 1597 regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) 1598 rset_clear(drop, r); 1599 } 1600 ra_evictset(as, drop); 1601 1602 asm_setupresult(as, ir, ci); 1603 1604 switch ((IROp)ir->o) { 1605 case IR_LT: 1606 asm_guard(as, MIPSI_BGEZ, RID_RET, 0); 1607 break; 1608 case IR_ULT: 1609 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); 1610 emit_loadi(as, RID_TMP, 1); 1611 asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); 1612 break; 1613 case IR_GE: 1614 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); 1615 emit_loadi(as, RID_TMP, 2); 1616 asm_guard(as, MIPSI_BLTZ, RID_RET, 0); 1617 break; 1618 case IR_LE: 1619 asm_guard(as, MIPSI_BGTZ, RID_RET, 0); 1620 break; 1621 case IR_GT: 1622 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); 1623 emit_loadi(as, RID_TMP, 2); 1624 asm_guard(as, MIPSI_BLEZ, RID_RET, 0); 1625 break; 1626 case IR_UGE: 1627 asm_guard(as, MIPSI_BLTZ, RID_RET, 0); 1628 break; 1629 case IR_ULE: 1630 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); 1631 emit_loadi(as, RID_TMP, 1); 1632 break; 1633 case IR_UGT: case IR_ABC: 1634 asm_guard(as, MIPSI_BLEZ, RID_RET, 0); 1635 break; 1636 case IR_EQ: case IR_NE: 1637 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO); 1638 default: 1639 break; 1640 } 1641 asm_gencall(as, ci, args); 1642 } 1643 #endif 1644 1645 static void asm_comp(ASMState *as, IRIns *ir) 1646 { 1647 /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ 1648 IROp op = ir->o; 1649 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 1650 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1651 right = (left >> 8); left &= 255; 1652 asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); 1653 emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); 1654 } else { 1655 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); 1656 if (op == IR_ABC) op = IR_UGT; 1657 if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) { 1658 MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : 1659 ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); 1660 asm_guard(as, mi, left, 0); 1661 } else { 1662 if (irref_isk(ir->op2)) { 1663 int32_t k = IR(ir->op2)->i; 1664 if ((op&2)) k++; 1665 if (checki16(k)) { 1666 asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); 1667 emit_tsi(as, (op&4) ? MIPSI_SLTIU : MIPSI_SLTI, 1668 RID_TMP, left, k); 1669 return; 1670 } 1671 } 1672 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1673 asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); 1674 emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, 1675 RID_TMP, (op&2) ? right : left, (op&2) ? left : right); 1676 } 1677 } 1678 } 1679 1680 static void asm_equal(ASMState *as, IRIns *ir) 1681 { 1682 Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR); 1683 right = (left >> 8); left &= 255; 1684 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 1685 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); 1686 emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); 1687 } else { 1688 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); 1689 } 1690 } 1691 1692 #if LJ_HASFFI 1693 /* 64 bit integer comparisons. */ 1694 static void asm_comp64(ASMState *as, IRIns *ir) 1695 { 1696 /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ 1697 IROp op = (ir-1)->o; 1698 MCLabel l_end; 1699 Reg rightlo, leftlo, righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); 1700 righthi = (lefthi >> 8); lefthi &= 255; 1701 leftlo = ra_alloc2(as, ir-1, 1702 rset_exclude(rset_exclude(RSET_GPR, lefthi), righthi)); 1703 rightlo = (leftlo >> 8); leftlo &= 255; 1704 asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); 1705 l_end = emit_label(as); 1706 if (lefthi != righthi) 1707 emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, RID_TMP, 1708 (op&2) ? righthi : lefthi, (op&2) ? lefthi : righthi); 1709 emit_dst(as, MIPSI_SLTU, RID_TMP, 1710 (op&2) ? rightlo : leftlo, (op&2) ? leftlo : rightlo); 1711 if (lefthi != righthi) 1712 emit_branch(as, MIPSI_BEQ, lefthi, righthi, l_end); 1713 } 1714 1715 static void asm_comp64eq(ASMState *as, IRIns *ir) 1716 { 1717 Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); 1718 right = (left >> 8); left &= 255; 1719 asm_guard(as, ((ir-1)->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO); 1720 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right)); 1721 emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp); 1722 emit_dst(as, MIPSI_XOR, tmp, left, right); 1723 left = ra_alloc2(as, ir-1, RSET_GPR); 1724 right = (left >> 8); left &= 255; 1725 emit_dst(as, MIPSI_XOR, RID_TMP, left, right); 1726 } 1727 #endif 1728 1729 /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ 1730 1731 /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 1732 static void asm_hiop(ASMState *as, IRIns *ir) 1733 { 1734 #if LJ_HASFFI || LJ_SOFTFP 1735 /* HIOP is marked as a store because it needs its own DCE logic. */ 1736 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 1737 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 1738 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 1739 as->curins--; /* Always skip the CONV. */ 1740 #if LJ_HASFFI && !LJ_SOFTFP 1741 if (usehi || uselo) 1742 asm_conv64(as, ir); 1743 return; 1744 #endif 1745 } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ 1746 as->curins--; /* Always skip the loword comparison. */ 1747 #if LJ_SOFTFP 1748 if (!irt_isint(ir->t)) { 1749 asm_sfpcomp(as, ir-1); 1750 return; 1751 } 1752 #endif 1753 #if LJ_HASFFI 1754 asm_comp64(as, ir); 1755 #endif 1756 return; 1757 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 1758 as->curins--; /* Always skip the loword comparison. */ 1759 #if LJ_SOFTFP 1760 if (!irt_isint(ir->t)) { 1761 asm_sfpcomp(as, ir-1); 1762 return; 1763 } 1764 #endif 1765 #if LJ_HASFFI 1766 asm_comp64eq(as, ir); 1767 #endif 1768 return; 1769 #if LJ_SOFTFP 1770 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { 1771 as->curins--; /* Always skip the loword min/max. */ 1772 if (uselo || usehi) 1773 asm_sfpmin_max(as, ir-1); 1774 return; 1775 #endif 1776 } else if ((ir-1)->o == IR_XSTORE) { 1777 as->curins--; /* Handle both stores here. */ 1778 if ((ir-1)->r != RID_SINK) { 1779 asm_xstore_(as, ir, LJ_LE ? 4 : 0); 1780 asm_xstore_(as, ir-1, LJ_LE ? 0 : 4); 1781 } 1782 return; 1783 } 1784 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1785 switch ((ir-1)->o) { 1786 #if LJ_HASFFI 1787 case IR_ADD: as->curins--; asm_add64(as, ir); break; 1788 case IR_SUB: as->curins--; asm_sub64(as, ir); break; 1789 case IR_NEG: as->curins--; asm_neg64(as, ir); break; 1790 #endif 1791 #if LJ_SOFTFP 1792 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: 1793 case IR_STRTO: 1794 if (!uselo) 1795 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ 1796 break; 1797 #endif 1798 case IR_CALLN: 1799 case IR_CALLS: 1800 case IR_CALLXS: 1801 if (!uselo) 1802 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 1803 break; 1804 #if LJ_SOFTFP 1805 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: 1806 #endif 1807 case IR_CNEWI: 1808 /* Nothing to do here. Handled by lo op itself. */ 1809 break; 1810 default: lua_assert(0); break; 1811 } 1812 #else 1813 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ 1814 #endif 1815 } 1816 1817 /* -- Profiling ----------------------------------------------------------- */ 1818 1819 static void asm_prof(ASMState *as, IRIns *ir) 1820 { 1821 UNUSED(ir); 1822 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); 1823 emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); 1824 emit_lsglptr(as, MIPSI_LBU, RID_TMP, 1825 (int32_t)offsetof(global_State, hookmask)); 1826 } 1827 1828 /* -- Stack handling ------------------------------------------------------ */ 1829 1830 /* Check Lua stack size for overflow. Use exit handler as fallback. */ 1831 static void asm_stack_check(ASMState *as, BCReg topslot, 1832 IRIns *irp, RegSet allow, ExitNo exitno) 1833 { 1834 /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ 1835 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; 1836 ExitNo oldsnap = as->snapno; 1837 rset_clear(allow, pbase); 1838 tmp = allow ? rset_pickbot(allow) : 1839 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); 1840 as->snapno = exitno; 1841 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); 1842 as->snapno = oldsnap; 1843 if (allow == RSET_EMPTY) /* Restore temp. register. */ 1844 emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0); 1845 else 1846 ra_modified(as, tmp); 1847 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); 1848 emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase); 1849 emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); 1850 if (pbase == RID_TMP) 1851 emit_getgl(as, RID_TMP, jit_base); 1852 emit_getgl(as, tmp, cur_L); 1853 if (allow == RSET_EMPTY) /* Spill temp. register. */ 1854 emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); 1855 } 1856 1857 /* Restore Lua stack from on-trace state. */ 1858 static void asm_stack_restore(ASMState *as, SnapShot *snap) 1859 { 1860 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 1861 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; 1862 MSize n, nent = snap->nent; 1863 /* Store the value of all modified slots to the Lua stack. */ 1864 for (n = 0; n < nent; n++) { 1865 SnapEntry sn = map[n]; 1866 BCReg s = snap_slot(sn); 1867 int32_t ofs = 8*((int32_t)s-1); 1868 IRRef ref = snap_ref(sn); 1869 IRIns *ir = IR(ref); 1870 if ((sn & SNAP_NORESTORE)) 1871 continue; 1872 if (irt_isnum(ir->t)) { 1873 #if LJ_SOFTFP 1874 Reg tmp; 1875 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 1876 lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ 1877 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); 1878 emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); 1879 if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); 1880 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); 1881 emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); 1882 #else 1883 Reg src = ra_alloc1(as, ref, RSET_FPR); 1884 emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); 1885 #endif 1886 } else { 1887 Reg type; 1888 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 1889 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 1890 if (!irt_ispri(ir->t)) { 1891 Reg src = ra_alloc1(as, ref, allow); 1892 rset_clear(allow, src); 1893 emit_tsi(as, MIPSI_SW, src, RID_BASE, ofs+(LJ_BE?4:0)); 1894 } 1895 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 1896 if (s == 0) continue; /* Do not overwrite link to previous frame. */ 1897 type = ra_allock(as, (int32_t)(*flinks--), allow); 1898 #if LJ_SOFTFP 1899 } else if ((sn & SNAP_SOFTFPNUM)) { 1900 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); 1901 #endif 1902 } else { 1903 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1904 } 1905 emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); 1906 } 1907 checkmclim(as); 1908 } 1909 lua_assert(map + nent == flinks); 1910 } 1911 1912 /* -- GC handling --------------------------------------------------------- */ 1913 1914 /* Check GC threshold and do one or more GC steps. */ 1915 static void asm_gc_check(ASMState *as) 1916 { 1917 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; 1918 IRRef args[2]; 1919 MCLabel l_end; 1920 Reg tmp; 1921 ra_evictset(as, RSET_SCRATCH); 1922 l_end = emit_label(as); 1923 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ 1924 /* Assumes asm_snap_prep() already done. */ 1925 asm_guard(as, MIPSI_BNE, RID_RET, RID_ZERO); 1926 args[0] = ASMREF_TMP1; /* global_State *g */ 1927 args[1] = ASMREF_TMP2; /* MSize steps */ 1928 asm_gencall(as, ci, args); 1929 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 1930 tmp = ra_releasetmp(as, ASMREF_TMP2); 1931 emit_loadi(as, tmp, as->gcsteps); 1932 /* Jump around GC step if GC total < GC threshold. */ 1933 emit_branch(as, MIPSI_BNE, RID_TMP, RID_ZERO, l_end); 1934 emit_dst(as, MIPSI_SLTU, RID_TMP, RID_TMP, tmp); 1935 emit_getgl(as, tmp, gc.threshold); 1936 emit_getgl(as, RID_TMP, gc.total); 1937 as->gcsteps = 0; 1938 checkmclim(as); 1939 } 1940 1941 /* -- Loop handling ------------------------------------------------------- */ 1942 1943 /* Fixup the loop branch. */ 1944 static void asm_loop_fixup(ASMState *as) 1945 { 1946 MCode *p = as->mctop; 1947 MCode *target = as->mcp; 1948 p[-1] = MIPSI_NOP; 1949 if (as->loopinv) { /* Inverted loop branch? */ 1950 /* asm_guard already inverted the cond branch. Only patch the target. */ 1951 p[-3] |= ((target-p+2) & 0x0000ffffu); 1952 } else { 1953 p[-2] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); 1954 } 1955 } 1956 1957 /* -- Head of trace ------------------------------------------------------- */ 1958 1959 /* Coalesce BASE register for a root trace. */ 1960 static void asm_head_root_base(ASMState *as) 1961 { 1962 IRIns *ir = IR(REF_BASE); 1963 Reg r = ir->r; 1964 if (as->loopinv) as->mctop--; 1965 if (ra_hasreg(r)) { 1966 ra_free(as, r); 1967 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) 1968 ir->r = RID_INIT; /* No inheritance for modified BASE register. */ 1969 if (r != RID_BASE) 1970 emit_move(as, r, RID_BASE); 1971 } 1972 } 1973 1974 /* Coalesce BASE register for a side trace. */ 1975 static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) 1976 { 1977 IRIns *ir = IR(REF_BASE); 1978 Reg r = ir->r; 1979 if (as->loopinv) as->mctop--; 1980 if (ra_hasreg(r)) { 1981 ra_free(as, r); 1982 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) 1983 ir->r = RID_INIT; /* No inheritance for modified BASE register. */ 1984 if (irp->r == r) { 1985 rset_clear(allow, r); /* Mark same BASE register as coalesced. */ 1986 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { 1987 rset_clear(allow, irp->r); 1988 emit_move(as, r, irp->r); /* Move from coalesced parent reg. */ 1989 } else { 1990 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ 1991 } 1992 } 1993 return allow; 1994 } 1995 1996 /* -- Tail of trace ------------------------------------------------------- */ 1997 1998 /* Fixup the tail code. */ 1999 static void asm_tail_fixup(ASMState *as, TraceNo lnk) 2000 { 2001 MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; 2002 int32_t spadj = as->T->spadjust; 2003 MCode *p = as->mctop-1; 2004 *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; 2005 p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); 2006 } 2007 2008 /* Prepare tail of code. */ 2009 static void asm_tail_prep(ASMState *as) 2010 { 2011 as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */ 2012 as->invmcp = as->loopref ? as->mcp : NULL; 2013 } 2014 2015 /* -- Trace setup --------------------------------------------------------- */ 2016 2017 /* Ensure there are enough stack slots for call arguments. */ 2018 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2019 { 2020 IRRef args[CCI_NARGS_MAX*2]; 2021 uint32_t i, nargs = CCI_XNARGS(ci); 2022 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 2023 asm_collectargs(as, ir, ci, args); 2024 for (i = 0; i < nargs; i++) { 2025 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) && 2026 nfpr > 0 && !(ci->flags & CCI_VARARG)) { 2027 nfpr--; 2028 ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; 2029 } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) { 2030 nfpr = 0; 2031 ngpr = ngpr & ~1; 2032 if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; 2033 } else { 2034 nfpr = 0; 2035 if (ngpr > 0) ngpr--; else nslots++; 2036 } 2037 } 2038 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2039 as->evenspill = nslots; 2040 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); 2041 } 2042 2043 static void asm_setup_target(ASMState *as) 2044 { 2045 asm_sparejump_setup(as); 2046 asm_exitstub_setup(as); 2047 } 2048 2049 /* -- Trace patching ------------------------------------------------------ */ 2050 2051 /* Patch exit jumps of existing machine code to a new target. */ 2052 void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) 2053 { 2054 MCode *p = T->mcode; 2055 MCode *pe = (MCode *)((char *)p + T->szmcode); 2056 MCode *px = exitstub_trace_addr(T, exitno); 2057 MCode *cstart = NULL, *cstop = NULL; 2058 MCode *mcarea = lj_mcode_patch(J, p, 0); 2059 MCode exitload = MIPSI_LI | MIPSF_T(RID_TMP) | exitno; 2060 MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); 2061 for (p++; p < pe; p++) { 2062 if (*p == exitload) { /* Look for load of exit number. */ 2063 if (((p[-1] ^ (px-p)) & 0xffffu) == 0) { /* Look for exitstub branch. */ 2064 ptrdiff_t delta = target - p; 2065 if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ 2066 patchbranch: 2067 p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu); 2068 *p = MIPSI_NOP; /* Replace the load of the exit number. */ 2069 cstop = p; 2070 if (!cstart) cstart = p-1; 2071 } else { /* Branch out of range. Use spare jump slot in mcarea. */ 2072 int i; 2073 for (i = 2; i < 2+MIPS_SPAREJUMP*2; i += 2) { 2074 if (mcarea[i] == tjump) { 2075 delta = mcarea+i - p; 2076 goto patchbranch; 2077 } else if (mcarea[i] == MIPSI_NOP) { 2078 mcarea[i] = tjump; 2079 cstart = mcarea+i; 2080 delta = mcarea+i - p; 2081 goto patchbranch; 2082 } 2083 } 2084 /* Ignore jump slot overflow. Child trace is simply not attached. */ 2085 } 2086 } else if (p+1 == pe) { 2087 /* Patch NOP after code for inverted loop branch. Use of J is ok. */ 2088 lua_assert(p[1] == MIPSI_NOP); 2089 p[1] = tjump; 2090 *p = MIPSI_NOP; /* Replace the load of the exit number. */ 2091 cstop = p+2; 2092 if (!cstart) cstart = p+1; 2093 } 2094 } 2095 } 2096 if (cstart) lj_mcode_sync(cstart, cstop); 2097 lj_mcode_patch(J, mcarea, 1); 2098 } 2099