lj_asm.c (71394B)
1 /* 2 ** IR assembler (SSA IR -> machine code). 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 */ 5 6 #define lj_asm_c 7 #define LUA_CORE 8 9 #include "lj_obj.h" 10 11 #if LJ_HASJIT 12 13 #include "lj_gc.h" 14 #include "lj_str.h" 15 #include "lj_tab.h" 16 #include "lj_frame.h" 17 #if LJ_HASFFI 18 #include "lj_ctype.h" 19 #endif 20 #include "lj_ir.h" 21 #include "lj_jit.h" 22 #include "lj_ircall.h" 23 #include "lj_iropt.h" 24 #include "lj_mcode.h" 25 #include "lj_iropt.h" 26 #include "lj_trace.h" 27 #include "lj_snap.h" 28 #include "lj_asm.h" 29 #include "lj_dispatch.h" 30 #include "lj_vm.h" 31 #include "lj_target.h" 32 33 #ifdef LUA_USE_ASSERT 34 #include <stdio.h> 35 #endif 36 37 /* -- Assembler state and common macros ----------------------------------- */ 38 39 /* Assembler state. */ 40 typedef struct ASMState { 41 RegCost cost[RID_MAX]; /* Reference and blended allocation cost for regs. */ 42 43 MCode *mcp; /* Current MCode pointer (grows down). */ 44 MCode *mclim; /* Lower limit for MCode memory + red zone. */ 45 #ifdef LUA_USE_ASSERT 46 MCode *mcp_prev; /* Red zone overflow check. */ 47 #endif 48 49 IRIns *ir; /* Copy of pointer to IR instructions/constants. */ 50 jit_State *J; /* JIT compiler state. */ 51 52 #if LJ_TARGET_X86ORX64 53 x86ModRM mrm; /* Fused x86 address operand. */ 54 #endif 55 56 RegSet freeset; /* Set of free registers. */ 57 RegSet modset; /* Set of registers modified inside the loop. */ 58 RegSet weakset; /* Set of weakly referenced registers. */ 59 RegSet phiset; /* Set of PHI registers. */ 60 61 uint32_t flags; /* Copy of JIT compiler flags. */ 62 int loopinv; /* Loop branch inversion (0:no, 1:yes, 2:yes+CC_P). */ 63 64 int32_t evenspill; /* Next even spill slot. */ 65 int32_t oddspill; /* Next odd spill slot (or 0). */ 66 67 IRRef curins; /* Reference of current instruction. */ 68 IRRef stopins; /* Stop assembly before hitting this instruction. */ 69 IRRef orignins; /* Original T->nins. */ 70 71 IRRef snapref; /* Current snapshot is active after this reference. */ 72 IRRef snaprename; /* Rename highwater mark for snapshot check. */ 73 SnapNo snapno; /* Current snapshot number. */ 74 SnapNo loopsnapno; /* Loop snapshot number. */ 75 76 IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ 77 IRRef sectref; /* Section base reference (loopref or 0). */ 78 IRRef loopref; /* Reference of LOOP instruction (or 0). */ 79 80 BCReg topslot; /* Number of slots for stack check (unless 0). */ 81 int32_t gcsteps; /* Accumulated number of GC steps (per section). */ 82 83 GCtrace *T; /* Trace to assemble. */ 84 GCtrace *parent; /* Parent trace (or NULL). */ 85 86 MCode *mcbot; /* Bottom of reserved MCode. */ 87 MCode *mctop; /* Top of generated MCode. */ 88 MCode *mcloop; /* Pointer to loop MCode (or NULL). */ 89 MCode *invmcp; /* Points to invertible loop branch (or NULL). */ 90 MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ 91 MCode *realign; /* Realign loop if not NULL. */ 92 93 #ifdef RID_NUM_KREF 94 int32_t krefk[RID_NUM_KREF]; 95 #endif 96 IRRef1 phireg[RID_MAX]; /* PHI register references. */ 97 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ 98 } ASMState; 99 100 #define IR(ref) (&as->ir[(ref)]) 101 102 #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ 103 #define ASMREF_TMP2 REF_FALSE /* Temp. register. */ 104 #define ASMREF_L REF_NIL /* Stores register for L. */ 105 106 /* Check for variant to invariant references. */ 107 #define iscrossref(as, ref) ((ref) < as->sectref) 108 109 /* Inhibit memory op fusion from variant to invariant references. */ 110 #define FUSE_DISABLED (~(IRRef)0) 111 #define mayfuse(as, ref) ((ref) > as->fuseref) 112 #define neverfuse(as) (as->fuseref == FUSE_DISABLED) 113 #define canfuse(as, ir) (!neverfuse(as) && !irt_isphi((ir)->t)) 114 #define opisfusableload(o) \ 115 ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \ 116 (o) == IR_FLOAD || (o) == IR_XLOAD || (o) == IR_SLOAD || (o) == IR_VLOAD) 117 118 /* Sparse limit checks using a red zone before the actual limit. */ 119 #define MCLIM_REDZONE 64 120 121 static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as) 122 { 123 lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE)); 124 } 125 126 static LJ_AINLINE void checkmclim(ASMState *as) 127 { 128 #ifdef LUA_USE_ASSERT 129 if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { 130 IRIns *ir = IR(as->curins+1); 131 fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, 132 as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); 133 lua_assert(0); 134 } 135 #endif 136 if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); 137 #ifdef LUA_USE_ASSERT 138 as->mcp_prev = as->mcp; 139 #endif 140 } 141 142 #ifdef RID_NUM_KREF 143 #define ra_iskref(ref) ((ref) < RID_NUM_KREF) 144 #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) 145 #define ra_krefk(as, ref) (as->krefk[(ref)]) 146 147 static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) 148 { 149 IRRef ref = (IRRef)(r - RID_MIN_KREF); 150 as->krefk[ref] = k; 151 as->cost[r] = REGCOST(ref, ref); 152 } 153 154 #else 155 #define ra_iskref(ref) 0 156 #define ra_krefreg(ref) RID_MIN_GPR 157 #define ra_krefk(as, ref) 0 158 #endif 159 160 /* Arch-specific field offsets. */ 161 static const uint8_t field_ofs[IRFL__MAX+1] = { 162 #define FLOFS(name, ofs) (uint8_t)(ofs), 163 IRFLDEF(FLOFS) 164 #undef FLOFS 165 0 166 }; 167 168 /* -- Target-specific instruction emitter --------------------------------- */ 169 170 #if LJ_TARGET_X86ORX64 171 #include "lj_emit_x86.h" 172 #elif LJ_TARGET_ARM 173 #include "lj_emit_arm.h" 174 #elif LJ_TARGET_PPC 175 #include "lj_emit_ppc.h" 176 #elif LJ_TARGET_MIPS 177 #include "lj_emit_mips.h" 178 #else 179 #error "Missing instruction emitter for target CPU" 180 #endif 181 182 /* Generic load/store of register from/to stack slot. */ 183 #define emit_spload(as, ir, r, ofs) \ 184 emit_loadofs(as, ir, (r), RID_SP, (ofs)) 185 #define emit_spstore(as, ir, r, ofs) \ 186 emit_storeofs(as, ir, (r), RID_SP, (ofs)) 187 188 /* -- Register allocator debugging ---------------------------------------- */ 189 190 /* #define LUAJIT_DEBUG_RA */ 191 192 #ifdef LUAJIT_DEBUG_RA 193 194 #include <stdio.h> 195 #include <stdarg.h> 196 197 #define RIDNAME(name) #name, 198 static const char *const ra_regname[] = { 199 GPRDEF(RIDNAME) 200 FPRDEF(RIDNAME) 201 VRIDDEF(RIDNAME) 202 NULL 203 }; 204 #undef RIDNAME 205 206 static char ra_dbg_buf[65536]; 207 static char *ra_dbg_p; 208 static char *ra_dbg_merge; 209 static MCode *ra_dbg_mcp; 210 211 static void ra_dstart(void) 212 { 213 ra_dbg_p = ra_dbg_buf; 214 ra_dbg_merge = NULL; 215 ra_dbg_mcp = NULL; 216 } 217 218 static void ra_dflush(void) 219 { 220 fwrite(ra_dbg_buf, 1, (size_t)(ra_dbg_p-ra_dbg_buf), stdout); 221 ra_dstart(); 222 } 223 224 static void ra_dprintf(ASMState *as, const char *fmt, ...) 225 { 226 char *p; 227 va_list argp; 228 va_start(argp, fmt); 229 p = ra_dbg_mcp == as->mcp ? ra_dbg_merge : ra_dbg_p; 230 ra_dbg_mcp = NULL; 231 p += sprintf(p, "%08x \e[36m%04d ", (uintptr_t)as->mcp, as->curins-REF_BIAS); 232 for (;;) { 233 const char *e = strchr(fmt, '$'); 234 if (e == NULL) break; 235 memcpy(p, fmt, (size_t)(e-fmt)); 236 p += e-fmt; 237 if (e[1] == 'r') { 238 Reg r = va_arg(argp, Reg) & RID_MASK; 239 if (r <= RID_MAX) { 240 const char *q; 241 for (q = ra_regname[r]; *q; q++) 242 *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; 243 } else { 244 *p++ = '?'; 245 lua_assert(0); 246 } 247 } else if (e[1] == 'f' || e[1] == 'i') { 248 IRRef ref; 249 if (e[1] == 'f') 250 ref = va_arg(argp, IRRef); 251 else 252 ref = va_arg(argp, IRIns *) - as->ir; 253 if (ref >= REF_BIAS) 254 p += sprintf(p, "%04d", ref - REF_BIAS); 255 else 256 p += sprintf(p, "K%03d", REF_BIAS - ref); 257 } else if (e[1] == 's') { 258 uint32_t slot = va_arg(argp, uint32_t); 259 p += sprintf(p, "[sp+0x%x]", sps_scale(slot)); 260 } else if (e[1] == 'x') { 261 p += sprintf(p, "%08x", va_arg(argp, int32_t)); 262 } else { 263 lua_assert(0); 264 } 265 fmt = e+2; 266 } 267 va_end(argp); 268 while (*fmt) 269 *p++ = *fmt++; 270 *p++ = '\e'; *p++ = '['; *p++ = 'm'; *p++ = '\n'; 271 if (p > ra_dbg_buf+sizeof(ra_dbg_buf)-256) { 272 fwrite(ra_dbg_buf, 1, (size_t)(p-ra_dbg_buf), stdout); 273 p = ra_dbg_buf; 274 } 275 ra_dbg_p = p; 276 } 277 278 #define RA_DBG_START() ra_dstart() 279 #define RA_DBG_FLUSH() ra_dflush() 280 #define RA_DBG_REF() \ 281 do { char *_p = ra_dbg_p; ra_dprintf(as, ""); \ 282 ra_dbg_merge = _p; ra_dbg_mcp = as->mcp; } while (0) 283 #define RA_DBGX(x) ra_dprintf x 284 285 #else 286 #define RA_DBG_START() ((void)0) 287 #define RA_DBG_FLUSH() ((void)0) 288 #define RA_DBG_REF() ((void)0) 289 #define RA_DBGX(x) ((void)0) 290 #endif 291 292 /* -- Register allocator -------------------------------------------------- */ 293 294 #define ra_free(as, r) rset_set(as->freeset, (r)) 295 #define ra_modified(as, r) rset_set(as->modset, (r)) 296 #define ra_weak(as, r) rset_set(as->weakset, (r)) 297 #define ra_noweak(as, r) rset_clear(as->weakset, (r)) 298 299 #define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s)) 300 301 /* Setup register allocator. */ 302 static void ra_setup(ASMState *as) 303 { 304 Reg r; 305 /* Initially all regs (except the stack pointer) are free for use. */ 306 as->freeset = RSET_INIT; 307 as->modset = RSET_EMPTY; 308 as->weakset = RSET_EMPTY; 309 as->phiset = RSET_EMPTY; 310 memset(as->phireg, 0, sizeof(as->phireg)); 311 for (r = RID_MIN_GPR; r < RID_MAX; r++) 312 as->cost[r] = REGCOST(~0u, 0u); 313 } 314 315 /* Rematerialize constants. */ 316 static Reg ra_rematk(ASMState *as, IRRef ref) 317 { 318 IRIns *ir; 319 Reg r; 320 if (ra_iskref(ref)) { 321 r = ra_krefreg(ref); 322 lua_assert(!rset_test(as->freeset, r)); 323 ra_free(as, r); 324 ra_modified(as, r); 325 emit_loadi(as, r, ra_krefk(as, ref)); 326 return r; 327 } 328 ir = IR(ref); 329 r = ir->r; 330 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 331 ra_free(as, r); 332 ra_modified(as, r); 333 ir->r = RID_INIT; /* Do not keep any hint. */ 334 RA_DBGX((as, "remat $i $r", ir, r)); 335 #if !LJ_SOFTFP 336 if (ir->o == IR_KNUM) { 337 emit_loadk64(as, r, ir); 338 } else 339 #endif 340 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 341 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ 342 emit_getgl(as, r, jit_base); 343 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 344 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 345 emit_getgl(as, r, cur_L); 346 #if LJ_64 347 } else if (ir->o == IR_KINT64) { 348 emit_loadu64(as, r, ir_kint64(ir)->u64); 349 #if LJ_GC64 350 } else if (ir->o == IR_KGC) { 351 emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); 352 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { 353 emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); 354 #endif 355 #endif 356 } else { 357 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 358 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 359 emit_loadi(as, r, ir->i); 360 } 361 return r; 362 } 363 364 /* Force a spill. Allocate a new spill slot if needed. */ 365 static int32_t ra_spill(ASMState *as, IRIns *ir) 366 { 367 int32_t slot = ir->s; 368 lua_assert(ir >= as->ir + REF_TRUE); 369 if (!ra_hasspill(slot)) { 370 if (irt_is64(ir->t)) { 371 slot = as->evenspill; 372 as->evenspill += 2; 373 } else if (as->oddspill) { 374 slot = as->oddspill; 375 as->oddspill = 0; 376 } else { 377 slot = as->evenspill; 378 as->oddspill = slot+1; 379 as->evenspill += 2; 380 } 381 if (as->evenspill > 256) 382 lj_trace_err(as->J, LJ_TRERR_SPILLOV); 383 ir->s = (uint8_t)slot; 384 } 385 return sps_scale(slot); 386 } 387 388 /* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */ 389 static Reg ra_releasetmp(ASMState *as, IRRef ref) 390 { 391 IRIns *ir = IR(ref); 392 Reg r = ir->r; 393 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 394 ra_free(as, r); 395 ra_modified(as, r); 396 ir->r = RID_INIT; 397 return r; 398 } 399 400 /* Restore a register (marked as free). Rematerialize or force a spill. */ 401 static Reg ra_restore(ASMState *as, IRRef ref) 402 { 403 if (emit_canremat(ref)) { 404 return ra_rematk(as, ref); 405 } else { 406 IRIns *ir = IR(ref); 407 int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ 408 Reg r = ir->r; 409 lua_assert(ra_hasreg(r)); 410 ra_sethint(ir->r, r); /* Keep hint. */ 411 ra_free(as, r); 412 if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ 413 ra_modified(as, r); 414 RA_DBGX((as, "restore $i $r", ir, r)); 415 emit_spload(as, ir, r, ofs); 416 } 417 return r; 418 } 419 } 420 421 /* Save a register to a spill slot. */ 422 static void ra_save(ASMState *as, IRIns *ir, Reg r) 423 { 424 RA_DBGX((as, "save $i $r", ir, r)); 425 emit_spstore(as, ir, r, sps_scale(ir->s)); 426 } 427 428 #define MINCOST(name) \ 429 if (rset_test(RSET_ALL, RID_##name) && \ 430 LJ_LIKELY(allow&RID2RSET(RID_##name)) && as->cost[RID_##name] < cost) \ 431 cost = as->cost[RID_##name]; 432 433 /* Evict the register with the lowest cost, forcing a restore. */ 434 static Reg ra_evict(ASMState *as, RegSet allow) 435 { 436 IRRef ref; 437 RegCost cost = ~(RegCost)0; 438 lua_assert(allow != RSET_EMPTY); 439 if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { 440 GPRDEF(MINCOST) 441 } else { 442 FPRDEF(MINCOST) 443 } 444 ref = regcost_ref(cost); 445 lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); 446 /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ 447 if (!irref_isk(ref) && (as->weakset & allow)) { 448 IRIns *ir = IR(ref); 449 if (!rset_test(as->weakset, ir->r)) 450 ref = regcost_ref(as->cost[rset_pickbot((as->weakset & allow))]); 451 } 452 return ra_restore(as, ref); 453 } 454 455 /* Pick any register (marked as free). Evict on-demand. */ 456 static Reg ra_pick(ASMState *as, RegSet allow) 457 { 458 RegSet pick = as->freeset & allow; 459 if (!pick) 460 return ra_evict(as, allow); 461 else 462 return rset_picktop(pick); 463 } 464 465 /* Get a scratch register (marked as free). */ 466 static Reg ra_scratch(ASMState *as, RegSet allow) 467 { 468 Reg r = ra_pick(as, allow); 469 ra_modified(as, r); 470 RA_DBGX((as, "scratch $r", r)); 471 return r; 472 } 473 474 /* Evict all registers from a set (if not free). */ 475 static void ra_evictset(ASMState *as, RegSet drop) 476 { 477 RegSet work; 478 as->modset |= drop; 479 #if !LJ_SOFTFP 480 work = (drop & ~as->freeset) & RSET_FPR; 481 while (work) { 482 Reg r = rset_pickbot(work); 483 ra_restore(as, regcost_ref(as->cost[r])); 484 rset_clear(work, r); 485 checkmclim(as); 486 } 487 #endif 488 work = (drop & ~as->freeset); 489 while (work) { 490 Reg r = rset_pickbot(work); 491 ra_restore(as, regcost_ref(as->cost[r])); 492 rset_clear(work, r); 493 checkmclim(as); 494 } 495 } 496 497 /* Evict (rematerialize) all registers allocated to constants. */ 498 static void ra_evictk(ASMState *as) 499 { 500 RegSet work; 501 #if !LJ_SOFTFP 502 work = ~as->freeset & RSET_FPR; 503 while (work) { 504 Reg r = rset_pickbot(work); 505 IRRef ref = regcost_ref(as->cost[r]); 506 if (emit_canremat(ref) && irref_isk(ref)) { 507 ra_rematk(as, ref); 508 checkmclim(as); 509 } 510 rset_clear(work, r); 511 } 512 #endif 513 work = ~as->freeset & RSET_GPR; 514 while (work) { 515 Reg r = rset_pickbot(work); 516 IRRef ref = regcost_ref(as->cost[r]); 517 if (emit_canremat(ref) && irref_isk(ref)) { 518 ra_rematk(as, ref); 519 checkmclim(as); 520 } 521 rset_clear(work, r); 522 } 523 } 524 525 #ifdef RID_NUM_KREF 526 /* Allocate a register for a constant. */ 527 static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) 528 { 529 /* First try to find a register which already holds the same constant. */ 530 RegSet pick, work = ~as->freeset & RSET_GPR; 531 Reg r; 532 while (work) { 533 IRRef ref; 534 r = rset_pickbot(work); 535 ref = regcost_ref(as->cost[r]); 536 if (ref < ASMREF_L && 537 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) 538 return r; 539 rset_clear(work, r); 540 } 541 pick = as->freeset & allow; 542 if (pick) { 543 /* Constants should preferably get unmodified registers. */ 544 if ((pick & ~as->modset)) 545 pick &= ~as->modset; 546 r = rset_pickbot(pick); /* Reduce conflicts with inverse allocation. */ 547 } else { 548 r = ra_evict(as, allow); 549 } 550 RA_DBGX((as, "allock $x $r", k, r)); 551 ra_setkref(as, r, k); 552 rset_clear(as->freeset, r); 553 ra_noweak(as, r); 554 return r; 555 } 556 557 /* Allocate a specific register for a constant. */ 558 static void ra_allockreg(ASMState *as, int32_t k, Reg r) 559 { 560 Reg kr = ra_allock(as, k, RID2RSET(r)); 561 if (kr != r) { 562 IRIns irdummy; 563 irdummy.t.irt = IRT_INT; 564 ra_scratch(as, RID2RSET(r)); 565 emit_movrr(as, &irdummy, r, kr); 566 } 567 } 568 #else 569 #define ra_allockreg(as, k, r) emit_loadi(as, (r), (k)) 570 #endif 571 572 /* Allocate a register for ref from the allowed set of registers. 573 ** Note: this function assumes the ref does NOT have a register yet! 574 ** Picks an optimal register, sets the cost and marks the register as non-free. 575 */ 576 static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) 577 { 578 IRIns *ir = IR(ref); 579 RegSet pick = as->freeset & allow; 580 Reg r; 581 lua_assert(ra_noreg(ir->r)); 582 if (pick) { 583 /* First check register hint from propagation or PHI. */ 584 if (ra_hashint(ir->r)) { 585 r = ra_gethint(ir->r); 586 if (rset_test(pick, r)) /* Use hint register if possible. */ 587 goto found; 588 /* Rematerialization is cheaper than missing a hint. */ 589 if (rset_test(allow, r) && emit_canremat(regcost_ref(as->cost[r]))) { 590 ra_rematk(as, regcost_ref(as->cost[r])); 591 goto found; 592 } 593 RA_DBGX((as, "hintmiss $f $r", ref, r)); 594 } 595 /* Invariants should preferably get unmodified registers. */ 596 if (ref < as->loopref && !irt_isphi(ir->t)) { 597 if ((pick & ~as->modset)) 598 pick &= ~as->modset; 599 r = rset_pickbot(pick); /* Reduce conflicts with inverse allocation. */ 600 } else { 601 /* We've got plenty of regs, so get callee-save regs if possible. */ 602 if (RID_NUM_GPR > 8 && (pick & ~RSET_SCRATCH)) 603 pick &= ~RSET_SCRATCH; 604 r = rset_picktop(pick); 605 } 606 } else { 607 r = ra_evict(as, allow); 608 } 609 found: 610 RA_DBGX((as, "alloc $f $r", ref, r)); 611 ir->r = (uint8_t)r; 612 rset_clear(as->freeset, r); 613 ra_noweak(as, r); 614 as->cost[r] = REGCOST_REF_T(ref, irt_t(ir->t)); 615 return r; 616 } 617 618 /* Allocate a register on-demand. */ 619 static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) 620 { 621 Reg r = IR(ref)->r; 622 /* Note: allow is ignored if the register is already allocated. */ 623 if (ra_noreg(r)) r = ra_allocref(as, ref, allow); 624 ra_noweak(as, r); 625 return r; 626 } 627 628 /* Add a register rename to the IR. */ 629 static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno) 630 { 631 IRRef ren; 632 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno); 633 ren = tref_ref(lj_ir_emit(as->J)); 634 as->J->cur.ir[ren].r = (uint8_t)down; 635 as->J->cur.ir[ren].s = SPS_NONE; 636 } 637 638 /* Rename register allocation and emit move. */ 639 static void ra_rename(ASMState *as, Reg down, Reg up) 640 { 641 IRRef ref = regcost_ref(as->cost[up] = as->cost[down]); 642 IRIns *ir = IR(ref); 643 ir->r = (uint8_t)up; 644 as->cost[down] = 0; 645 lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); 646 lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); 647 ra_free(as, down); /* 'down' is free ... */ 648 ra_modified(as, down); 649 rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ 650 ra_noweak(as, up); 651 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); 652 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ 653 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ 654 ra_addrename(as, down, ref, as->snapno); 655 } 656 } 657 658 /* Pick a destination register (marked as free). 659 ** Caveat: allow is ignored if there's already a destination register. 660 ** Use ra_destreg() to get a specific register. 661 */ 662 static Reg ra_dest(ASMState *as, IRIns *ir, RegSet allow) 663 { 664 Reg dest = ir->r; 665 if (ra_hasreg(dest)) { 666 ra_free(as, dest); 667 ra_modified(as, dest); 668 } else { 669 if (ra_hashint(dest) && rset_test((as->freeset&allow), ra_gethint(dest))) { 670 dest = ra_gethint(dest); 671 ra_modified(as, dest); 672 RA_DBGX((as, "dest $r", dest)); 673 } else { 674 dest = ra_scratch(as, allow); 675 } 676 ir->r = dest; 677 } 678 if (LJ_UNLIKELY(ra_hasspill(ir->s))) ra_save(as, ir, dest); 679 return dest; 680 } 681 682 /* Force a specific destination register (marked as free). */ 683 static void ra_destreg(ASMState *as, IRIns *ir, Reg r) 684 { 685 Reg dest = ra_dest(as, ir, RID2RSET(r)); 686 if (dest != r) { 687 lua_assert(rset_test(as->freeset, r)); 688 ra_modified(as, r); 689 emit_movrr(as, ir, dest, r); 690 } 691 } 692 693 #if LJ_TARGET_X86ORX64 694 /* Propagate dest register to left reference. Emit moves as needed. 695 ** This is a required fixup step for all 2-operand machine instructions. 696 */ 697 static void ra_left(ASMState *as, Reg dest, IRRef lref) 698 { 699 IRIns *ir = IR(lref); 700 Reg left = ir->r; 701 if (ra_noreg(left)) { 702 if (irref_isk(lref)) { 703 if (ir->o == IR_KNUM) { 704 /* FP remat needs a load except for +0. Still better than eviction. */ 705 if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) { 706 emit_loadk64(as, dest, ir); 707 return; 708 } 709 #if LJ_64 710 } else if (ir->o == IR_KINT64) { 711 emit_loadk64(as, dest, ir); 712 return; 713 #if LJ_GC64 714 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { 715 emit_loadk64(as, dest, ir); 716 return; 717 #endif 718 #endif 719 } else if (ir->o != IR_KPRI) { 720 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 721 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 722 emit_loadi(as, dest, ir->i); 723 return; 724 } 725 } 726 if (!ra_hashint(left) && !iscrossref(as, lref)) 727 ra_sethint(ir->r, dest); /* Propagate register hint. */ 728 left = ra_allocref(as, lref, dest < RID_MAX_GPR ? RSET_GPR : RSET_FPR); 729 } 730 ra_noweak(as, left); 731 /* Move needed for true 3-operand instruction: y=a+b ==> y=a; y+=b. */ 732 if (dest != left) { 733 /* Use register renaming if dest is the PHI reg. */ 734 if (irt_isphi(ir->t) && as->phireg[dest] == lref) { 735 ra_modified(as, left); 736 ra_rename(as, left, dest); 737 } else { 738 emit_movrr(as, ir, dest, left); 739 } 740 } 741 } 742 #else 743 /* Similar to ra_left, except we override any hints. */ 744 static void ra_leftov(ASMState *as, Reg dest, IRRef lref) 745 { 746 IRIns *ir = IR(lref); 747 Reg left = ir->r; 748 if (ra_noreg(left)) { 749 ra_sethint(ir->r, dest); /* Propagate register hint. */ 750 left = ra_allocref(as, lref, 751 (LJ_SOFTFP || dest < RID_MAX_GPR) ? RSET_GPR : RSET_FPR); 752 } 753 ra_noweak(as, left); 754 if (dest != left) { 755 /* Use register renaming if dest is the PHI reg. */ 756 if (irt_isphi(ir->t) && as->phireg[dest] == lref) { 757 ra_modified(as, left); 758 ra_rename(as, left, dest); 759 } else { 760 emit_movrr(as, ir, dest, left); 761 } 762 } 763 } 764 #endif 765 766 #if !LJ_64 767 /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ 768 static void ra_destpair(ASMState *as, IRIns *ir) 769 { 770 Reg destlo = ir->r, desthi = (ir+1)->r; 771 /* First spill unrelated refs blocking the destination registers. */ 772 if (!rset_test(as->freeset, RID_RETLO) && 773 destlo != RID_RETLO && desthi != RID_RETLO) 774 ra_restore(as, regcost_ref(as->cost[RID_RETLO])); 775 if (!rset_test(as->freeset, RID_RETHI) && 776 destlo != RID_RETHI && desthi != RID_RETHI) 777 ra_restore(as, regcost_ref(as->cost[RID_RETHI])); 778 /* Next free the destination registers (if any). */ 779 if (ra_hasreg(destlo)) { 780 ra_free(as, destlo); 781 ra_modified(as, destlo); 782 } else { 783 destlo = RID_RETLO; 784 } 785 if (ra_hasreg(desthi)) { 786 ra_free(as, desthi); 787 ra_modified(as, desthi); 788 } else { 789 desthi = RID_RETHI; 790 } 791 /* Check for conflicts and shuffle the registers as needed. */ 792 if (destlo == RID_RETHI) { 793 if (desthi == RID_RETLO) { 794 #if LJ_TARGET_X86 795 *--as->mcp = XI_XCHGa + RID_RETHI; 796 #else 797 emit_movrr(as, ir, RID_RETHI, RID_TMP); 798 emit_movrr(as, ir, RID_RETLO, RID_RETHI); 799 emit_movrr(as, ir, RID_TMP, RID_RETLO); 800 #endif 801 } else { 802 emit_movrr(as, ir, RID_RETHI, RID_RETLO); 803 if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); 804 } 805 } else if (desthi == RID_RETLO) { 806 emit_movrr(as, ir, RID_RETLO, RID_RETHI); 807 if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); 808 } else { 809 if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); 810 if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); 811 } 812 /* Restore spill slots (if any). */ 813 if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); 814 if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); 815 } 816 #endif 817 818 /* -- Snapshot handling --------- ----------------------------------------- */ 819 820 /* Can we rematerialize a KNUM instead of forcing a spill? */ 821 static int asm_snap_canremat(ASMState *as) 822 { 823 Reg r; 824 for (r = RID_MIN_FPR; r < RID_MAX_FPR; r++) 825 if (irref_isk(regcost_ref(as->cost[r]))) 826 return 1; 827 return 0; 828 } 829 830 /* Check whether a sunk store corresponds to an allocation. */ 831 static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs) 832 { 833 if (irs->s == 255) { 834 if (irs->o == IR_ASTORE || irs->o == IR_HSTORE || 835 irs->o == IR_FSTORE || irs->o == IR_XSTORE) { 836 IRIns *irk = IR(irs->op1); 837 if (irk->o == IR_AREF || irk->o == IR_HREFK) 838 irk = IR(irk->op1); 839 return (IR(irk->op1) == ira); 840 } 841 return 0; 842 } else { 843 return (ira + irs->s == irs); /* Quick check. */ 844 } 845 } 846 847 /* Allocate register or spill slot for a ref that escapes to a snapshot. */ 848 static void asm_snap_alloc1(ASMState *as, IRRef ref) 849 { 850 IRIns *ir = IR(ref); 851 if (!irref_isk(ref) && (!(ra_used(ir) || ir->r == RID_SUNK))) { 852 if (ir->r == RID_SINK) { 853 ir->r = RID_SUNK; 854 #if LJ_HASFFI 855 if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */ 856 asm_snap_alloc1(as, ir->op2); 857 if (LJ_32 && (ir+1)->o == IR_HIOP) 858 asm_snap_alloc1(as, (ir+1)->op2); 859 } else 860 #endif 861 { /* Allocate stored values for TNEW, TDUP and CNEW. */ 862 IRIns *irs; 863 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); 864 for (irs = IR(as->snapref-1); irs > ir; irs--) 865 if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { 866 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || 867 irs->o == IR_FSTORE || irs->o == IR_XSTORE); 868 asm_snap_alloc1(as, irs->op2); 869 if (LJ_32 && (irs+1)->o == IR_HIOP) 870 asm_snap_alloc1(as, (irs+1)->op2); 871 } 872 } 873 } else { 874 RegSet allow; 875 if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT) { 876 IRIns *irc; 877 for (irc = IR(as->curins); irc > ir; irc--) 878 if ((irc->op1 == ref || irc->op2 == ref) && 879 !(irc->r == RID_SINK || irc->r == RID_SUNK)) 880 goto nosink; /* Don't sink conversion if result is used. */ 881 asm_snap_alloc1(as, ir->op1); 882 return; 883 } 884 nosink: 885 allow = (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR; 886 if ((as->freeset & allow) || 887 (allow == RSET_FPR && asm_snap_canremat(as))) { 888 /* Get a weak register if we have a free one or can rematerialize. */ 889 Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */ 890 if (!irt_isphi(ir->t)) 891 ra_weak(as, r); /* But mark it as weakly referenced. */ 892 checkmclim(as); 893 RA_DBGX((as, "snapreg $f $r", ref, ir->r)); 894 } else { 895 ra_spill(as, ir); /* Otherwise force a spill slot. */ 896 RA_DBGX((as, "snapspill $f $s", ref, ir->s)); 897 } 898 } 899 } 900 } 901 902 /* Allocate refs escaping to a snapshot. */ 903 static void asm_snap_alloc(ASMState *as) 904 { 905 SnapShot *snap = &as->T->snap[as->snapno]; 906 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 907 MSize n, nent = snap->nent; 908 for (n = 0; n < nent; n++) { 909 SnapEntry sn = map[n]; 910 IRRef ref = snap_ref(sn); 911 if (!irref_isk(ref)) { 912 asm_snap_alloc1(as, ref); 913 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { 914 lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); 915 asm_snap_alloc1(as, ref+1); 916 } 917 } 918 } 919 } 920 921 /* All guards for a snapshot use the same exitno. This is currently the 922 ** same as the snapshot number. Since the exact origin of the exit cannot 923 ** be determined, all guards for the same snapshot must exit with the same 924 ** RegSP mapping. 925 ** A renamed ref which has been used in a prior guard for the same snapshot 926 ** would cause an inconsistency. The easy way out is to force a spill slot. 927 */ 928 static int asm_snap_checkrename(ASMState *as, IRRef ren) 929 { 930 SnapShot *snap = &as->T->snap[as->snapno]; 931 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 932 MSize n, nent = snap->nent; 933 for (n = 0; n < nent; n++) { 934 SnapEntry sn = map[n]; 935 IRRef ref = snap_ref(sn); 936 if (ref == ren || (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && ++ref == ren)) { 937 IRIns *ir = IR(ref); 938 ra_spill(as, ir); /* Register renamed, so force a spill slot. */ 939 RA_DBGX((as, "snaprensp $f $s", ref, ir->s)); 940 return 1; /* Found. */ 941 } 942 } 943 return 0; /* Not found. */ 944 } 945 946 /* Prepare snapshot for next guard instruction. */ 947 static void asm_snap_prep(ASMState *as) 948 { 949 if (as->curins < as->snapref) { 950 do { 951 if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ 952 as->snapno--; 953 as->snapref = as->T->snap[as->snapno].ref; 954 } while (as->curins < as->snapref); 955 asm_snap_alloc(as); 956 as->snaprename = as->T->nins; 957 } else { 958 /* Process any renames above the highwater mark. */ 959 for (; as->snaprename < as->T->nins; as->snaprename++) { 960 IRIns *ir = &as->T->ir[as->snaprename]; 961 if (asm_snap_checkrename(as, ir->op1)) 962 ir->op2 = REF_BIAS-1; /* Kill rename. */ 963 } 964 } 965 } 966 967 /* -- Miscellaneous helpers ----------------------------------------------- */ 968 969 /* Calculate stack adjustment. */ 970 static int32_t asm_stack_adjust(ASMState *as) 971 { 972 if (as->evenspill <= SPS_FIXED) 973 return 0; 974 return sps_scale(sps_align(as->evenspill)); 975 } 976 977 /* Must match with hash*() in lj_tab.c. */ 978 static uint32_t ir_khash(IRIns *ir) 979 { 980 uint32_t lo, hi; 981 if (irt_isstr(ir->t)) { 982 return ir_kstr(ir)->hash; 983 } else if (irt_isnum(ir->t)) { 984 lo = ir_knum(ir)->u32.lo; 985 hi = ir_knum(ir)->u32.hi << 1; 986 } else if (irt_ispri(ir->t)) { 987 lua_assert(!irt_isnil(ir->t)); 988 return irt_type(ir->t)-IRT_FALSE; 989 } else { 990 lua_assert(irt_isgcv(ir->t)); 991 lo = u32ptr(ir_kgc(ir)); 992 hi = lo + HASH_BIAS; 993 } 994 return hashrot(lo, hi); 995 } 996 997 /* -- Allocations --------------------------------------------------------- */ 998 999 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args); 1000 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci); 1001 1002 static void asm_snew(ASMState *as, IRIns *ir) 1003 { 1004 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; 1005 IRRef args[3]; 1006 args[0] = ASMREF_L; /* lua_State *L */ 1007 args[1] = ir->op1; /* const char *str */ 1008 args[2] = ir->op2; /* size_t len */ 1009 as->gcsteps++; 1010 asm_setupresult(as, ir, ci); /* GCstr * */ 1011 asm_gencall(as, ci, args); 1012 } 1013 1014 static void asm_tnew(ASMState *as, IRIns *ir) 1015 { 1016 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; 1017 IRRef args[2]; 1018 args[0] = ASMREF_L; /* lua_State *L */ 1019 args[1] = ASMREF_TMP1; /* uint32_t ahsize */ 1020 as->gcsteps++; 1021 asm_setupresult(as, ir, ci); /* GCtab * */ 1022 asm_gencall(as, ci, args); 1023 ra_allockreg(as, ir->op1 | (ir->op2 << 24), ra_releasetmp(as, ASMREF_TMP1)); 1024 } 1025 1026 static void asm_tdup(ASMState *as, IRIns *ir) 1027 { 1028 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; 1029 IRRef args[2]; 1030 args[0] = ASMREF_L; /* lua_State *L */ 1031 args[1] = ir->op1; /* const GCtab *kt */ 1032 as->gcsteps++; 1033 asm_setupresult(as, ir, ci); /* GCtab * */ 1034 asm_gencall(as, ci, args); 1035 } 1036 1037 static void asm_gc_check(ASMState *as); 1038 1039 /* Explicit GC step. */ 1040 static void asm_gcstep(ASMState *as, IRIns *ir) 1041 { 1042 IRIns *ira; 1043 for (ira = IR(as->stopins+1); ira < ir; ira++) 1044 if ((ira->o == IR_TNEW || ira->o == IR_TDUP || 1045 (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI))) && 1046 ra_used(ira)) 1047 as->gcsteps++; 1048 if (as->gcsteps) 1049 asm_gc_check(as); 1050 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1051 } 1052 1053 /* -- Buffer operations --------------------------------------------------- */ 1054 1055 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref); 1056 1057 static void asm_bufhdr(ASMState *as, IRIns *ir) 1058 { 1059 Reg sb = ra_dest(as, ir, RSET_GPR); 1060 if ((ir->op2 & IRBUFHDR_APPEND)) { 1061 /* Rematerialize const buffer pointer instead of likely spill. */ 1062 IRIns *irp = IR(ir->op1); 1063 if (!(ra_hasreg(irp->r) || irp == ir-1 || 1064 (irp == ir-2 && !ra_used(ir-1)))) { 1065 while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND))) 1066 irp = IR(irp->op1); 1067 if (irref_isk(irp->op1)) { 1068 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR)); 1069 ir = irp; 1070 } 1071 } 1072 } else { 1073 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); 1074 /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */ 1075 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p)); 1076 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b)); 1077 } 1078 #if LJ_TARGET_X86ORX64 1079 ra_left(as, sb, ir->op1); 1080 #else 1081 ra_leftov(as, sb, ir->op1); 1082 #endif 1083 } 1084 1085 static void asm_bufput(ASMState *as, IRIns *ir) 1086 { 1087 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; 1088 IRRef args[3]; 1089 IRIns *irs; 1090 int kchar = -1; 1091 args[0] = ir->op1; /* SBuf * */ 1092 args[1] = ir->op2; /* GCstr * */ 1093 irs = IR(ir->op2); 1094 lua_assert(irt_isstr(irs->t)); 1095 if (irs->o == IR_KGC) { 1096 GCstr *s = ir_kstr(irs); 1097 if (s->len == 1) { /* Optimize put of single-char string constant. */ 1098 kchar = strdata(s)[0]; 1099 args[1] = ASMREF_TMP1; /* int, truncated to char */ 1100 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; 1101 } 1102 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) { 1103 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */ 1104 if (irs->op2 == IRTOSTR_NUM) { 1105 args[1] = ASMREF_TMP1; /* TValue * */ 1106 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; 1107 } else { 1108 lua_assert(irt_isinteger(IR(irs->op1)->t)); 1109 args[1] = irs->op1; /* int */ 1110 if (irs->op2 == IRTOSTR_INT) 1111 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; 1112 else 1113 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; 1114 } 1115 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */ 1116 args[1] = irs->op1; /* const void * */ 1117 args[2] = irs->op2; /* MSize */ 1118 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem]; 1119 } 1120 } 1121 asm_setupresult(as, ir, ci); /* SBuf * */ 1122 asm_gencall(as, ci, args); 1123 if (args[1] == ASMREF_TMP1) { 1124 Reg tmp = ra_releasetmp(as, ASMREF_TMP1); 1125 if (kchar == -1) 1126 asm_tvptr(as, tmp, irs->op1); 1127 else 1128 ra_allockreg(as, kchar, tmp); 1129 } 1130 } 1131 1132 static void asm_bufstr(ASMState *as, IRIns *ir) 1133 { 1134 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; 1135 IRRef args[1]; 1136 args[0] = ir->op1; /* SBuf *sb */ 1137 as->gcsteps++; 1138 asm_setupresult(as, ir, ci); /* GCstr * */ 1139 asm_gencall(as, ci, args); 1140 } 1141 1142 /* -- Type conversions ---------------------------------------------------- */ 1143 1144 static void asm_tostr(ASMState *as, IRIns *ir) 1145 { 1146 const CCallInfo *ci; 1147 IRRef args[2]; 1148 args[0] = ASMREF_L; 1149 as->gcsteps++; 1150 if (ir->op2 == IRTOSTR_NUM) { 1151 args[1] = ASMREF_TMP1; /* cTValue * */ 1152 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num]; 1153 } else { 1154 args[1] = ir->op1; /* int32_t k */ 1155 if (ir->op2 == IRTOSTR_INT) 1156 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int]; 1157 else 1158 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char]; 1159 } 1160 asm_setupresult(as, ir, ci); /* GCstr * */ 1161 asm_gencall(as, ci, args); 1162 if (ir->op2 == IRTOSTR_NUM) 1163 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); 1164 } 1165 1166 #if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 1167 static void asm_conv64(ASMState *as, IRIns *ir) 1168 { 1169 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); 1170 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); 1171 IRCallID id; 1172 IRRef args[2]; 1173 lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP); 1174 args[LJ_BE] = (ir-1)->op1; 1175 args[LJ_LE] = ir->op1; 1176 if (st == IRT_NUM || st == IRT_FLOAT) { 1177 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); 1178 ir--; 1179 } else { 1180 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); 1181 } 1182 { 1183 #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP 1184 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; 1185 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ 1186 #else 1187 const CCallInfo *ci = &lj_ir_callinfo[id]; 1188 #endif 1189 asm_setupresult(as, ir, ci); 1190 asm_gencall(as, ci, args); 1191 } 1192 } 1193 #endif 1194 1195 /* -- Memory references --------------------------------------------------- */ 1196 1197 static void asm_newref(ASMState *as, IRIns *ir) 1198 { 1199 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; 1200 IRRef args[3]; 1201 if (ir->r == RID_SINK) 1202 return; 1203 args[0] = ASMREF_L; /* lua_State *L */ 1204 args[1] = ir->op1; /* GCtab *t */ 1205 args[2] = ASMREF_TMP1; /* cTValue *key */ 1206 asm_setupresult(as, ir, ci); /* TValue * */ 1207 asm_gencall(as, ci, args); 1208 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); 1209 } 1210 1211 static void asm_lref(ASMState *as, IRIns *ir) 1212 { 1213 Reg r = ra_dest(as, ir, RSET_GPR); 1214 #if LJ_TARGET_X86ORX64 1215 ra_left(as, r, ASMREF_L); 1216 #else 1217 ra_leftov(as, r, ASMREF_L); 1218 #endif 1219 } 1220 1221 /* -- Calls --------------------------------------------------------------- */ 1222 1223 /* Collect arguments from CALL* and CARG instructions. */ 1224 static void asm_collectargs(ASMState *as, IRIns *ir, 1225 const CCallInfo *ci, IRRef *args) 1226 { 1227 uint32_t n = CCI_XNARGS(ci); 1228 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ 1229 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } 1230 while (n-- > 1) { 1231 ir = IR(ir->op1); 1232 lua_assert(ir->o == IR_CARG); 1233 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; 1234 } 1235 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; 1236 lua_assert(IR(ir->op1)->o != IR_CARG); 1237 } 1238 1239 /* Reconstruct CCallInfo flags for CALLX*. */ 1240 static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) 1241 { 1242 uint32_t nargs = 0; 1243 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ 1244 IRIns *ira = IR(ir->op1); 1245 nargs++; 1246 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } 1247 } 1248 #if LJ_HASFFI 1249 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ 1250 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; 1251 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); 1252 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); 1253 #if LJ_TARGET_X86 1254 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); 1255 #endif 1256 } 1257 #endif 1258 return (nargs | (ir->t.irt << CCI_OTSHIFT)); 1259 } 1260 1261 static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 1262 { 1263 const CCallInfo *ci = &lj_ir_callinfo[id]; 1264 IRRef args[2]; 1265 args[0] = ir->op1; 1266 args[1] = ir->op2; 1267 asm_setupresult(as, ir, ci); 1268 asm_gencall(as, ci, args); 1269 } 1270 1271 static void asm_call(ASMState *as, IRIns *ir) 1272 { 1273 IRRef args[CCI_NARGS_MAX]; 1274 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 1275 asm_collectargs(as, ir, ci, args); 1276 asm_setupresult(as, ir, ci); 1277 asm_gencall(as, ci, args); 1278 } 1279 1280 #if !LJ_SOFTFP 1281 static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref) 1282 { 1283 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1284 IRRef args[2]; 1285 args[0] = lref; 1286 args[1] = rref; 1287 asm_setupresult(as, ir, ci); 1288 asm_gencall(as, ci, args); 1289 } 1290 1291 static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1292 { 1293 IRIns *irp = IR(ir->op1); 1294 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1295 IRIns *irpp = IR(irp->op1); 1296 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1297 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1298 asm_fppow(as, ir, irpp->op1, irp->op2); 1299 return 1; 1300 } 1301 } 1302 return 0; 1303 } 1304 #endif 1305 1306 /* -- PHI and loop handling ----------------------------------------------- */ 1307 1308 /* Break a PHI cycle by renaming to a free register (evict if needed). */ 1309 static void asm_phi_break(ASMState *as, RegSet blocked, RegSet blockedby, 1310 RegSet allow) 1311 { 1312 RegSet candidates = blocked & allow; 1313 if (candidates) { /* If this register file has candidates. */ 1314 /* Note: the set for ra_pick cannot be empty, since each register file 1315 ** has some registers never allocated to PHIs. 1316 */ 1317 Reg down, up = ra_pick(as, ~blocked & allow); /* Get a free register. */ 1318 if (candidates & ~blockedby) /* Optimize shifts, else it's a cycle. */ 1319 candidates = candidates & ~blockedby; 1320 down = rset_picktop(candidates); /* Pick candidate PHI register. */ 1321 ra_rename(as, down, up); /* And rename it to the free register. */ 1322 } 1323 } 1324 1325 /* PHI register shuffling. 1326 ** 1327 ** The allocator tries hard to preserve PHI register assignments across 1328 ** the loop body. Most of the time this loop does nothing, since there 1329 ** are no register mismatches. 1330 ** 1331 ** If a register mismatch is detected and ... 1332 ** - the register is currently free: rename it. 1333 ** - the register is blocked by an invariant: restore/remat and rename it. 1334 ** - Otherwise the register is used by another PHI, so mark it as blocked. 1335 ** 1336 ** The renames are order-sensitive, so just retry the loop if a register 1337 ** is marked as blocked, but has been freed in the meantime. A cycle is 1338 ** detected if all of the blocked registers are allocated. To break the 1339 ** cycle rename one of them to a free register and retry. 1340 ** 1341 ** Note that PHI spill slots are kept in sync and don't need to be shuffled. 1342 */ 1343 static void asm_phi_shuffle(ASMState *as) 1344 { 1345 RegSet work; 1346 1347 /* Find and resolve PHI register mismatches. */ 1348 for (;;) { 1349 RegSet blocked = RSET_EMPTY; 1350 RegSet blockedby = RSET_EMPTY; 1351 RegSet phiset = as->phiset; 1352 while (phiset) { /* Check all left PHI operand registers. */ 1353 Reg r = rset_pickbot(phiset); 1354 IRIns *irl = IR(as->phireg[r]); 1355 Reg left = irl->r; 1356 if (r != left) { /* Mismatch? */ 1357 if (!rset_test(as->freeset, r)) { /* PHI register blocked? */ 1358 IRRef ref = regcost_ref(as->cost[r]); 1359 /* Blocked by other PHI (w/reg)? */ 1360 if (!ra_iskref(ref) && irt_ismarked(IR(ref)->t)) { 1361 rset_set(blocked, r); 1362 if (ra_hasreg(left)) 1363 rset_set(blockedby, left); 1364 left = RID_NONE; 1365 } else { /* Otherwise grab register from invariant. */ 1366 ra_restore(as, ref); 1367 checkmclim(as); 1368 } 1369 } 1370 if (ra_hasreg(left)) { 1371 ra_rename(as, left, r); 1372 checkmclim(as); 1373 } 1374 } 1375 rset_clear(phiset, r); 1376 } 1377 if (!blocked) break; /* Finished. */ 1378 if (!(as->freeset & blocked)) { /* Break cycles if none are free. */ 1379 asm_phi_break(as, blocked, blockedby, RSET_GPR); 1380 if (!LJ_SOFTFP) asm_phi_break(as, blocked, blockedby, RSET_FPR); 1381 checkmclim(as); 1382 } /* Else retry some more renames. */ 1383 } 1384 1385 /* Restore/remat invariants whose registers are modified inside the loop. */ 1386 #if !LJ_SOFTFP 1387 work = as->modset & ~(as->freeset | as->phiset) & RSET_FPR; 1388 while (work) { 1389 Reg r = rset_pickbot(work); 1390 ra_restore(as, regcost_ref(as->cost[r])); 1391 rset_clear(work, r); 1392 checkmclim(as); 1393 } 1394 #endif 1395 work = as->modset & ~(as->freeset | as->phiset); 1396 while (work) { 1397 Reg r = rset_pickbot(work); 1398 ra_restore(as, regcost_ref(as->cost[r])); 1399 rset_clear(work, r); 1400 checkmclim(as); 1401 } 1402 1403 /* Allocate and save all unsaved PHI regs and clear marks. */ 1404 work = as->phiset; 1405 while (work) { 1406 Reg r = rset_picktop(work); 1407 IRRef lref = as->phireg[r]; 1408 IRIns *ir = IR(lref); 1409 if (ra_hasspill(ir->s)) { /* Left PHI gained a spill slot? */ 1410 irt_clearmark(ir->t); /* Handled here, so clear marker now. */ 1411 ra_alloc1(as, lref, RID2RSET(r)); 1412 ra_save(as, ir, r); /* Save to spill slot inside the loop. */ 1413 checkmclim(as); 1414 } 1415 rset_clear(work, r); 1416 } 1417 } 1418 1419 /* Copy unsynced left/right PHI spill slots. Rarely needed. */ 1420 static void asm_phi_copyspill(ASMState *as) 1421 { 1422 int need = 0; 1423 IRIns *ir; 1424 for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) 1425 if (ra_hasspill(ir->s) && ra_hasspill(IR(ir->op1)->s)) 1426 need |= irt_isfp(ir->t) ? 2 : 1; /* Unsynced spill slot? */ 1427 if ((need & 1)) { /* Copy integer spill slots. */ 1428 #if !LJ_TARGET_X86ORX64 1429 Reg r = RID_TMP; 1430 #else 1431 Reg r = RID_RET; 1432 if ((as->freeset & RSET_GPR)) 1433 r = rset_pickbot((as->freeset & RSET_GPR)); 1434 else 1435 emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); 1436 #endif 1437 for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) { 1438 if (ra_hasspill(ir->s)) { 1439 IRIns *irl = IR(ir->op1); 1440 if (ra_hasspill(irl->s) && !irt_isfp(ir->t)) { 1441 emit_spstore(as, irl, r, sps_scale(irl->s)); 1442 emit_spload(as, ir, r, sps_scale(ir->s)); 1443 checkmclim(as); 1444 } 1445 } 1446 } 1447 #if LJ_TARGET_X86ORX64 1448 if (!rset_test(as->freeset, r)) 1449 emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); 1450 #endif 1451 } 1452 #if !LJ_SOFTFP 1453 if ((need & 2)) { /* Copy FP spill slots. */ 1454 #if LJ_TARGET_X86 1455 Reg r = RID_XMM0; 1456 #else 1457 Reg r = RID_FPRET; 1458 #endif 1459 if ((as->freeset & RSET_FPR)) 1460 r = rset_pickbot((as->freeset & RSET_FPR)); 1461 if (!rset_test(as->freeset, r)) 1462 emit_spload(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); 1463 for (ir = IR(as->orignins-1); ir->o == IR_PHI; ir--) { 1464 if (ra_hasspill(ir->s)) { 1465 IRIns *irl = IR(ir->op1); 1466 if (ra_hasspill(irl->s) && irt_isfp(ir->t)) { 1467 emit_spstore(as, irl, r, sps_scale(irl->s)); 1468 emit_spload(as, ir, r, sps_scale(ir->s)); 1469 checkmclim(as); 1470 } 1471 } 1472 } 1473 if (!rset_test(as->freeset, r)) 1474 emit_spstore(as, IR(regcost_ref(as->cost[r])), r, SPOFS_TMP); 1475 } 1476 #endif 1477 } 1478 1479 /* Emit renames for left PHIs which are only spilled outside the loop. */ 1480 static void asm_phi_fixup(ASMState *as) 1481 { 1482 RegSet work = as->phiset; 1483 while (work) { 1484 Reg r = rset_picktop(work); 1485 IRRef lref = as->phireg[r]; 1486 IRIns *ir = IR(lref); 1487 if (irt_ismarked(ir->t)) { 1488 irt_clearmark(ir->t); 1489 /* Left PHI gained a spill slot before the loop? */ 1490 if (ra_hasspill(ir->s)) { 1491 ra_addrename(as, r, lref, as->loopsnapno); 1492 } 1493 } 1494 rset_clear(work, r); 1495 } 1496 } 1497 1498 /* Setup right PHI reference. */ 1499 static void asm_phi(ASMState *as, IRIns *ir) 1500 { 1501 RegSet allow = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) & 1502 ~as->phiset; 1503 RegSet afree = (as->freeset & allow); 1504 IRIns *irl = IR(ir->op1); 1505 IRIns *irr = IR(ir->op2); 1506 if (ir->r == RID_SINK) /* Sink PHI. */ 1507 return; 1508 /* Spill slot shuffling is not implemented yet (but rarely needed). */ 1509 if (ra_hasspill(irl->s) || ra_hasspill(irr->s)) 1510 lj_trace_err(as->J, LJ_TRERR_NYIPHI); 1511 /* Leave at least one register free for non-PHIs (and PHI cycle breaking). */ 1512 if ((afree & (afree-1))) { /* Two or more free registers? */ 1513 Reg r; 1514 if (ra_noreg(irr->r)) { /* Get a register for the right PHI. */ 1515 r = ra_allocref(as, ir->op2, allow); 1516 } else { /* Duplicate right PHI, need a copy (rare). */ 1517 r = ra_scratch(as, allow); 1518 emit_movrr(as, irr, r, irr->r); 1519 } 1520 ir->r = (uint8_t)r; 1521 rset_set(as->phiset, r); 1522 as->phireg[r] = (IRRef1)ir->op1; 1523 irt_setmark(irl->t); /* Marks left PHIs _with_ register. */ 1524 if (ra_noreg(irl->r)) 1525 ra_sethint(irl->r, r); /* Set register hint for left PHI. */ 1526 } else { /* Otherwise allocate a spill slot. */ 1527 /* This is overly restrictive, but it triggers only on synthetic code. */ 1528 if (ra_hasreg(irl->r) || ra_hasreg(irr->r)) 1529 lj_trace_err(as->J, LJ_TRERR_NYIPHI); 1530 ra_spill(as, ir); 1531 irr->s = ir->s; /* Set right PHI spill slot. Sync left slot later. */ 1532 } 1533 } 1534 1535 static void asm_loop_fixup(ASMState *as); 1536 1537 /* Middle part of a loop. */ 1538 static void asm_loop(ASMState *as) 1539 { 1540 MCode *mcspill; 1541 /* LOOP is a guard, so the snapno is up to date. */ 1542 as->loopsnapno = as->snapno; 1543 if (as->gcsteps) 1544 asm_gc_check(as); 1545 /* LOOP marks the transition from the variant to the invariant part. */ 1546 as->flagmcp = as->invmcp = NULL; 1547 as->sectref = 0; 1548 if (!neverfuse(as)) as->fuseref = 0; 1549 asm_phi_shuffle(as); 1550 mcspill = as->mcp; 1551 asm_phi_copyspill(as); 1552 asm_loop_fixup(as); 1553 as->mcloop = as->mcp; 1554 RA_DBGX((as, "===== LOOP =====")); 1555 if (!as->realign) RA_DBG_FLUSH(); 1556 if (as->mcp != mcspill) 1557 emit_jmp(as, mcspill); 1558 } 1559 1560 /* -- Target-specific assembler ------------------------------------------- */ 1561 1562 #if LJ_TARGET_X86ORX64 1563 #include "lj_asm_x86.h" 1564 #elif LJ_TARGET_ARM 1565 #include "lj_asm_arm.h" 1566 #elif LJ_TARGET_PPC 1567 #include "lj_asm_ppc.h" 1568 #elif LJ_TARGET_MIPS 1569 #include "lj_asm_mips.h" 1570 #else 1571 #error "Missing assembler for target CPU" 1572 #endif 1573 1574 /* -- Instruction dispatch ------------------------------------------------ */ 1575 1576 /* Assemble a single instruction. */ 1577 static void asm_ir(ASMState *as, IRIns *ir) 1578 { 1579 switch ((IROp)ir->o) { 1580 /* Miscellaneous ops. */ 1581 case IR_LOOP: asm_loop(as); break; 1582 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; 1583 case IR_USE: 1584 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; 1585 case IR_PHI: asm_phi(as, ir); break; 1586 case IR_HIOP: asm_hiop(as, ir); break; 1587 case IR_GCSTEP: asm_gcstep(as, ir); break; 1588 case IR_PROF: asm_prof(as, ir); break; 1589 1590 /* Guarded assertions. */ 1591 case IR_LT: case IR_GE: case IR_LE: case IR_GT: 1592 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: 1593 case IR_ABC: 1594 asm_comp(as, ir); 1595 break; 1596 case IR_EQ: case IR_NE: 1597 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { 1598 as->curins--; 1599 asm_href(as, ir-1, (IROp)ir->o); 1600 } else { 1601 asm_equal(as, ir); 1602 } 1603 break; 1604 1605 case IR_RETF: asm_retf(as, ir); break; 1606 1607 /* Bit ops. */ 1608 case IR_BNOT: asm_bnot(as, ir); break; 1609 case IR_BSWAP: asm_bswap(as, ir); break; 1610 case IR_BAND: asm_band(as, ir); break; 1611 case IR_BOR: asm_bor(as, ir); break; 1612 case IR_BXOR: asm_bxor(as, ir); break; 1613 case IR_BSHL: asm_bshl(as, ir); break; 1614 case IR_BSHR: asm_bshr(as, ir); break; 1615 case IR_BSAR: asm_bsar(as, ir); break; 1616 case IR_BROL: asm_brol(as, ir); break; 1617 case IR_BROR: asm_bror(as, ir); break; 1618 1619 /* Arithmetic ops. */ 1620 case IR_ADD: asm_add(as, ir); break; 1621 case IR_SUB: asm_sub(as, ir); break; 1622 case IR_MUL: asm_mul(as, ir); break; 1623 case IR_MOD: asm_mod(as, ir); break; 1624 case IR_NEG: asm_neg(as, ir); break; 1625 #if LJ_SOFTFP 1626 case IR_DIV: case IR_POW: case IR_ABS: 1627 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: 1628 lua_assert(0); /* Unused for LJ_SOFTFP. */ 1629 break; 1630 #else 1631 case IR_DIV: asm_div(as, ir); break; 1632 case IR_POW: asm_pow(as, ir); break; 1633 case IR_ABS: asm_abs(as, ir); break; 1634 case IR_ATAN2: asm_atan2(as, ir); break; 1635 case IR_LDEXP: asm_ldexp(as, ir); break; 1636 case IR_FPMATH: asm_fpmath(as, ir); break; 1637 case IR_TOBIT: asm_tobit(as, ir); break; 1638 #endif 1639 case IR_MIN: asm_min(as, ir); break; 1640 case IR_MAX: asm_max(as, ir); break; 1641 1642 /* Overflow-checking arithmetic ops. */ 1643 case IR_ADDOV: asm_addov(as, ir); break; 1644 case IR_SUBOV: asm_subov(as, ir); break; 1645 case IR_MULOV: asm_mulov(as, ir); break; 1646 1647 /* Memory references. */ 1648 case IR_AREF: asm_aref(as, ir); break; 1649 case IR_HREF: asm_href(as, ir, 0); break; 1650 case IR_HREFK: asm_hrefk(as, ir); break; 1651 case IR_NEWREF: asm_newref(as, ir); break; 1652 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; 1653 case IR_FREF: asm_fref(as, ir); break; 1654 case IR_STRREF: asm_strref(as, ir); break; 1655 case IR_LREF: asm_lref(as, ir); break; 1656 1657 /* Loads and stores. */ 1658 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: 1659 asm_ahuvload(as, ir); 1660 break; 1661 case IR_FLOAD: asm_fload(as, ir); break; 1662 case IR_XLOAD: asm_xload(as, ir); break; 1663 case IR_SLOAD: asm_sload(as, ir); break; 1664 1665 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; 1666 case IR_FSTORE: asm_fstore(as, ir); break; 1667 case IR_XSTORE: asm_xstore(as, ir); break; 1668 1669 /* Allocations. */ 1670 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; 1671 case IR_TNEW: asm_tnew(as, ir); break; 1672 case IR_TDUP: asm_tdup(as, ir); break; 1673 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; 1674 1675 /* Buffer operations. */ 1676 case IR_BUFHDR: asm_bufhdr(as, ir); break; 1677 case IR_BUFPUT: asm_bufput(as, ir); break; 1678 case IR_BUFSTR: asm_bufstr(as, ir); break; 1679 1680 /* Write barriers. */ 1681 case IR_TBAR: asm_tbar(as, ir); break; 1682 case IR_OBAR: asm_obar(as, ir); break; 1683 1684 /* Type conversions. */ 1685 case IR_CONV: asm_conv(as, ir); break; 1686 case IR_TOSTR: asm_tostr(as, ir); break; 1687 case IR_STRTO: asm_strto(as, ir); break; 1688 1689 /* Calls. */ 1690 case IR_CALLA: 1691 as->gcsteps++; 1692 /* fallthrough */ 1693 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; 1694 case IR_CALLXS: asm_callx(as, ir); break; 1695 case IR_CARG: break; 1696 1697 default: 1698 setintV(&as->J->errinfo, ir->o); 1699 lj_trace_err_info(as->J, LJ_TRERR_NYIIR); 1700 break; 1701 } 1702 } 1703 1704 /* -- Head of trace ------------------------------------------------------- */ 1705 1706 /* Head of a root trace. */ 1707 static void asm_head_root(ASMState *as) 1708 { 1709 int32_t spadj; 1710 asm_head_root_base(as); 1711 emit_setvmstate(as, (int32_t)as->T->traceno); 1712 spadj = asm_stack_adjust(as); 1713 as->T->spadjust = (uint16_t)spadj; 1714 emit_spsub(as, spadj); 1715 /* Root traces assume a checked stack for the starting proto. */ 1716 as->T->topslot = gcref(as->T->startpt)->pt.framesize; 1717 } 1718 1719 /* Head of a side trace. 1720 ** 1721 ** The current simplistic algorithm requires that all slots inherited 1722 ** from the parent are live in a register between pass 2 and pass 3. This 1723 ** avoids the complexity of stack slot shuffling. But of course this may 1724 ** overflow the register set in some cases and cause the dreaded error: 1725 ** "NYI: register coalescing too complex". A refined algorithm is needed. 1726 */ 1727 static void asm_head_side(ASMState *as) 1728 { 1729 IRRef1 sloadins[RID_MAX]; 1730 RegSet allow = RSET_ALL; /* Inverse of all coalesced registers. */ 1731 RegSet live = RSET_EMPTY; /* Live parent registers. */ 1732 IRIns *irp = &as->parent->ir[REF_BASE]; /* Parent base. */ 1733 int32_t spadj, spdelta; 1734 int pass2 = 0; 1735 int pass3 = 0; 1736 IRRef i; 1737 1738 if (as->snapno && as->topslot > as->parent->topslot) { 1739 /* Force snap #0 alloc to prevent register overwrite in stack check. */ 1740 as->snapno = 0; 1741 asm_snap_alloc(as); 1742 } 1743 allow = asm_head_side_base(as, irp, allow); 1744 1745 /* Scan all parent SLOADs and collect register dependencies. */ 1746 for (i = as->stopins; i > REF_BASE; i--) { 1747 IRIns *ir = IR(i); 1748 RegSP rs; 1749 lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || 1750 (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); 1751 rs = as->parentmap[i - REF_FIRST]; 1752 if (ra_hasreg(ir->r)) { 1753 rset_clear(allow, ir->r); 1754 if (ra_hasspill(ir->s)) { 1755 ra_save(as, ir, ir->r); 1756 checkmclim(as); 1757 } 1758 } else if (ra_hasspill(ir->s)) { 1759 irt_setmark(ir->t); 1760 pass2 = 1; 1761 } 1762 if (ir->r == rs) { /* Coalesce matching registers right now. */ 1763 ra_free(as, ir->r); 1764 } else if (ra_hasspill(regsp_spill(rs))) { 1765 if (ra_hasreg(ir->r)) 1766 pass3 = 1; 1767 } else if (ra_used(ir)) { 1768 sloadins[rs] = (IRRef1)i; 1769 rset_set(live, rs); /* Block live parent register. */ 1770 } 1771 } 1772 1773 /* Calculate stack frame adjustment. */ 1774 spadj = asm_stack_adjust(as); 1775 spdelta = spadj - (int32_t)as->parent->spadjust; 1776 if (spdelta < 0) { /* Don't shrink the stack frame. */ 1777 spadj = (int32_t)as->parent->spadjust; 1778 spdelta = 0; 1779 } 1780 as->T->spadjust = (uint16_t)spadj; 1781 1782 /* Reload spilled target registers. */ 1783 if (pass2) { 1784 for (i = as->stopins; i > REF_BASE; i--) { 1785 IRIns *ir = IR(i); 1786 if (irt_ismarked(ir->t)) { 1787 RegSet mask; 1788 Reg r; 1789 RegSP rs; 1790 irt_clearmark(ir->t); 1791 rs = as->parentmap[i - REF_FIRST]; 1792 if (!ra_hasspill(regsp_spill(rs))) 1793 ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */ 1794 else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s)) 1795 continue; /* Same spill slot, do nothing. */ 1796 mask = ((!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR) & allow; 1797 if (mask == RSET_EMPTY) 1798 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 1799 r = ra_allocref(as, i, mask); 1800 ra_save(as, ir, r); 1801 rset_clear(allow, r); 1802 if (r == rs) { /* Coalesce matching registers right now. */ 1803 ra_free(as, r); 1804 rset_clear(live, r); 1805 } else if (ra_hasspill(regsp_spill(rs))) { 1806 pass3 = 1; 1807 } 1808 checkmclim(as); 1809 } 1810 } 1811 } 1812 1813 /* Store trace number and adjust stack frame relative to the parent. */ 1814 emit_setvmstate(as, (int32_t)as->T->traceno); 1815 emit_spsub(as, spdelta); 1816 1817 #if !LJ_TARGET_X86ORX64 1818 /* Restore BASE register from parent spill slot. */ 1819 if (ra_hasspill(irp->s)) 1820 emit_spload(as, IR(REF_BASE), IR(REF_BASE)->r, sps_scale(irp->s)); 1821 #endif 1822 1823 /* Restore target registers from parent spill slots. */ 1824 if (pass3) { 1825 RegSet work = ~as->freeset & RSET_ALL; 1826 while (work) { 1827 Reg r = rset_pickbot(work); 1828 IRRef ref = regcost_ref(as->cost[r]); 1829 RegSP rs = as->parentmap[ref - REF_FIRST]; 1830 rset_clear(work, r); 1831 if (ra_hasspill(regsp_spill(rs))) { 1832 int32_t ofs = sps_scale(regsp_spill(rs)); 1833 ra_free(as, r); 1834 emit_spload(as, IR(ref), r, ofs); 1835 checkmclim(as); 1836 } 1837 } 1838 } 1839 1840 /* Shuffle registers to match up target regs with parent regs. */ 1841 for (;;) { 1842 RegSet work; 1843 1844 /* Repeatedly coalesce free live registers by moving to their target. */ 1845 while ((work = as->freeset & live) != RSET_EMPTY) { 1846 Reg rp = rset_pickbot(work); 1847 IRIns *ir = IR(sloadins[rp]); 1848 rset_clear(live, rp); 1849 rset_clear(allow, rp); 1850 ra_free(as, ir->r); 1851 emit_movrr(as, ir, ir->r, rp); 1852 checkmclim(as); 1853 } 1854 1855 /* We're done if no live registers remain. */ 1856 if (live == RSET_EMPTY) 1857 break; 1858 1859 /* Break cycles by renaming one target to a temp. register. */ 1860 if (live & RSET_GPR) { 1861 RegSet tmpset = as->freeset & ~live & allow & RSET_GPR; 1862 if (tmpset == RSET_EMPTY) 1863 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 1864 ra_rename(as, rset_pickbot(live & RSET_GPR), rset_pickbot(tmpset)); 1865 } 1866 if (!LJ_SOFTFP && (live & RSET_FPR)) { 1867 RegSet tmpset = as->freeset & ~live & allow & RSET_FPR; 1868 if (tmpset == RSET_EMPTY) 1869 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 1870 ra_rename(as, rset_pickbot(live & RSET_FPR), rset_pickbot(tmpset)); 1871 } 1872 checkmclim(as); 1873 /* Continue with coalescing to fix up the broken cycle(s). */ 1874 } 1875 1876 /* Inherit top stack slot already checked by parent trace. */ 1877 as->T->topslot = as->parent->topslot; 1878 if (as->topslot > as->T->topslot) { /* Need to check for higher slot? */ 1879 #ifdef EXITSTATE_CHECKEXIT 1880 /* Highest exit + 1 indicates stack check. */ 1881 ExitNo exitno = as->T->nsnap; 1882 #else 1883 /* Reuse the parent exit in the context of the parent trace. */ 1884 ExitNo exitno = as->J->exitno; 1885 #endif 1886 as->T->topslot = (uint8_t)as->topslot; /* Remember for child traces. */ 1887 asm_stack_check(as, as->topslot, irp, allow & RSET_GPR, exitno); 1888 } 1889 } 1890 1891 /* -- Tail of trace ------------------------------------------------------- */ 1892 1893 /* Get base slot for a snapshot. */ 1894 static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) 1895 { 1896 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 1897 MSize n; 1898 for (n = snap->nent; n > 0; n--) { 1899 SnapEntry sn = map[n-1]; 1900 if ((sn & SNAP_FRAME)) { 1901 *gotframe = 1; 1902 return snap_slot(sn) - LJ_FR2; 1903 } 1904 } 1905 return 0; 1906 } 1907 1908 /* Link to another trace. */ 1909 static void asm_tail_link(ASMState *as) 1910 { 1911 SnapNo snapno = as->T->nsnap-1; /* Last snapshot. */ 1912 SnapShot *snap = &as->T->snap[snapno]; 1913 int gotframe = 0; 1914 BCReg baseslot = asm_baseslot(as, snap, &gotframe); 1915 1916 as->topslot = snap->topslot; 1917 checkmclim(as); 1918 ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); 1919 1920 if (as->T->link == 0) { 1921 /* Setup fixed registers for exit to interpreter. */ 1922 const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); 1923 int32_t mres; 1924 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ 1925 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; 1926 if (bc_isret(bc_op(*retpc))) 1927 pc = retpc; 1928 } 1929 #if LJ_GC64 1930 emit_loadu64(as, RID_LPC, u64ptr(pc)); 1931 #else 1932 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 1933 ra_allockreg(as, i32ptr(pc), RID_LPC); 1934 #endif 1935 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); 1936 switch (bc_op(*pc)) { 1937 case BC_CALLM: case BC_CALLMT: 1938 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; 1939 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 1940 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 1941 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 1942 } 1943 ra_allockreg(as, mres, RID_RET); /* Return MULTRES or 0. */ 1944 } else if (baseslot) { 1945 /* Save modified BASE for linking to trace with higher start frame. */ 1946 emit_setgl(as, RID_BASE, jit_base); 1947 } 1948 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); 1949 1950 if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */ 1951 setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal)); 1952 IR(as->J->ktrace)->o = IR_KGC; 1953 } 1954 1955 /* Sync the interpreter state with the on-trace state. */ 1956 asm_stack_restore(as, snap); 1957 1958 /* Root traces that add frames need to check the stack at the end. */ 1959 if (!as->parent && gotframe) 1960 asm_stack_check(as, as->topslot, NULL, as->freeset & RSET_GPR, snapno); 1961 } 1962 1963 /* -- Trace setup --------------------------------------------------------- */ 1964 1965 /* Clear reg/sp for all instructions and add register hints. */ 1966 static void asm_setup_regsp(ASMState *as) 1967 { 1968 GCtrace *T = as->T; 1969 int sink = T->sinktags; 1970 IRRef nins = T->nins; 1971 IRIns *ir, *lastir; 1972 int inloop; 1973 #if LJ_TARGET_ARM 1974 uint32_t rload = 0xa6402a64; 1975 #endif 1976 1977 ra_setup(as); 1978 1979 /* Clear reg/sp for constants. */ 1980 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { 1981 ir->prev = REGSP_INIT; 1982 if (irt_is64(ir->t) && ir->o != IR_KNULL) { 1983 #if LJ_GC64 1984 ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ 1985 #else 1986 /* Make life easier for backends by putting address of constant in i. */ 1987 ir->i = (int32_t)(intptr_t)(ir+1); 1988 #endif 1989 ir++; 1990 } 1991 } 1992 1993 /* REF_BASE is used for implicit references to the BASE register. */ 1994 lastir->prev = REGSP_HINT(RID_BASE); 1995 1996 ir = IR(nins-1); 1997 if (ir->o == IR_RENAME) { 1998 /* Remove any renames left over from ASM restart due to LJ_TRERR_MCODELM. */ 1999 do { ir--; nins--; } while (ir->o == IR_RENAME); 2000 T->nins = nins; 2001 } 2002 as->snaprename = nins; 2003 as->snapref = nins; 2004 as->snapno = T->nsnap; 2005 2006 as->stopins = REF_BASE; 2007 as->orignins = nins; 2008 as->curins = nins; 2009 2010 /* Setup register hints for parent link instructions. */ 2011 ir = IR(REF_FIRST); 2012 if (as->parent) { 2013 uint16_t *p; 2014 lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); 2015 if (lastir - ir > LJ_MAX_JSLOTS) 2016 lj_trace_err(as->J, LJ_TRERR_NYICOAL); 2017 as->stopins = (IRRef)((lastir-1) - as->ir); 2018 for (p = as->parentmap; ir < lastir; ir++) { 2019 RegSP rs = ir->prev; 2020 *p++ = (uint16_t)rs; /* Copy original parent RegSP to parentmap. */ 2021 if (!ra_hasspill(regsp_spill(rs))) 2022 ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs)); 2023 else 2024 ir->prev = REGSP_INIT; 2025 } 2026 } 2027 2028 inloop = 0; 2029 as->evenspill = SPS_FIRST; 2030 for (lastir = IR(nins); ir < lastir; ir++) { 2031 if (sink) { 2032 if (ir->r == RID_SINK) 2033 continue; 2034 if (ir->r == RID_SUNK) { /* Revert after ASM restart. */ 2035 ir->r = RID_SINK; 2036 continue; 2037 } 2038 } 2039 switch (ir->o) { 2040 case IR_LOOP: 2041 inloop = 1; 2042 break; 2043 #if LJ_TARGET_ARM 2044 case IR_SLOAD: 2045 if (!((ir->op2 & IRSLOAD_TYPECHECK) || (ir+1)->o == IR_HIOP)) 2046 break; 2047 /* fallthrough */ 2048 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: 2049 if (!LJ_SOFTFP && irt_isnum(ir->t)) break; 2050 ir->prev = (uint16_t)REGSP_HINT((rload & 15)); 2051 rload = lj_ror(rload, 4); 2052 continue; 2053 #endif 2054 case IR_CALLXS: { 2055 CCallInfo ci; 2056 ci.flags = asm_callx_flags(as, ir); 2057 ir->prev = asm_setup_call_slots(as, ir, &ci); 2058 if (inloop) 2059 as->modset |= RSET_SCRATCH; 2060 continue; 2061 } 2062 case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: { 2063 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2064 ir->prev = asm_setup_call_slots(as, ir, ci); 2065 if (inloop) 2066 as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ? 2067 (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; 2068 continue; 2069 } 2070 #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 2071 case IR_HIOP: 2072 switch ((ir-1)->o) { 2073 #if LJ_SOFTFP && LJ_TARGET_ARM 2074 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: 2075 if (ra_hashint((ir-1)->r)) { 2076 ir->prev = (ir-1)->prev + 1; 2077 continue; 2078 } 2079 break; 2080 #endif 2081 #if !LJ_SOFTFP && LJ_NEED_FP64 2082 case IR_CONV: 2083 if (irt_isfp((ir-1)->t)) { 2084 ir->prev = REGSP_HINT(RID_FPRET); 2085 continue; 2086 } 2087 /* fallthrough */ 2088 #endif 2089 case IR_CALLN: case IR_CALLXS: 2090 #if LJ_SOFTFP 2091 case IR_MIN: case IR_MAX: 2092 #endif 2093 (ir-1)->prev = REGSP_HINT(RID_RETLO); 2094 ir->prev = REGSP_HINT(RID_RETHI); 2095 continue; 2096 default: 2097 break; 2098 } 2099 break; 2100 #endif 2101 #if LJ_SOFTFP 2102 case IR_MIN: case IR_MAX: 2103 if ((ir+1)->o != IR_HIOP) break; 2104 /* fallthrough */ 2105 #endif 2106 /* C calls evict all scratch regs and return results in RID_RET. */ 2107 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: 2108 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2109 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2110 #if LJ_TARGET_X86 && LJ_HASFFI 2111 if (0) { 2112 case IR_CNEW: 2113 if (ir->op2 != REF_NIL && as->evenspill < 4) 2114 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ 2115 } 2116 #else 2117 case IR_CNEW: 2118 #endif 2119 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: 2120 case IR_BUFSTR: 2121 ir->prev = REGSP_HINT(RID_RET); 2122 if (inloop) 2123 as->modset = RSET_SCRATCH; 2124 continue; 2125 case IR_STRTO: case IR_OBAR: 2126 if (inloop) 2127 as->modset = RSET_SCRATCH; 2128 break; 2129 #if !LJ_SOFTFP 2130 case IR_ATAN2: 2131 #if LJ_TARGET_X86 2132 if (as->evenspill < 4) /* Leave room to call atan2(). */ 2133 as->evenspill = 4; 2134 #endif 2135 #if !LJ_TARGET_X86ORX64 2136 case IR_LDEXP: 2137 #endif 2138 #endif 2139 case IR_POW: 2140 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2141 if (inloop) 2142 as->modset |= RSET_SCRATCH; 2143 #if LJ_TARGET_X86 2144 break; 2145 #else 2146 ir->prev = REGSP_HINT(RID_FPRET); 2147 continue; 2148 #endif 2149 } 2150 /* fallthrough for integer POW */ 2151 case IR_DIV: case IR_MOD: 2152 if (!irt_isnum(ir->t)) { 2153 ir->prev = REGSP_HINT(RID_RET); 2154 if (inloop) 2155 as->modset |= (RSET_SCRATCH & RSET_GPR); 2156 continue; 2157 } 2158 break; 2159 case IR_FPMATH: 2160 #if LJ_TARGET_X86ORX64 2161 if (ir->op2 <= IRFPM_TRUNC) { 2162 if (!(as->flags & JIT_F_SSE4_1)) { 2163 ir->prev = REGSP_HINT(RID_XMM0); 2164 if (inloop) 2165 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); 2166 continue; 2167 } 2168 break; 2169 } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) { 2170 if (as->evenspill < 4) /* Leave room to call pow(). */ 2171 as->evenspill = 4; 2172 } 2173 #endif 2174 if (inloop) 2175 as->modset |= RSET_SCRATCH; 2176 #if LJ_TARGET_X86 2177 break; 2178 #else 2179 ir->prev = REGSP_HINT(RID_FPRET); 2180 continue; 2181 #endif 2182 #if LJ_TARGET_X86ORX64 2183 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2184 case IR_BSHL: case IR_BSHR: case IR_BSAR: 2185 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ 2186 break; 2187 case IR_BROL: case IR_BROR: 2188 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2189 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2190 if (inloop) 2191 rset_set(as->modset, RID_ECX); 2192 } 2193 break; 2194 #endif 2195 /* Do not propagate hints across type conversions or loads. */ 2196 case IR_TOBIT: 2197 case IR_XLOAD: 2198 #if !LJ_TARGET_ARM 2199 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: 2200 #endif 2201 break; 2202 case IR_CONV: 2203 if (irt_isfp(ir->t) || (ir->op2 & IRCONV_SRCMASK) == IRT_NUM || 2204 (ir->op2 & IRCONV_SRCMASK) == IRT_FLOAT) 2205 break; 2206 /* fallthrough */ 2207 default: 2208 /* Propagate hints across likely 'op reg, imm' or 'op reg'. */ 2209 if (irref_isk(ir->op2) && !irref_isk(ir->op1) && 2210 ra_hashint(regsp_reg(IR(ir->op1)->prev))) { 2211 ir->prev = IR(ir->op1)->prev; 2212 continue; 2213 } 2214 break; 2215 } 2216 ir->prev = REGSP_INIT; 2217 } 2218 if ((as->evenspill & 1)) 2219 as->oddspill = as->evenspill++; 2220 else 2221 as->oddspill = 0; 2222 } 2223 2224 /* -- Assembler core ------------------------------------------------------ */ 2225 2226 /* Assemble a trace. */ 2227 void lj_asm_trace(jit_State *J, GCtrace *T) 2228 { 2229 ASMState as_; 2230 ASMState *as = &as_; 2231 MCode *origtop; 2232 2233 /* Ensure an initialized instruction beyond the last one for HIOP checks. */ 2234 /* This also allows one RENAME to be added without reallocating curfinal. */ 2235 as->orignins = lj_ir_nextins(J); 2236 J->cur.ir[as->orignins].o = IR_NOP; 2237 2238 /* Setup initial state. Copy some fields to reduce indirections. */ 2239 as->J = J; 2240 as->T = T; 2241 J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */ 2242 as->flags = J->flags; 2243 as->loopref = J->loopref; 2244 as->realign = NULL; 2245 as->loopinv = 0; 2246 as->parent = J->parent ? traceref(J, J->parent) : NULL; 2247 2248 /* Reserve MCode memory. */ 2249 as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot); 2250 as->mcp = as->mctop; 2251 as->mclim = as->mcbot + MCLIM_REDZONE; 2252 asm_setup_target(as); 2253 2254 /* 2255 ** This is a loop, because the MCode may have to be (re-)assembled 2256 ** multiple times: 2257 ** 2258 ** 1. as->realign is set (and the assembly aborted), if the arch-specific 2259 ** backend wants the MCode to be aligned differently. 2260 ** 2261 ** This is currently only the case on x86/x64, where small loops get 2262 ** an aligned loop body plus a short branch. Not much effort is wasted, 2263 ** because the abort happens very quickly and only once. 2264 ** 2265 ** 2. The IR is immovable, since the MCode embeds pointers to various 2266 ** constants inside the IR. But RENAMEs may need to be added to the IR 2267 ** during assembly, which might grow and reallocate the IR. We check 2268 ** at the end if the IR (in J->cur.ir) has actually grown, resize the 2269 ** copy (in J->curfinal.ir) and try again. 2270 ** 2271 ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have 2272 ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to 2273 ** always have one spare slot in the IR (see above), which means we 2274 ** have to redo the assembly for only ~2% of all traces. 2275 ** 2276 ** Very, very rarely, this needs to be done repeatedly, since the 2277 ** location of constants inside the IR (actually, reachability from 2278 ** a global pointer) may affect register allocation and thus the 2279 ** number of RENAMEs. 2280 */ 2281 for (;;) { 2282 as->mcp = as->mctop; 2283 #ifdef LUA_USE_ASSERT 2284 as->mcp_prev = as->mcp; 2285 #endif 2286 as->ir = J->curfinal->ir; /* Use the copied IR. */ 2287 as->curins = J->cur.nins = as->orignins; 2288 2289 RA_DBG_START(); 2290 RA_DBGX((as, "===== STOP =====")); 2291 2292 /* General trace setup. Emit tail of trace. */ 2293 asm_tail_prep(as); 2294 as->mcloop = NULL; 2295 as->flagmcp = NULL; 2296 as->topslot = 0; 2297 as->gcsteps = 0; 2298 as->sectref = as->loopref; 2299 as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; 2300 asm_setup_regsp(as); 2301 if (!as->loopref) 2302 asm_tail_link(as); 2303 2304 /* Assemble a trace in linear backwards order. */ 2305 for (as->curins--; as->curins > as->stopins; as->curins--) { 2306 IRIns *ir = IR(as->curins); 2307 lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ 2308 if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) 2309 continue; /* Dead-code elimination can be soooo easy. */ 2310 if (irt_isguard(ir->t)) 2311 asm_snap_prep(as); 2312 RA_DBG_REF(); 2313 checkmclim(as); 2314 asm_ir(as, ir); 2315 } 2316 2317 if (as->realign && J->curfinal->nins >= T->nins) 2318 continue; /* Retry in case only the MCode needs to be realigned. */ 2319 2320 /* Emit head of trace. */ 2321 RA_DBG_REF(); 2322 checkmclim(as); 2323 if (as->gcsteps > 0) { 2324 as->curins = as->T->snap[0].ref; 2325 asm_snap_prep(as); /* The GC check is a guard. */ 2326 asm_gc_check(as); 2327 as->curins = as->stopins; 2328 } 2329 ra_evictk(as); 2330 if (as->parent) 2331 asm_head_side(as); 2332 else 2333 asm_head_root(as); 2334 asm_phi_fixup(as); 2335 2336 if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ 2337 lua_assert(J->curfinal->nk == T->nk); 2338 memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, 2339 (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ 2340 T->nins = J->curfinal->nins; 2341 break; /* Done. */ 2342 } 2343 2344 /* Otherwise try again with a bigger IR. */ 2345 lj_trace_free(J2G(J), J->curfinal); 2346 J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ 2347 J->curfinal = lj_trace_alloc(J->L, T); 2348 as->realign = NULL; 2349 } 2350 2351 RA_DBGX((as, "===== START ====")); 2352 RA_DBG_FLUSH(); 2353 if (as->freeset != RSET_ALL) 2354 lj_trace_err(as->J, LJ_TRERR_BADRA); /* Ouch! Should never happen. */ 2355 2356 /* Set trace entry point before fixing up tail to allow link to self. */ 2357 T->mcode = as->mcp; 2358 T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; 2359 if (!as->loopref) 2360 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ 2361 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); 2362 lj_mcode_sync(T->mcode, origtop); 2363 } 2364 2365 #undef IR 2366 2367 #endif