lj_asm_ppc.h (66228B)
1 /* 2 ** PPC IR assembler (SSA IR -> machine code). 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 */ 5 6 /* -- Register allocator extensions --------------------------------------- */ 7 8 /* Allocate a register with a hint. */ 9 static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) 10 { 11 Reg r = IR(ref)->r; 12 if (ra_noreg(r)) { 13 if (!ra_hashint(r) && !iscrossref(as, ref)) 14 ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ 15 r = ra_allocref(as, ref, allow); 16 } 17 ra_noweak(as, r); 18 return r; 19 } 20 21 /* Allocate two source registers for three-operand instructions. */ 22 static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) 23 { 24 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); 25 Reg left = irl->r, right = irr->r; 26 if (ra_hasreg(left)) { 27 ra_noweak(as, left); 28 if (ra_noreg(right)) 29 right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); 30 else 31 ra_noweak(as, right); 32 } else if (ra_hasreg(right)) { 33 ra_noweak(as, right); 34 left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); 35 } else if (ra_hashint(right)) { 36 right = ra_allocref(as, ir->op2, allow); 37 left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); 38 } else { 39 left = ra_allocref(as, ir->op1, allow); 40 right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); 41 } 42 return left | (right << 8); 43 } 44 45 /* -- Guard handling ------------------------------------------------------ */ 46 47 /* Setup exit stubs after the end of each trace. */ 48 static void asm_exitstub_setup(ASMState *as, ExitNo nexits) 49 { 50 ExitNo i; 51 MCode *mxp = as->mctop; 52 if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) 53 asm_mclimit(as); 54 /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */ 55 for (i = nexits-1; (int32_t)i >= 0; i--) 56 *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2); 57 *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */ 58 mxp--; 59 *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2); 60 *--mxp = PPCI_MFLR|PPCF_T(RID_TMP); 61 as->mctop = mxp; 62 } 63 64 static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) 65 { 66 /* Keep this in-sync with exitstub_trace_addr(). */ 67 return as->mctop + exitno + 3; 68 } 69 70 /* Emit conditional branch to exit for guard. */ 71 static void asm_guardcc(ASMState *as, PPCCC cc) 72 { 73 MCode *target = asm_exitstub_addr(as, as->snapno); 74 MCode *p = as->mcp; 75 if (LJ_UNLIKELY(p == as->invmcp)) { 76 as->loopinv = 1; 77 *p = PPCI_B | (((target-p) & 0x00ffffffu) << 2); 78 emit_condbranch(as, PPCI_BC, cc^4, p); 79 return; 80 } 81 emit_condbranch(as, PPCI_BC, cc, target); 82 } 83 84 /* -- Operand fusion ------------------------------------------------------ */ 85 86 /* Limit linear search to this distance. Avoids O(n^2) behavior. */ 87 #define CONFLICT_SEARCH_LIM 31 88 89 /* Check if there's no conflicting instruction between curins and ref. */ 90 static int noconflict(ASMState *as, IRRef ref, IROp conflict) 91 { 92 IRIns *ir = as->ir; 93 IRRef i = as->curins; 94 if (i > ref + CONFLICT_SEARCH_LIM) 95 return 0; /* Give up, ref is too far away. */ 96 while (--i > ref) 97 if (ir[i].o == conflict) 98 return 0; /* Conflict found. */ 99 return 1; /* Ok, no conflict. */ 100 } 101 102 /* Fuse the array base of colocated arrays. */ 103 static int32_t asm_fuseabase(ASMState *as, IRRef ref) 104 { 105 IRIns *ir = IR(ref); 106 if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && 107 !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) 108 return (int32_t)sizeof(GCtab); 109 return 0; 110 } 111 112 /* Indicates load/store indexed is ok. */ 113 #define AHUREF_LSX ((int32_t)0x80000000) 114 115 /* Fuse array/hash/upvalue reference into register+offset operand. */ 116 static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) 117 { 118 IRIns *ir = IR(ref); 119 if (ra_noreg(ir->r)) { 120 if (ir->o == IR_AREF) { 121 if (mayfuse(as, ref)) { 122 if (irref_isk(ir->op2)) { 123 IRRef tab = IR(ir->op1)->op1; 124 int32_t ofs = asm_fuseabase(as, tab); 125 IRRef refa = ofs ? tab : ir->op1; 126 ofs += 8*IR(ir->op2)->i; 127 if (checki16(ofs)) { 128 *ofsp = ofs; 129 return ra_alloc1(as, refa, allow); 130 } 131 } 132 if (*ofsp == AHUREF_LSX) { 133 Reg base = ra_alloc1(as, ir->op1, allow); 134 Reg idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); 135 return base | (idx << 8); 136 } 137 } 138 } else if (ir->o == IR_HREFK) { 139 if (mayfuse(as, ref)) { 140 int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); 141 if (checki16(ofs)) { 142 *ofsp = ofs; 143 return ra_alloc1(as, ir->op1, allow); 144 } 145 } 146 } else if (ir->o == IR_UREFC) { 147 if (irref_isk(ir->op1)) { 148 GCfunc *fn = ir_kfunc(IR(ir->op1)); 149 int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); 150 int32_t jgl = (intptr_t)J2G(as->J); 151 if ((uint32_t)(ofs-jgl) < 65536) { 152 *ofsp = ofs-jgl-32768; 153 return RID_JGL; 154 } else { 155 *ofsp = (int16_t)ofs; 156 return ra_allock(as, ofs-(int16_t)ofs, allow); 157 } 158 } 159 } 160 } 161 *ofsp = 0; 162 return ra_alloc1(as, ref, allow); 163 } 164 165 /* Fuse XLOAD/XSTORE reference into load/store operand. */ 166 static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, 167 RegSet allow, int32_t ofs) 168 { 169 IRIns *ir = IR(ref); 170 Reg base; 171 if (ra_noreg(ir->r) && canfuse(as, ir)) { 172 if (ir->o == IR_ADD) { 173 int32_t ofs2; 174 if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { 175 ofs = ofs2; 176 ref = ir->op1; 177 } else if (ofs == 0) { 178 Reg right, left = ra_alloc2(as, ir, allow); 179 right = (left >> 8); left &= 255; 180 emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right); 181 return; 182 } 183 } else if (ir->o == IR_STRREF) { 184 lua_assert(ofs == 0); 185 ofs = (int32_t)sizeof(GCstr); 186 if (irref_isk(ir->op2)) { 187 ofs += IR(ir->op2)->i; 188 ref = ir->op1; 189 } else if (irref_isk(ir->op1)) { 190 ofs += IR(ir->op1)->i; 191 ref = ir->op2; 192 } else { 193 /* NYI: Fuse ADD with constant. */ 194 Reg tmp, right, left = ra_alloc2(as, ir, allow); 195 right = (left >> 8); left &= 255; 196 tmp = ra_scratch(as, rset_exclude(rset_exclude(allow, left), right)); 197 emit_fai(as, pi, rt, tmp, ofs); 198 emit_tab(as, PPCI_ADD, tmp, left, right); 199 return; 200 } 201 if (!checki16(ofs)) { 202 Reg left = ra_alloc1(as, ref, allow); 203 Reg right = ra_allock(as, ofs, rset_exclude(allow, left)); 204 emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right); 205 return; 206 } 207 } 208 } 209 base = ra_alloc1(as, ref, allow); 210 emit_fai(as, pi, rt, base, ofs); 211 } 212 213 /* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */ 214 static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, 215 RegSet allow) 216 { 217 IRIns *ira = IR(ref); 218 Reg right, left; 219 if (canfuse(as, ira) && ira->o == IR_ADD && ra_noreg(ira->r)) { 220 left = ra_alloc2(as, ira, allow); 221 right = (left >> 8); left &= 255; 222 } else { 223 right = ra_alloc1(as, ref, allow); 224 left = RID_R0; 225 } 226 emit_tab(as, pi, rt, left, right); 227 } 228 229 /* Fuse to multiply-add/sub instruction. */ 230 static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) 231 { 232 IRRef lref = ir->op1, rref = ir->op2; 233 IRIns *irm; 234 if (lref != rref && 235 ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && 236 ra_noreg(irm->r)) || 237 (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && 238 (rref = lref, pi = pir, ra_noreg(irm->r))))) { 239 Reg dest = ra_dest(as, ir, RSET_FPR); 240 Reg add = ra_alloc1(as, rref, RSET_FPR); 241 Reg right, left = ra_alloc2(as, irm, rset_exclude(RSET_FPR, add)); 242 right = (left >> 8); left &= 255; 243 emit_facb(as, pi, dest, left, right, add); 244 return 1; 245 } 246 return 0; 247 } 248 249 /* -- Calls --------------------------------------------------------------- */ 250 251 /* Generate a call to a C function. */ 252 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 253 { 254 uint32_t n, nargs = CCI_XNARGS(ci); 255 int32_t ofs = 8; 256 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; 257 if ((void *)ci->func) 258 emit_call(as, (void *)ci->func); 259 for (n = 0; n < nargs; n++) { /* Setup args. */ 260 IRRef ref = args[n]; 261 if (ref) { 262 IRIns *ir = IR(ref); 263 if (irt_isfp(ir->t)) { 264 if (fpr <= REGARG_LASTFPR) { 265 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ 266 ra_leftov(as, fpr, ref); 267 fpr++; 268 } else { 269 Reg r = ra_alloc1(as, ref, RSET_FPR); 270 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; 271 emit_spstore(as, ir, r, ofs); 272 ofs += irt_isnum(ir->t) ? 8 : 4; 273 } 274 } else { 275 if (gpr <= REGARG_LASTGPR) { 276 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ 277 ra_leftov(as, gpr, ref); 278 gpr++; 279 } else { 280 Reg r = ra_alloc1(as, ref, RSET_GPR); 281 emit_spstore(as, ir, r, ofs); 282 ofs += 4; 283 } 284 } 285 } else { 286 if (gpr <= REGARG_LASTGPR) 287 gpr++; 288 else 289 ofs += 4; 290 } 291 checkmclim(as); 292 } 293 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ 294 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); 295 } 296 297 /* Setup result reg/sp for call. Evict scratch regs. */ 298 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) 299 { 300 RegSet drop = RSET_SCRATCH; 301 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 302 if ((ci->flags & CCI_NOFPRCLOBBER)) 303 drop &= ~RSET_FPR; 304 if (ra_hasreg(ir->r)) 305 rset_clear(drop, ir->r); /* Dest reg handled below. */ 306 if (hiop && ra_hasreg((ir+1)->r)) 307 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 308 ra_evictset(as, drop); /* Evictions must be performed first. */ 309 if (ra_used(ir)) { 310 lua_assert(!irt_ispri(ir->t)); 311 if (irt_isfp(ir->t)) { 312 if ((ci->flags & CCI_CASTU64)) { 313 /* Use spill slot or temp slots. */ 314 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; 315 Reg dest = ir->r; 316 if (ra_hasreg(dest)) { 317 ra_free(as, dest); 318 ra_modified(as, dest); 319 emit_fai(as, PPCI_LFD, dest, RID_SP, ofs); 320 } 321 emit_tai(as, PPCI_STW, RID_RETHI, RID_SP, ofs); 322 emit_tai(as, PPCI_STW, RID_RETLO, RID_SP, ofs+4); 323 } else { 324 ra_destreg(as, ir, RID_FPRET); 325 } 326 #if LJ_32 327 } else if (hiop) { 328 ra_destpair(as, ir); 329 #endif 330 } else { 331 ra_destreg(as, ir, RID_RET); 332 } 333 } 334 } 335 336 static void asm_callx(ASMState *as, IRIns *ir) 337 { 338 IRRef args[CCI_NARGS_MAX*2]; 339 CCallInfo ci; 340 IRRef func; 341 IRIns *irf; 342 ci.flags = asm_callx_flags(as, ir); 343 asm_collectargs(as, ir, &ci, args); 344 asm_setupresult(as, ir, &ci); 345 func = ir->op2; irf = IR(func); 346 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 347 if (irref_isk(func)) { /* Call to constant address. */ 348 ci.func = (ASMFunction)(void *)(intptr_t)(irf->i); 349 } else { /* Need a non-argument register for indirect calls. */ 350 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); 351 Reg freg = ra_alloc1(as, func, allow); 352 *--as->mcp = PPCI_BCTRL; 353 *--as->mcp = PPCI_MTCTR | PPCF_T(freg); 354 ci.func = (ASMFunction)(void *)0; 355 } 356 asm_gencall(as, &ci, args); 357 } 358 359 /* -- Returns ------------------------------------------------------------- */ 360 361 /* Return to lower frame. Guard that it goes to the right spot. */ 362 static void asm_retf(ASMState *as, IRIns *ir) 363 { 364 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 365 void *pc = ir_kptr(IR(ir->op2)); 366 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); 367 as->topslot -= (BCReg)delta; 368 if ((int32_t)as->topslot < 0) as->topslot = 0; 369 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 370 emit_setgl(as, base, jit_base); 371 emit_addptr(as, base, -8*delta); 372 asm_guardcc(as, CC_NE); 373 emit_ab(as, PPCI_CMPW, RID_TMP, 374 ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); 375 emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); 376 } 377 378 /* -- Type conversions ---------------------------------------------------- */ 379 380 static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 381 { 382 RegSet allow = RSET_FPR; 383 Reg tmp = ra_scratch(as, rset_clear(allow, left)); 384 Reg fbias = ra_scratch(as, rset_clear(allow, tmp)); 385 Reg dest = ra_dest(as, ir, RSET_GPR); 386 Reg hibias = ra_allock(as, 0x43300000, rset_exclude(RSET_GPR, dest)); 387 asm_guardcc(as, CC_NE); 388 emit_fab(as, PPCI_FCMPU, 0, tmp, left); 389 emit_fab(as, PPCI_FSUB, tmp, tmp, fbias); 390 emit_fai(as, PPCI_LFD, tmp, RID_SP, SPOFS_TMP); 391 emit_tai(as, PPCI_STW, RID_TMP, RID_SP, SPOFS_TMPLO); 392 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); 393 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); 394 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 395 emit_lsptr(as, PPCI_LFS, (fbias & 31), 396 (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); 397 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 398 emit_fb(as, PPCI_FCTIWZ, tmp, left); 399 } 400 401 static void asm_tobit(ASMState *as, IRIns *ir) 402 { 403 RegSet allow = RSET_FPR; 404 Reg dest = ra_dest(as, ir, RSET_GPR); 405 Reg left = ra_alloc1(as, ir->op1, allow); 406 Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); 407 Reg tmp = ra_scratch(as, rset_clear(allow, right)); 408 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 409 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 410 emit_fab(as, PPCI_FADD, tmp, left, right); 411 } 412 413 static void asm_conv(ASMState *as, IRIns *ir) 414 { 415 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 416 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 417 IRRef lref = ir->op1; 418 lua_assert(irt_type(ir->t) != st); 419 lua_assert(!(irt_isint64(ir->t) || 420 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ 421 if (irt_isfp(ir->t)) { 422 Reg dest = ra_dest(as, ir, RSET_FPR); 423 if (stfp) { /* FP to FP conversion. */ 424 if (st == IRT_NUM) /* double -> float conversion. */ 425 emit_fb(as, PPCI_FRSP, dest, ra_alloc1(as, lref, RSET_FPR)); 426 else /* float -> double conversion is a no-op on PPC. */ 427 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ 428 } else { /* Integer to FP conversion. */ 429 /* IRT_INT: Flip hibit, bias with 2^52, subtract 2^52+2^31. */ 430 /* IRT_U32: Bias with 2^52, subtract 2^52. */ 431 RegSet allow = RSET_GPR; 432 Reg left = ra_alloc1(as, lref, allow); 433 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); 434 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); 435 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); 436 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 437 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 438 emit_lsptr(as, PPCI_LFS, (fbias & 31), 439 &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], 440 rset_clear(allow, hibias)); 441 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, 442 RID_SP, SPOFS_TMPLO); 443 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); 444 if (st != IRT_U32) emit_asi(as, PPCI_XORIS, RID_TMP, left, 0x8000); 445 } 446 } else if (stfp) { /* FP to integer conversion. */ 447 if (irt_isguard(ir->t)) { 448 /* Checked conversions are only supported from number to int. */ 449 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 450 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 451 } else { 452 Reg dest = ra_dest(as, ir, RSET_GPR); 453 Reg left = ra_alloc1(as, lref, RSET_FPR); 454 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 455 if (irt_isu32(ir->t)) { 456 /* Convert both x and x-2^31 to int and merge results. */ 457 Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest)); 458 emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */ 459 emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP); 460 emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP); 461 emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */ 462 emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */ 463 emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */ 464 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 465 emit_tai(as, PPCI_LWZ, dest, 466 RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */ 467 emit_fb(as, PPCI_FCTIWZ, tmp, left); 468 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 469 emit_fb(as, PPCI_FCTIWZ, tmp, tmp); 470 emit_fab(as, PPCI_FSUB, tmp, left, tmp); 471 emit_lsptr(as, PPCI_LFS, (tmp & 31), 472 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); 473 } else { 474 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 475 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 476 emit_fb(as, PPCI_FCTIWZ, tmp, left); 477 } 478 } 479 } else { 480 Reg dest = ra_dest(as, ir, RSET_GPR); 481 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 482 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 483 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 484 if ((ir->op2 & IRCONV_SEXT)) 485 emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); 486 else 487 emit_rot(as, PPCI_RLWINM, dest, left, 0, st == IRT_U8 ? 24 : 16, 31); 488 } else { /* 32/64 bit integer conversions. */ 489 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ 490 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ 491 } 492 } 493 } 494 495 static void asm_strto(ASMState *as, IRIns *ir) 496 { 497 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 498 IRRef args[2]; 499 int32_t ofs; 500 RegSet drop = RSET_SCRATCH; 501 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ 502 ra_evictset(as, drop); 503 asm_guardcc(as, CC_EQ); 504 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ 505 args[0] = ir->op1; /* GCstr *str */ 506 args[1] = ASMREF_TMP1; /* TValue *n */ 507 asm_gencall(as, ci, args); 508 /* Store the result to the spill slot or temp slots. */ 509 ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; 510 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); 511 } 512 513 /* -- Memory references --------------------------------------------------- */ 514 515 /* Get pointer to TValue. */ 516 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 517 { 518 IRIns *ir = IR(ref); 519 if (irt_isnum(ir->t)) { 520 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ 521 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 522 else /* Otherwise force a spill and use the spill slot. */ 523 emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); 524 } else { 525 /* Otherwise use g->tmptv to hold the TValue. */ 526 RegSet allow = rset_exclude(RSET_GPR, dest); 527 Reg type; 528 emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768); 529 if (!irt_ispri(ir->t)) { 530 Reg src = ra_alloc1(as, ref, allow); 531 emit_setgl(as, src, tmptv.gcr); 532 } 533 type = ra_allock(as, irt_toitype(ir->t), allow); 534 emit_setgl(as, type, tmptv.it); 535 } 536 } 537 538 static void asm_aref(ASMState *as, IRIns *ir) 539 { 540 Reg dest = ra_dest(as, ir, RSET_GPR); 541 Reg idx, base; 542 if (irref_isk(ir->op2)) { 543 IRRef tab = IR(ir->op1)->op1; 544 int32_t ofs = asm_fuseabase(as, tab); 545 IRRef refa = ofs ? tab : ir->op1; 546 ofs += 8*IR(ir->op2)->i; 547 if (checki16(ofs)) { 548 base = ra_alloc1(as, refa, RSET_GPR); 549 emit_tai(as, PPCI_ADDI, dest, base, ofs); 550 return; 551 } 552 } 553 base = ra_alloc1(as, ir->op1, RSET_GPR); 554 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); 555 emit_tab(as, PPCI_ADD, dest, RID_TMP, base); 556 emit_slwi(as, RID_TMP, idx, 3); 557 } 558 559 /* Inlined hash lookup. Specialized for key type and for const keys. 560 ** The equivalent C code is: 561 ** Node *n = hashkey(t, key); 562 ** do { 563 ** if (lj_obj_equal(&n->key, key)) return &n->val; 564 ** } while ((n = nextnode(n))); 565 ** return niltv(L); 566 */ 567 static void asm_href(ASMState *as, IRIns *ir, IROp merge) 568 { 569 RegSet allow = RSET_GPR; 570 int destused = ra_used(ir); 571 Reg dest = ra_dest(as, ir, allow); 572 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 573 Reg key = RID_NONE, tmp1 = RID_TMP, tmp2; 574 Reg tisnum = RID_NONE, tmpnum = RID_NONE; 575 IRRef refkey = ir->op2; 576 IRIns *irkey = IR(refkey); 577 IRType1 kt = irkey->t; 578 uint32_t khash; 579 MCLabel l_end, l_loop, l_next; 580 581 rset_clear(allow, tab); 582 if (irt_isnum(kt)) { 583 key = ra_alloc1(as, refkey, RSET_FPR); 584 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); 585 tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); 586 rset_clear(allow, tisnum); 587 } else if (!irt_ispri(kt)) { 588 key = ra_alloc1(as, refkey, allow); 589 rset_clear(allow, key); 590 } 591 tmp2 = ra_scratch(as, allow); 592 rset_clear(allow, tmp2); 593 594 /* Key not found in chain: jump to exit (if merged) or load niltv. */ 595 l_end = emit_label(as); 596 as->invmcp = NULL; 597 if (merge == IR_NE) 598 asm_guardcc(as, CC_EQ); 599 else if (destused) 600 emit_loada(as, dest, niltvg(J2G(as->J))); 601 602 /* Follow hash chain until the end. */ 603 l_loop = --as->mcp; 604 emit_ai(as, PPCI_CMPWI, dest, 0); 605 emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(Node, next)); 606 l_next = emit_label(as); 607 608 /* Type and value comparison. */ 609 if (merge == IR_EQ) 610 asm_guardcc(as, CC_EQ); 611 else 612 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); 613 if (irt_isnum(kt)) { 614 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); 615 emit_condbranch(as, PPCI_BC, CC_GE, l_next); 616 emit_ab(as, PPCI_CMPLW, tmp1, tisnum); 617 emit_fai(as, PPCI_LFD, tmpnum, dest, (int32_t)offsetof(Node, key.n)); 618 } else { 619 if (!irt_ispri(kt)) { 620 emit_ab(as, PPCI_CMPW, tmp2, key); 621 emit_condbranch(as, PPCI_BC, CC_NE, l_next); 622 } 623 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); 624 if (!irt_ispri(kt)) 625 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); 626 } 627 emit_tai(as, PPCI_LWZ, tmp1, dest, (int32_t)offsetof(Node, key.it)); 628 *l_loop = PPCI_BC | PPCF_Y | PPCF_CC(CC_NE) | 629 (((char *)as->mcp-(char *)l_loop) & 0xffffu); 630 631 /* Load main position relative to tab->node into dest. */ 632 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 633 if (khash == 0) { 634 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 635 } else { 636 Reg tmphash = tmp1; 637 if (irref_isk(refkey)) 638 tmphash = ra_allock(as, khash, allow); 639 emit_tab(as, PPCI_ADD, dest, dest, tmp1); 640 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); 641 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); 642 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 643 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); 644 if (irref_isk(refkey)) { 645 /* Nothing to do. */ 646 } else if (irt_isstr(kt)) { 647 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); 648 } else { /* Must match with hash*() in lj_tab.c. */ 649 emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); 650 emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); 651 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); 652 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); 653 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); 654 if (irt_isnum(kt)) { 655 int32_t ofs = ra_spill(as, irkey); 656 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); 657 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 658 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); 659 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); 660 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); 661 } else { 662 emit_asb(as, PPCI_XOR, tmp2, key, tmp1); 663 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 664 emit_tai(as, PPCI_ADDI, tmp1, tmp2, HASH_BIAS); 665 emit_tai(as, PPCI_ADDIS, tmp2, key, (HASH_BIAS + 32768)>>16); 666 } 667 } 668 } 669 } 670 671 static void asm_hrefk(ASMState *as, IRIns *ir) 672 { 673 IRIns *kslot = IR(ir->op2); 674 IRIns *irkey = IR(kslot->op1); 675 int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); 676 int32_t kofs = ofs + (int32_t)offsetof(Node, key); 677 Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; 678 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 679 Reg key = RID_NONE, type = RID_TMP, idx = node; 680 RegSet allow = rset_exclude(RSET_GPR, node); 681 lua_assert(ofs % sizeof(Node) == 0); 682 if (ofs > 32736) { 683 idx = dest; 684 rset_clear(allow, dest); 685 kofs = (int32_t)offsetof(Node, key); 686 } else if (ra_hasreg(dest)) { 687 emit_tai(as, PPCI_ADDI, dest, node, ofs); 688 } 689 asm_guardcc(as, CC_NE); 690 if (!irt_ispri(irkey->t)) { 691 key = ra_scratch(as, allow); 692 rset_clear(allow, key); 693 } 694 rset_clear(allow, type); 695 if (irt_isnum(irkey->t)) { 696 emit_cmpi(as, key, (int32_t)ir_knum(irkey)->u32.lo); 697 asm_guardcc(as, CC_NE); 698 emit_cmpi(as, type, (int32_t)ir_knum(irkey)->u32.hi); 699 } else { 700 if (ra_hasreg(key)) { 701 emit_cmpi(as, key, irkey->i); /* May use RID_TMP, i.e. type. */ 702 asm_guardcc(as, CC_NE); 703 } 704 emit_ai(as, PPCI_CMPWI, type, irt_toitype(irkey->t)); 705 } 706 if (ra_hasreg(key)) emit_tai(as, PPCI_LWZ, key, idx, kofs+4); 707 emit_tai(as, PPCI_LWZ, type, idx, kofs); 708 if (ofs > 32736) { 709 emit_tai(as, PPCI_ADDIS, dest, dest, (ofs + 32768) >> 16); 710 emit_tai(as, PPCI_ADDI, dest, node, ofs); 711 } 712 } 713 714 static void asm_uref(ASMState *as, IRIns *ir) 715 { 716 Reg dest = ra_dest(as, ir, RSET_GPR); 717 if (irref_isk(ir->op1)) { 718 GCfunc *fn = ir_kfunc(IR(ir->op1)); 719 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 720 emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); 721 } else { 722 Reg uv = ra_scratch(as, RSET_GPR); 723 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 724 if (ir->o == IR_UREFC) { 725 asm_guardcc(as, CC_NE); 726 emit_ai(as, PPCI_CMPWI, RID_TMP, 1); 727 emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv)); 728 emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); 729 } else { 730 emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v)); 731 } 732 emit_tai(as, PPCI_LWZ, uv, func, 733 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); 734 } 735 } 736 737 static void asm_fref(ASMState *as, IRIns *ir) 738 { 739 UNUSED(as); UNUSED(ir); 740 lua_assert(!ra_used(ir)); 741 } 742 743 static void asm_strref(ASMState *as, IRIns *ir) 744 { 745 Reg dest = ra_dest(as, ir, RSET_GPR); 746 IRRef ref = ir->op2, refk = ir->op1; 747 int32_t ofs = (int32_t)sizeof(GCstr); 748 Reg r; 749 if (irref_isk(ref)) { 750 IRRef tmp = refk; refk = ref; ref = tmp; 751 } else if (!irref_isk(refk)) { 752 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); 753 IRIns *irr = IR(ir->op2); 754 if (ra_hasreg(irr->r)) { 755 ra_noweak(as, irr->r); 756 right = irr->r; 757 } else if (mayfuse(as, irr->op2) && 758 irr->o == IR_ADD && irref_isk(irr->op2) && 759 checki16(ofs + IR(irr->op2)->i)) { 760 ofs += IR(irr->op2)->i; 761 right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); 762 } else { 763 right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); 764 } 765 emit_tai(as, PPCI_ADDI, dest, dest, ofs); 766 emit_tab(as, PPCI_ADD, dest, left, right); 767 return; 768 } 769 r = ra_alloc1(as, ref, RSET_GPR); 770 ofs += IR(refk)->i; 771 if (checki16(ofs)) 772 emit_tai(as, PPCI_ADDI, dest, r, ofs); 773 else 774 emit_tab(as, PPCI_ADD, dest, r, 775 ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); 776 } 777 778 /* -- Loads and stores ---------------------------------------------------- */ 779 780 static PPCIns asm_fxloadins(IRIns *ir) 781 { 782 switch (irt_type(ir->t)) { 783 case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ 784 case IRT_U8: return PPCI_LBZ; 785 case IRT_I16: return PPCI_LHA; 786 case IRT_U16: return PPCI_LHZ; 787 case IRT_NUM: return PPCI_LFD; 788 case IRT_FLOAT: return PPCI_LFS; 789 default: return PPCI_LWZ; 790 } 791 } 792 793 static PPCIns asm_fxstoreins(IRIns *ir) 794 { 795 switch (irt_type(ir->t)) { 796 case IRT_I8: case IRT_U8: return PPCI_STB; 797 case IRT_I16: case IRT_U16: return PPCI_STH; 798 case IRT_NUM: return PPCI_STFD; 799 case IRT_FLOAT: return PPCI_STFS; 800 default: return PPCI_STW; 801 } 802 } 803 804 static void asm_fload(ASMState *as, IRIns *ir) 805 { 806 Reg dest = ra_dest(as, ir, RSET_GPR); 807 PPCIns pi = asm_fxloadins(ir); 808 Reg idx; 809 int32_t ofs; 810 if (ir->op1 == REF_NIL) { 811 idx = RID_JGL; 812 ofs = ir->op2 - 32768; 813 } else { 814 idx = ra_alloc1(as, ir->op1, RSET_GPR); 815 if (ir->op2 == IRFL_TAB_ARRAY) { 816 ofs = asm_fuseabase(as, ir->op1); 817 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 818 emit_tai(as, PPCI_ADDI, dest, idx, ofs); 819 return; 820 } 821 } 822 ofs = field_ofs[ir->op2]; 823 } 824 lua_assert(!irt_isi8(ir->t)); 825 emit_tai(as, pi, dest, idx, ofs); 826 } 827 828 static void asm_fstore(ASMState *as, IRIns *ir) 829 { 830 if (ir->r != RID_SINK) { 831 Reg src = ra_alloc1(as, ir->op2, RSET_GPR); 832 IRIns *irf = IR(ir->op1); 833 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 834 int32_t ofs = field_ofs[irf->op2]; 835 PPCIns pi = asm_fxstoreins(ir); 836 emit_tai(as, pi, src, idx, ofs); 837 } 838 } 839 840 static void asm_xload(ASMState *as, IRIns *ir) 841 { 842 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 843 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 844 if (irt_isi8(ir->t)) 845 emit_as(as, PPCI_EXTSB, dest, dest); 846 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 847 } 848 849 static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) 850 { 851 IRIns *irb; 852 if (ir->r == RID_SINK) 853 return; 854 if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && 855 ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { 856 /* Fuse BSWAP with XSTORE to stwbrx. */ 857 Reg src = ra_alloc1(as, irb->op1, RSET_GPR); 858 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); 859 } else { 860 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 861 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 862 rset_exclude(RSET_GPR, src), ofs); 863 } 864 } 865 866 #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) 867 868 static void asm_ahuvload(ASMState *as, IRIns *ir) 869 { 870 IRType1 t = ir->t; 871 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; 872 RegSet allow = RSET_GPR; 873 int32_t ofs = AHUREF_LSX; 874 if (ra_used(ir)) { 875 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 876 if (!irt_isnum(t)) ofs = 0; 877 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 878 rset_clear(allow, dest); 879 } 880 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 881 if (irt_isnum(t)) { 882 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); 883 asm_guardcc(as, CC_GE); 884 emit_ab(as, PPCI_CMPLW, type, tisnum); 885 if (ra_hasreg(dest)) { 886 if (ofs == AHUREF_LSX) { 887 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, 888 (idx&255)), (idx>>8))); 889 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); 890 } else { 891 emit_fai(as, PPCI_LFD, dest, idx, ofs); 892 } 893 } 894 } else { 895 asm_guardcc(as, CC_NE); 896 emit_ai(as, PPCI_CMPWI, type, irt_toitype(t)); 897 if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, idx, ofs+4); 898 } 899 if (ofs == AHUREF_LSX) { 900 emit_tab(as, PPCI_LWZX, type, (idx&255), tmp); 901 emit_slwi(as, tmp, (idx>>8), 3); 902 } else { 903 emit_tai(as, PPCI_LWZ, type, idx, ofs); 904 } 905 } 906 907 static void asm_ahustore(ASMState *as, IRIns *ir) 908 { 909 RegSet allow = RSET_GPR; 910 Reg idx, src = RID_NONE, type = RID_NONE; 911 int32_t ofs = AHUREF_LSX; 912 if (ir->r == RID_SINK) 913 return; 914 if (irt_isnum(ir->t)) { 915 src = ra_alloc1(as, ir->op2, RSET_FPR); 916 } else { 917 if (!irt_ispri(ir->t)) { 918 src = ra_alloc1(as, ir->op2, allow); 919 rset_clear(allow, src); 920 ofs = 0; 921 } 922 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 923 rset_clear(allow, type); 924 } 925 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 926 if (irt_isnum(ir->t)) { 927 if (ofs == AHUREF_LSX) { 928 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); 929 emit_slwi(as, RID_TMP, (idx>>8), 3); 930 } else { 931 emit_fai(as, PPCI_STFD, src, idx, ofs); 932 } 933 } else { 934 if (ra_hasreg(src)) 935 emit_tai(as, PPCI_STW, src, idx, ofs+4); 936 if (ofs == AHUREF_LSX) { 937 emit_tab(as, PPCI_STWX, type, (idx&255), RID_TMP); 938 emit_slwi(as, RID_TMP, (idx>>8), 3); 939 } else { 940 emit_tai(as, PPCI_STW, type, idx, ofs); 941 } 942 } 943 } 944 945 static void asm_sload(ASMState *as, IRIns *ir) 946 { 947 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 0 : 4); 948 IRType1 t = ir->t; 949 Reg dest = RID_NONE, type = RID_NONE, base; 950 RegSet allow = RSET_GPR; 951 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 952 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 953 lua_assert(LJ_DUALNUM || 954 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 955 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { 956 dest = ra_scratch(as, RSET_FPR); 957 asm_tointg(as, ir, dest); 958 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 959 } else if (ra_used(ir)) { 960 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 961 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 962 rset_clear(allow, dest); 963 base = ra_alloc1(as, REF_BASE, allow); 964 rset_clear(allow, base); 965 if ((ir->op2 & IRSLOAD_CONVERT)) { 966 if (irt_isint(t)) { 967 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 968 dest = ra_scratch(as, RSET_FPR); 969 emit_fai(as, PPCI_STFD, dest, RID_SP, SPOFS_TMP); 970 emit_fb(as, PPCI_FCTIWZ, dest, dest); 971 t.irt = IRT_NUM; /* Check for original type. */ 972 } else { 973 Reg tmp = ra_scratch(as, allow); 974 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, tmp)); 975 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); 976 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 977 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 978 emit_lsptr(as, PPCI_LFS, (fbias & 31), 979 (void *)&as->J->k32[LJ_K32_2P52_2P31], 980 rset_clear(allow, hibias)); 981 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); 982 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); 983 emit_asi(as, PPCI_XORIS, tmp, tmp, 0x8000); 984 dest = tmp; 985 t.irt = IRT_INT; /* Check for original type. */ 986 } 987 } 988 goto dotypecheck; 989 } 990 base = ra_alloc1(as, REF_BASE, allow); 991 rset_clear(allow, base); 992 dotypecheck: 993 if (irt_isnum(t)) { 994 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 995 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); 996 asm_guardcc(as, CC_GE); 997 emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); 998 type = RID_TMP; 999 } 1000 if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); 1001 } else { 1002 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1003 asm_guardcc(as, CC_NE); 1004 emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t)); 1005 type = RID_TMP; 1006 } 1007 if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs); 1008 } 1009 if (ra_hasreg(type)) emit_tai(as, PPCI_LWZ, type, base, ofs-4); 1010 } 1011 1012 /* -- Allocations --------------------------------------------------------- */ 1013 1014 #if LJ_HASFFI 1015 static void asm_cnew(ASMState *as, IRIns *ir) 1016 { 1017 CTState *cts = ctype_ctsG(J2G(as->J)); 1018 CTypeID id = (CTypeID)IR(ir->op1)->i; 1019 CTSize sz; 1020 CTInfo info = lj_ctype_info(cts, id, &sz); 1021 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1022 IRRef args[4]; 1023 RegSet drop = RSET_SCRATCH; 1024 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); 1025 1026 as->gcsteps++; 1027 if (ra_hasreg(ir->r)) 1028 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1029 ra_evictset(as, drop); 1030 if (ra_used(ir)) 1031 ra_destreg(as, ir, RID_RET); /* GCcdata * */ 1032 1033 /* Initialize immutable cdata object. */ 1034 if (ir->o == IR_CNEWI) { 1035 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1036 int32_t ofs = sizeof(GCcdata); 1037 lua_assert(sz == 4 || sz == 8); 1038 if (sz == 8) { 1039 ofs += 4; 1040 lua_assert((ir+1)->o == IR_HIOP); 1041 } 1042 for (;;) { 1043 Reg r = ra_alloc1(as, ir->op2, allow); 1044 emit_tai(as, PPCI_STW, r, RID_RET, ofs); 1045 rset_clear(allow, r); 1046 if (ofs == sizeof(GCcdata)) break; 1047 ofs -= 4; ir++; 1048 } 1049 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ 1050 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; 1051 args[0] = ASMREF_L; /* lua_State *L */ 1052 args[1] = ir->op1; /* CTypeID id */ 1053 args[2] = ir->op2; /* CTSize sz */ 1054 args[3] = ASMREF_TMP1; /* CTSize align */ 1055 asm_gencall(as, ci, args); 1056 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); 1057 return; 1058 } 1059 1060 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1061 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1062 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1063 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); 1064 emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ 1065 args[0] = ASMREF_L; /* lua_State *L */ 1066 args[1] = ASMREF_TMP1; /* MSize size */ 1067 asm_gencall(as, ci, args); 1068 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1069 ra_releasetmp(as, ASMREF_TMP1)); 1070 } 1071 #else 1072 #define asm_cnew(as, ir) ((void)0) 1073 #endif 1074 1075 /* -- Write barriers ------------------------------------------------------ */ 1076 1077 static void asm_tbar(ASMState *as, IRIns *ir) 1078 { 1079 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); 1080 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); 1081 Reg link = RID_TMP; 1082 MCLabel l_end = emit_label(as); 1083 emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); 1084 emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); 1085 emit_setgl(as, tab, gc.grayagain); 1086 lua_assert(LJ_GC_BLACK == 0x04); 1087 emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ 1088 emit_getgl(as, link, gc.grayagain); 1089 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); 1090 emit_asi(as, PPCI_ANDIDOT, RID_TMP, mark, LJ_GC_BLACK); 1091 emit_tai(as, PPCI_LBZ, mark, tab, (int32_t)offsetof(GCtab, marked)); 1092 } 1093 1094 static void asm_obar(ASMState *as, IRIns *ir) 1095 { 1096 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; 1097 IRRef args[2]; 1098 MCLabel l_end; 1099 Reg obj, val, tmp; 1100 /* No need for other object barriers (yet). */ 1101 lua_assert(IR(ir->op1)->o == IR_UREFC); 1102 ra_evictset(as, RSET_SCRATCH); 1103 l_end = emit_label(as); 1104 args[0] = ASMREF_TMP1; /* global_State *g */ 1105 args[1] = ir->op1; /* TValue *tv */ 1106 asm_gencall(as, ci, args); 1107 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 1108 obj = IR(ir->op1)->r; 1109 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); 1110 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); 1111 emit_asi(as, PPCI_ANDIDOT, tmp, tmp, LJ_GC_BLACK); 1112 emit_condbranch(as, PPCI_BC, CC_EQ, l_end); 1113 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, LJ_GC_WHITES); 1114 val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); 1115 emit_tai(as, PPCI_LBZ, tmp, obj, 1116 (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); 1117 emit_tai(as, PPCI_LBZ, RID_TMP, val, (int32_t)offsetof(GChead, marked)); 1118 } 1119 1120 /* -- Arithmetic and logic operations ------------------------------------- */ 1121 1122 static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) 1123 { 1124 Reg dest = ra_dest(as, ir, RSET_FPR); 1125 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1126 right = (left >> 8); left &= 255; 1127 if (pi == PPCI_FMUL) 1128 emit_fac(as, pi, dest, left, right); 1129 else 1130 emit_fab(as, pi, dest, left, right); 1131 } 1132 1133 static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) 1134 { 1135 Reg dest = ra_dest(as, ir, RSET_FPR); 1136 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); 1137 emit_fb(as, pi, dest, left); 1138 } 1139 1140 static void asm_fpmath(ASMState *as, IRIns *ir) 1141 { 1142 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) 1143 return; 1144 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) 1145 asm_fpunary(as, ir, PPCI_FSQRT); 1146 else 1147 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); 1148 } 1149 1150 static void asm_add(ASMState *as, IRIns *ir) 1151 { 1152 if (irt_isnum(ir->t)) { 1153 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) 1154 asm_fparith(as, ir, PPCI_FADD); 1155 } else { 1156 Reg dest = ra_dest(as, ir, RSET_GPR); 1157 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1158 PPCIns pi; 1159 if (irref_isk(ir->op2)) { 1160 int32_t k = IR(ir->op2)->i; 1161 if (checki16(k)) { 1162 pi = PPCI_ADDI; 1163 /* May fail due to spills/restores above, but simplifies the logic. */ 1164 if (as->flagmcp == as->mcp) { 1165 as->flagmcp = NULL; 1166 as->mcp++; 1167 pi = PPCI_ADDICDOT; 1168 } 1169 emit_tai(as, pi, dest, left, k); 1170 return; 1171 } else if ((k & 0xffff) == 0) { 1172 emit_tai(as, PPCI_ADDIS, dest, left, (k >> 16)); 1173 return; 1174 } else if (!as->sectref) { 1175 emit_tai(as, PPCI_ADDIS, dest, dest, (k + 32768) >> 16); 1176 emit_tai(as, PPCI_ADDI, dest, left, k); 1177 return; 1178 } 1179 } 1180 pi = PPCI_ADD; 1181 /* May fail due to spills/restores above, but simplifies the logic. */ 1182 if (as->flagmcp == as->mcp) { 1183 as->flagmcp = NULL; 1184 as->mcp++; 1185 pi |= PPCF_DOT; 1186 } 1187 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1188 emit_tab(as, pi, dest, left, right); 1189 } 1190 } 1191 1192 static void asm_sub(ASMState *as, IRIns *ir) 1193 { 1194 if (irt_isnum(ir->t)) { 1195 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) 1196 asm_fparith(as, ir, PPCI_FSUB); 1197 } else { 1198 PPCIns pi = PPCI_SUBF; 1199 Reg dest = ra_dest(as, ir, RSET_GPR); 1200 Reg left, right; 1201 if (irref_isk(ir->op1)) { 1202 int32_t k = IR(ir->op1)->i; 1203 if (checki16(k)) { 1204 right = ra_alloc1(as, ir->op2, RSET_GPR); 1205 emit_tai(as, PPCI_SUBFIC, dest, right, k); 1206 return; 1207 } 1208 } 1209 /* May fail due to spills/restores above, but simplifies the logic. */ 1210 if (as->flagmcp == as->mcp) { 1211 as->flagmcp = NULL; 1212 as->mcp++; 1213 pi |= PPCF_DOT; 1214 } 1215 left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1216 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1217 emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */ 1218 } 1219 } 1220 1221 static void asm_mul(ASMState *as, IRIns *ir) 1222 { 1223 if (irt_isnum(ir->t)) { 1224 asm_fparith(as, ir, PPCI_FMUL); 1225 } else { 1226 PPCIns pi = PPCI_MULLW; 1227 Reg dest = ra_dest(as, ir, RSET_GPR); 1228 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1229 if (irref_isk(ir->op2)) { 1230 int32_t k = IR(ir->op2)->i; 1231 if (checki16(k)) { 1232 emit_tai(as, PPCI_MULLI, dest, left, k); 1233 return; 1234 } 1235 } 1236 /* May fail due to spills/restores above, but simplifies the logic. */ 1237 if (as->flagmcp == as->mcp) { 1238 as->flagmcp = NULL; 1239 as->mcp++; 1240 pi |= PPCF_DOT; 1241 } 1242 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1243 emit_tab(as, pi, dest, left, right); 1244 } 1245 } 1246 1247 #define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV) 1248 #define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) 1249 #define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) 1250 1251 static void asm_neg(ASMState *as, IRIns *ir) 1252 { 1253 if (irt_isnum(ir->t)) { 1254 asm_fpunary(as, ir, PPCI_FNEG); 1255 } else { 1256 Reg dest, left; 1257 PPCIns pi = PPCI_NEG; 1258 if (as->flagmcp == as->mcp) { 1259 as->flagmcp = NULL; 1260 as->mcp++; 1261 pi |= PPCF_DOT; 1262 } 1263 dest = ra_dest(as, ir, RSET_GPR); 1264 left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1265 emit_tab(as, pi, dest, left, 0); 1266 } 1267 } 1268 1269 #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) 1270 #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) 1271 #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) 1272 1273 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1274 { 1275 Reg dest, left, right; 1276 if (as->flagmcp == as->mcp) { 1277 as->flagmcp = NULL; 1278 as->mcp++; 1279 } 1280 asm_guardcc(as, CC_SO); 1281 dest = ra_dest(as, ir, RSET_GPR); 1282 left = ra_alloc2(as, ir, RSET_GPR); 1283 right = (left >> 8); left &= 255; 1284 if (pi == PPCI_SUBFO) { Reg tmp = left; left = right; right = tmp; } 1285 emit_tab(as, pi|PPCF_DOT, dest, left, right); 1286 } 1287 1288 #define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) 1289 #define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) 1290 #define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) 1291 1292 #if LJ_HASFFI 1293 static void asm_add64(ASMState *as, IRIns *ir) 1294 { 1295 Reg dest = ra_dest(as, ir, RSET_GPR); 1296 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); 1297 PPCIns pi = PPCI_ADDE; 1298 if (irref_isk(ir->op2)) { 1299 int32_t k = IR(ir->op2)->i; 1300 if (k == 0) 1301 pi = PPCI_ADDZE; 1302 else if (k == -1) 1303 pi = PPCI_ADDME; 1304 else 1305 goto needright; 1306 right = 0; 1307 } else { 1308 needright: 1309 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1310 } 1311 emit_tab(as, pi, dest, left, right); 1312 ir--; 1313 dest = ra_dest(as, ir, RSET_GPR); 1314 left = ra_alloc1(as, ir->op1, RSET_GPR); 1315 if (irref_isk(ir->op2)) { 1316 int32_t k = IR(ir->op2)->i; 1317 if (checki16(k)) { 1318 emit_tai(as, PPCI_ADDIC, dest, left, k); 1319 return; 1320 } 1321 } 1322 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1323 emit_tab(as, PPCI_ADDC, dest, left, right); 1324 } 1325 1326 static void asm_sub64(ASMState *as, IRIns *ir) 1327 { 1328 Reg dest = ra_dest(as, ir, RSET_GPR); 1329 Reg left, right = ra_alloc1(as, ir->op2, RSET_GPR); 1330 PPCIns pi = PPCI_SUBFE; 1331 if (irref_isk(ir->op1)) { 1332 int32_t k = IR(ir->op1)->i; 1333 if (k == 0) 1334 pi = PPCI_SUBFZE; 1335 else if (k == -1) 1336 pi = PPCI_SUBFME; 1337 else 1338 goto needleft; 1339 left = 0; 1340 } else { 1341 needleft: 1342 left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right)); 1343 } 1344 emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */ 1345 ir--; 1346 dest = ra_dest(as, ir, RSET_GPR); 1347 right = ra_alloc1(as, ir->op2, RSET_GPR); 1348 if (irref_isk(ir->op1)) { 1349 int32_t k = IR(ir->op1)->i; 1350 if (checki16(k)) { 1351 emit_tai(as, PPCI_SUBFIC, dest, right, k); 1352 return; 1353 } 1354 } 1355 left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right)); 1356 emit_tab(as, PPCI_SUBFC, dest, right, left); 1357 } 1358 1359 static void asm_neg64(ASMState *as, IRIns *ir) 1360 { 1361 Reg dest = ra_dest(as, ir, RSET_GPR); 1362 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1363 emit_tab(as, PPCI_SUBFZE, dest, left, 0); 1364 ir--; 1365 dest = ra_dest(as, ir, RSET_GPR); 1366 left = ra_alloc1(as, ir->op1, RSET_GPR); 1367 emit_tai(as, PPCI_SUBFIC, dest, left, 0); 1368 } 1369 #endif 1370 1371 static void asm_bnot(ASMState *as, IRIns *ir) 1372 { 1373 Reg dest, left, right; 1374 PPCIns pi = PPCI_NOR; 1375 if (as->flagmcp == as->mcp) { 1376 as->flagmcp = NULL; 1377 as->mcp++; 1378 pi |= PPCF_DOT; 1379 } 1380 dest = ra_dest(as, ir, RSET_GPR); 1381 if (mayfuse(as, ir->op1)) { 1382 IRIns *irl = IR(ir->op1); 1383 if (irl->o == IR_BAND) 1384 pi ^= (PPCI_NOR ^ PPCI_NAND); 1385 else if (irl->o == IR_BXOR) 1386 pi ^= (PPCI_NOR ^ PPCI_EQV); 1387 else if (irl->o != IR_BOR) 1388 goto nofuse; 1389 left = ra_hintalloc(as, irl->op1, dest, RSET_GPR); 1390 right = ra_alloc1(as, irl->op2, rset_exclude(RSET_GPR, left)); 1391 } else { 1392 nofuse: 1393 left = right = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1394 } 1395 emit_asb(as, pi, dest, left, right); 1396 } 1397 1398 static void asm_bswap(ASMState *as, IRIns *ir) 1399 { 1400 Reg dest = ra_dest(as, ir, RSET_GPR); 1401 IRIns *irx; 1402 if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD && 1403 ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) { 1404 /* Fuse BSWAP with XLOAD to lwbrx. */ 1405 asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR); 1406 } else { 1407 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1408 Reg tmp = dest; 1409 if (tmp == left) { 1410 tmp = RID_TMP; 1411 emit_mr(as, dest, RID_TMP); 1412 } 1413 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); 1414 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); 1415 emit_rotlwi(as, tmp, left, 8); 1416 } 1417 } 1418 1419 /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ 1420 static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) 1421 { 1422 IRIns *ir; 1423 Reg left; 1424 if (mayfuse(as, ref) && (ir = IR(ref), ra_noreg(ir->r)) && 1425 irref_isk(ir->op2) && ir->o >= IR_BSHL && ir->o <= IR_BROR) { 1426 int32_t sh = (IR(ir->op2)->i & 31); 1427 switch (ir->o) { 1428 case IR_BSHL: 1429 if ((mask & ((1u<<sh)-1))) goto nofuse; 1430 break; 1431 case IR_BSHR: 1432 if ((mask & ~((~0u)>>sh))) goto nofuse; 1433 sh = ((32-sh)&31); 1434 break; 1435 case IR_BROL: 1436 break; 1437 default: 1438 goto nofuse; 1439 } 1440 left = ra_alloc1(as, ir->op1, RSET_GPR); 1441 *--as->mcp = pi | PPCF_T(left) | PPCF_B(sh); 1442 return; 1443 } 1444 nofuse: 1445 left = ra_alloc1(as, ref, RSET_GPR); 1446 *--as->mcp = pi | PPCF_T(left); 1447 } 1448 1449 static void asm_band(ASMState *as, IRIns *ir) 1450 { 1451 Reg dest, left, right; 1452 IRRef lref = ir->op1; 1453 PPCIns dot = 0; 1454 IRRef op2; 1455 if (as->flagmcp == as->mcp) { 1456 as->flagmcp = NULL; 1457 as->mcp++; 1458 dot = PPCF_DOT; 1459 } 1460 dest = ra_dest(as, ir, RSET_GPR); 1461 if (irref_isk(ir->op2)) { 1462 int32_t k = IR(ir->op2)->i; 1463 if (k) { 1464 /* First check for a contiguous bitmask as used by rlwinm. */ 1465 uint32_t s1 = lj_ffs((uint32_t)k); 1466 uint32_t k1 = ((uint32_t)k >> s1); 1467 if ((k1 & (k1+1)) == 0) { 1468 asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | 1469 PPCF_MB(31-lj_fls((uint32_t)k)) | PPCF_ME(31-s1), 1470 k, lref); 1471 return; 1472 } 1473 if (~(uint32_t)k) { 1474 uint32_t s2 = lj_ffs(~(uint32_t)k); 1475 uint32_t k2 = (~(uint32_t)k >> s2); 1476 if ((k2 & (k2+1)) == 0) { 1477 asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | 1478 PPCF_MB(32-s2) | PPCF_ME(30-lj_fls(~(uint32_t)k)), 1479 k, lref); 1480 return; 1481 } 1482 } 1483 } 1484 if (checku16(k)) { 1485 left = ra_alloc1(as, lref, RSET_GPR); 1486 emit_asi(as, PPCI_ANDIDOT, dest, left, k); 1487 return; 1488 } else if ((k & 0xffff) == 0) { 1489 left = ra_alloc1(as, lref, RSET_GPR); 1490 emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16)); 1491 return; 1492 } 1493 } 1494 op2 = ir->op2; 1495 if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT && ra_noreg(IR(op2)->r)) { 1496 dot ^= (PPCI_AND ^ PPCI_ANDC); 1497 op2 = IR(op2)->op1; 1498 } 1499 left = ra_hintalloc(as, lref, dest, RSET_GPR); 1500 right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left)); 1501 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1502 } 1503 1504 static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1505 { 1506 Reg dest = ra_dest(as, ir, RSET_GPR); 1507 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1508 if (irref_isk(ir->op2)) { 1509 int32_t k = IR(ir->op2)->i; 1510 Reg tmp = left; 1511 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { 1512 if (!checku16(k)) { 1513 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); 1514 if ((k & 0xffff) == 0) return; 1515 } 1516 emit_asi(as, pik, dest, left, k); 1517 return; 1518 } 1519 } 1520 /* May fail due to spills/restores above, but simplifies the logic. */ 1521 if (as->flagmcp == as->mcp) { 1522 as->flagmcp = NULL; 1523 as->mcp++; 1524 pi |= PPCF_DOT; 1525 } 1526 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1527 emit_asb(as, pi, dest, left, right); 1528 } 1529 1530 #define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) 1531 #define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) 1532 1533 static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1534 { 1535 Reg dest, left; 1536 Reg dot = 0; 1537 if (as->flagmcp == as->mcp) { 1538 as->flagmcp = NULL; 1539 as->mcp++; 1540 dot = PPCF_DOT; 1541 } 1542 dest = ra_dest(as, ir, RSET_GPR); 1543 left = ra_alloc1(as, ir->op1, RSET_GPR); 1544 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1545 int32_t shift = (IR(ir->op2)->i & 31); 1546 if (pik == 0) /* SLWI */ 1547 emit_rot(as, PPCI_RLWINM|dot, dest, left, shift, 0, 31-shift); 1548 else if (pik == 1) /* SRWI */ 1549 emit_rot(as, PPCI_RLWINM|dot, dest, left, (32-shift)&31, shift, 31); 1550 else 1551 emit_asb(as, pik|dot, dest, left, shift); 1552 } else { 1553 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1554 emit_asb(as, pi|dot, dest, left, right); 1555 } 1556 } 1557 1558 #define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) 1559 #define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) 1560 #define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) 1561 #define asm_brol(as, ir) \ 1562 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ 1563 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) 1564 #define asm_bror(as, ir) lua_assert(0) 1565 1566 static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1567 { 1568 if (irt_isnum(ir->t)) { 1569 Reg dest = ra_dest(as, ir, RSET_FPR); 1570 Reg tmp = dest; 1571 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1572 right = (left >> 8); left &= 255; 1573 if (tmp == left || tmp == right) 1574 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, 1575 dest), left), right)); 1576 emit_facb(as, PPCI_FSEL, dest, tmp, 1577 ismax ? left : right, ismax ? right : left); 1578 emit_fab(as, PPCI_FSUB, tmp, left, right); 1579 } else { 1580 Reg dest = ra_dest(as, ir, RSET_GPR); 1581 Reg tmp1 = RID_TMP, tmp2 = dest; 1582 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1583 right = (left >> 8); left &= 255; 1584 if (tmp2 == left || tmp2 == right) 1585 tmp2 = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, 1586 dest), left), right)); 1587 emit_tab(as, PPCI_ADD, dest, tmp2, right); 1588 emit_asb(as, ismax ? PPCI_ANDC : PPCI_AND, tmp2, tmp2, tmp1); 1589 emit_tab(as, PPCI_SUBFE, tmp1, tmp1, tmp1); 1590 emit_tab(as, PPCI_SUBFC, tmp2, tmp2, tmp1); 1591 emit_asi(as, PPCI_XORIS, tmp2, right, 0x8000); 1592 emit_asi(as, PPCI_XORIS, tmp1, left, 0x8000); 1593 } 1594 } 1595 1596 #define asm_min(as, ir) asm_min_max(as, ir, 0) 1597 #define asm_max(as, ir) asm_min_max(as, ir, 1) 1598 1599 /* -- Comparisons --------------------------------------------------------- */ 1600 1601 #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ 1602 #define CC_TWO 0x80 /* Check two flags for FP comparison. */ 1603 1604 /* Map of comparisons to flags. ORDER IR. */ 1605 static const uint8_t asm_compmap[IR_ABC+1] = { 1606 /* op int cc FP cc */ 1607 /* LT */ CC_GE + (CC_GE<<4), 1608 /* GE */ CC_LT + (CC_LE<<4) + CC_TWO, 1609 /* LE */ CC_GT + (CC_GE<<4) + CC_TWO, 1610 /* GT */ CC_LE + (CC_LE<<4), 1611 /* ULT */ CC_GE + CC_UNSIGNED + (CC_GT<<4) + CC_TWO, 1612 /* UGE */ CC_LT + CC_UNSIGNED + (CC_LT<<4), 1613 /* ULE */ CC_GT + CC_UNSIGNED + (CC_GT<<4), 1614 /* UGT */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO, 1615 /* EQ */ CC_NE + (CC_NE<<4), 1616 /* NE */ CC_EQ + (CC_EQ<<4), 1617 /* ABC */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO /* Same as UGT. */ 1618 }; 1619 1620 static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) 1621 { 1622 Reg right, left = ra_alloc1(as, lref, RSET_GPR); 1623 if (irref_isk(rref)) { 1624 int32_t k = IR(rref)->i; 1625 if ((cc & CC_UNSIGNED) == 0) { /* Signed comparison with constant. */ 1626 if (checki16(k)) { 1627 emit_tai(as, PPCI_CMPWI, cr, left, k); 1628 /* Signed comparison with zero and referencing previous ins? */ 1629 if (k == 0 && lref == as->curins-1) 1630 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1631 return; 1632 } else if ((cc & 3) == (CC_EQ & 3)) { /* Use CMPLWI for EQ or NE. */ 1633 if (checku16(k)) { 1634 emit_tai(as, PPCI_CMPLWI, cr, left, k); 1635 return; 1636 } else if (!as->sectref && ra_noreg(IR(rref)->r)) { 1637 emit_tai(as, PPCI_CMPLWI, cr, RID_TMP, k); 1638 emit_asi(as, PPCI_XORIS, RID_TMP, left, (k >> 16)); 1639 return; 1640 } 1641 } 1642 } else { /* Unsigned comparison with constant. */ 1643 if (checku16(k)) { 1644 emit_tai(as, PPCI_CMPLWI, cr, left, k); 1645 return; 1646 } 1647 } 1648 } 1649 right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); 1650 emit_tab(as, (cc & CC_UNSIGNED) ? PPCI_CMPLW : PPCI_CMPW, cr, left, right); 1651 } 1652 1653 static void asm_comp(ASMState *as, IRIns *ir) 1654 { 1655 PPCCC cc = asm_compmap[ir->o]; 1656 if (irt_isnum(ir->t)) { 1657 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1658 right = (left >> 8); left &= 255; 1659 asm_guardcc(as, (cc >> 4)); 1660 if ((cc & CC_TWO)) 1661 emit_tab(as, PPCI_CROR, ((cc>>4)&3), ((cc>>4)&3), (CC_EQ&3)); 1662 emit_fab(as, PPCI_FCMPU, 0, left, right); 1663 } else { 1664 IRRef lref = ir->op1, rref = ir->op2; 1665 if (irref_isk(lref) && !irref_isk(rref)) { 1666 /* Swap constants to the right (only for ABC). */ 1667 IRRef tmp = lref; lref = rref; rref = tmp; 1668 if ((cc & 2) == 0) cc ^= 1; /* LT <-> GT, LE <-> GE */ 1669 } 1670 asm_guardcc(as, cc); 1671 asm_intcomp_(as, lref, rref, 0, cc); 1672 } 1673 } 1674 1675 #define asm_equal(as, ir) asm_comp(as, ir) 1676 1677 #if LJ_HASFFI 1678 /* 64 bit integer comparisons. */ 1679 static void asm_comp64(ASMState *as, IRIns *ir) 1680 { 1681 PPCCC cc = asm_compmap[(ir-1)->o]; 1682 if ((cc&3) == (CC_EQ&3)) { 1683 asm_guardcc(as, cc); 1684 emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CROR, 1685 (CC_EQ&3), (CC_EQ&3), 4+(CC_EQ&3)); 1686 } else { 1687 asm_guardcc(as, CC_EQ); 1688 emit_tab(as, PPCI_CROR, (CC_EQ&3), (CC_EQ&3), ((cc^~(cc>>2))&1)); 1689 emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CRANDC, 1690 (CC_EQ&3), (CC_EQ&3), 4+(cc&3)); 1691 } 1692 /* Loword comparison sets cr1 and is unsigned, except for equality. */ 1693 asm_intcomp_(as, (ir-1)->op1, (ir-1)->op2, 4, 1694 cc | ((cc&3) == (CC_EQ&3) ? 0 : CC_UNSIGNED)); 1695 /* Hiword comparison sets cr0. */ 1696 asm_intcomp_(as, ir->op1, ir->op2, 0, cc); 1697 as->flagmcp = NULL; /* Doesn't work here. */ 1698 } 1699 #endif 1700 1701 /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ 1702 1703 /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 1704 static void asm_hiop(ASMState *as, IRIns *ir) 1705 { 1706 #if LJ_HASFFI 1707 /* HIOP is marked as a store because it needs its own DCE logic. */ 1708 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 1709 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 1710 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 1711 as->curins--; /* Always skip the CONV. */ 1712 if (usehi || uselo) 1713 asm_conv64(as, ir); 1714 return; 1715 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 1716 as->curins--; /* Always skip the loword comparison. */ 1717 asm_comp64(as, ir); 1718 return; 1719 } else if ((ir-1)->o == IR_XSTORE) { 1720 as->curins--; /* Handle both stores here. */ 1721 if ((ir-1)->r != RID_SINK) { 1722 asm_xstore_(as, ir, 0); 1723 asm_xstore_(as, ir-1, 4); 1724 } 1725 return; 1726 } 1727 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1728 switch ((ir-1)->o) { 1729 case IR_ADD: as->curins--; asm_add64(as, ir); break; 1730 case IR_SUB: as->curins--; asm_sub64(as, ir); break; 1731 case IR_NEG: as->curins--; asm_neg64(as, ir); break; 1732 case IR_CALLN: 1733 case IR_CALLXS: 1734 if (!uselo) 1735 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 1736 break; 1737 case IR_CNEWI: 1738 /* Nothing to do here. Handled by lo op itself. */ 1739 break; 1740 default: lua_assert(0); break; 1741 } 1742 #else 1743 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ 1744 #endif 1745 } 1746 1747 /* -- Profiling ----------------------------------------------------------- */ 1748 1749 static void asm_prof(ASMState *as, IRIns *ir) 1750 { 1751 UNUSED(ir); 1752 asm_guardcc(as, CC_NE); 1753 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); 1754 emit_lsglptr(as, PPCI_LBZ, RID_TMP, 1755 (int32_t)offsetof(global_State, hookmask)); 1756 } 1757 1758 /* -- Stack handling ------------------------------------------------------ */ 1759 1760 /* Check Lua stack size for overflow. Use exit handler as fallback. */ 1761 static void asm_stack_check(ASMState *as, BCReg topslot, 1762 IRIns *irp, RegSet allow, ExitNo exitno) 1763 { 1764 /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ 1765 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; 1766 rset_clear(allow, pbase); 1767 tmp = allow ? rset_pickbot(allow) : 1768 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); 1769 emit_condbranch(as, PPCI_BC, CC_LT, asm_exitstub_addr(as, exitno)); 1770 if (allow == RSET_EMPTY) /* Restore temp. register. */ 1771 emit_tai(as, PPCI_LWZ, tmp, RID_SP, SPOFS_TMPW); 1772 else 1773 ra_modified(as, tmp); 1774 emit_ai(as, PPCI_CMPLWI, RID_TMP, (int32_t)(8*topslot)); 1775 emit_tab(as, PPCI_SUBF, RID_TMP, pbase, tmp); 1776 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); 1777 if (pbase == RID_TMP) 1778 emit_getgl(as, RID_TMP, jit_base); 1779 emit_getgl(as, tmp, cur_L); 1780 if (allow == RSET_EMPTY) /* Spill temp. register. */ 1781 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); 1782 } 1783 1784 /* Restore Lua stack from on-trace state. */ 1785 static void asm_stack_restore(ASMState *as, SnapShot *snap) 1786 { 1787 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 1788 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; 1789 MSize n, nent = snap->nent; 1790 /* Store the value of all modified slots to the Lua stack. */ 1791 for (n = 0; n < nent; n++) { 1792 SnapEntry sn = map[n]; 1793 BCReg s = snap_slot(sn); 1794 int32_t ofs = 8*((int32_t)s-1); 1795 IRRef ref = snap_ref(sn); 1796 IRIns *ir = IR(ref); 1797 if ((sn & SNAP_NORESTORE)) 1798 continue; 1799 if (irt_isnum(ir->t)) { 1800 Reg src = ra_alloc1(as, ref, RSET_FPR); 1801 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); 1802 } else { 1803 Reg type; 1804 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 1805 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 1806 if (!irt_ispri(ir->t)) { 1807 Reg src = ra_alloc1(as, ref, allow); 1808 rset_clear(allow, src); 1809 emit_tai(as, PPCI_STW, src, RID_BASE, ofs+4); 1810 } 1811 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 1812 if (s == 0) continue; /* Do not overwrite link to previous frame. */ 1813 type = ra_allock(as, (int32_t)(*flinks--), allow); 1814 } else { 1815 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1816 } 1817 emit_tai(as, PPCI_STW, type, RID_BASE, ofs); 1818 } 1819 checkmclim(as); 1820 } 1821 lua_assert(map + nent == flinks); 1822 } 1823 1824 /* -- GC handling --------------------------------------------------------- */ 1825 1826 /* Check GC threshold and do one or more GC steps. */ 1827 static void asm_gc_check(ASMState *as) 1828 { 1829 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; 1830 IRRef args[2]; 1831 MCLabel l_end; 1832 Reg tmp; 1833 ra_evictset(as, RSET_SCRATCH); 1834 l_end = emit_label(as); 1835 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ 1836 asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ 1837 emit_ai(as, PPCI_CMPWI, RID_RET, 0); 1838 args[0] = ASMREF_TMP1; /* global_State *g */ 1839 args[1] = ASMREF_TMP2; /* MSize steps */ 1840 asm_gencall(as, ci, args); 1841 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 1842 tmp = ra_releasetmp(as, ASMREF_TMP2); 1843 emit_loadi(as, tmp, as->gcsteps); 1844 /* Jump around GC step if GC total < GC threshold. */ 1845 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_LT, l_end); 1846 emit_ab(as, PPCI_CMPLW, RID_TMP, tmp); 1847 emit_getgl(as, tmp, gc.threshold); 1848 emit_getgl(as, RID_TMP, gc.total); 1849 as->gcsteps = 0; 1850 checkmclim(as); 1851 } 1852 1853 /* -- Loop handling ------------------------------------------------------- */ 1854 1855 /* Fixup the loop branch. */ 1856 static void asm_loop_fixup(ASMState *as) 1857 { 1858 MCode *p = as->mctop; 1859 MCode *target = as->mcp; 1860 if (as->loopinv) { /* Inverted loop branch? */ 1861 /* asm_guardcc already inverted the cond branch and patched the final b. */ 1862 p[-2] = (p[-2] & (0xffff0000u & ~PPCF_Y)) | (((target-p+2) & 0x3fffu) << 2); 1863 } else { 1864 p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); 1865 } 1866 } 1867 1868 /* -- Head of trace ------------------------------------------------------- */ 1869 1870 /* Coalesce BASE register for a root trace. */ 1871 static void asm_head_root_base(ASMState *as) 1872 { 1873 IRIns *ir = IR(REF_BASE); 1874 Reg r = ir->r; 1875 if (ra_hasreg(r)) { 1876 ra_free(as, r); 1877 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) 1878 ir->r = RID_INIT; /* No inheritance for modified BASE register. */ 1879 if (r != RID_BASE) 1880 emit_mr(as, r, RID_BASE); 1881 } 1882 } 1883 1884 /* Coalesce BASE register for a side trace. */ 1885 static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) 1886 { 1887 IRIns *ir = IR(REF_BASE); 1888 Reg r = ir->r; 1889 if (ra_hasreg(r)) { 1890 ra_free(as, r); 1891 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) 1892 ir->r = RID_INIT; /* No inheritance for modified BASE register. */ 1893 if (irp->r == r) { 1894 rset_clear(allow, r); /* Mark same BASE register as coalesced. */ 1895 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { 1896 rset_clear(allow, irp->r); 1897 emit_mr(as, r, irp->r); /* Move from coalesced parent reg. */ 1898 } else { 1899 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ 1900 } 1901 } 1902 return allow; 1903 } 1904 1905 /* -- Tail of trace ------------------------------------------------------- */ 1906 1907 /* Fixup the tail code. */ 1908 static void asm_tail_fixup(ASMState *as, TraceNo lnk) 1909 { 1910 MCode *p = as->mctop; 1911 MCode *target; 1912 int32_t spadj = as->T->spadjust; 1913 if (spadj == 0) { 1914 *--p = PPCI_NOP; 1915 *--p = PPCI_NOP; 1916 as->mctop = p; 1917 } else { 1918 /* Patch stack adjustment. */ 1919 lua_assert(checki16(CFRAME_SIZE+spadj)); 1920 p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); 1921 p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; 1922 } 1923 /* Patch exit branch. */ 1924 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; 1925 p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); 1926 } 1927 1928 /* Prepare tail of code. */ 1929 static void asm_tail_prep(ASMState *as) 1930 { 1931 MCode *p = as->mctop - 1; /* Leave room for exit branch. */ 1932 if (as->loopref) { 1933 as->invmcp = as->mcp = p; 1934 } else { 1935 as->mcp = p-2; /* Leave room for stack pointer adjustment. */ 1936 as->invmcp = NULL; 1937 } 1938 } 1939 1940 /* -- Trace setup --------------------------------------------------------- */ 1941 1942 /* Ensure there are enough stack slots for call arguments. */ 1943 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 1944 { 1945 IRRef args[CCI_NARGS_MAX*2]; 1946 uint32_t i, nargs = CCI_XNARGS(ci); 1947 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 1948 asm_collectargs(as, ir, ci, args); 1949 for (i = 0; i < nargs; i++) 1950 if (args[i] && irt_isfp(IR(args[i])->t)) { 1951 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; 1952 } else { 1953 if (ngpr > 0) ngpr--; else nslots++; 1954 } 1955 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 1956 as->evenspill = nslots; 1957 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); 1958 } 1959 1960 static void asm_setup_target(ASMState *as) 1961 { 1962 asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); 1963 } 1964 1965 /* -- Trace patching ------------------------------------------------------ */ 1966 1967 /* Patch exit jumps of existing machine code to a new target. */ 1968 void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) 1969 { 1970 MCode *p = T->mcode; 1971 MCode *pe = (MCode *)((char *)p + T->szmcode); 1972 MCode *px = exitstub_trace_addr(T, exitno); 1973 MCode *cstart = NULL; 1974 MCode *mcarea = lj_mcode_patch(J, p, 0); 1975 int clearso = 0; 1976 for (; p < pe; p++) { 1977 /* Look for exitstub branch, try to replace with branch to target. */ 1978 uint32_t ins = *p; 1979 if ((ins & 0xfc000000u) == 0x40000000u && 1980 ((ins ^ ((char *)px-(char *)p)) & 0xffffu) == 0) { 1981 ptrdiff_t delta = (char *)target - (char *)p; 1982 if (((ins >> 16) & 3) == (CC_SO&3)) { 1983 clearso = sizeof(MCode); 1984 delta -= sizeof(MCode); 1985 } 1986 /* Many, but not all short-range branches can be patched directly. */ 1987 if (((delta + 0x8000) >> 16) == 0) { 1988 *p = (ins & 0xffdf0000u) | ((uint32_t)delta & 0xffffu) | 1989 ((delta & 0x8000) * (PPCF_Y/0x8000)); 1990 if (!cstart) cstart = p; 1991 } 1992 } else if ((ins & 0xfc000000u) == PPCI_B && 1993 ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { 1994 ptrdiff_t delta = (char *)target - (char *)p; 1995 lua_assert(((delta + 0x02000000) >> 26) == 0); 1996 *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); 1997 if (!cstart) cstart = p; 1998 } 1999 } 2000 { /* Always patch long-range branch in exit stub itself. */ 2001 ptrdiff_t delta = (char *)target - (char *)px - clearso; 2002 lua_assert(((delta + 0x02000000) >> 26) == 0); 2003 *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); 2004 } 2005 if (!cstart) cstart = px; 2006 lj_mcode_sync(cstart, px+1); 2007 if (clearso) { /* Extend the current trace. Ugly workaround. */ 2008 MCode *pp = J->cur.mcode; 2009 J->cur.szmcode += sizeof(MCode); 2010 *--pp = PPCI_MCRXR; /* Clear SO flag. */ 2011 J->cur.mcode = pp; 2012 lj_mcode_sync(pp, pp+1); 2013 } 2014 lj_mcode_patch(J, mcarea, 1); 2015 } 2016