lj_asm_arm.h (71006B)
1 /* 2 ** ARM IR assembler (SSA IR -> machine code). 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 */ 5 6 /* -- Register allocator extensions --------------------------------------- */ 7 8 /* Allocate a register with a hint. */ 9 static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) 10 { 11 Reg r = IR(ref)->r; 12 if (ra_noreg(r)) { 13 if (!ra_hashint(r) && !iscrossref(as, ref)) 14 ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ 15 r = ra_allocref(as, ref, allow); 16 } 17 ra_noweak(as, r); 18 return r; 19 } 20 21 /* Allocate a scratch register pair. */ 22 static Reg ra_scratchpair(ASMState *as, RegSet allow) 23 { 24 RegSet pick1 = as->freeset & allow; 25 RegSet pick2 = pick1 & (pick1 >> 1) & RSET_GPREVEN; 26 Reg r; 27 if (pick2) { 28 r = rset_picktop(pick2); 29 } else { 30 RegSet pick = pick1 & (allow >> 1) & RSET_GPREVEN; 31 if (pick) { 32 r = rset_picktop(pick); 33 ra_restore(as, regcost_ref(as->cost[r+1])); 34 } else { 35 pick = pick1 & (allow << 1) & RSET_GPRODD; 36 if (pick) { 37 r = ra_restore(as, regcost_ref(as->cost[rset_picktop(pick)-1])); 38 } else { 39 r = ra_evict(as, allow & (allow >> 1) & RSET_GPREVEN); 40 ra_restore(as, regcost_ref(as->cost[r+1])); 41 } 42 } 43 } 44 lua_assert(rset_test(RSET_GPREVEN, r)); 45 ra_modified(as, r); 46 ra_modified(as, r+1); 47 RA_DBGX((as, "scratchpair $r $r", r, r+1)); 48 return r; 49 } 50 51 #if !LJ_SOFTFP 52 /* Allocate two source registers for three-operand instructions. */ 53 static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) 54 { 55 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); 56 Reg left = irl->r, right = irr->r; 57 if (ra_hasreg(left)) { 58 ra_noweak(as, left); 59 if (ra_noreg(right)) 60 right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); 61 else 62 ra_noweak(as, right); 63 } else if (ra_hasreg(right)) { 64 ra_noweak(as, right); 65 left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); 66 } else if (ra_hashint(right)) { 67 right = ra_allocref(as, ir->op2, allow); 68 left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); 69 } else { 70 left = ra_allocref(as, ir->op1, allow); 71 right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); 72 } 73 return left | (right << 8); 74 } 75 #endif 76 77 /* -- Guard handling ------------------------------------------------------ */ 78 79 /* Generate an exit stub group at the bottom of the reserved MCode memory. */ 80 static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) 81 { 82 MCode *mxp = as->mcbot; 83 int i; 84 if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop) 85 asm_mclimit(as); 86 /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */ 87 *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP); 88 *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu); 89 mxp++; 90 *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */ 91 *mxp++ = group*EXITSTUBS_PER_GROUP; 92 for (i = 0; i < EXITSTUBS_PER_GROUP; i++) 93 *mxp++ = ARMI_B|((-6-i)&0x00ffffffu); 94 lj_mcode_sync(as->mcbot, mxp); 95 lj_mcode_commitbot(as->J, mxp); 96 as->mcbot = mxp; 97 as->mclim = as->mcbot + MCLIM_REDZONE; 98 return mxp - EXITSTUBS_PER_GROUP; 99 } 100 101 /* Setup all needed exit stubs. */ 102 static void asm_exitstub_setup(ASMState *as, ExitNo nexits) 103 { 104 ExitNo i; 105 if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) 106 lj_trace_err(as->J, LJ_TRERR_SNAPOV); 107 for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) 108 if (as->J->exitstubgroup[i] == NULL) 109 as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); 110 } 111 112 /* Emit conditional branch to exit for guard. */ 113 static void asm_guardcc(ASMState *as, ARMCC cc) 114 { 115 MCode *target = exitstub_addr(as->J, as->snapno); 116 MCode *p = as->mcp; 117 if (LJ_UNLIKELY(p == as->invmcp)) { 118 as->loopinv = 1; 119 *p = ARMI_BL | ((target-p-2) & 0x00ffffffu); 120 emit_branch(as, ARMF_CC(ARMI_B, cc^1), p+1); 121 return; 122 } 123 emit_branch(as, ARMF_CC(ARMI_BL, cc), target); 124 } 125 126 /* -- Operand fusion ------------------------------------------------------ */ 127 128 /* Limit linear search to this distance. Avoids O(n^2) behavior. */ 129 #define CONFLICT_SEARCH_LIM 31 130 131 /* Check if there's no conflicting instruction between curins and ref. */ 132 static int noconflict(ASMState *as, IRRef ref, IROp conflict) 133 { 134 IRIns *ir = as->ir; 135 IRRef i = as->curins; 136 if (i > ref + CONFLICT_SEARCH_LIM) 137 return 0; /* Give up, ref is too far away. */ 138 while (--i > ref) 139 if (ir[i].o == conflict) 140 return 0; /* Conflict found. */ 141 return 1; /* Ok, no conflict. */ 142 } 143 144 /* Fuse the array base of colocated arrays. */ 145 static int32_t asm_fuseabase(ASMState *as, IRRef ref) 146 { 147 IRIns *ir = IR(ref); 148 if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && 149 !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) 150 return (int32_t)sizeof(GCtab); 151 return 0; 152 } 153 154 /* Fuse array/hash/upvalue reference into register+offset operand. */ 155 static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, 156 int lim) 157 { 158 IRIns *ir = IR(ref); 159 if (ra_noreg(ir->r)) { 160 if (ir->o == IR_AREF) { 161 if (mayfuse(as, ref)) { 162 if (irref_isk(ir->op2)) { 163 IRRef tab = IR(ir->op1)->op1; 164 int32_t ofs = asm_fuseabase(as, tab); 165 IRRef refa = ofs ? tab : ir->op1; 166 ofs += 8*IR(ir->op2)->i; 167 if (ofs > -lim && ofs < lim) { 168 *ofsp = ofs; 169 return ra_alloc1(as, refa, allow); 170 } 171 } 172 } 173 } else if (ir->o == IR_HREFK) { 174 if (mayfuse(as, ref)) { 175 int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); 176 if (ofs < lim) { 177 *ofsp = ofs; 178 return ra_alloc1(as, ir->op1, allow); 179 } 180 } 181 } else if (ir->o == IR_UREFC) { 182 if (irref_isk(ir->op1)) { 183 GCfunc *fn = ir_kfunc(IR(ir->op1)); 184 int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); 185 *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ 186 return ra_allock(as, (ofs & ~255), allow); 187 } 188 } 189 } 190 *ofsp = 0; 191 return ra_alloc1(as, ref, allow); 192 } 193 194 /* Fuse m operand into arithmetic/logic instructions. */ 195 static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow) 196 { 197 IRIns *ir = IR(ref); 198 if (ra_hasreg(ir->r)) { 199 ra_noweak(as, ir->r); 200 return ARMF_M(ir->r); 201 } else if (irref_isk(ref)) { 202 uint32_t k = emit_isk12(ai, ir->i); 203 if (k) 204 return k; 205 } else if (mayfuse(as, ref)) { 206 if (ir->o >= IR_BSHL && ir->o <= IR_BROR) { 207 Reg m = ra_alloc1(as, ir->op1, allow); 208 ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL : 209 ir->o == IR_BSHR ? ARMSH_LSR : 210 ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR; 211 if (irref_isk(ir->op2)) { 212 return m | ARMF_SH(sh, (IR(ir->op2)->i & 31)); 213 } else { 214 Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m)); 215 return m | ARMF_RSH(sh, s); 216 } 217 } else if (ir->o == IR_ADD && ir->op1 == ir->op2) { 218 Reg m = ra_alloc1(as, ir->op1, allow); 219 return m | ARMF_SH(ARMSH_LSL, 1); 220 } 221 } 222 return ra_allocref(as, ref, allow); 223 } 224 225 /* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */ 226 static IRRef asm_fuselsl2(ASMState *as, IRRef ref) 227 { 228 IRIns *ir = IR(ref); 229 if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL && 230 irref_isk(ir->op2) && IR(ir->op2)->i == 2) 231 return ir->op1; 232 return 0; /* No fusion. */ 233 } 234 235 /* Fuse XLOAD/XSTORE reference into load/store operand. */ 236 static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, 237 RegSet allow, int32_t ofs) 238 { 239 IRIns *ir = IR(ref); 240 Reg base; 241 if (ra_noreg(ir->r) && canfuse(as, ir)) { 242 int32_t lim = (!LJ_SOFTFP && (ai & 0x08000000)) ? 1024 : 243 (ai & 0x04000000) ? 4096 : 256; 244 if (ir->o == IR_ADD) { 245 int32_t ofs2; 246 if (irref_isk(ir->op2) && 247 (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim && 248 (!(!LJ_SOFTFP && (ai & 0x08000000)) || !(ofs2 & 3))) { 249 ofs = ofs2; 250 ref = ir->op1; 251 } else if (ofs == 0 && !(!LJ_SOFTFP && (ai & 0x08000000))) { 252 IRRef lref = ir->op1, rref = ir->op2; 253 Reg rn, rm; 254 if ((ai & 0x04000000)) { 255 IRRef sref = asm_fuselsl2(as, rref); 256 if (sref) { 257 rref = sref; 258 ai |= ARMF_SH(ARMSH_LSL, 2); 259 } else if ((sref = asm_fuselsl2(as, lref)) != 0) { 260 lref = rref; 261 rref = sref; 262 ai |= ARMF_SH(ARMSH_LSL, 2); 263 } 264 } 265 rn = ra_alloc1(as, lref, allow); 266 rm = ra_alloc1(as, rref, rset_exclude(allow, rn)); 267 if ((ai & 0x04000000)) ai |= ARMI_LS_R; 268 emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); 269 return; 270 } 271 } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { 272 lua_assert(ofs == 0); 273 ofs = (int32_t)sizeof(GCstr); 274 if (irref_isk(ir->op2)) { 275 ofs += IR(ir->op2)->i; 276 ref = ir->op1; 277 } else if (irref_isk(ir->op1)) { 278 ofs += IR(ir->op1)->i; 279 ref = ir->op2; 280 } else { 281 /* NYI: Fuse ADD with constant. */ 282 Reg rn = ra_alloc1(as, ir->op1, allow); 283 uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn)); 284 if ((ai & 0x04000000)) 285 emit_lso(as, ai, rd, rd, ofs); 286 else 287 emit_lsox(as, ai, rd, rd, ofs); 288 emit_dn(as, ARMI_ADD^m, rd, rn); 289 return; 290 } 291 if (ofs <= -lim || ofs >= lim) { 292 Reg rn = ra_alloc1(as, ref, allow); 293 Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); 294 if ((ai & 0x04000000)) ai |= ARMI_LS_R; 295 emit_dnm(as, ai|ARMI_LS_P|ARMI_LS_U, rd, rn, rm); 296 return; 297 } 298 } 299 } 300 base = ra_alloc1(as, ref, allow); 301 #if !LJ_SOFTFP 302 if ((ai & 0x08000000)) 303 emit_vlso(as, ai, rd, base, ofs); 304 else 305 #endif 306 if ((ai & 0x04000000)) 307 emit_lso(as, ai, rd, base, ofs); 308 else 309 emit_lsox(as, ai, rd, base, ofs); 310 } 311 312 #if !LJ_SOFTFP 313 /* Fuse to multiply-add/sub instruction. */ 314 static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) 315 { 316 IRRef lref = ir->op1, rref = ir->op2; 317 IRIns *irm; 318 if (lref != rref && 319 ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && 320 ra_noreg(irm->r)) || 321 (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && 322 (rref = lref, ai = air, ra_noreg(irm->r))))) { 323 Reg dest = ra_dest(as, ir, RSET_FPR); 324 Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); 325 Reg right, left = ra_alloc2(as, irm, 326 rset_exclude(rset_exclude(RSET_FPR, dest), add)); 327 right = (left >> 8); left &= 255; 328 emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15)); 329 if (dest != add) emit_dm(as, ARMI_VMOV_D, (dest & 15), (add & 15)); 330 return 1; 331 } 332 return 0; 333 } 334 #endif 335 336 /* -- Calls --------------------------------------------------------------- */ 337 338 /* Generate a call to a C function. */ 339 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 340 { 341 uint32_t n, nargs = CCI_XNARGS(ci); 342 int32_t ofs = 0; 343 #if LJ_SOFTFP 344 Reg gpr = REGARG_FIRSTGPR; 345 #else 346 Reg gpr, fpr = REGARG_FIRSTFPR, fprodd = 0; 347 #endif 348 if ((void *)ci->func) 349 emit_call(as, (void *)ci->func); 350 #if !LJ_SOFTFP 351 for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) 352 as->cost[gpr] = REGCOST(~0u, ASMREF_L); 353 gpr = REGARG_FIRSTGPR; 354 #endif 355 for (n = 0; n < nargs; n++) { /* Setup args. */ 356 IRRef ref = args[n]; 357 IRIns *ir = IR(ref); 358 #if !LJ_SOFTFP 359 if (ref && irt_isfp(ir->t)) { 360 RegSet of = as->freeset; 361 Reg src; 362 if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { 363 if (irt_isnum(ir->t)) { 364 if (fpr <= REGARG_LASTFPR) { 365 ra_leftov(as, fpr, ref); 366 fpr++; 367 continue; 368 } 369 } else if (fprodd) { /* Ick. */ 370 src = ra_alloc1(as, ref, RSET_FPR); 371 emit_dm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00400000); 372 fprodd = 0; 373 continue; 374 } else if (fpr <= REGARG_LASTFPR) { 375 ra_leftov(as, fpr, ref); 376 fprodd = fpr++; 377 continue; 378 } 379 /* Workaround to protect argument GPRs from being used for remat. */ 380 as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); 381 src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ 382 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); 383 fprodd = 0; 384 goto stackfp; 385 } 386 /* Workaround to protect argument GPRs from being used for remat. */ 387 as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); 388 src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ 389 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); 390 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; 391 if (gpr <= REGARG_LASTGPR) { 392 lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ 393 if (irt_isnum(ir->t)) { 394 lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ 395 emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); 396 gpr += 2; 397 } else { 398 emit_dn(as, ARMI_VMOV_R_S, gpr, (src & 15)); 399 gpr++; 400 } 401 } else { 402 stackfp: 403 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; 404 emit_spstore(as, ir, src, ofs); 405 ofs += irt_isnum(ir->t) ? 8 : 4; 406 } 407 } else 408 #endif 409 { 410 if (gpr <= REGARG_LASTGPR) { 411 lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ 412 if (ref) ra_leftov(as, gpr, ref); 413 gpr++; 414 } else { 415 if (ref) { 416 Reg r = ra_alloc1(as, ref, RSET_GPR); 417 emit_spstore(as, ir, r, ofs); 418 } 419 ofs += 4; 420 } 421 } 422 } 423 } 424 425 /* Setup result reg/sp for call. Evict scratch regs. */ 426 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) 427 { 428 RegSet drop = RSET_SCRATCH; 429 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 430 if (ra_hasreg(ir->r)) 431 rset_clear(drop, ir->r); /* Dest reg handled below. */ 432 if (hiop && ra_hasreg((ir+1)->r)) 433 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 434 ra_evictset(as, drop); /* Evictions must be performed first. */ 435 if (ra_used(ir)) { 436 lua_assert(!irt_ispri(ir->t)); 437 if (!LJ_SOFTFP && irt_isfp(ir->t)) { 438 if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { 439 Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); 440 if (irt_isnum(ir->t)) 441 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest); 442 else 443 emit_dn(as, ARMI_VMOV_S_R, RID_RET, dest); 444 } else { 445 ra_destreg(as, ir, RID_FPRET); 446 } 447 } else if (hiop) { 448 ra_destpair(as, ir); 449 } else { 450 ra_destreg(as, ir, RID_RET); 451 } 452 } 453 UNUSED(ci); 454 } 455 456 static void asm_callx(ASMState *as, IRIns *ir) 457 { 458 IRRef args[CCI_NARGS_MAX*2]; 459 CCallInfo ci; 460 IRRef func; 461 IRIns *irf; 462 ci.flags = asm_callx_flags(as, ir); 463 asm_collectargs(as, ir, &ci, args); 464 asm_setupresult(as, ir, &ci); 465 func = ir->op2; irf = IR(func); 466 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 467 if (irref_isk(func)) { /* Call to constant address. */ 468 ci.func = (ASMFunction)(void *)(irf->i); 469 } else { /* Need a non-argument register for indirect calls. */ 470 Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12+1)); 471 emit_m(as, ARMI_BLXr, freg); 472 ci.func = (ASMFunction)(void *)0; 473 } 474 asm_gencall(as, &ci, args); 475 } 476 477 /* -- Returns ------------------------------------------------------------- */ 478 479 /* Return to lower frame. Guard that it goes to the right spot. */ 480 static void asm_retf(ASMState *as, IRIns *ir) 481 { 482 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 483 void *pc = ir_kptr(IR(ir->op2)); 484 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); 485 as->topslot -= (BCReg)delta; 486 if ((int32_t)as->topslot < 0) as->topslot = 0; 487 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 488 /* Need to force a spill on REF_BASE now to update the stack slot. */ 489 emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE))); 490 emit_setgl(as, base, jit_base); 491 emit_addptr(as, base, -8*delta); 492 asm_guardcc(as, CC_NE); 493 emit_nm(as, ARMI_CMP, RID_TMP, 494 ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); 495 emit_lso(as, ARMI_LDR, RID_TMP, base, -4); 496 } 497 498 /* -- Type conversions ---------------------------------------------------- */ 499 500 #if !LJ_SOFTFP 501 static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 502 { 503 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 504 Reg dest = ra_dest(as, ir, RSET_GPR); 505 asm_guardcc(as, CC_NE); 506 emit_d(as, ARMI_VMRS, 0); 507 emit_dm(as, ARMI_VCMP_D, (tmp & 15), (left & 15)); 508 emit_dm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15)); 509 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); 510 emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15)); 511 } 512 513 static void asm_tobit(ASMState *as, IRIns *ir) 514 { 515 RegSet allow = RSET_FPR; 516 Reg left = ra_alloc1(as, ir->op1, allow); 517 Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); 518 Reg tmp = ra_scratch(as, rset_clear(allow, right)); 519 Reg dest = ra_dest(as, ir, RSET_GPR); 520 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); 521 emit_dnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); 522 } 523 #endif 524 525 static void asm_conv(ASMState *as, IRIns *ir) 526 { 527 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 528 #if !LJ_SOFTFP 529 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 530 #endif 531 IRRef lref = ir->op1; 532 /* 64 bit integer conversions are handled by SPLIT. */ 533 lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); 534 #if LJ_SOFTFP 535 /* FP conversions are handled by SPLIT. */ 536 lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); 537 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ 538 #else 539 lua_assert(irt_type(ir->t) != st); 540 if (irt_isfp(ir->t)) { 541 Reg dest = ra_dest(as, ir, RSET_FPR); 542 if (stfp) { /* FP to FP conversion. */ 543 emit_dm(as, st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32, 544 (dest & 15), (ra_alloc1(as, lref, RSET_FPR) & 15)); 545 } else { /* Integer to FP conversion. */ 546 Reg left = ra_alloc1(as, lref, RSET_GPR); 547 ARMIns ai = irt_isfloat(ir->t) ? 548 (st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) : 549 (st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32); 550 emit_dm(as, ai, (dest & 15), (dest & 15)); 551 emit_dn(as, ARMI_VMOV_S_R, left, (dest & 15)); 552 } 553 } else if (stfp) { /* FP to integer conversion. */ 554 if (irt_isguard(ir->t)) { 555 /* Checked conversions are only supported from number to int. */ 556 lua_assert(irt_isint(ir->t) && st == IRT_NUM); 557 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 558 } else { 559 Reg left = ra_alloc1(as, lref, RSET_FPR); 560 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 561 Reg dest = ra_dest(as, ir, RSET_GPR); 562 ARMIns ai; 563 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); 564 ai = irt_isint(ir->t) ? 565 (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) : 566 (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32); 567 emit_dm(as, ai, (tmp & 15), (left & 15)); 568 } 569 } else 570 #endif 571 { 572 Reg dest = ra_dest(as, ir, RSET_GPR); 573 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 574 Reg left = ra_alloc1(as, lref, RSET_GPR); 575 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 576 if ((as->flags & JIT_F_ARMV6)) { 577 ARMIns ai = st == IRT_I8 ? ARMI_SXTB : 578 st == IRT_U8 ? ARMI_UXTB : 579 st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH; 580 emit_dm(as, ai, dest, left); 581 } else if (st == IRT_U8) { 582 emit_dn(as, ARMI_AND|ARMI_K12|255, dest, left); 583 } else { 584 uint32_t shift = st == IRT_I8 ? 24 : 16; 585 ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR; 586 emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, RID_TMP); 587 emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_LSL, shift), RID_TMP, left); 588 } 589 } else { /* Handle 32/32 bit no-op (cast). */ 590 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ 591 } 592 } 593 } 594 595 static void asm_strto(ASMState *as, IRIns *ir) 596 { 597 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 598 IRRef args[2]; 599 Reg rlo = 0, rhi = 0, tmp; 600 int destused = ra_used(ir); 601 int32_t ofs = 0; 602 ra_evictset(as, RSET_SCRATCH); 603 #if LJ_SOFTFP 604 if (destused) { 605 if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && 606 (ir->s & 1) == 0 && ir->s + 1 == (ir+1)->s) { 607 int i; 608 for (i = 0; i < 2; i++) { 609 Reg r = (ir+i)->r; 610 if (ra_hasreg(r)) { 611 ra_free(as, r); 612 ra_modified(as, r); 613 emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); 614 } 615 } 616 ofs = sps_scale(ir->s); 617 destused = 0; 618 } else { 619 rhi = ra_dest(as, ir+1, RSET_GPR); 620 rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); 621 } 622 } 623 asm_guardcc(as, CC_EQ); 624 if (destused) { 625 emit_lso(as, ARMI_LDR, rhi, RID_SP, 4); 626 emit_lso(as, ARMI_LDR, rlo, RID_SP, 0); 627 } 628 #else 629 UNUSED(rhi); 630 if (destused) { 631 if (ra_hasspill(ir->s)) { 632 ofs = sps_scale(ir->s); 633 destused = 0; 634 if (ra_hasreg(ir->r)) { 635 ra_free(as, ir->r); 636 ra_modified(as, ir->r); 637 emit_spload(as, ir, ir->r, ofs); 638 } 639 } else { 640 rlo = ra_dest(as, ir, RSET_FPR); 641 } 642 } 643 asm_guardcc(as, CC_EQ); 644 if (destused) 645 emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0); 646 #endif 647 emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); /* Test return status. */ 648 args[0] = ir->op1; /* GCstr *str */ 649 args[1] = ASMREF_TMP1; /* TValue *n */ 650 asm_gencall(as, ci, args); 651 tmp = ra_releasetmp(as, ASMREF_TMP1); 652 if (ofs == 0) 653 emit_dm(as, ARMI_MOV, tmp, RID_SP); 654 else 655 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); 656 } 657 658 /* -- Memory references --------------------------------------------------- */ 659 660 /* Get pointer to TValue. */ 661 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 662 { 663 IRIns *ir = IR(ref); 664 if (irt_isnum(ir->t)) { 665 if (irref_isk(ref)) { 666 /* Use the number constant itself as a TValue. */ 667 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 668 } else { 669 #if LJ_SOFTFP 670 lua_assert(0); 671 #else 672 /* Otherwise force a spill and use the spill slot. */ 673 emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); 674 #endif 675 } 676 } else { 677 /* Otherwise use [sp] and [sp+4] to hold the TValue. */ 678 RegSet allow = rset_exclude(RSET_GPR, dest); 679 Reg type; 680 emit_dm(as, ARMI_MOV, dest, RID_SP); 681 if (!irt_ispri(ir->t)) { 682 Reg src = ra_alloc1(as, ref, allow); 683 emit_lso(as, ARMI_STR, src, RID_SP, 0); 684 } 685 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) 686 type = ra_alloc1(as, ref+1, allow); 687 else 688 type = ra_allock(as, irt_toitype(ir->t), allow); 689 emit_lso(as, ARMI_STR, type, RID_SP, 4); 690 } 691 } 692 693 static void asm_aref(ASMState *as, IRIns *ir) 694 { 695 Reg dest = ra_dest(as, ir, RSET_GPR); 696 Reg idx, base; 697 if (irref_isk(ir->op2)) { 698 IRRef tab = IR(ir->op1)->op1; 699 int32_t ofs = asm_fuseabase(as, tab); 700 IRRef refa = ofs ? tab : ir->op1; 701 uint32_t k = emit_isk12(ARMI_ADD, ofs + 8*IR(ir->op2)->i); 702 if (k) { 703 base = ra_alloc1(as, refa, RSET_GPR); 704 emit_dn(as, ARMI_ADD^k, dest, base); 705 return; 706 } 707 } 708 base = ra_alloc1(as, ir->op1, RSET_GPR); 709 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); 710 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, base, idx); 711 } 712 713 /* Inlined hash lookup. Specialized for key type and for const keys. 714 ** The equivalent C code is: 715 ** Node *n = hashkey(t, key); 716 ** do { 717 ** if (lj_obj_equal(&n->key, key)) return &n->val; 718 ** } while ((n = nextnode(n))); 719 ** return niltv(L); 720 */ 721 static void asm_href(ASMState *as, IRIns *ir, IROp merge) 722 { 723 RegSet allow = RSET_GPR; 724 int destused = ra_used(ir); 725 Reg dest = ra_dest(as, ir, allow); 726 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 727 Reg key = 0, keyhi = 0, keynumhi = RID_NONE, tmp = RID_TMP; 728 IRRef refkey = ir->op2; 729 IRIns *irkey = IR(refkey); 730 IRType1 kt = irkey->t; 731 int32_t k = 0, khi = emit_isk12(ARMI_CMP, irt_toitype(kt)); 732 uint32_t khash; 733 MCLabel l_end, l_loop; 734 rset_clear(allow, tab); 735 if (!irref_isk(refkey) || irt_isstr(kt)) { 736 #if LJ_SOFTFP 737 key = ra_alloc1(as, refkey, allow); 738 rset_clear(allow, key); 739 if (irkey[1].o == IR_HIOP) { 740 if (ra_hasreg((irkey+1)->r)) { 741 keynumhi = (irkey+1)->r; 742 keyhi = RID_TMP; 743 ra_noweak(as, keynumhi); 744 } else { 745 keyhi = keynumhi = ra_allocref(as, refkey+1, allow); 746 } 747 rset_clear(allow, keynumhi); 748 khi = 0; 749 } 750 #else 751 if (irt_isnum(kt)) { 752 key = ra_scratch(as, allow); 753 rset_clear(allow, key); 754 keyhi = keynumhi = ra_scratch(as, allow); 755 rset_clear(allow, keyhi); 756 khi = 0; 757 } else { 758 key = ra_alloc1(as, refkey, allow); 759 rset_clear(allow, key); 760 } 761 #endif 762 } else if (irt_isnum(kt)) { 763 int32_t val = (int32_t)ir_knum(irkey)->u32.lo; 764 k = emit_isk12(ARMI_CMP, val); 765 if (!k) { 766 key = ra_allock(as, val, allow); 767 rset_clear(allow, key); 768 } 769 val = (int32_t)ir_knum(irkey)->u32.hi; 770 khi = emit_isk12(ARMI_CMP, val); 771 if (!khi) { 772 keyhi = ra_allock(as, val, allow); 773 rset_clear(allow, keyhi); 774 } 775 } else if (!irt_ispri(kt)) { 776 k = emit_isk12(ARMI_CMP, irkey->i); 777 if (!k) { 778 key = ra_alloc1(as, refkey, allow); 779 rset_clear(allow, key); 780 } 781 } 782 if (!irt_ispri(kt)) 783 tmp = ra_scratchpair(as, allow); 784 785 /* Key not found in chain: jump to exit (if merged) or load niltv. */ 786 l_end = emit_label(as); 787 as->invmcp = NULL; 788 if (merge == IR_NE) 789 asm_guardcc(as, CC_AL); 790 else if (destused) 791 emit_loada(as, dest, niltvg(J2G(as->J))); 792 793 /* Follow hash chain until the end. */ 794 l_loop = --as->mcp; 795 emit_n(as, ARMI_CMP|ARMI_K12|0, dest); 796 emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(Node, next)); 797 798 /* Type and value comparison. */ 799 if (merge == IR_EQ) 800 asm_guardcc(as, CC_EQ); 801 else 802 emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); 803 if (!irt_ispri(kt)) { 804 emit_nm(as, ARMF_CC(ARMI_CMP, CC_EQ)^k, tmp, key); 805 emit_nm(as, ARMI_CMP^khi, tmp+1, keyhi); 806 emit_lsox(as, ARMI_LDRD, tmp, dest, (int32_t)offsetof(Node, key)); 807 } else { 808 emit_n(as, ARMI_CMP^khi, tmp); 809 emit_lso(as, ARMI_LDR, tmp, dest, (int32_t)offsetof(Node, key.it)); 810 } 811 *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); 812 813 /* Load main position relative to tab->node into dest. */ 814 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 815 if (khash == 0) { 816 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); 817 } else { 818 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); 819 emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); 820 if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ 821 emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); 822 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); 823 emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); 824 emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); 825 } else if (irref_isk(refkey)) { 826 emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, 827 rset_exclude(rset_exclude(RSET_GPR, tab), dest)); 828 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); 829 emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); 830 } else { /* Must match with hash*() in lj_tab.c. */ 831 if (ra_hasreg(keynumhi)) { /* Canonicalize +-0.0 to 0.0. */ 832 if (keyhi == RID_TMP) 833 emit_dm(as, ARMF_CC(ARMI_MOV, CC_NE), keyhi, keynumhi); 834 emit_d(as, ARMF_CC(ARMI_MOV, CC_EQ)|ARMI_K12|0, keyhi); 835 } 836 emit_dnm(as, ARMI_AND, tmp, tmp, RID_TMP); 837 emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT3), tmp, tmp, tmp+1); 838 emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); 839 emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 32-((HASH_ROT2+HASH_ROT1)&31)), 840 tmp, tmp+1, tmp); 841 emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); 842 emit_dnm(as, ARMI_SUB|ARMF_SH(ARMSH_ROR, 32-HASH_ROT1), tmp+1, tmp+1, tmp); 843 if (ra_hasreg(keynumhi)) { 844 emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); 845 emit_dnm(as, ARMI_ORR|ARMI_S, RID_TMP, tmp, key); /* Test for +-0.0. */ 846 emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi); 847 #if !LJ_SOFTFP 848 emit_dnm(as, ARMI_VMOV_RR_D, key, keynumhi, 849 (ra_alloc1(as, refkey, RSET_FPR) & 15)); 850 #endif 851 } else { 852 emit_dnm(as, ARMI_EOR, tmp+1, tmp, key); 853 emit_opk(as, ARMI_ADD, tmp, key, (int32_t)HASH_BIAS, 854 rset_exclude(rset_exclude(RSET_GPR, tab), key)); 855 } 856 } 857 } 858 } 859 860 static void asm_hrefk(ASMState *as, IRIns *ir) 861 { 862 IRIns *kslot = IR(ir->op2); 863 IRIns *irkey = IR(kslot->op1); 864 int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); 865 int32_t kofs = ofs + (int32_t)offsetof(Node, key); 866 Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; 867 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 868 Reg key = RID_NONE, type = RID_TMP, idx = node; 869 RegSet allow = rset_exclude(RSET_GPR, node); 870 lua_assert(ofs % sizeof(Node) == 0); 871 if (ofs > 4095) { 872 idx = dest; 873 rset_clear(allow, dest); 874 kofs = (int32_t)offsetof(Node, key); 875 } else if (ra_hasreg(dest)) { 876 emit_opk(as, ARMI_ADD, dest, node, ofs, allow); 877 } 878 asm_guardcc(as, CC_NE); 879 if (!irt_ispri(irkey->t)) { 880 RegSet even = (as->freeset & allow); 881 even = even & (even >> 1) & RSET_GPREVEN; 882 if (even) { 883 key = ra_scratch(as, even); 884 if (rset_test(as->freeset, key+1)) { 885 type = key+1; 886 ra_modified(as, type); 887 } 888 } else { 889 key = ra_scratch(as, allow); 890 } 891 rset_clear(allow, key); 892 } 893 rset_clear(allow, type); 894 if (irt_isnum(irkey->t)) { 895 emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, type, 896 (int32_t)ir_knum(irkey)->u32.hi, allow); 897 emit_opk(as, ARMI_CMP, 0, key, 898 (int32_t)ir_knum(irkey)->u32.lo, allow); 899 } else { 900 if (ra_hasreg(key)) 901 emit_opk(as, ARMF_CC(ARMI_CMP, CC_EQ), 0, key, irkey->i, allow); 902 emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype(irkey->t), type); 903 } 904 emit_lso(as, ARMI_LDR, type, idx, kofs+4); 905 if (ra_hasreg(key)) emit_lso(as, ARMI_LDR, key, idx, kofs); 906 if (ofs > 4095) 907 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); 908 } 909 910 static void asm_uref(ASMState *as, IRIns *ir) 911 { 912 Reg dest = ra_dest(as, ir, RSET_GPR); 913 if (irref_isk(ir->op1)) { 914 GCfunc *fn = ir_kfunc(IR(ir->op1)); 915 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 916 emit_lsptr(as, ARMI_LDR, dest, v); 917 } else { 918 Reg uv = ra_scratch(as, RSET_GPR); 919 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 920 if (ir->o == IR_UREFC) { 921 asm_guardcc(as, CC_NE); 922 emit_n(as, ARMI_CMP|ARMI_K12|1, RID_TMP); 923 emit_opk(as, ARMI_ADD, dest, uv, 924 (int32_t)offsetof(GCupval, tv), RSET_GPR); 925 emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); 926 } else { 927 emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v)); 928 } 929 emit_lso(as, ARMI_LDR, uv, func, 930 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); 931 } 932 } 933 934 static void asm_fref(ASMState *as, IRIns *ir) 935 { 936 UNUSED(as); UNUSED(ir); 937 lua_assert(!ra_used(ir)); 938 } 939 940 static void asm_strref(ASMState *as, IRIns *ir) 941 { 942 Reg dest = ra_dest(as, ir, RSET_GPR); 943 IRRef ref = ir->op2, refk = ir->op1; 944 Reg r; 945 if (irref_isk(ref)) { 946 IRRef tmp = refk; refk = ref; ref = tmp; 947 } else if (!irref_isk(refk)) { 948 uint32_t k, m = ARMI_K12|sizeof(GCstr); 949 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); 950 IRIns *irr = IR(ir->op2); 951 if (ra_hasreg(irr->r)) { 952 ra_noweak(as, irr->r); 953 right = irr->r; 954 } else if (mayfuse(as, irr->op2) && 955 irr->o == IR_ADD && irref_isk(irr->op2) && 956 (k = emit_isk12(ARMI_ADD, 957 (int32_t)sizeof(GCstr) + IR(irr->op2)->i))) { 958 m = k; 959 right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); 960 } else { 961 right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); 962 } 963 emit_dn(as, ARMI_ADD^m, dest, dest); 964 emit_dnm(as, ARMI_ADD, dest, left, right); 965 return; 966 } 967 r = ra_alloc1(as, ref, RSET_GPR); 968 emit_opk(as, ARMI_ADD, dest, r, 969 sizeof(GCstr) + IR(refk)->i, rset_exclude(RSET_GPR, r)); 970 } 971 972 /* -- Loads and stores ---------------------------------------------------- */ 973 974 static ARMIns asm_fxloadins(IRIns *ir) 975 { 976 switch (irt_type(ir->t)) { 977 case IRT_I8: return ARMI_LDRSB; 978 case IRT_U8: return ARMI_LDRB; 979 case IRT_I16: return ARMI_LDRSH; 980 case IRT_U16: return ARMI_LDRH; 981 case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; 982 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; 983 default: return ARMI_LDR; 984 } 985 } 986 987 static ARMIns asm_fxstoreins(IRIns *ir) 988 { 989 switch (irt_type(ir->t)) { 990 case IRT_I8: case IRT_U8: return ARMI_STRB; 991 case IRT_I16: case IRT_U16: return ARMI_STRH; 992 case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; 993 case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; 994 default: return ARMI_STR; 995 } 996 } 997 998 static void asm_fload(ASMState *as, IRIns *ir) 999 { 1000 if (ir->op1 == REF_NIL) { 1001 lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */ 1002 } else { 1003 Reg dest = ra_dest(as, ir, RSET_GPR); 1004 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 1005 ARMIns ai = asm_fxloadins(ir); 1006 int32_t ofs; 1007 if (ir->op2 == IRFL_TAB_ARRAY) { 1008 ofs = asm_fuseabase(as, ir->op1); 1009 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 1010 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); 1011 return; 1012 } 1013 } 1014 ofs = field_ofs[ir->op2]; 1015 if ((ai & 0x04000000)) 1016 emit_lso(as, ai, dest, idx, ofs); 1017 else 1018 emit_lsox(as, ai, dest, idx, ofs); 1019 } 1020 } 1021 1022 static void asm_fstore(ASMState *as, IRIns *ir) 1023 { 1024 if (ir->r != RID_SINK) { 1025 Reg src = ra_alloc1(as, ir->op2, RSET_GPR); 1026 IRIns *irf = IR(ir->op1); 1027 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 1028 int32_t ofs = field_ofs[irf->op2]; 1029 ARMIns ai = asm_fxstoreins(ir); 1030 if ((ai & 0x04000000)) 1031 emit_lso(as, ai, src, idx, ofs); 1032 else 1033 emit_lsox(as, ai, src, idx, ofs); 1034 } 1035 } 1036 1037 static void asm_xload(ASMState *as, IRIns *ir) 1038 { 1039 Reg dest = ra_dest(as, ir, 1040 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); 1041 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 1042 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1043 } 1044 1045 static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) 1046 { 1047 if (ir->r != RID_SINK) { 1048 Reg src = ra_alloc1(as, ir->op2, 1049 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); 1050 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 1051 rset_exclude(RSET_GPR, src), ofs); 1052 } 1053 } 1054 1055 #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) 1056 1057 static void asm_ahuvload(ASMState *as, IRIns *ir) 1058 { 1059 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1060 IRType t = hiop ? IRT_NUM : irt_type(ir->t); 1061 Reg dest = RID_NONE, type = RID_NONE, idx; 1062 RegSet allow = RSET_GPR; 1063 int32_t ofs = 0; 1064 if (hiop && ra_used(ir+1)) { 1065 type = ra_dest(as, ir+1, allow); 1066 rset_clear(allow, type); 1067 } 1068 if (ra_used(ir)) { 1069 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 1070 irt_isint(ir->t) || irt_isaddr(ir->t)); 1071 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 1072 rset_clear(allow, dest); 1073 } 1074 idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1075 (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); 1076 if (!hiop || type == RID_NONE) { 1077 rset_clear(allow, idx); 1078 if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && 1079 rset_test((as->freeset & allow), dest+1)) { 1080 type = dest+1; 1081 ra_modified(as, type); 1082 } else { 1083 type = RID_TMP; 1084 } 1085 } 1086 asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); 1087 emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); 1088 if (ra_hasreg(dest)) { 1089 #if !LJ_SOFTFP 1090 if (t == IRT_NUM) 1091 emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs); 1092 else 1093 #endif 1094 emit_lso(as, ARMI_LDR, dest, idx, ofs); 1095 } 1096 emit_lso(as, ARMI_LDR, type, idx, ofs+4); 1097 } 1098 1099 static void asm_ahustore(ASMState *as, IRIns *ir) 1100 { 1101 if (ir->r != RID_SINK) { 1102 RegSet allow = RSET_GPR; 1103 Reg idx, src = RID_NONE, type = RID_NONE; 1104 int32_t ofs = 0; 1105 #if !LJ_SOFTFP 1106 if (irt_isnum(ir->t)) { 1107 src = ra_alloc1(as, ir->op2, RSET_FPR); 1108 idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024); 1109 emit_vlso(as, ARMI_VSTR_D, src, idx, ofs); 1110 } else 1111 #endif 1112 { 1113 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1114 if (!irt_ispri(ir->t)) { 1115 src = ra_alloc1(as, ir->op2, allow); 1116 rset_clear(allow, src); 1117 } 1118 if (hiop) 1119 type = ra_alloc1(as, (ir+1)->op2, allow); 1120 else 1121 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1122 idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096); 1123 if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); 1124 emit_lso(as, ARMI_STR, type, idx, ofs+4); 1125 } 1126 } 1127 } 1128 1129 static void asm_sload(ASMState *as, IRIns *ir) 1130 { 1131 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); 1132 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1133 IRType t = hiop ? IRT_NUM : irt_type(ir->t); 1134 Reg dest = RID_NONE, type = RID_NONE, base; 1135 RegSet allow = RSET_GPR; 1136 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1137 lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1138 #if LJ_SOFTFP 1139 lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ 1140 if (hiop && ra_used(ir+1)) { 1141 type = ra_dest(as, ir+1, allow); 1142 rset_clear(allow, type); 1143 } 1144 #else 1145 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) { 1146 dest = ra_scratch(as, RSET_FPR); 1147 asm_tointg(as, ir, dest); 1148 t = IRT_NUM; /* Continue with a regular number type check. */ 1149 } else 1150 #endif 1151 if (ra_used(ir)) { 1152 Reg tmp = RID_NONE; 1153 if ((ir->op2 & IRSLOAD_CONVERT)) 1154 tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); 1155 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || 1156 irt_isint(ir->t) || irt_isaddr(ir->t)); 1157 dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); 1158 rset_clear(allow, dest); 1159 base = ra_alloc1(as, REF_BASE, allow); 1160 if ((ir->op2 & IRSLOAD_CONVERT)) { 1161 if (t == IRT_INT) { 1162 emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); 1163 emit_dm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15)); 1164 t = IRT_NUM; /* Check for original type. */ 1165 } else { 1166 emit_dm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15)); 1167 emit_dn(as, ARMI_VMOV_S_R, tmp, (dest & 15)); 1168 t = IRT_INT; /* Check for original type. */ 1169 } 1170 dest = tmp; 1171 } 1172 goto dotypecheck; 1173 } 1174 base = ra_alloc1(as, REF_BASE, allow); 1175 dotypecheck: 1176 rset_clear(allow, base); 1177 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1178 if (ra_noreg(type)) { 1179 if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && 1180 rset_test((as->freeset & allow), dest+1)) { 1181 type = dest+1; 1182 ra_modified(as, type); 1183 } else { 1184 type = RID_TMP; 1185 } 1186 } 1187 asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); 1188 emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); 1189 } 1190 if (ra_hasreg(dest)) { 1191 #if !LJ_SOFTFP 1192 if (t == IRT_NUM) { 1193 if (ofs < 1024) { 1194 emit_vlso(as, ARMI_VLDR_D, dest, base, ofs); 1195 } else { 1196 if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4); 1197 emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0); 1198 emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow); 1199 return; 1200 } 1201 } else 1202 #endif 1203 emit_lso(as, ARMI_LDR, dest, base, ofs); 1204 } 1205 if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs+4); 1206 } 1207 1208 /* -- Allocations --------------------------------------------------------- */ 1209 1210 #if LJ_HASFFI 1211 static void asm_cnew(ASMState *as, IRIns *ir) 1212 { 1213 CTState *cts = ctype_ctsG(J2G(as->J)); 1214 CTypeID id = (CTypeID)IR(ir->op1)->i; 1215 CTSize sz; 1216 CTInfo info = lj_ctype_info(cts, id, &sz); 1217 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1218 IRRef args[4]; 1219 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1220 RegSet drop = RSET_SCRATCH; 1221 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); 1222 1223 as->gcsteps++; 1224 if (ra_hasreg(ir->r)) 1225 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1226 ra_evictset(as, drop); 1227 if (ra_used(ir)) 1228 ra_destreg(as, ir, RID_RET); /* GCcdata * */ 1229 1230 /* Initialize immutable cdata object. */ 1231 if (ir->o == IR_CNEWI) { 1232 int32_t ofs = sizeof(GCcdata); 1233 lua_assert(sz == 4 || sz == 8); 1234 if (sz == 8) { 1235 ofs += 4; ir++; 1236 lua_assert(ir->o == IR_HIOP); 1237 } 1238 for (;;) { 1239 Reg r = ra_alloc1(as, ir->op2, allow); 1240 emit_lso(as, ARMI_STR, r, RID_RET, ofs); 1241 rset_clear(allow, r); 1242 if (ofs == sizeof(GCcdata)) break; 1243 ofs -= 4; ir--; 1244 } 1245 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ 1246 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; 1247 args[0] = ASMREF_L; /* lua_State *L */ 1248 args[1] = ir->op1; /* CTypeID id */ 1249 args[2] = ir->op2; /* CTSize sz */ 1250 args[3] = ASMREF_TMP1; /* CTSize align */ 1251 asm_gencall(as, ci, args); 1252 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); 1253 return; 1254 } 1255 1256 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1257 { 1258 uint32_t k = emit_isk12(ARMI_MOV, id); 1259 Reg r = k ? RID_R1 : ra_allock(as, id, allow); 1260 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); 1261 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); 1262 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); 1263 if (k) emit_d(as, ARMI_MOV^k, RID_R1); 1264 } 1265 args[0] = ASMREF_L; /* lua_State *L */ 1266 args[1] = ASMREF_TMP1; /* MSize size */ 1267 asm_gencall(as, ci, args); 1268 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1269 ra_releasetmp(as, ASMREF_TMP1)); 1270 } 1271 #else 1272 #define asm_cnew(as, ir) ((void)0) 1273 #endif 1274 1275 /* -- Write barriers ------------------------------------------------------ */ 1276 1277 static void asm_tbar(ASMState *as, IRIns *ir) 1278 { 1279 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); 1280 Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); 1281 Reg gr = ra_allock(as, i32ptr(J2G(as->J)), 1282 rset_exclude(rset_exclude(RSET_GPR, tab), link)); 1283 Reg mark = RID_TMP; 1284 MCLabel l_end = emit_label(as); 1285 emit_lso(as, ARMI_STR, link, tab, (int32_t)offsetof(GCtab, gclist)); 1286 emit_lso(as, ARMI_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); 1287 emit_lso(as, ARMI_STR, tab, gr, 1288 (int32_t)offsetof(global_State, gc.grayagain)); 1289 emit_dn(as, ARMI_BIC|ARMI_K12|LJ_GC_BLACK, mark, mark); 1290 emit_lso(as, ARMI_LDR, link, gr, 1291 (int32_t)offsetof(global_State, gc.grayagain)); 1292 emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); 1293 emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_BLACK, mark); 1294 emit_lso(as, ARMI_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); 1295 } 1296 1297 static void asm_obar(ASMState *as, IRIns *ir) 1298 { 1299 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; 1300 IRRef args[2]; 1301 MCLabel l_end; 1302 Reg obj, val, tmp; 1303 /* No need for other object barriers (yet). */ 1304 lua_assert(IR(ir->op1)->o == IR_UREFC); 1305 ra_evictset(as, RSET_SCRATCH); 1306 l_end = emit_label(as); 1307 args[0] = ASMREF_TMP1; /* global_State *g */ 1308 args[1] = ir->op1; /* TValue *tv */ 1309 asm_gencall(as, ci, args); 1310 if ((l_end[-1] >> 28) == CC_AL) 1311 l_end[-1] = ARMF_CC(l_end[-1], CC_NE); 1312 else 1313 emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); 1314 ra_allockreg(as, i32ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1)); 1315 obj = IR(ir->op1)->r; 1316 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); 1317 emit_n(as, ARMF_CC(ARMI_TST, CC_NE)|ARMI_K12|LJ_GC_BLACK, tmp); 1318 emit_n(as, ARMI_TST|ARMI_K12|LJ_GC_WHITES, RID_TMP); 1319 val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); 1320 emit_lso(as, ARMI_LDRB, tmp, obj, 1321 (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); 1322 emit_lso(as, ARMI_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); 1323 } 1324 1325 /* -- Arithmetic and logic operations ------------------------------------- */ 1326 1327 #if !LJ_SOFTFP 1328 static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai) 1329 { 1330 Reg dest = ra_dest(as, ir, RSET_FPR); 1331 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1332 right = (left >> 8); left &= 255; 1333 emit_dnm(as, ai, (dest & 15), (left & 15), (right & 15)); 1334 } 1335 1336 static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) 1337 { 1338 Reg dest = ra_dest(as, ir, RSET_FPR); 1339 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); 1340 emit_dm(as, ai, (dest & 15), (left & 15)); 1341 } 1342 1343 static void asm_callround(ASMState *as, IRIns *ir, int id) 1344 { 1345 /* The modified regs must match with the *.dasc implementation. */ 1346 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| 1347 RID2RSET(RID_R3)|RID2RSET(RID_R12); 1348 RegSet of; 1349 Reg dest, src; 1350 ra_evictset(as, drop); 1351 dest = ra_dest(as, ir, RSET_FPR); 1352 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); 1353 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : 1354 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : 1355 (void *)lj_vm_trunc_sf); 1356 /* Workaround to protect argument GPRs from being used for remat. */ 1357 of = as->freeset; 1358 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); 1359 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); 1360 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ 1361 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); 1362 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); 1363 } 1364 1365 static void asm_fpmath(ASMState *as, IRIns *ir) 1366 { 1367 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) 1368 return; 1369 if (ir->op2 <= IRFPM_TRUNC) 1370 asm_callround(as, ir, ir->op2); 1371 else if (ir->op2 == IRFPM_SQRT) 1372 asm_fpunary(as, ir, ARMI_VSQRT_D); 1373 else 1374 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); 1375 } 1376 #endif 1377 1378 static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) 1379 { 1380 IRIns *ir; 1381 if (irref_isk(rref)) 1382 return 0; /* Don't swap constants to the left. */ 1383 if (irref_isk(lref)) 1384 return 1; /* But swap constants to the right. */ 1385 ir = IR(rref); 1386 if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || 1387 (ir->o == IR_ADD && ir->op1 == ir->op2)) 1388 return 0; /* Don't swap fusable operands to the left. */ 1389 ir = IR(lref); 1390 if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || 1391 (ir->o == IR_ADD && ir->op1 == ir->op2)) 1392 return 1; /* But swap fusable operands to the right. */ 1393 return 0; /* Otherwise don't swap. */ 1394 } 1395 1396 static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai) 1397 { 1398 IRRef lref = ir->op1, rref = ir->op2; 1399 Reg left, dest = ra_dest(as, ir, RSET_GPR); 1400 uint32_t m; 1401 if (asm_swapops(as, lref, rref)) { 1402 IRRef tmp = lref; lref = rref; rref = tmp; 1403 if ((ai & ~ARMI_S) == ARMI_SUB || (ai & ~ARMI_S) == ARMI_SBC) 1404 ai ^= (ARMI_SUB^ARMI_RSB); 1405 } 1406 left = ra_hintalloc(as, lref, dest, RSET_GPR); 1407 m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); 1408 if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ 1409 asm_guardcc(as, CC_VS); 1410 ai |= ARMI_S; 1411 } 1412 emit_dn(as, ai^m, dest, left); 1413 } 1414 1415 static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) 1416 { 1417 if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ 1418 as->flagmcp = NULL; 1419 as->mcp++; 1420 ai |= ARMI_S; 1421 } 1422 asm_intop(as, ir, ai); 1423 } 1424 1425 static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1426 { 1427 Reg dest = ra_dest(as, ir, RSET_GPR); 1428 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1429 emit_dn(as, ai|ARMI_K12|0, dest, left); 1430 } 1431 1432 /* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ 1433 static void asm_intmul(ASMState *as, IRIns *ir) 1434 { 1435 Reg dest = ra_dest(as, ir, RSET_GPR); 1436 Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); 1437 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1438 Reg tmp = RID_NONE; 1439 /* ARMv5 restriction: dest != left and dest_hi != left. */ 1440 if (dest == left && left != right) { left = right; right = dest; } 1441 if (irt_isguard(ir->t)) { /* IR_MULOV */ 1442 if (!(as->flags & JIT_F_ARMV6) && dest == left) 1443 tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left)); 1444 asm_guardcc(as, CC_NE); 1445 emit_nm(as, ARMI_TEQ|ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest); 1446 emit_dnm(as, ARMI_SMULL|ARMF_S(right), dest, RID_TMP, left); 1447 } else { 1448 if (!(as->flags & JIT_F_ARMV6) && dest == left) tmp = left = RID_TMP; 1449 emit_nm(as, ARMI_MUL|ARMF_S(right), dest, left); 1450 } 1451 /* Only need this for the dest == left == right case. */ 1452 if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right); 1453 } 1454 1455 static void asm_add(ASMState *as, IRIns *ir) 1456 { 1457 #if !LJ_SOFTFP 1458 if (irt_isnum(ir->t)) { 1459 if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D)) 1460 asm_fparith(as, ir, ARMI_VADD_D); 1461 return; 1462 } 1463 #endif 1464 asm_intop_s(as, ir, ARMI_ADD); 1465 } 1466 1467 static void asm_sub(ASMState *as, IRIns *ir) 1468 { 1469 #if !LJ_SOFTFP 1470 if (irt_isnum(ir->t)) { 1471 if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D)) 1472 asm_fparith(as, ir, ARMI_VSUB_D); 1473 return; 1474 } 1475 #endif 1476 asm_intop_s(as, ir, ARMI_SUB); 1477 } 1478 1479 static void asm_mul(ASMState *as, IRIns *ir) 1480 { 1481 #if !LJ_SOFTFP 1482 if (irt_isnum(ir->t)) { 1483 asm_fparith(as, ir, ARMI_VMUL_D); 1484 return; 1485 } 1486 #endif 1487 asm_intmul(as, ir); 1488 } 1489 1490 #define asm_addov(as, ir) asm_add(as, ir) 1491 #define asm_subov(as, ir) asm_sub(as, ir) 1492 #define asm_mulov(as, ir) asm_mul(as, ir) 1493 1494 #if !LJ_SOFTFP 1495 #define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) 1496 #define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi) 1497 #define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) 1498 #define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) 1499 #define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) 1500 #endif 1501 1502 #define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi) 1503 1504 static void asm_neg(ASMState *as, IRIns *ir) 1505 { 1506 #if !LJ_SOFTFP 1507 if (irt_isnum(ir->t)) { 1508 asm_fpunary(as, ir, ARMI_VNEG_D); 1509 return; 1510 } 1511 #endif 1512 asm_intneg(as, ir, ARMI_RSB); 1513 } 1514 1515 static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) 1516 { 1517 if (as->flagmcp == as->mcp) { /* Try to drop cmp r, #0. */ 1518 uint32_t cc = (as->mcp[1] >> 28); 1519 as->flagmcp = NULL; 1520 if (cc <= CC_NE) { 1521 as->mcp++; 1522 ai |= ARMI_S; 1523 } else if (cc == CC_GE) { 1524 *++as->mcp ^= ((CC_GE^CC_PL) << 28); 1525 ai |= ARMI_S; 1526 } else if (cc == CC_LT) { 1527 *++as->mcp ^= ((CC_LT^CC_MI) << 28); 1528 ai |= ARMI_S; 1529 } /* else: other conds don't work with bit ops. */ 1530 } 1531 if (ir->op2 == 0) { 1532 Reg dest = ra_dest(as, ir, RSET_GPR); 1533 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); 1534 emit_d(as, ai^m, dest); 1535 } else { 1536 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ 1537 asm_intop(as, ir, ai); 1538 } 1539 } 1540 1541 #define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) 1542 1543 static void asm_bswap(ASMState *as, IRIns *ir) 1544 { 1545 Reg dest = ra_dest(as, ir, RSET_GPR); 1546 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1547 if ((as->flags & JIT_F_ARMV6)) { 1548 emit_dm(as, ARMI_REV, dest, left); 1549 } else { 1550 Reg tmp2 = dest; 1551 if (tmp2 == left) 1552 tmp2 = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), left)); 1553 emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_LSR, 8), dest, tmp2, RID_TMP); 1554 emit_dm(as, ARMI_MOV|ARMF_SH(ARMSH_ROR, 8), tmp2, left); 1555 emit_dn(as, ARMI_BIC|ARMI_K12|256*8|255, RID_TMP, RID_TMP); 1556 emit_dnm(as, ARMI_EOR|ARMF_SH(ARMSH_ROR, 16), RID_TMP, left, left); 1557 } 1558 } 1559 1560 #define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) 1561 #define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) 1562 #define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) 1563 1564 static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1565 { 1566 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1567 /* NYI: Turn SHL+SHR or BAND+SHR into uxtb, uxth or ubfx. */ 1568 /* NYI: Turn SHL+ASR into sxtb, sxth or sbfx. */ 1569 Reg dest = ra_dest(as, ir, RSET_GPR); 1570 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1571 int32_t shift = (IR(ir->op2)->i & 31); 1572 emit_dm(as, ARMI_MOV|ARMF_SH(sh, shift), dest, left); 1573 } else { 1574 Reg dest = ra_dest(as, ir, RSET_GPR); 1575 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1576 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1577 emit_dm(as, ARMI_MOV|ARMF_RSH(sh, right), dest, left); 1578 } 1579 } 1580 1581 #define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) 1582 #define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) 1583 #define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) 1584 #define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) 1585 #define asm_brol(as, ir) lua_assert(0) 1586 1587 static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1588 { 1589 uint32_t kcmp = 0, kmov = 0; 1590 Reg dest = ra_dest(as, ir, RSET_GPR); 1591 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1592 Reg right = 0; 1593 if (irref_isk(ir->op2)) { 1594 kcmp = emit_isk12(ARMI_CMP, IR(ir->op2)->i); 1595 if (kcmp) kmov = emit_isk12(ARMI_MOV, IR(ir->op2)->i); 1596 } 1597 if (!kmov) { 1598 kcmp = 0; 1599 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1600 } 1601 if (kmov || dest != right) { 1602 emit_dm(as, ARMF_CC(ARMI_MOV, cc)^kmov, dest, right); 1603 cc ^= 1; /* Must use opposite conditions for paired moves. */ 1604 } else { 1605 cc ^= (CC_LT^CC_GT); /* Otherwise may swap CC_LT <-> CC_GT. */ 1606 } 1607 if (dest != left) emit_dm(as, ARMF_CC(ARMI_MOV, cc), dest, left); 1608 emit_nm(as, ARMI_CMP^kcmp, left, right); 1609 } 1610 1611 #if LJ_SOFTFP 1612 static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc) 1613 { 1614 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; 1615 RegSet drop = RSET_SCRATCH; 1616 Reg r; 1617 IRRef args[4]; 1618 args[0] = ir->op1; args[1] = (ir+1)->op1; 1619 args[2] = ir->op2; args[3] = (ir+1)->op2; 1620 /* __aeabi_cdcmple preserves r0-r3. */ 1621 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); 1622 if (ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r); 1623 if (!rset_test(as->freeset, RID_R2) && 1624 regcost_ref(as->cost[RID_R2]) == args[2]) rset_clear(drop, RID_R2); 1625 if (!rset_test(as->freeset, RID_R3) && 1626 regcost_ref(as->cost[RID_R3]) == args[3]) rset_clear(drop, RID_R3); 1627 ra_evictset(as, drop); 1628 ra_destpair(as, ir); 1629 emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETHI, RID_R3); 1630 emit_dm(as, ARMF_CC(ARMI_MOV, cc), RID_RETLO, RID_R2); 1631 emit_call(as, (void *)ci->func); 1632 for (r = RID_R0; r <= RID_R3; r++) 1633 ra_leftov(as, r, args[r-RID_R0]); 1634 } 1635 #else 1636 static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc) 1637 { 1638 Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); 1639 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1640 right = ((left >> 8) & 15); left &= 15; 1641 if (dest != left) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc^1), dest, left); 1642 if (dest != right) emit_dm(as, ARMF_CC(ARMI_VMOV_D, cc), dest, right); 1643 emit_d(as, ARMI_VMRS, 0); 1644 emit_dm(as, ARMI_VCMP_D, left, right); 1645 } 1646 #endif 1647 1648 static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) 1649 { 1650 #if LJ_SOFTFP 1651 UNUSED(fcc); 1652 #else 1653 if (irt_isnum(ir->t)) 1654 asm_fpmin_max(as, ir, fcc); 1655 else 1656 #endif 1657 asm_intmin_max(as, ir, cc); 1658 } 1659 1660 #define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI) 1661 #define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO) 1662 1663 /* -- Comparisons --------------------------------------------------------- */ 1664 1665 /* Map of comparisons to flags. ORDER IR. */ 1666 static const uint8_t asm_compmap[IR_ABC+1] = { 1667 /* op FP swp int cc FP cc */ 1668 /* LT */ CC_GE + (CC_HS << 4), 1669 /* GE x */ CC_LT + (CC_HI << 4), 1670 /* LE */ CC_GT + (CC_HI << 4), 1671 /* GT x */ CC_LE + (CC_HS << 4), 1672 /* ULT x */ CC_HS + (CC_LS << 4), 1673 /* UGE */ CC_LO + (CC_LO << 4), 1674 /* ULE x */ CC_HI + (CC_LO << 4), 1675 /* UGT */ CC_LS + (CC_LS << 4), 1676 /* EQ */ CC_NE + (CC_NE << 4), 1677 /* NE */ CC_EQ + (CC_EQ << 4), 1678 /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ 1679 }; 1680 1681 #if LJ_SOFTFP 1682 /* FP comparisons. */ 1683 static void asm_sfpcomp(ASMState *as, IRIns *ir) 1684 { 1685 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; 1686 RegSet drop = RSET_SCRATCH; 1687 Reg r; 1688 IRRef args[4]; 1689 int swp = (((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1) << 1); 1690 args[swp^0] = ir->op1; args[swp^1] = (ir+1)->op1; 1691 args[swp^2] = ir->op2; args[swp^3] = (ir+1)->op2; 1692 /* __aeabi_cdcmple preserves r0-r3. This helps to reduce spills. */ 1693 for (r = RID_R0; r <= RID_R3; r++) 1694 if (!rset_test(as->freeset, r) && 1695 regcost_ref(as->cost[r]) == args[r-RID_R0]) rset_clear(drop, r); 1696 ra_evictset(as, drop); 1697 asm_guardcc(as, (asm_compmap[ir->o] >> 4)); 1698 emit_call(as, (void *)ci->func); 1699 for (r = RID_R0; r <= RID_R3; r++) 1700 ra_leftov(as, r, args[r-RID_R0]); 1701 } 1702 #else 1703 /* FP comparisons. */ 1704 static void asm_fpcomp(ASMState *as, IRIns *ir) 1705 { 1706 Reg left, right; 1707 ARMIns ai; 1708 int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); 1709 if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { 1710 left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15); 1711 right = 0; 1712 ai = ARMI_VCMPZ_D; 1713 } else { 1714 left = ra_alloc2(as, ir, RSET_FPR); 1715 if (swp) { 1716 right = (left & 15); left = ((left >> 8) & 15); 1717 } else { 1718 right = ((left >> 8) & 15); left &= 15; 1719 } 1720 ai = ARMI_VCMP_D; 1721 } 1722 asm_guardcc(as, (asm_compmap[ir->o] >> 4)); 1723 emit_d(as, ARMI_VMRS, 0); 1724 emit_dm(as, ai, left, right); 1725 } 1726 #endif 1727 1728 /* Integer comparisons. */ 1729 static void asm_intcomp(ASMState *as, IRIns *ir) 1730 { 1731 ARMCC cc = (asm_compmap[ir->o] & 15); 1732 IRRef lref = ir->op1, rref = ir->op2; 1733 Reg left; 1734 uint32_t m; 1735 int cmpprev0 = 0; 1736 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); 1737 if (asm_swapops(as, lref, rref)) { 1738 Reg tmp = lref; lref = rref; rref = tmp; 1739 if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ 1740 else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ 1741 } 1742 if (irref_isk(rref) && IR(rref)->i == 0) { 1743 IRIns *irl = IR(lref); 1744 cmpprev0 = (irl+1 == ir); 1745 /* Combine comp(BAND(left, right), 0) into tst left, right. */ 1746 if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { 1747 IRRef blref = irl->op1, brref = irl->op2; 1748 uint32_t m2 = 0; 1749 Reg bleft; 1750 if (asm_swapops(as, blref, brref)) { 1751 Reg tmp = blref; blref = brref; brref = tmp; 1752 } 1753 if (irref_isk(brref)) { 1754 m2 = emit_isk12(ARMI_AND, IR(brref)->i); 1755 if ((m2 & (ARMI_AND^ARMI_BIC))) 1756 goto notst; /* Not beneficial if we miss a constant operand. */ 1757 } 1758 if (cc == CC_GE) cc = CC_PL; 1759 else if (cc == CC_LT) cc = CC_MI; 1760 else if (cc > CC_NE) goto notst; /* Other conds don't work with tst. */ 1761 bleft = ra_alloc1(as, blref, RSET_GPR); 1762 if (!m2) m2 = asm_fuseopm(as, 0, brref, rset_exclude(RSET_GPR, bleft)); 1763 asm_guardcc(as, cc); 1764 emit_n(as, ARMI_TST^m2, bleft); 1765 return; 1766 } 1767 } 1768 notst: 1769 left = ra_alloc1(as, lref, RSET_GPR); 1770 m = asm_fuseopm(as, ARMI_CMP, rref, rset_exclude(RSET_GPR, left)); 1771 asm_guardcc(as, cc); 1772 emit_n(as, ARMI_CMP^m, left); 1773 /* Signed comparison with zero and referencing previous ins? */ 1774 if (cmpprev0 && (cc <= CC_NE || cc >= CC_GE)) 1775 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1776 } 1777 1778 static void asm_comp(ASMState *as, IRIns *ir) 1779 { 1780 #if !LJ_SOFTFP 1781 if (irt_isnum(ir->t)) 1782 asm_fpcomp(as, ir); 1783 else 1784 #endif 1785 asm_intcomp(as, ir); 1786 } 1787 1788 #define asm_equal(as, ir) asm_comp(as, ir) 1789 1790 #if LJ_HASFFI 1791 /* 64 bit integer comparisons. */ 1792 static void asm_int64comp(ASMState *as, IRIns *ir) 1793 { 1794 int signedcomp = (ir->o <= IR_GT); 1795 ARMCC cclo, cchi; 1796 Reg leftlo, lefthi; 1797 uint32_t mlo, mhi; 1798 RegSet allow = RSET_GPR, oldfree; 1799 1800 /* Always use unsigned comparison for loword. */ 1801 cclo = asm_compmap[ir->o + (signedcomp ? 4 : 0)] & 15; 1802 leftlo = ra_alloc1(as, ir->op1, allow); 1803 oldfree = as->freeset; 1804 mlo = asm_fuseopm(as, ARMI_CMP, ir->op2, rset_clear(allow, leftlo)); 1805 allow &= ~(oldfree & ~as->freeset); /* Update for allocs of asm_fuseopm. */ 1806 1807 /* Use signed or unsigned comparison for hiword. */ 1808 cchi = asm_compmap[ir->o] & 15; 1809 lefthi = ra_alloc1(as, (ir+1)->op1, allow); 1810 mhi = asm_fuseopm(as, ARMI_CMP, (ir+1)->op2, rset_clear(allow, lefthi)); 1811 1812 /* All register allocations must be performed _before_ this point. */ 1813 if (signedcomp) { 1814 MCLabel l_around = emit_label(as); 1815 asm_guardcc(as, cclo); 1816 emit_n(as, ARMI_CMP^mlo, leftlo); 1817 emit_branch(as, ARMF_CC(ARMI_B, CC_NE), l_around); 1818 if (cchi == CC_GE || cchi == CC_LE) cchi ^= 6; /* GE -> GT, LE -> LT */ 1819 asm_guardcc(as, cchi); 1820 } else { 1821 asm_guardcc(as, cclo); 1822 emit_n(as, ARMF_CC(ARMI_CMP, CC_EQ)^mlo, leftlo); 1823 } 1824 emit_n(as, ARMI_CMP^mhi, lefthi); 1825 } 1826 #endif 1827 1828 /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ 1829 1830 /* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 1831 static void asm_hiop(ASMState *as, IRIns *ir) 1832 { 1833 #if LJ_HASFFI || LJ_SOFTFP 1834 /* HIOP is marked as a store because it needs its own DCE logic. */ 1835 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 1836 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 1837 if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ 1838 as->curins--; /* Always skip the loword comparison. */ 1839 #if LJ_SOFTFP 1840 if (!irt_isint(ir->t)) { 1841 asm_sfpcomp(as, ir-1); 1842 return; 1843 } 1844 #endif 1845 #if LJ_HASFFI 1846 asm_int64comp(as, ir-1); 1847 #endif 1848 return; 1849 #if LJ_SOFTFP 1850 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { 1851 as->curins--; /* Always skip the loword min/max. */ 1852 if (uselo || usehi) 1853 asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); 1854 return; 1855 #elif LJ_HASFFI 1856 } else if ((ir-1)->o == IR_CONV) { 1857 as->curins--; /* Always skip the CONV. */ 1858 if (usehi || uselo) 1859 asm_conv64(as, ir); 1860 return; 1861 #endif 1862 } else if ((ir-1)->o == IR_XSTORE) { 1863 if ((ir-1)->r != RID_SINK) 1864 asm_xstore_(as, ir, 4); 1865 return; 1866 } 1867 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1868 switch ((ir-1)->o) { 1869 #if LJ_HASFFI 1870 case IR_ADD: 1871 as->curins--; 1872 asm_intop(as, ir, ARMI_ADC); 1873 asm_intop(as, ir-1, ARMI_ADD|ARMI_S); 1874 break; 1875 case IR_SUB: 1876 as->curins--; 1877 asm_intop(as, ir, ARMI_SBC); 1878 asm_intop(as, ir-1, ARMI_SUB|ARMI_S); 1879 break; 1880 case IR_NEG: 1881 as->curins--; 1882 asm_intneg(as, ir, ARMI_RSC); 1883 asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); 1884 break; 1885 #endif 1886 #if LJ_SOFTFP 1887 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: 1888 case IR_STRTO: 1889 if (!uselo) 1890 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ 1891 break; 1892 #endif 1893 case IR_CALLN: 1894 case IR_CALLS: 1895 case IR_CALLXS: 1896 if (!uselo) 1897 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 1898 break; 1899 #if LJ_SOFTFP 1900 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: 1901 #endif 1902 case IR_CNEWI: 1903 /* Nothing to do here. Handled by lo op itself. */ 1904 break; 1905 default: lua_assert(0); break; 1906 } 1907 #else 1908 UNUSED(as); UNUSED(ir); lua_assert(0); 1909 #endif 1910 } 1911 1912 /* -- Profiling ----------------------------------------------------------- */ 1913 1914 static void asm_prof(ASMState *as, IRIns *ir) 1915 { 1916 UNUSED(ir); 1917 asm_guardcc(as, CC_NE); 1918 emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP); 1919 emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); 1920 } 1921 1922 /* -- Stack handling ------------------------------------------------------ */ 1923 1924 /* Check Lua stack size for overflow. Use exit handler as fallback. */ 1925 static void asm_stack_check(ASMState *as, BCReg topslot, 1926 IRIns *irp, RegSet allow, ExitNo exitno) 1927 { 1928 Reg pbase; 1929 uint32_t k; 1930 if (irp) { 1931 if (!ra_hasspill(irp->s)) { 1932 pbase = irp->r; 1933 lua_assert(ra_hasreg(pbase)); 1934 } else if (allow) { 1935 pbase = rset_pickbot(allow); 1936 } else { 1937 pbase = RID_RET; 1938 emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ 1939 } 1940 } else { 1941 pbase = RID_BASE; 1942 } 1943 emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); 1944 k = emit_isk12(0, (int32_t)(8*topslot)); 1945 lua_assert(k); 1946 emit_n(as, ARMI_CMP^k, RID_TMP); 1947 emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); 1948 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, 1949 (int32_t)offsetof(lua_State, maxstack)); 1950 if (irp) { /* Must not spill arbitrary registers in head of side trace. */ 1951 int32_t i = i32ptr(&J2G(as->J)->cur_L); 1952 if (ra_hasspill(irp->s)) 1953 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); 1954 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); 1955 if (ra_hasspill(irp->s) && !allow) 1956 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ 1957 emit_loadi(as, RID_TMP, (i & ~4095)); 1958 } else { 1959 emit_getgl(as, RID_TMP, cur_L); 1960 } 1961 } 1962 1963 /* Restore Lua stack from on-trace state. */ 1964 static void asm_stack_restore(ASMState *as, SnapShot *snap) 1965 { 1966 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 1967 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; 1968 MSize n, nent = snap->nent; 1969 /* Store the value of all modified slots to the Lua stack. */ 1970 for (n = 0; n < nent; n++) { 1971 SnapEntry sn = map[n]; 1972 BCReg s = snap_slot(sn); 1973 int32_t ofs = 8*((int32_t)s-1); 1974 IRRef ref = snap_ref(sn); 1975 IRIns *ir = IR(ref); 1976 if ((sn & SNAP_NORESTORE)) 1977 continue; 1978 if (irt_isnum(ir->t)) { 1979 #if LJ_SOFTFP 1980 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); 1981 Reg tmp; 1982 lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ 1983 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, 1984 rset_exclude(RSET_GPREVEN, RID_BASE)); 1985 emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); 1986 if (rset_test(as->freeset, tmp+1)) odd = RID2RSET(tmp+1); 1987 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd); 1988 emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs+4); 1989 #else 1990 Reg src = ra_alloc1(as, ref, RSET_FPR); 1991 emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs); 1992 #endif 1993 } else { 1994 RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); 1995 Reg type; 1996 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 1997 if (!irt_ispri(ir->t)) { 1998 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); 1999 emit_lso(as, ARMI_STR, src, RID_BASE, ofs); 2000 if (rset_test(as->freeset, src+1)) odd = RID2RSET(src+1); 2001 } 2002 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2003 if (s == 0) continue; /* Do not overwrite link to previous frame. */ 2004 type = ra_allock(as, (int32_t)(*flinks--), odd); 2005 #if LJ_SOFTFP 2006 } else if ((sn & SNAP_SOFTFPNUM)) { 2007 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); 2008 #endif 2009 } else { 2010 type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); 2011 } 2012 emit_lso(as, ARMI_STR, type, RID_BASE, ofs+4); 2013 } 2014 checkmclim(as); 2015 } 2016 lua_assert(map + nent == flinks); 2017 } 2018 2019 /* -- GC handling --------------------------------------------------------- */ 2020 2021 /* Check GC threshold and do one or more GC steps. */ 2022 static void asm_gc_check(ASMState *as) 2023 { 2024 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; 2025 IRRef args[2]; 2026 MCLabel l_end; 2027 Reg tmp1, tmp2; 2028 ra_evictset(as, RSET_SCRATCH); 2029 l_end = emit_label(as); 2030 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ 2031 asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ 2032 emit_n(as, ARMI_CMP|ARMI_K12|0, RID_RET); 2033 args[0] = ASMREF_TMP1; /* global_State *g */ 2034 args[1] = ASMREF_TMP2; /* MSize steps */ 2035 asm_gencall(as, ci, args); 2036 tmp1 = ra_releasetmp(as, ASMREF_TMP1); 2037 tmp2 = ra_releasetmp(as, ASMREF_TMP2); 2038 emit_loadi(as, tmp2, as->gcsteps); 2039 /* Jump around GC step if GC total < GC threshold. */ 2040 emit_branch(as, ARMF_CC(ARMI_B, CC_LS), l_end); 2041 emit_nm(as, ARMI_CMP, RID_TMP, tmp2); 2042 emit_lso(as, ARMI_LDR, tmp2, tmp1, 2043 (int32_t)offsetof(global_State, gc.threshold)); 2044 emit_lso(as, ARMI_LDR, RID_TMP, tmp1, 2045 (int32_t)offsetof(global_State, gc.total)); 2046 ra_allockreg(as, i32ptr(J2G(as->J)), tmp1); 2047 as->gcsteps = 0; 2048 checkmclim(as); 2049 } 2050 2051 /* -- Loop handling ------------------------------------------------------- */ 2052 2053 /* Fixup the loop branch. */ 2054 static void asm_loop_fixup(ASMState *as) 2055 { 2056 MCode *p = as->mctop; 2057 MCode *target = as->mcp; 2058 if (as->loopinv) { /* Inverted loop branch? */ 2059 /* asm_guardcc already inverted the bcc and patched the final bl. */ 2060 p[-2] |= ((uint32_t)(target-p) & 0x00ffffffu); 2061 } else { 2062 p[-1] = ARMI_B | ((uint32_t)((target-p)-1) & 0x00ffffffu); 2063 } 2064 } 2065 2066 /* -- Head of trace ------------------------------------------------------- */ 2067 2068 /* Reload L register from g->cur_L. */ 2069 static void asm_head_lreg(ASMState *as) 2070 { 2071 IRIns *ir = IR(ASMREF_L); 2072 if (ra_used(ir)) { 2073 Reg r = ra_dest(as, ir, RSET_GPR); 2074 emit_getgl(as, r, cur_L); 2075 ra_evictk(as); 2076 } 2077 } 2078 2079 /* Coalesce BASE register for a root trace. */ 2080 static void asm_head_root_base(ASMState *as) 2081 { 2082 IRIns *ir; 2083 asm_head_lreg(as); 2084 ir = IR(REF_BASE); 2085 if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) 2086 ra_spill(as, ir); 2087 ra_destreg(as, ir, RID_BASE); 2088 } 2089 2090 /* Coalesce BASE register for a side trace. */ 2091 static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) 2092 { 2093 IRIns *ir; 2094 asm_head_lreg(as); 2095 ir = IR(REF_BASE); 2096 if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) 2097 ra_spill(as, ir); 2098 if (ra_hasspill(irp->s)) { 2099 rset_clear(allow, ra_dest(as, ir, allow)); 2100 } else { 2101 Reg r = irp->r; 2102 lua_assert(ra_hasreg(r)); 2103 rset_clear(allow, r); 2104 if (r != ir->r && !rset_test(as->freeset, r)) 2105 ra_restore(as, regcost_ref(as->cost[r])); 2106 ra_destreg(as, ir, r); 2107 } 2108 return allow; 2109 } 2110 2111 /* -- Tail of trace ------------------------------------------------------- */ 2112 2113 /* Fixup the tail code. */ 2114 static void asm_tail_fixup(ASMState *as, TraceNo lnk) 2115 { 2116 MCode *p = as->mctop; 2117 MCode *target; 2118 int32_t spadj = as->T->spadjust; 2119 if (spadj == 0) { 2120 as->mctop = --p; 2121 } else { 2122 /* Patch stack adjustment. */ 2123 uint32_t k = emit_isk12(ARMI_ADD, spadj); 2124 lua_assert(k); 2125 p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); 2126 } 2127 /* Patch exit branch. */ 2128 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; 2129 p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu); 2130 } 2131 2132 /* Prepare tail of code. */ 2133 static void asm_tail_prep(ASMState *as) 2134 { 2135 MCode *p = as->mctop - 1; /* Leave room for exit branch. */ 2136 if (as->loopref) { 2137 as->invmcp = as->mcp = p; 2138 } else { 2139 as->mcp = p-1; /* Leave room for stack pointer adjustment. */ 2140 as->invmcp = NULL; 2141 } 2142 *p = 0; /* Prevent load/store merging. */ 2143 } 2144 2145 /* -- Trace setup --------------------------------------------------------- */ 2146 2147 /* Ensure there are enough stack slots for call arguments. */ 2148 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2149 { 2150 IRRef args[CCI_NARGS_MAX*2]; 2151 uint32_t i, nargs = CCI_XNARGS(ci); 2152 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; 2153 asm_collectargs(as, ir, ci, args); 2154 for (i = 0; i < nargs; i++) { 2155 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { 2156 if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { 2157 if (irt_isnum(IR(args[i])->t)) { 2158 if (nfpr > 0) nfpr--; 2159 else fprodd = 0, nslots = (nslots + 3) & ~1; 2160 } else { 2161 if (fprodd) fprodd--; 2162 else if (nfpr > 0) fprodd = 1, nfpr--; 2163 else nslots++; 2164 } 2165 } else if (irt_isnum(IR(args[i])->t)) { 2166 ngpr &= ~1; 2167 if (ngpr > 0) ngpr -= 2; else nslots += 2; 2168 } else { 2169 if (ngpr > 0) ngpr--; else nslots++; 2170 } 2171 } else { 2172 if (ngpr > 0) ngpr--; else nslots++; 2173 } 2174 } 2175 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2176 as->evenspill = nslots; 2177 return REGSP_HINT(RID_RET); 2178 } 2179 2180 static void asm_setup_target(ASMState *as) 2181 { 2182 /* May need extra exit for asm_stack_check on side traces. */ 2183 asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); 2184 } 2185 2186 /* -- Trace patching ------------------------------------------------------ */ 2187 2188 /* Patch exit jumps of existing machine code to a new target. */ 2189 void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) 2190 { 2191 MCode *p = T->mcode; 2192 MCode *pe = (MCode *)((char *)p + T->szmcode); 2193 MCode *cstart = NULL, *cend = p; 2194 MCode *mcarea = lj_mcode_patch(J, p, 0); 2195 MCode *px = exitstub_addr(J, exitno) - 2; 2196 for (; p < pe; p++) { 2197 /* Look for bl_cc exitstub, replace with b_cc target. */ 2198 uint32_t ins = *p; 2199 if ((ins & 0x0f000000u) == 0x0b000000u && ins < 0xf0000000u && 2200 ((ins ^ (px-p)) & 0x00ffffffu) == 0) { 2201 *p = (ins & 0xfe000000u) | (((target-p)-2) & 0x00ffffffu); 2202 cend = p+1; 2203 if (!cstart) cstart = p; 2204 } 2205 } 2206 lua_assert(cstart != NULL); 2207 lj_mcode_sync(cstart, cend); 2208 lj_mcode_patch(J, mcarea, 1); 2209 } 2210