lj_emit_x86.h (14667B)
1 /* 2 ** x86/x64 instruction emitter. 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 */ 5 6 /* -- Emit basic instructions --------------------------------------------- */ 7 8 #define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7))) 9 10 #if LJ_64 11 #define REXRB(p, rr, rb) \ 12 { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ 13 if (rex != 0x40) *--(p) = rex; } 14 #define FORCE_REX 0x200 15 #define REX_64 (FORCE_REX|0x080000) 16 #define VEX_64 0x800000 17 #else 18 #define REXRB(p, rr, rb) ((void)0) 19 #define FORCE_REX 0 20 #define REX_64 0 21 #define VEX_64 0 22 #endif 23 #if LJ_GC64 24 #define REX_GC64 REX_64 25 #else 26 #define REX_GC64 0 27 #endif 28 29 #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) 30 #define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4) 31 #define emit_u32(as, u) (*(uint32_t *)(as->mcp-4) = (u), as->mcp -= 4) 32 33 #define emit_x87op(as, xo) \ 34 (*(uint16_t *)(as->mcp-2) = (uint16_t)(xo), as->mcp -= 2) 35 36 /* op */ 37 static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, 38 MCode *p, int delta) 39 { 40 int n = (int8_t)xo; 41 if (n == -60) { /* VEX-encoded instruction */ 42 #if LJ_64 43 xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13; 44 #endif 45 *(uint32_t *)(p+delta-5) = (uint32_t)xo; 46 return p+delta-5; 47 } 48 #if defined(__GNUC__) 49 if (__builtin_constant_p(xo) && n == -2) 50 p[delta-2] = (MCode)(xo >> 24); 51 else if (__builtin_constant_p(xo) && n == -3) 52 *(uint16_t *)(p+delta-3) = (uint16_t)(xo >> 16); 53 else 54 #endif 55 *(uint32_t *)(p+delta-5) = (uint32_t)xo; 56 p += n + delta; 57 #if LJ_64 58 { 59 uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); 60 if (rex != 0x40) { 61 rex |= (rr >> 16); 62 if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } 63 else if ((xo & 0xffffff) == 0x6600fd) { *p = (MCode)rex; rex = 0x66; } 64 *--p = (MCode)rex; 65 } 66 } 67 #else 68 UNUSED(rr); UNUSED(rb); UNUSED(rx); 69 #endif 70 return p; 71 } 72 73 /* op + modrm */ 74 #define emit_opm(xo, mode, rr, rb, p, delta) \ 75 (p[(delta)-1] = MODRM((mode), (rr), (rb)), \ 76 emit_op((xo), (rr), (rb), 0, (p), (delta))) 77 78 /* op + modrm + sib */ 79 #define emit_opmx(xo, mode, scale, rr, rb, rx, p) \ 80 (p[-1] = MODRM((scale), (rx), (rb)), \ 81 p[-2] = MODRM((mode), (rr), RID_ESP), \ 82 emit_op((xo), (rr), (rb), (rx), (p), -1)) 83 84 /* op r1, r2 */ 85 static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2) 86 { 87 MCode *p = as->mcp; 88 as->mcp = emit_opm(xo, XM_REG, r1, r2, p, 0); 89 } 90 91 #if LJ_64 && defined(LUA_USE_ASSERT) 92 /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ 93 static int32_t ptr2addr(const void *p) 94 { 95 lua_assert((uintptr_t)p < (uintptr_t)0x80000000); 96 return i32ptr(p); 97 } 98 #else 99 #define ptr2addr(p) (i32ptr((p))) 100 #endif 101 102 /* op r, [base+ofs] */ 103 static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) 104 { 105 MCode *p = as->mcp; 106 x86Mode mode; 107 if (ra_hasreg(rb)) { 108 if (LJ_GC64 && rb == RID_RIP) { 109 mode = XM_OFS0; 110 p -= 4; 111 *(int32_t *)p = ofs; 112 } else if (ofs == 0 && (rb&7) != RID_EBP) { 113 mode = XM_OFS0; 114 } else if (checki8(ofs)) { 115 *--p = (MCode)ofs; 116 mode = XM_OFS8; 117 } else { 118 p -= 4; 119 *(int32_t *)p = ofs; 120 mode = XM_OFS32; 121 } 122 if ((rb&7) == RID_ESP) 123 *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP); 124 } else { 125 *(int32_t *)(p-4) = ofs; 126 #if LJ_64 127 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); 128 p -= 5; 129 rb = RID_ESP; 130 #else 131 p -= 4; 132 rb = RID_EBP; 133 #endif 134 mode = XM_OFS0; 135 } 136 as->mcp = emit_opm(xo, mode, rr, rb, p, 0); 137 } 138 139 /* op r, [base+idx*scale+ofs] */ 140 static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx, 141 x86Mode scale, int32_t ofs) 142 { 143 MCode *p = as->mcp; 144 x86Mode mode; 145 if (ofs == 0 && (rb&7) != RID_EBP) { 146 mode = XM_OFS0; 147 } else if (checki8(ofs)) { 148 mode = XM_OFS8; 149 *--p = (MCode)ofs; 150 } else { 151 mode = XM_OFS32; 152 p -= 4; 153 *(int32_t *)p = ofs; 154 } 155 as->mcp = emit_opmx(xo, mode, scale, rr, rb, rx, p); 156 } 157 158 /* op r, i */ 159 static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i) 160 { 161 MCode *p = as->mcp; 162 x86Op xo; 163 if (checki8(i)) { 164 *--p = (MCode)i; 165 xo = XG_TOXOi8(xg); 166 } else { 167 p -= 4; 168 *(int32_t *)p = i; 169 xo = XG_TOXOi(xg); 170 } 171 as->mcp = emit_opm(xo, XM_REG, (Reg)(xg & 7) | (rb & REX_64), rb, p, 0); 172 } 173 174 /* op [base+ofs], i */ 175 static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs, 176 int32_t i) 177 { 178 x86Op xo; 179 if (checki8(i)) { 180 emit_i8(as, i); 181 xo = XG_TOXOi8(xg); 182 } else { 183 emit_i32(as, i); 184 xo = XG_TOXOi(xg); 185 } 186 emit_rmro(as, xo, (Reg)(xg & 7), rb, ofs); 187 } 188 189 #define emit_shifti(as, xg, r, i) \ 190 (emit_i8(as, (i)), emit_rr(as, XO_SHIFTi, (Reg)(xg), (r))) 191 192 /* op r, rm/mrm */ 193 static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) 194 { 195 MCode *p = as->mcp; 196 x86Mode mode = XM_REG; 197 if (rb == RID_MRM) { 198 rb = as->mrm.base; 199 if (rb == RID_NONE) { 200 rb = RID_EBP; 201 mode = XM_OFS0; 202 p -= 4; 203 *(int32_t *)p = as->mrm.ofs; 204 if (as->mrm.idx != RID_NONE) 205 goto mrmidx; 206 #if LJ_64 207 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); 208 rb = RID_ESP; 209 #endif 210 } else if (LJ_GC64 && rb == RID_RIP) { 211 lua_assert(as->mrm.idx == RID_NONE); 212 mode = XM_OFS0; 213 p -= 4; 214 *(int32_t *)p = as->mrm.ofs; 215 } else { 216 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { 217 mode = XM_OFS0; 218 } else if (checki8(as->mrm.ofs)) { 219 *--p = (MCode)as->mrm.ofs; 220 mode = XM_OFS8; 221 } else { 222 p -= 4; 223 *(int32_t *)p = as->mrm.ofs; 224 mode = XM_OFS32; 225 } 226 if (as->mrm.idx != RID_NONE) { 227 mrmidx: 228 as->mcp = emit_opmx(xo, mode, as->mrm.scale, rr, rb, as->mrm.idx, p); 229 return; 230 } 231 if ((rb&7) == RID_ESP) 232 *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP); 233 } 234 } 235 as->mcp = emit_opm(xo, mode, rr, rb, p, 0); 236 } 237 238 /* op rm/mrm, i */ 239 static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) 240 { 241 x86Op xo; 242 if (checki8(i)) { 243 emit_i8(as, i); 244 xo = XG_TOXOi8(xg); 245 } else { 246 emit_i32(as, i); 247 xo = XG_TOXOi(xg); 248 } 249 emit_mrm(as, xo, (Reg)(xg & 7) | (rb & REX_64), (rb & ~REX_64)); 250 } 251 252 /* -- Emit loads/stores --------------------------------------------------- */ 253 254 /* mov [base+ofs], i */ 255 static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 256 { 257 emit_i32(as, i); 258 emit_rmro(as, XO_MOVmi, 0, base, ofs); 259 } 260 261 /* mov [base+ofs], r */ 262 #define emit_movtomro(as, r, base, ofs) \ 263 emit_rmro(as, XO_MOVto, (r), (base), (ofs)) 264 265 /* Get/set global_State fields. */ 266 #define emit_opgl(as, xo, r, field) \ 267 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) 268 #define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field) 269 #define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field) 270 271 #define emit_setvmstate(as, i) \ 272 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) 273 274 /* mov r, i / xor r, r */ 275 static void emit_loadi(ASMState *as, Reg r, int32_t i) 276 { 277 /* XOR r,r is shorter, but modifies the flags. This is bad for HIOP. */ 278 if (i == 0 && !(LJ_32 && (IR(as->curins)->o == IR_HIOP || 279 (as->curins+1 < as->T->nins && 280 IR(as->curins+1)->o == IR_HIOP)))) { 281 emit_rr(as, XO_ARITH(XOg_XOR), r, r); 282 } else { 283 MCode *p = as->mcp; 284 *(int32_t *)(p-4) = i; 285 p[-5] = (MCode)(XI_MOVri+(r&7)); 286 p -= 5; 287 REXRB(p, 0, r); 288 as->mcp = p; 289 } 290 } 291 292 #if LJ_GC64 293 #define dispofs(as, k) \ 294 ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch)) 295 #define mcpofs(as, k) \ 296 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp)) 297 #define mctopofs(as, k) \ 298 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop)) 299 /* mov r, addr */ 300 #define emit_loada(as, r, addr) \ 301 emit_loadu64(as, (r), (uintptr_t)(addr)) 302 #else 303 /* mov r, addr */ 304 #define emit_loada(as, r, addr) \ 305 emit_loadi(as, (r), ptr2addr((addr))) 306 #endif 307 308 #if LJ_64 309 /* mov r, imm64 or shorter 32 bit extended load. */ 310 static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) 311 { 312 if (checku32(u64)) { /* 32 bit load clears upper 32 bits. */ 313 emit_loadi(as, r, (int32_t)u64); 314 } else if (checki32((int64_t)u64)) { /* Sign-extended 32 bit load. */ 315 MCode *p = as->mcp; 316 *(int32_t *)(p-4) = (int32_t)u64; 317 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); 318 #if LJ_GC64 319 } else if (checki32(dispofs(as, u64))) { 320 emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64)); 321 } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) { 322 /* Since as->realign assumes the code size doesn't change, check 323 ** RIP-relative addressing reachability for both as->mcp and as->mctop. 324 */ 325 emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64)); 326 #endif 327 } else { /* Full-size 64 bit load. */ 328 MCode *p = as->mcp; 329 *(uint64_t *)(p-8) = u64; 330 p[-9] = (MCode)(XI_MOVri+(r&7)); 331 p[-10] = 0x48 + ((r>>3)&1); 332 p -= 10; 333 as->mcp = p; 334 } 335 } 336 #endif 337 338 /* op r, [addr] */ 339 static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) 340 { 341 #if LJ_GC64 342 if (checki32(dispofs(as, addr))) { 343 emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); 344 } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { 345 emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); 346 } else if (!checki32((intptr_t)addr) && (xo == XO_MOV || xo == XO_MOVSD)) { 347 emit_rmro(as, xo, rr, rr, 0); 348 emit_loadu64(as, rr, (uintptr_t)addr); 349 } else 350 #endif 351 { 352 MCode *p = as->mcp; 353 *(int32_t *)(p-4) = ptr2addr(addr); 354 #if LJ_64 355 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); 356 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); 357 #else 358 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); 359 #endif 360 } 361 } 362 363 /* Load 64 bit IR constant into register. */ 364 static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) 365 { 366 Reg r64; 367 x86Op xo; 368 const uint64_t *k = &ir_k64(ir)->u64; 369 if (rset_test(RSET_FPR, r)) { 370 r64 = r; 371 xo = XO_MOVSD; 372 } else { 373 r64 = r | REX_64; 374 xo = XO_MOV; 375 } 376 if (*k == 0) { 377 emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r); 378 #if LJ_GC64 379 } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) || 380 (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) { 381 emit_rma(as, xo, r64, k); 382 } else { 383 if (ir->i) { 384 lua_assert(*k == *(uint64_t*)(as->mctop - ir->i)); 385 } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { 386 emit_loadu64(as, r, *k); 387 return; 388 } else { 389 /* If all else fails, add the FP constant at the MCode area bottom. */ 390 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; 391 *(uint64_t *)as->mcbot = *k; 392 ir->i = (int32_t)(as->mctop - as->mcbot); 393 as->mcbot += 8; 394 as->mclim = as->mcbot + MCLIM_REDZONE; 395 } 396 emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); 397 #else 398 } else { 399 emit_rma(as, xo, r64, k); 400 #endif 401 } 402 } 403 404 /* -- Emit control-flow instructions -------------------------------------- */ 405 406 /* Label for short jumps. */ 407 typedef MCode *MCLabel; 408 409 #if LJ_32 && LJ_HASFFI 410 /* jmp short target */ 411 static void emit_sjmp(ASMState *as, MCLabel target) 412 { 413 MCode *p = as->mcp; 414 ptrdiff_t delta = target - p; 415 lua_assert(delta == (int8_t)delta); 416 p[-1] = (MCode)(int8_t)delta; 417 p[-2] = XI_JMPs; 418 as->mcp = p - 2; 419 } 420 #endif 421 422 /* jcc short target */ 423 static void emit_sjcc(ASMState *as, int cc, MCLabel target) 424 { 425 MCode *p = as->mcp; 426 ptrdiff_t delta = target - p; 427 lua_assert(delta == (int8_t)delta); 428 p[-1] = (MCode)(int8_t)delta; 429 p[-2] = (MCode)(XI_JCCs+(cc&15)); 430 as->mcp = p - 2; 431 } 432 433 /* jcc short (pending target) */ 434 static MCLabel emit_sjcc_label(ASMState *as, int cc) 435 { 436 MCode *p = as->mcp; 437 p[-1] = 0; 438 p[-2] = (MCode)(XI_JCCs+(cc&15)); 439 as->mcp = p - 2; 440 return p; 441 } 442 443 /* Fixup jcc short target. */ 444 static void emit_sfixup(ASMState *as, MCLabel source) 445 { 446 source[-1] = (MCode)(as->mcp-source); 447 } 448 449 /* Return label pointing to current PC. */ 450 #define emit_label(as) ((as)->mcp) 451 452 /* Compute relative 32 bit offset for jump and call instructions. */ 453 static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) 454 { 455 ptrdiff_t delta = target - p; 456 lua_assert(delta == (int32_t)delta); 457 return (int32_t)delta; 458 } 459 460 /* jcc target */ 461 static void emit_jcc(ASMState *as, int cc, MCode *target) 462 { 463 MCode *p = as->mcp; 464 *(int32_t *)(p-4) = jmprel(p, target); 465 p[-5] = (MCode)(XI_JCCn+(cc&15)); 466 p[-6] = 0x0f; 467 as->mcp = p - 6; 468 } 469 470 /* jmp target */ 471 static void emit_jmp(ASMState *as, MCode *target) 472 { 473 MCode *p = as->mcp; 474 *(int32_t *)(p-4) = jmprel(p, target); 475 p[-5] = XI_JMP; 476 as->mcp = p - 5; 477 } 478 479 /* call target */ 480 static void emit_call_(ASMState *as, MCode *target) 481 { 482 MCode *p = as->mcp; 483 #if LJ_64 484 if (target-p != (int32_t)(target-p)) { 485 /* Assumes RID_RET is never an argument to calls and always clobbered. */ 486 emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET); 487 emit_loadu64(as, RID_RET, (uint64_t)target); 488 return; 489 } 490 #endif 491 *(int32_t *)(p-4) = jmprel(p, target); 492 p[-5] = XI_CALL; 493 as->mcp = p - 5; 494 } 495 496 #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) 497 498 /* -- Emit generic operations --------------------------------------------- */ 499 500 /* Use 64 bit operations to handle 64 bit IR types. */ 501 #if LJ_64 502 #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) 503 #define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0)) 504 #else 505 #define REX_64IR(ir, r) (r) 506 #define VEX_64IR(ir, r) (r) 507 #endif 508 509 /* Generic move between two regs. */ 510 static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) 511 { 512 UNUSED(ir); 513 if (dst < RID_MAX_GPR) 514 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); 515 else 516 emit_rr(as, XO_MOVAPS, dst, src); 517 } 518 519 /* Generic load of register with base and (small) offset address. */ 520 static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) 521 { 522 if (r < RID_MAX_GPR) 523 emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs); 524 else 525 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs); 526 } 527 528 /* Generic store of register with base and (small) offset address. */ 529 static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) 530 { 531 if (r < RID_MAX_GPR) 532 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs); 533 else 534 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs); 535 } 536 537 /* Add offset to pointer. */ 538 static void emit_addptr(ASMState *as, Reg r, int32_t ofs) 539 { 540 if (ofs) { 541 if ((as->flags & JIT_F_LEA_AGU)) 542 emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs); 543 else 544 emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); 545 } 546 } 547 548 #define emit_spsub(as, ofs) emit_addptr(as, RID_ESP|REX_64, -(ofs)) 549 550 /* Prefer rematerialization of BASE/L from global_State over spills. */ 551 #define emit_canremat(ref) ((ref) <= REF_BASE) 552