lj_opt_split.c (26949B)
1 /* 2 ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 */ 5 6 #define lj_opt_split_c 7 #define LUA_CORE 8 9 #include "lj_obj.h" 10 11 #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) 12 13 #include "lj_err.h" 14 #include "lj_buf.h" 15 #include "lj_ir.h" 16 #include "lj_jit.h" 17 #include "lj_ircall.h" 18 #include "lj_iropt.h" 19 #include "lj_dispatch.h" 20 #include "lj_vm.h" 21 22 /* SPLIT pass: 23 ** 24 ** This pass splits up 64 bit IR instructions into multiple 32 bit IR 25 ** instructions. It's only active for soft-float targets or for 32 bit CPUs 26 ** which lack native 64 bit integer operations (the FFI is currently the 27 ** only emitter for 64 bit integer instructions). 28 ** 29 ** Splitting the IR in a separate pass keeps each 32 bit IR assembler 30 ** backend simple. Only a small amount of extra functionality needs to be 31 ** implemented. This is much easier than adding support for allocating 32 ** register pairs to each backend (believe me, I tried). A few simple, but 33 ** important optimizations can be performed by the SPLIT pass, which would 34 ** be tedious to do in the backend. 35 ** 36 ** The basic idea is to replace each 64 bit IR instruction with its 32 bit 37 ** equivalent plus an extra HIOP instruction. The splitted IR is not passed 38 ** through FOLD or any other optimizations, so each HIOP is guaranteed to 39 ** immediately follow it's counterpart. The actual functionality of HIOP is 40 ** inferred from the previous instruction. 41 ** 42 ** The operands of HIOP hold the hiword input references. The output of HIOP 43 ** is the hiword output reference, which is also used to hold the hiword 44 ** register or spill slot information. The register allocator treats this 45 ** instruction independently of any other instruction, which improves code 46 ** quality compared to using fixed register pairs. 47 ** 48 ** It's easier to split up some instructions into two regular 32 bit 49 ** instructions. E.g. XLOAD is split up into two XLOADs with two different 50 ** addresses. Obviously 64 bit constants need to be split up into two 32 bit 51 ** constants, too. Some hiword instructions can be entirely omitted, e.g. 52 ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls 53 ** are split up into two 32 bit arguments each. 54 ** 55 ** On soft-float targets, floating-point instructions are directly converted 56 ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX). 57 ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump). 58 ** 59 ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with 60 ** two int64_t fields: 61 ** 62 ** 0100 p32 ADD base +8 63 ** 0101 i64 XLOAD 0100 64 ** 0102 i64 ADD 0101 +1 65 ** 0103 p32 ADD base +16 66 ** 0104 i64 XSTORE 0103 0102 67 ** 68 ** mov rax, [esi+0x8] 69 ** add rax, +0x01 70 ** mov [esi+0x10], rax 71 ** 72 ** Here's the transformed IR and the x86 machine code after the SPLIT pass: 73 ** 74 ** 0100 p32 ADD base +8 75 ** 0101 int XLOAD 0100 76 ** 0102 p32 ADD base +12 77 ** 0103 int XLOAD 0102 78 ** 0104 int ADD 0101 +1 79 ** 0105 int HIOP 0103 +0 80 ** 0106 p32 ADD base +16 81 ** 0107 int XSTORE 0106 0104 82 ** 0108 int HIOP 0106 0105 83 ** 84 ** mov eax, [esi+0x8] 85 ** mov ecx, [esi+0xc] 86 ** add eax, +0x01 87 ** adc ecx, +0x00 88 ** mov [esi+0x10], eax 89 ** mov [esi+0x14], ecx 90 ** 91 ** You may notice the reassociated hiword address computation, which is 92 ** later fused into the mov operands by the assembler. 93 */ 94 95 /* Some local macros to save typing. Undef'd at the end. */ 96 #define IR(ref) (&J->cur.ir[(ref)]) 97 98 /* Directly emit the transformed IR without updating chains etc. */ 99 static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2) 100 { 101 IRRef nref = lj_ir_nextins(J); 102 IRIns *ir = IR(nref); 103 ir->ot = ot; 104 ir->op1 = op1; 105 ir->op2 = op2; 106 return nref; 107 } 108 109 #if LJ_SOFTFP 110 /* Emit a (checked) number to integer conversion. */ 111 static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check) 112 { 113 IRRef tmp, res; 114 #if LJ_LE 115 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi); 116 #else 117 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo); 118 #endif 119 res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i); 120 if (check) { 121 tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d); 122 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 123 split_emit(J, IRTGI(IR_EQ), tmp, lo); 124 split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi); 125 } 126 return res; 127 } 128 129 /* Emit a CALLN with one split 64 bit argument. */ 130 static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, 131 IRIns *ir, IRCallID id) 132 { 133 IRRef tmp, op1 = ir->op1; 134 J->cur.nins--; 135 #if LJ_LE 136 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); 137 #else 138 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); 139 #endif 140 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 141 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 142 } 143 #endif 144 145 /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ 146 static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, 147 IRIns *ir, IRCallID id) 148 { 149 IRRef tmp, op1 = ir->op1, op2 = ir->op2; 150 J->cur.nins--; 151 #if LJ_LE 152 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); 153 #else 154 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); 155 #endif 156 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); 157 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 158 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 159 } 160 161 /* Emit a CALLN with two split 64 bit arguments. */ 162 static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, 163 IRIns *ir, IRCallID id) 164 { 165 IRRef tmp, op1 = ir->op1, op2 = ir->op2; 166 J->cur.nins--; 167 #if LJ_LE 168 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); 169 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); 170 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); 171 #else 172 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); 173 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); 174 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); 175 #endif 176 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 177 return split_emit(J, 178 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), 179 tmp, tmp); 180 } 181 182 /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ 183 static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) 184 { 185 IRRef nref = oir[ref].prev; 186 IRIns *ir = IR(nref); 187 int32_t ofs = 4; 188 if (ir->o == IR_KPTR) 189 return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs); 190 if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) { 191 /* Reassociate address. */ 192 ofs += IR(ir->op2)->i; 193 nref = ir->op1; 194 if (ofs == 0) return nref; 195 } 196 return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs)); 197 } 198 199 #if LJ_HASFFI 200 static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, 201 IRIns *oir, IRIns *nir, IRIns *ir) 202 { 203 IROp op = ir->o; 204 IRRef kref = nir->op2; 205 if (irref_isk(kref)) { /* Optimize constant shifts. */ 206 int32_t k = (IR(kref)->i & 63); 207 IRRef lo = nir->op1, hi = hisubst[ir->op1]; 208 if (op == IR_BROL || op == IR_BROR) { 209 if (op == IR_BROR) k = (-k & 63); 210 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; } 211 if (k == 0) { 212 passthrough: 213 J->cur.nins--; 214 ir->prev = lo; 215 return hi; 216 } else { 217 TRef k1, k2; 218 IRRef t1, t2, t3, t4; 219 J->cur.nins--; 220 k1 = lj_ir_kint(J, k); 221 k2 = lj_ir_kint(J, (-k & 31)); 222 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1); 223 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1); 224 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2); 225 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2); 226 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4); 227 return split_emit(J, IRTI(IR_BOR), t2, t3); 228 } 229 } else if (k == 0) { 230 goto passthrough; 231 } else if (k < 32) { 232 if (op == IR_BSHL) { 233 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref); 234 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31))); 235 return split_emit(J, IRTI(IR_BOR), t1, t2); 236 } else { 237 IRRef t1 = ir->prev, t2; 238 lua_assert(op == IR_BSHR || op == IR_BSAR); 239 nir->o = IR_BSHR; 240 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); 241 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); 242 return split_emit(J, IRTI(op), hi, kref); 243 } 244 } else { 245 if (op == IR_BSHL) { 246 if (k == 32) 247 J->cur.nins--; 248 else 249 lo = ir->prev; 250 ir->prev = lj_ir_kint(J, 0); 251 return lo; 252 } else { 253 lua_assert(op == IR_BSHR || op == IR_BSAR); 254 if (k == 32) { 255 J->cur.nins--; 256 ir->prev = hi; 257 } else { 258 nir->op1 = hi; 259 } 260 if (op == IR_BSHR) 261 return lj_ir_kint(J, 0); 262 else 263 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31)); 264 } 265 } 266 } 267 return split_call_li(J, hisubst, oir, ir, 268 op - IR_BSHL + IRCALL_lj_carith_shl64); 269 } 270 271 static IRRef split_bitop(jit_State *J, IRRef1 *hisubst, 272 IRIns *nir, IRIns *ir) 273 { 274 IROp op = ir->o; 275 IRRef hi, kref = nir->op2; 276 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */ 277 int32_t k = IR(kref)->i; 278 if (k == 0 || k == -1) { 279 if (op == IR_BAND) k = ~k; 280 if (k == 0) { 281 J->cur.nins--; 282 ir->prev = nir->op1; 283 } else if (op == IR_BXOR) { 284 nir->o = IR_BNOT; 285 nir->op2 = 0; 286 } else { 287 J->cur.nins--; 288 ir->prev = kref; 289 } 290 } 291 } 292 hi = hisubst[ir->op1]; 293 kref = hisubst[ir->op2]; 294 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */ 295 int32_t k = IR(kref)->i; 296 if (k == 0 || k == -1) { 297 if (op == IR_BAND) k = ~k; 298 if (k == 0) { 299 return hi; 300 } else if (op == IR_BXOR) { 301 return split_emit(J, IRTI(IR_BNOT), hi, 0); 302 } else { 303 return kref; 304 } 305 } 306 } 307 return split_emit(J, IRTI(op), hi, kref); 308 } 309 #endif 310 311 /* Substitute references of a snapshot. */ 312 static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) 313 { 314 SnapEntry *map = &J->cur.snapmap[snap->mapofs]; 315 MSize n, nent = snap->nent; 316 for (n = 0; n < nent; n++) { 317 SnapEntry sn = map[n]; 318 IRIns *ir = &oir[snap_ref(sn)]; 319 if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn)))) 320 map[n] = ((sn & 0xffff0000) | ir->prev); 321 } 322 } 323 324 /* Transform the old IR to the new IR. */ 325 static void split_ir(jit_State *J) 326 { 327 IRRef nins = J->cur.nins, nk = J->cur.nk; 328 MSize irlen = nins - nk; 329 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); 330 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need); 331 IRRef1 *hisubst; 332 IRRef ref, snref; 333 SnapShot *snap; 334 335 /* Copy old IR to buffer. */ 336 memcpy(oir, IR(nk), irlen*sizeof(IRIns)); 337 /* Bias hiword substitution table and old IR. Loword kept in field prev. */ 338 hisubst = (IRRef1 *)&oir[irlen] - nk; 339 oir -= nk; 340 341 /* Remove all IR instructions, but retain IR constants. */ 342 J->cur.nins = REF_FIRST; 343 J->loopref = 0; 344 345 /* Process constants and fixed references. */ 346 for (ref = nk; ref <= REF_BASE; ref++) { 347 IRIns *ir = &oir[ref]; 348 if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { 349 /* Split up 64 bit constant. */ 350 TValue tv = *ir_k64(ir); 351 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); 352 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); 353 } else { 354 ir->prev = ref; /* Identity substitution for loword. */ 355 hisubst[ref] = 0; 356 } 357 if (irt_is64(ir->t) && ir->o != IR_KNULL) 358 ref++; 359 } 360 361 /* Process old IR instructions. */ 362 snap = J->cur.snap; 363 snref = snap->ref; 364 for (ref = REF_FIRST; ref < nins; ref++) { 365 IRIns *ir = &oir[ref]; 366 IRRef nref = lj_ir_nextins(J); 367 IRIns *nir = IR(nref); 368 IRRef hi = 0; 369 370 if (ref >= snref) { 371 snap->ref = nref; 372 split_subst_snap(J, snap++, oir); 373 snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; 374 } 375 376 /* Copy-substitute old instruction to new instruction. */ 377 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; 378 nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; 379 ir->prev = nref; /* Loword substitution. */ 380 nir->o = ir->o; 381 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); 382 hisubst[ref] = 0; 383 384 /* Split 64 bit instructions. */ 385 #if LJ_SOFTFP 386 if (irt_isnum(ir->t)) { 387 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ 388 /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ 389 switch (ir->o) { 390 case IR_ADD: 391 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); 392 break; 393 case IR_SUB: 394 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); 395 break; 396 case IR_MUL: 397 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); 398 break; 399 case IR_DIV: 400 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); 401 break; 402 case IR_POW: 403 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); 404 break; 405 case IR_FPMATH: 406 /* Try to rejoin pow from EXP2, MUL and LOG2. */ 407 if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { 408 IRIns *irp = IR(nir->op1); 409 if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { 410 IRIns *irm4 = IR(irp->op1); 411 IRIns *irm3 = IR(irm4->op1); 412 IRIns *irm12 = IR(irm3->op1); 413 IRIns *irl1 = IR(irm12->op1); 414 if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && 415 irl1->op2 == IRCALL_lj_vm_log2) { 416 IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ 417 IRRef arg3 = irm3->op2, arg4 = irm4->op2; 418 J->cur.nins--; 419 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); 420 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); 421 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); 422 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 423 break; 424 } 425 } 426 } 427 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); 428 break; 429 case IR_ATAN2: 430 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); 431 break; 432 case IR_LDEXP: 433 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); 434 break; 435 case IR_NEG: case IR_ABS: 436 nir->o = IR_CONV; /* Pass through loword. */ 437 nir->op2 = (IRT_INT << 5) | IRT_INT; 438 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), 439 hisubst[ir->op1], hisubst[ir->op2]); 440 break; 441 case IR_SLOAD: 442 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ 443 nir->op2 &= ~IRSLOAD_CONVERT; 444 ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, 445 IRCALL_softfp_i2d); 446 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); 447 break; 448 } 449 /* fallthrough */ 450 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: 451 case IR_STRTO: 452 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); 453 break; 454 case IR_FLOAD: 455 lua_assert(ir->op1 == REF_NIL); 456 hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); 457 nir->op2 += LJ_BE*4; 458 break; 459 case IR_XLOAD: { 460 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ 461 J->cur.nins--; 462 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ 463 #if LJ_BE 464 hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2); 465 inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD); 466 #endif 467 nref = lj_ir_nextins(J); 468 nir = IR(nref); 469 *nir = inslo; /* Re-emit lo XLOAD. */ 470 #if LJ_LE 471 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); 472 ir->prev = nref; 473 #else 474 ir->prev = hi; hi = nref; 475 #endif 476 break; 477 } 478 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: 479 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); 480 break; 481 case IR_CONV: { /* Conversion to number. Others handled below. */ 482 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 483 UNUSED(st); 484 #if LJ_32 && LJ_HASFFI 485 if (st == IRT_I64 || st == IRT_U64) { 486 hi = split_call_l(J, hisubst, oir, ir, 487 st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); 488 break; 489 } 490 #endif 491 lua_assert(st == IRT_INT || 492 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); 493 nir->o = IR_CALLN; 494 #if LJ_32 && LJ_HASFFI 495 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : 496 st == IRT_FLOAT ? IRCALL_softfp_f2d : 497 IRCALL_softfp_ui2d; 498 #else 499 nir->op2 = IRCALL_softfp_i2d; 500 #endif 501 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); 502 break; 503 } 504 case IR_CALLN: 505 case IR_CALLL: 506 case IR_CALLS: 507 case IR_CALLXS: 508 goto split_call; 509 case IR_PHI: 510 if (nir->op1 == nir->op2) 511 J->cur.nins--; /* Drop useless PHIs. */ 512 if (hisubst[ir->op1] != hisubst[ir->op2]) 513 split_emit(J, IRT(IR_PHI, IRT_SOFTFP), 514 hisubst[ir->op1], hisubst[ir->op2]); 515 break; 516 case IR_HIOP: 517 J->cur.nins--; /* Drop joining HIOP. */ 518 ir->prev = nir->op1; 519 hi = nir->op2; 520 break; 521 default: 522 lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); 523 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), 524 hisubst[ir->op1], hisubst[ir->op2]); 525 break; 526 } 527 } else 528 #endif 529 #if LJ_32 && LJ_HASFFI 530 if (irt_isint64(ir->t)) { 531 IRRef hiref = hisubst[ir->op1]; 532 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ 533 switch (ir->o) { 534 case IR_ADD: 535 case IR_SUB: 536 /* Use plain op for hiword if loword cannot produce a carry/borrow. */ 537 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { 538 ir->prev = nir->op1; /* Pass through loword. */ 539 nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; 540 hi = nref; 541 break; 542 } 543 /* fallthrough */ 544 case IR_NEG: 545 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); 546 break; 547 case IR_MUL: 548 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); 549 break; 550 case IR_DIV: 551 hi = split_call_ll(J, hisubst, oir, ir, 552 irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : 553 IRCALL_lj_carith_divu64); 554 break; 555 case IR_MOD: 556 hi = split_call_ll(J, hisubst, oir, ir, 557 irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : 558 IRCALL_lj_carith_modu64); 559 break; 560 case IR_POW: 561 hi = split_call_ll(J, hisubst, oir, ir, 562 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : 563 IRCALL_lj_carith_powu64); 564 break; 565 case IR_BNOT: 566 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0); 567 break; 568 case IR_BSWAP: 569 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0); 570 hi = nref; 571 break; 572 case IR_BAND: case IR_BOR: case IR_BXOR: 573 hi = split_bitop(J, hisubst, nir, ir); 574 break; 575 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 576 hi = split_bitshift(J, hisubst, oir, nir, ir); 577 break; 578 case IR_FLOAD: 579 lua_assert(ir->op2 == IRFL_CDATA_INT64); 580 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); 581 #if LJ_BE 582 ir->prev = hi; hi = nref; 583 #endif 584 break; 585 case IR_XLOAD: 586 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); 587 #if LJ_BE 588 ir->prev = hi; hi = nref; 589 #endif 590 break; 591 case IR_XSTORE: 592 split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); 593 break; 594 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ 595 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 596 #if LJ_SOFTFP 597 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ 598 hi = split_call_l(J, hisubst, oir, ir, 599 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); 600 } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ 601 nir->o = IR_CALLN; 602 nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; 603 hi = split_emit(J, IRTI(IR_HIOP), nref, nref); 604 } 605 #else 606 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ 607 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); 608 } 609 #endif 610 else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ 611 /* Drop cast, since assembler doesn't care. But fwd both parts. */ 612 hi = hiref; 613 goto fwdlo; 614 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ 615 IRRef k31 = lj_ir_kint(J, 31); 616 nir = IR(nref); /* May have been reallocated. */ 617 ir->prev = nir->op1; /* Pass through loword. */ 618 nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ 619 nir->op2 = k31; 620 hi = nref; 621 } else { /* Zero-extend to 64 bit. */ 622 hi = lj_ir_kint(J, 0); 623 goto fwdlo; 624 } 625 break; 626 } 627 case IR_CALLXS: 628 goto split_call; 629 case IR_PHI: { 630 IRRef hiref2; 631 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || 632 nir->op1 == nir->op2) 633 J->cur.nins--; /* Drop useless PHIs. */ 634 hiref2 = hisubst[ir->op2]; 635 if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) 636 split_emit(J, IRTI(IR_PHI), hiref, hiref2); 637 break; 638 } 639 case IR_HIOP: 640 J->cur.nins--; /* Drop joining HIOP. */ 641 ir->prev = nir->op1; 642 hi = nir->op2; 643 break; 644 default: 645 lua_assert(ir->o <= IR_NE); /* Comparisons. */ 646 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); 647 break; 648 } 649 } else 650 #endif 651 #if LJ_SOFTFP 652 if (ir->o == IR_SLOAD) { 653 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ 654 nir->op2 &= ~IRSLOAD_CONVERT; 655 if (!(nir->op2 & IRSLOAD_TYPECHECK)) 656 nir->t.irt = IRT_INT; /* Drop guard. */ 657 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); 658 ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); 659 } 660 } else if (ir->o == IR_TOBIT) { 661 IRRef tmp, op1 = ir->op1; 662 J->cur.nins--; 663 #if LJ_LE 664 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); 665 #else 666 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); 667 #endif 668 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); 669 } else if (ir->o == IR_TOSTR) { 670 if (hisubst[ir->op1]) { 671 if (irref_isk(ir->op1)) 672 nir->op1 = ir->op1; 673 else 674 split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); 675 } 676 } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { 677 if (irref_isk(ir->op2) && hisubst[ir->op2]) 678 nir->op2 = ir->op2; 679 } else 680 #endif 681 if (ir->o == IR_CONV) { /* See above, too. */ 682 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 683 #if LJ_32 && LJ_HASFFI 684 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ 685 #if LJ_SOFTFP 686 if (irt_isfloat(ir->t)) { 687 split_call_l(J, hisubst, oir, ir, 688 st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); 689 J->cur.nins--; /* Drop unused HIOP. */ 690 } 691 #else 692 if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ 693 ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), 694 hisubst[ir->op1], nref); 695 } 696 #endif 697 else { /* Truncate to lower 32 bits. */ 698 fwdlo: 699 ir->prev = nir->op1; /* Forward loword. */ 700 /* Replace with NOP to avoid messing up the snapshot logic. */ 701 nir->ot = IRT(IR_NOP, IRT_NIL); 702 nir->op1 = nir->op2 = 0; 703 } 704 } 705 #endif 706 #if LJ_SOFTFP && LJ_32 && LJ_HASFFI 707 else if (irt_isfloat(ir->t)) { 708 if (st == IRT_NUM) { 709 split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); 710 J->cur.nins--; /* Drop unused HIOP. */ 711 } else { 712 nir->o = IR_CALLN; 713 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; 714 } 715 } else if (st == IRT_FLOAT) { 716 nir->o = IR_CALLN; 717 nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; 718 } else 719 #endif 720 #if LJ_SOFTFP 721 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { 722 if (irt_isguard(ir->t)) { 723 lua_assert(st == IRT_NUM && irt_isint(ir->t)); 724 J->cur.nins--; 725 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); 726 } else { 727 split_call_l(J, hisubst, oir, ir, 728 #if LJ_32 && LJ_HASFFI 729 st == IRT_NUM ? 730 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : 731 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) 732 #else 733 IRCALL_softfp_d2i 734 #endif 735 ); 736 J->cur.nins--; /* Drop unused HIOP. */ 737 } 738 } 739 #endif 740 } else if (ir->o == IR_CALLXS) { 741 IRRef hiref; 742 split_call: 743 hiref = hisubst[ir->op1]; 744 if (hiref) { 745 IROpT ot = nir->ot; 746 IRRef op2 = nir->op2; 747 nir->ot = IRT(IR_CARG, IRT_NIL); 748 #if LJ_LE 749 nir->op2 = hiref; 750 #else 751 nir->op2 = nir->op1; nir->op1 = hiref; 752 #endif 753 ir->prev = nref = split_emit(J, ot, nref, op2); 754 } 755 if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) 756 hi = split_emit(J, 757 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), 758 nref, nref); 759 } else if (ir->o == IR_CARG) { 760 IRRef hiref = hisubst[ir->op1]; 761 if (hiref) { 762 IRRef op2 = nir->op2; 763 #if LJ_LE 764 nir->op2 = hiref; 765 #else 766 nir->op2 = nir->op1; nir->op1 = hiref; 767 #endif 768 ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); 769 nir = IR(nref); 770 } 771 hiref = hisubst[ir->op2]; 772 if (hiref) { 773 #if !LJ_TARGET_X86 774 int carg = 0; 775 IRIns *cir; 776 for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) 777 carg++; 778 if ((carg & 1) == 0) { /* Align 64 bit arguments. */ 779 IRRef op2 = nir->op2; 780 nir->op2 = REF_NIL; 781 nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); 782 nir = IR(nref); 783 } 784 #endif 785 #if LJ_BE 786 { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } 787 #endif 788 ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); 789 } 790 } else if (ir->o == IR_CNEWI) { 791 if (hisubst[ir->op2]) 792 split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); 793 } else if (ir->o == IR_LOOP) { 794 J->loopref = nref; /* Needed by assembler. */ 795 } 796 hisubst[ref] = hi; /* Store hiword substitution. */ 797 } 798 if (snref == nins) { /* Substitution for last snapshot. */ 799 snap->ref = J->cur.nins; 800 split_subst_snap(J, snap, oir); 801 } 802 803 /* Add PHI marks. */ 804 for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { 805 IRIns *ir = IR(ref); 806 if (ir->o != IR_PHI) break; 807 if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); 808 if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); 809 } 810 } 811 812 /* Protected callback for split pass. */ 813 static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud) 814 { 815 jit_State *J = (jit_State *)ud; 816 split_ir(J); 817 UNUSED(L); UNUSED(dummy); 818 return NULL; 819 } 820 821 #if defined(LUA_USE_ASSERT) || LJ_SOFTFP 822 /* Slow, but sure way to check whether a SPLIT pass is needed. */ 823 static int split_needsplit(jit_State *J) 824 { 825 IRIns *ir, *irend; 826 IRRef ref; 827 for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) 828 if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t)) 829 return 1; 830 if (LJ_SOFTFP) { 831 for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev) 832 if ((IR(ref)->op2 & IRSLOAD_CONVERT)) 833 return 1; 834 if (J->chain[IR_TOBIT]) 835 return 1; 836 } 837 for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) { 838 IRType st = (IR(ref)->op2 & IRCONV_SRCMASK); 839 if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) || 840 st == IRT_I64 || st == IRT_U64) 841 return 1; 842 } 843 return 0; /* Nope. */ 844 } 845 #endif 846 847 /* SPLIT pass. */ 848 void lj_opt_split(jit_State *J) 849 { 850 #if LJ_SOFTFP 851 if (!J->needsplit) 852 J->needsplit = split_needsplit(J); 853 #else 854 lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ 855 #endif 856 if (J->needsplit) { 857 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); 858 if (errcode) { 859 /* Completely reset the trace to avoid inconsistent dump on abort. */ 860 J->cur.nins = J->cur.nk = REF_BASE; 861 J->cur.nsnap = 0; 862 lj_err_throw(J->L, errcode); /* Propagate errors. */ 863 } 864 } 865 } 866 867 #undef IR 868 869 #endif