ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

lj_opt_split.c (26949B)


      1 /*
      2 ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
      3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
      4 */
      5 
      6 #define lj_opt_split_c
      7 #define LUA_CORE
      8 
      9 #include "lj_obj.h"
     10 
     11 #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
     12 
     13 #include "lj_err.h"
     14 #include "lj_buf.h"
     15 #include "lj_ir.h"
     16 #include "lj_jit.h"
     17 #include "lj_ircall.h"
     18 #include "lj_iropt.h"
     19 #include "lj_dispatch.h"
     20 #include "lj_vm.h"
     21 
     22 /* SPLIT pass:
     23 **
     24 ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
     25 ** instructions. It's only active for soft-float targets or for 32 bit CPUs
     26 ** which lack native 64 bit integer operations (the FFI is currently the
     27 ** only emitter for 64 bit integer instructions).
     28 **
     29 ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
     30 ** backend simple. Only a small amount of extra functionality needs to be
     31 ** implemented. This is much easier than adding support for allocating
     32 ** register pairs to each backend (believe me, I tried). A few simple, but
     33 ** important optimizations can be performed by the SPLIT pass, which would
     34 ** be tedious to do in the backend.
     35 **
     36 ** The basic idea is to replace each 64 bit IR instruction with its 32 bit
     37 ** equivalent plus an extra HIOP instruction. The splitted IR is not passed
     38 ** through FOLD or any other optimizations, so each HIOP is guaranteed to
     39 ** immediately follow it's counterpart. The actual functionality of HIOP is
     40 ** inferred from the previous instruction.
     41 **
     42 ** The operands of HIOP hold the hiword input references. The output of HIOP
     43 ** is the hiword output reference, which is also used to hold the hiword
     44 ** register or spill slot information. The register allocator treats this
     45 ** instruction independently of any other instruction, which improves code
     46 ** quality compared to using fixed register pairs.
     47 **
     48 ** It's easier to split up some instructions into two regular 32 bit
     49 ** instructions. E.g. XLOAD is split up into two XLOADs with two different
     50 ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
     51 ** constants, too. Some hiword instructions can be entirely omitted, e.g.
     52 ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
     53 ** are split up into two 32 bit arguments each.
     54 **
     55 ** On soft-float targets, floating-point instructions are directly converted
     56 ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
     57 ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
     58 **
     59 ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
     60 ** two int64_t fields:
     61 **
     62 ** 0100    p32 ADD    base  +8
     63 ** 0101    i64 XLOAD  0100
     64 ** 0102    i64 ADD    0101  +1
     65 ** 0103    p32 ADD    base  +16
     66 ** 0104    i64 XSTORE 0103  0102
     67 **
     68 **         mov rax, [esi+0x8]
     69 **         add rax, +0x01
     70 **         mov [esi+0x10], rax
     71 **
     72 ** Here's the transformed IR and the x86 machine code after the SPLIT pass:
     73 **
     74 ** 0100    p32 ADD    base  +8
     75 ** 0101    int XLOAD  0100
     76 ** 0102    p32 ADD    base  +12
     77 ** 0103    int XLOAD  0102
     78 ** 0104    int ADD    0101  +1
     79 ** 0105    int HIOP   0103  +0
     80 ** 0106    p32 ADD    base  +16
     81 ** 0107    int XSTORE 0106  0104
     82 ** 0108    int HIOP   0106  0105
     83 **
     84 **         mov eax, [esi+0x8]
     85 **         mov ecx, [esi+0xc]
     86 **         add eax, +0x01
     87 **         adc ecx, +0x00
     88 **         mov [esi+0x10], eax
     89 **         mov [esi+0x14], ecx
     90 **
     91 ** You may notice the reassociated hiword address computation, which is
     92 ** later fused into the mov operands by the assembler.
     93 */
     94 
     95 /* Some local macros to save typing. Undef'd at the end. */
     96 #define IR(ref)		(&J->cur.ir[(ref)])
     97 
     98 /* Directly emit the transformed IR without updating chains etc. */
     99 static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
    100 {
    101   IRRef nref = lj_ir_nextins(J);
    102   IRIns *ir = IR(nref);
    103   ir->ot = ot;
    104   ir->op1 = op1;
    105   ir->op2 = op2;
    106   return nref;
    107 }
    108 
    109 #if LJ_SOFTFP
    110 /* Emit a (checked) number to integer conversion. */
    111 static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
    112 {
    113   IRRef tmp, res;
    114 #if LJ_LE
    115   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
    116 #else
    117   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
    118 #endif
    119   res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
    120   if (check) {
    121     tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
    122     split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
    123     split_emit(J, IRTGI(IR_EQ), tmp, lo);
    124     split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
    125   }
    126   return res;
    127 }
    128 
    129 /* Emit a CALLN with one split 64 bit argument. */
    130 static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
    131 			  IRIns *ir, IRCallID id)
    132 {
    133   IRRef tmp, op1 = ir->op1;
    134   J->cur.nins--;
    135 #if LJ_LE
    136   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
    137 #else
    138   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
    139 #endif
    140   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
    141   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
    142 }
    143 #endif
    144 
    145 /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
    146 static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
    147 			   IRIns *ir, IRCallID id)
    148 {
    149   IRRef tmp, op1 = ir->op1, op2 = ir->op2;
    150   J->cur.nins--;
    151 #if LJ_LE
    152   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
    153 #else
    154   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
    155 #endif
    156   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
    157   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
    158   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
    159 }
    160 
    161 /* Emit a CALLN with two split 64 bit arguments. */
    162 static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
    163 			   IRIns *ir, IRCallID id)
    164 {
    165   IRRef tmp, op1 = ir->op1, op2 = ir->op2;
    166   J->cur.nins--;
    167 #if LJ_LE
    168   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
    169   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
    170   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
    171 #else
    172   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
    173   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
    174   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
    175 #endif
    176   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
    177   return split_emit(J,
    178     IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
    179     tmp, tmp);
    180 }
    181 
    182 /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
    183 static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
    184 {
    185   IRRef nref = oir[ref].prev;
    186   IRIns *ir = IR(nref);
    187   int32_t ofs = 4;
    188   if (ir->o == IR_KPTR)
    189     return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
    190   if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
    191     /* Reassociate address. */
    192     ofs += IR(ir->op2)->i;
    193     nref = ir->op1;
    194     if (ofs == 0) return nref;
    195   }
    196   return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
    197 }
    198 
    199 #if LJ_HASFFI
    200 static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
    201 			    IRIns *oir, IRIns *nir, IRIns *ir)
    202 {
    203   IROp op = ir->o;
    204   IRRef kref = nir->op2;
    205   if (irref_isk(kref)) {  /* Optimize constant shifts. */
    206     int32_t k = (IR(kref)->i & 63);
    207     IRRef lo = nir->op1, hi = hisubst[ir->op1];
    208     if (op == IR_BROL || op == IR_BROR) {
    209       if (op == IR_BROR) k = (-k & 63);
    210       if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
    211       if (k == 0) {
    212       passthrough:
    213 	J->cur.nins--;
    214 	ir->prev = lo;
    215 	return hi;
    216       } else {
    217 	TRef k1, k2;
    218 	IRRef t1, t2, t3, t4;
    219 	J->cur.nins--;
    220 	k1 = lj_ir_kint(J, k);
    221 	k2 = lj_ir_kint(J, (-k & 31));
    222 	t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
    223 	t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
    224 	t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
    225 	t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
    226 	ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
    227 	return split_emit(J, IRTI(IR_BOR), t2, t3);
    228       }
    229     } else if (k == 0) {
    230       goto passthrough;
    231     } else if (k < 32) {
    232       if (op == IR_BSHL) {
    233 	IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
    234 	IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
    235 	return split_emit(J, IRTI(IR_BOR), t1, t2);
    236       } else {
    237 	IRRef t1 = ir->prev, t2;
    238 	lua_assert(op == IR_BSHR || op == IR_BSAR);
    239 	nir->o = IR_BSHR;
    240 	t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
    241 	ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
    242 	return split_emit(J, IRTI(op), hi, kref);
    243       }
    244     } else {
    245       if (op == IR_BSHL) {
    246 	if (k == 32)
    247 	  J->cur.nins--;
    248 	else
    249 	  lo = ir->prev;
    250 	ir->prev = lj_ir_kint(J, 0);
    251 	return lo;
    252       } else {
    253 	lua_assert(op == IR_BSHR || op == IR_BSAR);
    254 	if (k == 32) {
    255 	  J->cur.nins--;
    256 	  ir->prev = hi;
    257 	} else {
    258 	  nir->op1 = hi;
    259 	}
    260 	if (op == IR_BSHR)
    261 	  return lj_ir_kint(J, 0);
    262 	else
    263 	  return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
    264       }
    265     }
    266   }
    267   return split_call_li(J, hisubst, oir, ir,
    268 		       op - IR_BSHL + IRCALL_lj_carith_shl64);
    269 }
    270 
    271 static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
    272 			 IRIns *nir, IRIns *ir)
    273 {
    274   IROp op = ir->o;
    275   IRRef hi, kref = nir->op2;
    276   if (irref_isk(kref)) {  /* Optimize bit operations with lo constant. */
    277     int32_t k = IR(kref)->i;
    278     if (k == 0 || k == -1) {
    279       if (op == IR_BAND) k = ~k;
    280       if (k == 0) {
    281 	J->cur.nins--;
    282 	ir->prev = nir->op1;
    283       } else if (op == IR_BXOR) {
    284 	nir->o = IR_BNOT;
    285 	nir->op2 = 0;
    286       } else {
    287 	J->cur.nins--;
    288 	ir->prev = kref;
    289       }
    290     }
    291   }
    292   hi = hisubst[ir->op1];
    293   kref = hisubst[ir->op2];
    294   if (irref_isk(kref)) {  /* Optimize bit operations with hi constant. */
    295     int32_t k = IR(kref)->i;
    296     if (k == 0 || k == -1) {
    297       if (op == IR_BAND) k = ~k;
    298       if (k == 0) {
    299 	return hi;
    300       } else if (op == IR_BXOR) {
    301 	return split_emit(J, IRTI(IR_BNOT), hi, 0);
    302       } else {
    303 	return kref;
    304       }
    305     }
    306   }
    307   return split_emit(J, IRTI(op), hi, kref);
    308 }
    309 #endif
    310 
    311 /* Substitute references of a snapshot. */
    312 static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
    313 {
    314   SnapEntry *map = &J->cur.snapmap[snap->mapofs];
    315   MSize n, nent = snap->nent;
    316   for (n = 0; n < nent; n++) {
    317     SnapEntry sn = map[n];
    318     IRIns *ir = &oir[snap_ref(sn)];
    319     if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
    320       map[n] = ((sn & 0xffff0000) | ir->prev);
    321   }
    322 }
    323 
    324 /* Transform the old IR to the new IR. */
    325 static void split_ir(jit_State *J)
    326 {
    327   IRRef nins = J->cur.nins, nk = J->cur.nk;
    328   MSize irlen = nins - nk;
    329   MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
    330   IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
    331   IRRef1 *hisubst;
    332   IRRef ref, snref;
    333   SnapShot *snap;
    334 
    335   /* Copy old IR to buffer. */
    336   memcpy(oir, IR(nk), irlen*sizeof(IRIns));
    337   /* Bias hiword substitution table and old IR. Loword kept in field prev. */
    338   hisubst = (IRRef1 *)&oir[irlen] - nk;
    339   oir -= nk;
    340 
    341   /* Remove all IR instructions, but retain IR constants. */
    342   J->cur.nins = REF_FIRST;
    343   J->loopref = 0;
    344 
    345   /* Process constants and fixed references. */
    346   for (ref = nk; ref <= REF_BASE; ref++) {
    347     IRIns *ir = &oir[ref];
    348     if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
    349       /* Split up 64 bit constant. */
    350       TValue tv = *ir_k64(ir);
    351       ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
    352       hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
    353     } else {
    354       ir->prev = ref;  /* Identity substitution for loword. */
    355       hisubst[ref] = 0;
    356     }
    357     if (irt_is64(ir->t) && ir->o != IR_KNULL)
    358       ref++;
    359   }
    360 
    361   /* Process old IR instructions. */
    362   snap = J->cur.snap;
    363   snref = snap->ref;
    364   for (ref = REF_FIRST; ref < nins; ref++) {
    365     IRIns *ir = &oir[ref];
    366     IRRef nref = lj_ir_nextins(J);
    367     IRIns *nir = IR(nref);
    368     IRRef hi = 0;
    369 
    370     if (ref >= snref) {
    371       snap->ref = nref;
    372       split_subst_snap(J, snap++, oir);
    373       snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
    374     }
    375 
    376     /* Copy-substitute old instruction to new instruction. */
    377     nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
    378     nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
    379     ir->prev = nref;  /* Loword substitution. */
    380     nir->o = ir->o;
    381     nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
    382     hisubst[ref] = 0;
    383 
    384     /* Split 64 bit instructions. */
    385 #if LJ_SOFTFP
    386     if (irt_isnum(ir->t)) {
    387       nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
    388       /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
    389       switch (ir->o) {
    390       case IR_ADD:
    391 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
    392 	break;
    393       case IR_SUB:
    394 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
    395 	break;
    396       case IR_MUL:
    397 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
    398 	break;
    399       case IR_DIV:
    400 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
    401 	break;
    402       case IR_POW:
    403 	hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
    404 	break;
    405       case IR_FPMATH:
    406 	/* Try to rejoin pow from EXP2, MUL and LOG2. */
    407 	if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
    408 	  IRIns *irp = IR(nir->op1);
    409 	  if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
    410 	    IRIns *irm4 = IR(irp->op1);
    411 	    IRIns *irm3 = IR(irm4->op1);
    412 	    IRIns *irm12 = IR(irm3->op1);
    413 	    IRIns *irl1 = IR(irm12->op1);
    414 	    if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
    415 		irl1->op2 == IRCALL_lj_vm_log2) {
    416 	      IRRef tmp = irl1->op1;  /* Recycle first two args from LOG2. */
    417 	      IRRef arg3 = irm3->op2, arg4 = irm4->op2;
    418 	      J->cur.nins--;
    419 	      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
    420 	      tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
    421 	      ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
    422 	      hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
    423 	      break;
    424 	    }
    425 	  }
    426 	}
    427 	hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
    428 	break;
    429       case IR_ATAN2:
    430 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
    431 	break;
    432       case IR_LDEXP:
    433 	hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
    434 	break;
    435       case IR_NEG: case IR_ABS:
    436 	nir->o = IR_CONV;  /* Pass through loword. */
    437 	nir->op2 = (IRT_INT << 5) | IRT_INT;
    438 	hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
    439 			hisubst[ir->op1], hisubst[ir->op2]);
    440 	break;
    441       case IR_SLOAD:
    442 	if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from int to number. */
    443 	  nir->op2 &= ~IRSLOAD_CONVERT;
    444 	  ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
    445 				       IRCALL_softfp_i2d);
    446 	  hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
    447 	  break;
    448 	}
    449 	/* fallthrough */
    450       case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
    451       case IR_STRTO:
    452 	hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
    453 	break;
    454       case IR_FLOAD:
    455 	lua_assert(ir->op1 == REF_NIL);
    456 	hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
    457 	nir->op2 += LJ_BE*4;
    458 	break;
    459       case IR_XLOAD: {
    460 	IRIns inslo = *nir;  /* Save/undo the emit of the lo XLOAD. */
    461 	J->cur.nins--;
    462 	hi = split_ptr(J, oir, ir->op1);  /* Insert the hiref ADD. */
    463 #if LJ_BE
    464 	hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
    465 	inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
    466 #endif
    467 	nref = lj_ir_nextins(J);
    468 	nir = IR(nref);
    469 	*nir = inslo;  /* Re-emit lo XLOAD. */
    470 #if LJ_LE
    471 	hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
    472 	ir->prev = nref;
    473 #else
    474 	ir->prev = hi; hi = nref;
    475 #endif
    476 	break;
    477 	}
    478       case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
    479 	split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
    480 	break;
    481       case IR_CONV: {  /* Conversion to number. Others handled below. */
    482 	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
    483 	UNUSED(st);
    484 #if LJ_32 && LJ_HASFFI
    485 	if (st == IRT_I64 || st == IRT_U64) {
    486 	  hi = split_call_l(J, hisubst, oir, ir,
    487 		 st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
    488 	  break;
    489 	}
    490 #endif
    491 	lua_assert(st == IRT_INT ||
    492 		   (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
    493 	nir->o = IR_CALLN;
    494 #if LJ_32 && LJ_HASFFI
    495 	nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
    496 		   st == IRT_FLOAT ? IRCALL_softfp_f2d :
    497 		   IRCALL_softfp_ui2d;
    498 #else
    499 	nir->op2 = IRCALL_softfp_i2d;
    500 #endif
    501 	hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
    502 	break;
    503 	}
    504       case IR_CALLN:
    505       case IR_CALLL:
    506       case IR_CALLS:
    507       case IR_CALLXS:
    508 	goto split_call;
    509       case IR_PHI:
    510 	if (nir->op1 == nir->op2)
    511 	  J->cur.nins--;  /* Drop useless PHIs. */
    512 	if (hisubst[ir->op1] != hisubst[ir->op2])
    513 	  split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
    514 		     hisubst[ir->op1], hisubst[ir->op2]);
    515 	break;
    516       case IR_HIOP:
    517 	J->cur.nins--;  /* Drop joining HIOP. */
    518 	ir->prev = nir->op1;
    519 	hi = nir->op2;
    520 	break;
    521       default:
    522 	lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
    523 	hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
    524 			hisubst[ir->op1], hisubst[ir->op2]);
    525 	break;
    526       }
    527     } else
    528 #endif
    529 #if LJ_32 && LJ_HASFFI
    530     if (irt_isint64(ir->t)) {
    531       IRRef hiref = hisubst[ir->op1];
    532       nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
    533       switch (ir->o) {
    534       case IR_ADD:
    535       case IR_SUB:
    536 	/* Use plain op for hiword if loword cannot produce a carry/borrow. */
    537 	if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
    538 	  ir->prev = nir->op1;  /* Pass through loword. */
    539 	  nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
    540 	  hi = nref;
    541 	  break;
    542 	}
    543 	/* fallthrough */
    544       case IR_NEG:
    545 	hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
    546 	break;
    547       case IR_MUL:
    548 	hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
    549 	break;
    550       case IR_DIV:
    551 	hi = split_call_ll(J, hisubst, oir, ir,
    552 			   irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
    553 					      IRCALL_lj_carith_divu64);
    554 	break;
    555       case IR_MOD:
    556 	hi = split_call_ll(J, hisubst, oir, ir,
    557 			   irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
    558 					      IRCALL_lj_carith_modu64);
    559 	break;
    560       case IR_POW:
    561 	hi = split_call_ll(J, hisubst, oir, ir,
    562 			   irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
    563 					      IRCALL_lj_carith_powu64);
    564 	break;
    565       case IR_BNOT:
    566 	hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
    567 	break;
    568       case IR_BSWAP:
    569 	ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
    570 	hi = nref;
    571 	break;
    572       case IR_BAND: case IR_BOR: case IR_BXOR:
    573 	hi = split_bitop(J, hisubst, nir, ir);
    574 	break;
    575       case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
    576 	hi = split_bitshift(J, hisubst, oir, nir, ir);
    577 	break;
    578       case IR_FLOAD:
    579 	lua_assert(ir->op2 == IRFL_CDATA_INT64);
    580 	hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
    581 #if LJ_BE
    582 	ir->prev = hi; hi = nref;
    583 #endif
    584 	break;
    585       case IR_XLOAD:
    586 	hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
    587 #if LJ_BE
    588 	ir->prev = hi; hi = nref;
    589 #endif
    590 	break;
    591       case IR_XSTORE:
    592 	split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
    593 	break;
    594       case IR_CONV: {  /* Conversion to 64 bit integer. Others handled below. */
    595 	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
    596 #if LJ_SOFTFP
    597 	if (st == IRT_NUM) {  /* NUM to 64 bit int conv. */
    598 	  hi = split_call_l(J, hisubst, oir, ir,
    599 		 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
    600 	} else if (st == IRT_FLOAT) {  /* FLOAT to 64 bit int conv. */
    601 	  nir->o = IR_CALLN;
    602 	  nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
    603 	  hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
    604 	}
    605 #else
    606 	if (st == IRT_NUM || st == IRT_FLOAT) {  /* FP to 64 bit int conv. */
    607 	  hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
    608 	}
    609 #endif
    610 	else if (st == IRT_I64 || st == IRT_U64) {  /* 64/64 bit cast. */
    611 	  /* Drop cast, since assembler doesn't care. But fwd both parts. */
    612 	  hi = hiref;
    613 	  goto fwdlo;
    614 	} else if ((ir->op2 & IRCONV_SEXT)) {  /* Sign-extend to 64 bit. */
    615 	  IRRef k31 = lj_ir_kint(J, 31);
    616 	  nir = IR(nref);  /* May have been reallocated. */
    617 	  ir->prev = nir->op1;  /* Pass through loword. */
    618 	  nir->o = IR_BSAR;  /* hi = bsar(lo, 31). */
    619 	  nir->op2 = k31;
    620 	  hi = nref;
    621 	} else {  /* Zero-extend to 64 bit. */
    622 	  hi = lj_ir_kint(J, 0);
    623 	  goto fwdlo;
    624 	}
    625 	break;
    626 	}
    627       case IR_CALLXS:
    628 	goto split_call;
    629       case IR_PHI: {
    630 	IRRef hiref2;
    631 	if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
    632 	    nir->op1 == nir->op2)
    633 	  J->cur.nins--;  /* Drop useless PHIs. */
    634 	hiref2 = hisubst[ir->op2];
    635 	if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
    636 	  split_emit(J, IRTI(IR_PHI), hiref, hiref2);
    637 	break;
    638 	}
    639       case IR_HIOP:
    640 	J->cur.nins--;  /* Drop joining HIOP. */
    641 	ir->prev = nir->op1;
    642 	hi = nir->op2;
    643 	break;
    644       default:
    645 	lua_assert(ir->o <= IR_NE);  /* Comparisons. */
    646 	split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
    647 	break;
    648       }
    649     } else
    650 #endif
    651 #if LJ_SOFTFP
    652     if (ir->o == IR_SLOAD) {
    653       if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from number to int. */
    654 	nir->op2 &= ~IRSLOAD_CONVERT;
    655 	if (!(nir->op2 & IRSLOAD_TYPECHECK))
    656 	  nir->t.irt = IRT_INT;  /* Drop guard. */
    657 	split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
    658 	ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
    659       }
    660     } else if (ir->o == IR_TOBIT) {
    661       IRRef tmp, op1 = ir->op1;
    662       J->cur.nins--;
    663 #if LJ_LE
    664       tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
    665 #else
    666       tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
    667 #endif
    668       ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
    669     } else if (ir->o == IR_TOSTR) {
    670       if (hisubst[ir->op1]) {
    671 	if (irref_isk(ir->op1))
    672 	  nir->op1 = ir->op1;
    673 	else
    674 	  split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
    675       }
    676     } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
    677       if (irref_isk(ir->op2) && hisubst[ir->op2])
    678 	nir->op2 = ir->op2;
    679     } else
    680 #endif
    681     if (ir->o == IR_CONV) {  /* See above, too. */
    682       IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
    683 #if LJ_32 && LJ_HASFFI
    684       if (st == IRT_I64 || st == IRT_U64) {  /* Conversion from 64 bit int. */
    685 #if LJ_SOFTFP
    686 	if (irt_isfloat(ir->t)) {
    687 	  split_call_l(J, hisubst, oir, ir,
    688 		       st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
    689 	  J->cur.nins--;  /* Drop unused HIOP. */
    690 	}
    691 #else
    692 	if (irt_isfp(ir->t)) {  /* 64 bit integer to FP conversion. */
    693 	  ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
    694 				hisubst[ir->op1], nref);
    695 	}
    696 #endif
    697 	else {  /* Truncate to lower 32 bits. */
    698 	fwdlo:
    699 	  ir->prev = nir->op1;  /* Forward loword. */
    700 	  /* Replace with NOP to avoid messing up the snapshot logic. */
    701 	  nir->ot = IRT(IR_NOP, IRT_NIL);
    702 	  nir->op1 = nir->op2 = 0;
    703 	}
    704       }
    705 #endif
    706 #if LJ_SOFTFP && LJ_32 && LJ_HASFFI
    707       else if (irt_isfloat(ir->t)) {
    708 	if (st == IRT_NUM) {
    709 	  split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
    710 	  J->cur.nins--;  /* Drop unused HIOP. */
    711 	} else {
    712 	  nir->o = IR_CALLN;
    713 	  nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
    714 	}
    715       } else if (st == IRT_FLOAT) {
    716 	nir->o = IR_CALLN;
    717 	nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
    718       } else
    719 #endif
    720 #if LJ_SOFTFP
    721       if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
    722 	if (irt_isguard(ir->t)) {
    723 	  lua_assert(st == IRT_NUM && irt_isint(ir->t));
    724 	  J->cur.nins--;
    725 	  ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
    726 	} else {
    727 	  split_call_l(J, hisubst, oir, ir,
    728 #if LJ_32 && LJ_HASFFI
    729 	    st == IRT_NUM ?
    730 	      (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
    731 	      (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
    732 #else
    733 	    IRCALL_softfp_d2i
    734 #endif
    735 	  );
    736 	  J->cur.nins--;  /* Drop unused HIOP. */
    737 	}
    738       }
    739 #endif
    740     } else if (ir->o == IR_CALLXS) {
    741       IRRef hiref;
    742     split_call:
    743       hiref = hisubst[ir->op1];
    744       if (hiref) {
    745 	IROpT ot = nir->ot;
    746 	IRRef op2 = nir->op2;
    747 	nir->ot = IRT(IR_CARG, IRT_NIL);
    748 #if LJ_LE
    749 	nir->op2 = hiref;
    750 #else
    751 	nir->op2 = nir->op1; nir->op1 = hiref;
    752 #endif
    753 	ir->prev = nref = split_emit(J, ot, nref, op2);
    754       }
    755       if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
    756 	hi = split_emit(J,
    757 	  IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
    758 	  nref, nref);
    759     } else if (ir->o == IR_CARG) {
    760       IRRef hiref = hisubst[ir->op1];
    761       if (hiref) {
    762 	IRRef op2 = nir->op2;
    763 #if LJ_LE
    764 	nir->op2 = hiref;
    765 #else
    766 	nir->op2 = nir->op1; nir->op1 = hiref;
    767 #endif
    768 	ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
    769 	nir = IR(nref);
    770       }
    771       hiref = hisubst[ir->op2];
    772       if (hiref) {
    773 #if !LJ_TARGET_X86
    774 	int carg = 0;
    775 	IRIns *cir;
    776 	for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
    777 	  carg++;
    778 	if ((carg & 1) == 0) {  /* Align 64 bit arguments. */
    779 	  IRRef op2 = nir->op2;
    780 	  nir->op2 = REF_NIL;
    781 	  nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
    782 	  nir = IR(nref);
    783 	}
    784 #endif
    785 #if LJ_BE
    786 	{ IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
    787 #endif
    788 	ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
    789       }
    790     } else if (ir->o == IR_CNEWI) {
    791       if (hisubst[ir->op2])
    792 	split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
    793     } else if (ir->o == IR_LOOP) {
    794       J->loopref = nref;  /* Needed by assembler. */
    795     }
    796     hisubst[ref] = hi;  /* Store hiword substitution. */
    797   }
    798   if (snref == nins) {  /* Substitution for last snapshot. */
    799     snap->ref = J->cur.nins;
    800     split_subst_snap(J, snap, oir);
    801   }
    802 
    803   /* Add PHI marks. */
    804   for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
    805     IRIns *ir = IR(ref);
    806     if (ir->o != IR_PHI) break;
    807     if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
    808     if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
    809   }
    810 }
    811 
    812 /* Protected callback for split pass. */
    813 static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
    814 {
    815   jit_State *J = (jit_State *)ud;
    816   split_ir(J);
    817   UNUSED(L); UNUSED(dummy);
    818   return NULL;
    819 }
    820 
    821 #if defined(LUA_USE_ASSERT) || LJ_SOFTFP
    822 /* Slow, but sure way to check whether a SPLIT pass is needed. */
    823 static int split_needsplit(jit_State *J)
    824 {
    825   IRIns *ir, *irend;
    826   IRRef ref;
    827   for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
    828     if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
    829       return 1;
    830   if (LJ_SOFTFP) {
    831     for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
    832       if ((IR(ref)->op2 & IRSLOAD_CONVERT))
    833 	return 1;
    834     if (J->chain[IR_TOBIT])
    835       return 1;
    836   }
    837   for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
    838     IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
    839     if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
    840 	st == IRT_I64 || st == IRT_U64)
    841       return 1;
    842   }
    843   return 0;  /* Nope. */
    844 }
    845 #endif
    846 
    847 /* SPLIT pass. */
    848 void lj_opt_split(jit_State *J)
    849 {
    850 #if LJ_SOFTFP
    851   if (!J->needsplit)
    852     J->needsplit = split_needsplit(J);
    853 #else
    854   lua_assert(J->needsplit >= split_needsplit(J));  /* Verify flag. */
    855 #endif
    856   if (J->needsplit) {
    857     int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
    858     if (errcode) {
    859       /* Completely reset the trace to avoid inconsistent dump on abort. */
    860       J->cur.nins = J->cur.nk = REF_BASE;
    861       J->cur.nsnap = 0;
    862       lj_err_throw(J->L, errcode);  /* Propagate errors. */
    863     }
    864   }
    865 }
    866 
    867 #undef IR
    868 
    869 #endif