ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

lj_snap.c (28693B)


      1 /*
      2 ** Snapshot handling.
      3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
      4 */
      5 
      6 #define lj_snap_c
      7 #define LUA_CORE
      8 
      9 #include "lj_obj.h"
     10 
     11 #if LJ_HASJIT
     12 
     13 #include "lj_gc.h"
     14 #include "lj_tab.h"
     15 #include "lj_state.h"
     16 #include "lj_frame.h"
     17 #include "lj_bc.h"
     18 #include "lj_ir.h"
     19 #include "lj_jit.h"
     20 #include "lj_iropt.h"
     21 #include "lj_trace.h"
     22 #include "lj_snap.h"
     23 #include "lj_target.h"
     24 #if LJ_HASFFI
     25 #include "lj_ctype.h"
     26 #include "lj_cdata.h"
     27 #endif
     28 
     29 /* Pass IR on to next optimization in chain (FOLD). */
     30 #define emitir(ot, a, b)	(lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
     31 
     32 /* Emit raw IR without passing through optimizations. */
     33 #define emitir_raw(ot, a, b)	(lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
     34 
     35 /* -- Snapshot buffer allocation ------------------------------------------ */
     36 
     37 /* Grow snapshot buffer. */
     38 void lj_snap_grow_buf_(jit_State *J, MSize need)
     39 {
     40   MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
     41   if (need > maxsnap)
     42     lj_trace_err(J, LJ_TRERR_SNAPOV);
     43   lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
     44   J->cur.snap = J->snapbuf;
     45 }
     46 
     47 /* Grow snapshot map buffer. */
     48 void lj_snap_grow_map_(jit_State *J, MSize need)
     49 {
     50   if (need < 2*J->sizesnapmap)
     51     need = 2*J->sizesnapmap;
     52   else if (need < 64)
     53     need = 64;
     54   J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
     55 		    J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
     56   J->cur.snapmap = J->snapmapbuf;
     57   J->sizesnapmap = need;
     58 }
     59 
     60 /* -- Snapshot generation ------------------------------------------------- */
     61 
     62 /* Add all modified slots to the snapshot. */
     63 static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
     64 {
     65   IRRef retf = J->chain[IR_RETF];  /* Limits SLOAD restore elimination. */
     66   BCReg s;
     67   MSize n = 0;
     68   for (s = 0; s < nslots; s++) {
     69     TRef tr = J->slot[s];
     70     IRRef ref = tref_ref(tr);
     71 #if LJ_FR2
     72     if (s == 1) continue;
     73     if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
     74       TValue *base = J->L->base - J->baseslot;
     75       tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
     76       ref = tref_ref(tr);
     77     }
     78 #endif
     79     if (ref) {
     80       SnapEntry sn = SNAP_TR(s, tr);
     81       IRIns *ir = &J->cur.ir[ref];
     82       if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
     83 	  ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
     84 	/* No need to snapshot unmodified non-inherited slots. */
     85 	if (!(ir->op2 & IRSLOAD_INHERIT))
     86 	  continue;
     87 	/* No need to restore readonly slots and unmodified non-parent slots. */
     88 	if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
     89 	    (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
     90 	  sn |= SNAP_NORESTORE;
     91       }
     92       if (LJ_SOFTFP && irt_isnum(ir->t))
     93 	sn |= SNAP_SOFTFPNUM;
     94       map[n++] = sn;
     95     }
     96   }
     97   return n;
     98 }
     99 
    100 /* Add frame links at the end of the snapshot. */
    101 static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
    102 {
    103   cTValue *frame = J->L->base - 1;
    104   cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
    105   GCfunc *fn = frame_func(frame);
    106   cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
    107 #if LJ_FR2
    108   uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
    109   lua_assert(2 <= J->baseslot && J->baseslot <= 257);
    110   memcpy(map, &pcbase, sizeof(uint64_t));
    111 #else
    112   MSize f = 0;
    113   map[f++] = SNAP_MKPC(J->pc);  /* The current PC is always the first entry. */
    114 #endif
    115   while (frame > lim) {  /* Backwards traversal of all frames above base. */
    116     if (frame_islua(frame)) {
    117 #if !LJ_FR2
    118       map[f++] = SNAP_MKPC(frame_pc(frame));
    119 #endif
    120       frame = frame_prevl(frame);
    121     } else if (frame_iscont(frame)) {
    122 #if !LJ_FR2
    123       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
    124       map[f++] = SNAP_MKPC(frame_contpc(frame));
    125 #endif
    126       frame = frame_prevd(frame);
    127     } else {
    128       lua_assert(!frame_isc(frame));
    129 #if !LJ_FR2
    130       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
    131 #endif
    132       frame = frame_prevd(frame);
    133       continue;
    134     }
    135     if (frame + funcproto(frame_func(frame))->framesize > ftop)
    136       ftop = frame + funcproto(frame_func(frame))->framesize;
    137   }
    138   *topslot = (uint8_t)(ftop - lim);
    139 #if LJ_FR2
    140   lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t));
    141   return 2;
    142 #else
    143   lua_assert(f == (MSize)(1 + J->framedepth));
    144   return f;
    145 #endif
    146 }
    147 
    148 /* Take a snapshot of the current stack. */
    149 static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
    150 {
    151   BCReg nslots = J->baseslot + J->maxslot;
    152   MSize nent;
    153   SnapEntry *p;
    154   /* Conservative estimate. */
    155   lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
    156   p = &J->cur.snapmap[nsnapmap];
    157   nent = snapshot_slots(J, p, nslots);
    158   snap->nent = (uint8_t)nent;
    159   nent += snapshot_framelinks(J, p + nent, &snap->topslot);
    160   snap->mapofs = (uint16_t)nsnapmap;
    161   snap->ref = (IRRef1)J->cur.nins;
    162   snap->nslots = (uint8_t)nslots;
    163   snap->count = 0;
    164   J->cur.nsnapmap = (uint16_t)(nsnapmap + nent);
    165 }
    166 
    167 /* Add or merge a snapshot. */
    168 void lj_snap_add(jit_State *J)
    169 {
    170   MSize nsnap = J->cur.nsnap;
    171   MSize nsnapmap = J->cur.nsnapmap;
    172   /* Merge if no ins. inbetween or if requested and no guard inbetween. */
    173   if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) ||
    174       (J->mergesnap && !irt_isguard(J->guardemit))) {
    175     if (nsnap == 1) {  /* But preserve snap #0 PC. */
    176       emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
    177       goto nomerge;
    178     }
    179     nsnapmap = J->cur.snap[--nsnap].mapofs;
    180   } else {
    181   nomerge:
    182     lj_snap_grow_buf(J, nsnap+1);
    183     J->cur.nsnap = (uint16_t)(nsnap+1);
    184   }
    185   J->mergesnap = 0;
    186   J->guardemit.irt = 0;
    187   snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
    188 }
    189 
    190 /* -- Snapshot modification ----------------------------------------------- */
    191 
    192 #define SNAP_USEDEF_SLOTS	(LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
    193 
    194 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
    195 static BCReg snap_usedef(jit_State *J, uint8_t *udf,
    196 			 const BCIns *pc, BCReg maxslot)
    197 {
    198   BCReg s;
    199   GCobj *o;
    200 
    201   if (maxslot == 0) return 0;
    202 #ifdef LUAJIT_USE_VALGRIND
    203   /* Avoid errors for harmless reads beyond maxslot. */
    204   memset(udf, 1, SNAP_USEDEF_SLOTS);
    205 #else
    206   memset(udf, 1, maxslot);
    207 #endif
    208 
    209   /* Treat open upvalues as used. */
    210   o = gcref(J->L->openupval);
    211   while (o) {
    212     if (uvval(gco2uv(o)) < J->L->base) break;
    213     udf[uvval(gco2uv(o)) - J->L->base] = 0;
    214     o = gcref(o->gch.nextgc);
    215   }
    216 
    217 #define USE_SLOT(s)		udf[(s)] &= ~1
    218 #define DEF_SLOT(s)		udf[(s)] *= 3
    219 
    220   /* Scan through following bytecode and check for uses/defs. */
    221   lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
    222   for (;;) {
    223     BCIns ins = *pc++;
    224     BCOp op = bc_op(ins);
    225     switch (bcmode_b(op)) {
    226     case BCMvar: USE_SLOT(bc_b(ins)); break;
    227     default: break;
    228     }
    229     switch (bcmode_c(op)) {
    230     case BCMvar: USE_SLOT(bc_c(ins)); break;
    231     case BCMrbase:
    232       lua_assert(op == BC_CAT);
    233       for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
    234       for (; s < maxslot; s++) DEF_SLOT(s);
    235       break;
    236     case BCMjump:
    237     handle_jump: {
    238       BCReg minslot = bc_a(ins);
    239       if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
    240       else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
    241       else if (op == BC_UCLO) { pc += bc_j(ins); break; }
    242       for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
    243       return minslot < maxslot ? minslot : maxslot;
    244       }
    245     case BCMlit:
    246       if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
    247 	goto handle_jump;
    248       } else if (bc_isret(op)) {
    249 	BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
    250 	for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
    251 	for (; s < top; s++) USE_SLOT(s);
    252 	for (; s < maxslot; s++) DEF_SLOT(s);
    253 	return 0;
    254       }
    255       break;
    256     case BCMfunc: return maxslot;  /* NYI: will abort, anyway. */
    257     default: break;
    258     }
    259     switch (bcmode_a(op)) {
    260     case BCMvar: USE_SLOT(bc_a(ins)); break;
    261     case BCMdst:
    262        if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
    263        break;
    264     case BCMbase:
    265       if (op >= BC_CALLM && op <= BC_VARG) {
    266 	BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
    267 		    maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
    268 	if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
    269 	s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
    270 	for (; s < top; s++) USE_SLOT(s);
    271 	for (; s < maxslot; s++) DEF_SLOT(s);
    272 	if (op == BC_CALLT || op == BC_CALLMT) {
    273 	  for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
    274 	  return 0;
    275 	}
    276       } else if (op == BC_KNIL) {
    277 	for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
    278       } else if (op == BC_TSETM) {
    279 	for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
    280       }
    281       break;
    282     default: break;
    283     }
    284     lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
    285   }
    286 
    287 #undef USE_SLOT
    288 #undef DEF_SLOT
    289 
    290   return 0;  /* unreachable */
    291 }
    292 
    293 /* Purge dead slots before the next snapshot. */
    294 void lj_snap_purge(jit_State *J)
    295 {
    296   uint8_t udf[SNAP_USEDEF_SLOTS];
    297   BCReg maxslot = J->maxslot;
    298   BCReg s = snap_usedef(J, udf, J->pc, maxslot);
    299   for (; s < maxslot; s++)
    300     if (udf[s] != 0)
    301       J->base[s] = 0;  /* Purge dead slots. */
    302 }
    303 
    304 /* Shrink last snapshot. */
    305 void lj_snap_shrink(jit_State *J)
    306 {
    307   SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
    308   SnapEntry *map = &J->cur.snapmap[snap->mapofs];
    309   MSize n, m, nlim, nent = snap->nent;
    310   uint8_t udf[SNAP_USEDEF_SLOTS];
    311   BCReg maxslot = J->maxslot;
    312   BCReg baseslot = J->baseslot;
    313   BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
    314   maxslot += baseslot;
    315   minslot += baseslot;
    316   snap->nslots = (uint8_t)maxslot;
    317   for (n = m = 0; n < nent; n++) {  /* Remove unused slots from snapshot. */
    318     BCReg s = snap_slot(map[n]);
    319     if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
    320       map[m++] = map[n];  /* Only copy used slots. */
    321   }
    322   snap->nent = (uint8_t)m;
    323   nlim = J->cur.nsnapmap - snap->mapofs - 1;
    324   while (n <= nlim) map[m++] = map[n++];  /* Move PC + frame links down. */
    325   J->cur.nsnapmap = (uint16_t)(snap->mapofs + m);  /* Free up space in map. */
    326 }
    327 
    328 /* -- Snapshot access ----------------------------------------------------- */
    329 
    330 /* Initialize a Bloom Filter with all renamed refs.
    331 ** There are very few renames (often none), so the filter has
    332 ** very few bits set. This makes it suitable for negative filtering.
    333 */
    334 static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
    335 {
    336   BloomFilter rfilt = 0;
    337   IRIns *ir;
    338   for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
    339     if (ir->op2 <= lim)
    340       bloomset(rfilt, ir->op1);
    341   return rfilt;
    342 }
    343 
    344 /* Process matching renames to find the original RegSP. */
    345 static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
    346 {
    347   IRIns *ir;
    348   for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
    349     if (ir->op1 == ref && ir->op2 <= lim)
    350       rs = ir->prev;
    351   return rs;
    352 }
    353 
    354 /* Copy RegSP from parent snapshot to the parent links of the IR. */
    355 IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
    356 {
    357   SnapShot *snap = &T->snap[snapno];
    358   SnapEntry *map = &T->snapmap[snap->mapofs];
    359   BloomFilter rfilt = snap_renamefilter(T, snapno);
    360   MSize n = 0;
    361   IRRef ref = 0;
    362   for ( ; ; ir++) {
    363     uint32_t rs;
    364     if (ir->o == IR_SLOAD) {
    365       if (!(ir->op2 & IRSLOAD_PARENT)) break;
    366       for ( ; ; n++) {
    367 	lua_assert(n < snap->nent);
    368 	if (snap_slot(map[n]) == ir->op1) {
    369 	  ref = snap_ref(map[n++]);
    370 	  break;
    371 	}
    372       }
    373     } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
    374       ref++;
    375     } else if (ir->o == IR_PVAL) {
    376       ref = ir->op1 + REF_BIAS;
    377     } else {
    378       break;
    379     }
    380     rs = T->ir[ref].prev;
    381     if (bloomtest(rfilt, ref))
    382       rs = snap_renameref(T, snapno, ref, rs);
    383     ir->prev = (uint16_t)rs;
    384     lua_assert(regsp_used(rs));
    385   }
    386   return ir;
    387 }
    388 
    389 /* -- Snapshot replay ----------------------------------------------------- */
    390 
    391 /* Replay constant from parent trace. */
    392 static TRef snap_replay_const(jit_State *J, IRIns *ir)
    393 {
    394   /* Only have to deal with constants that can occur in stack slots. */
    395   switch ((IROp)ir->o) {
    396   case IR_KPRI: return TREF_PRI(irt_type(ir->t));
    397   case IR_KINT: return lj_ir_kint(J, ir->i);
    398   case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
    399   case IR_KNUM: case IR_KINT64:
    400     return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
    401   case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir));  /* Continuation. */
    402   default: lua_assert(0); return TREF_NIL; break;
    403   }
    404 }
    405 
    406 /* De-duplicate parent reference. */
    407 static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
    408 {
    409   MSize j;
    410   for (j = 0; j < nmax; j++)
    411     if (snap_ref(map[j]) == ref)
    412       return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
    413   return 0;
    414 }
    415 
    416 /* Emit parent reference with de-duplication. */
    417 static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
    418 		      BloomFilter seen, IRRef ref)
    419 {
    420   IRIns *ir = &T->ir[ref];
    421   TRef tr;
    422   if (irref_isk(ref))
    423     tr = snap_replay_const(J, ir);
    424   else if (!regsp_used(ir->prev))
    425     tr = 0;
    426   else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
    427     tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
    428   return tr;
    429 }
    430 
    431 /* Check whether a sunk store corresponds to an allocation. Slow path. */
    432 static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs)
    433 {
    434   if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
    435       irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
    436     IRIns *irk = &T->ir[irs->op1];
    437     if (irk->o == IR_AREF || irk->o == IR_HREFK)
    438       irk = &T->ir[irk->op1];
    439     return (&T->ir[irk->op1] == ira);
    440   }
    441   return 0;
    442 }
    443 
    444 /* Check whether a sunk store corresponds to an allocation. Fast path. */
    445 static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs)
    446 {
    447   if (irs->s != 255)
    448     return (ira + irs->s == irs);  /* Fast check. */
    449   return snap_sunk_store2(T, ira, irs);
    450 }
    451 
    452 /* Replay snapshot state to setup side trace. */
    453 void lj_snap_replay(jit_State *J, GCtrace *T)
    454 {
    455   SnapShot *snap = &T->snap[J->exitno];
    456   SnapEntry *map = &T->snapmap[snap->mapofs];
    457   MSize n, nent = snap->nent;
    458   BloomFilter seen = 0;
    459   int pass23 = 0;
    460   J->framedepth = 0;
    461   /* Emit IR for slots inherited from parent snapshot. */
    462   for (n = 0; n < nent; n++) {
    463     SnapEntry sn = map[n];
    464     BCReg s = snap_slot(sn);
    465     IRRef ref = snap_ref(sn);
    466     IRIns *ir = &T->ir[ref];
    467     TRef tr;
    468     /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
    469     if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
    470       goto setslot;
    471     bloomset(seen, ref);
    472     if (irref_isk(ref)) {
    473       tr = snap_replay_const(J, ir);
    474     } else if (!regsp_used(ir->prev)) {
    475       pass23 = 1;
    476       lua_assert(s != 0);
    477       tr = s;
    478     } else {
    479       IRType t = irt_type(ir->t);
    480       uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
    481       if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
    482       if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
    483       tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
    484     }
    485   setslot:
    486     J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME));  /* Same as TREF_* flags. */
    487     J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s);
    488     if ((sn & SNAP_FRAME))
    489       J->baseslot = s+1;
    490   }
    491   if (pass23) {
    492     IRIns *irlast = &T->ir[snap->ref];
    493     pass23 = 0;
    494     /* Emit dependent PVALs. */
    495     for (n = 0; n < nent; n++) {
    496       SnapEntry sn = map[n];
    497       IRRef refp = snap_ref(sn);
    498       IRIns *ir = &T->ir[refp];
    499       if (regsp_reg(ir->r) == RID_SUNK) {
    500 	if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
    501 	pass23 = 1;
    502 	lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
    503 		   ir->o == IR_CNEW || ir->o == IR_CNEWI);
    504 	if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
    505 	if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
    506 	if (LJ_HASFFI && ir->o == IR_CNEWI) {
    507 	  if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
    508 	    snap_pref(J, T, map, nent, seen, (ir+1)->op2);
    509 	} else {
    510 	  IRIns *irs;
    511 	  for (irs = ir+1; irs < irlast; irs++)
    512 	    if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
    513 	      if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
    514 		snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
    515 	      else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
    516 		       irs+1 < irlast && (irs+1)->o == IR_HIOP)
    517 		snap_pref(J, T, map, nent, seen, (irs+1)->op2);
    518 	    }
    519 	}
    520       } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
    521 	lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
    522 	J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
    523       }
    524     }
    525     /* Replay sunk instructions. */
    526     for (n = 0; pass23 && n < nent; n++) {
    527       SnapEntry sn = map[n];
    528       IRRef refp = snap_ref(sn);
    529       IRIns *ir = &T->ir[refp];
    530       if (regsp_reg(ir->r) == RID_SUNK) {
    531 	TRef op1, op2;
    532 	if (J->slot[snap_slot(sn)] != snap_slot(sn)) {  /* De-dup allocs. */
    533 	  J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
    534 	  continue;
    535 	}
    536 	op1 = ir->op1;
    537 	if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
    538 	op2 = ir->op2;
    539 	if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
    540 	if (LJ_HASFFI && ir->o == IR_CNEWI) {
    541 	  if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
    542 	    lj_needsplit(J);  /* Emit joining HIOP. */
    543 	    op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
    544 			     snap_pref(J, T, map, nent, seen, (ir+1)->op2));
    545 	  }
    546 	  J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2);
    547 	} else {
    548 	  IRIns *irs;
    549 	  TRef tr = emitir(ir->ot, op1, op2);
    550 	  J->slot[snap_slot(sn)] = tr;
    551 	  for (irs = ir+1; irs < irlast; irs++)
    552 	    if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
    553 	      IRIns *irr = &T->ir[irs->op1];
    554 	      TRef val, key = irr->op2, tmp = tr;
    555 	      if (irr->o != IR_FREF) {
    556 		IRIns *irk = &T->ir[key];
    557 		if (irr->o == IR_HREFK)
    558 		  key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
    559 				    irk->op2);
    560 		else
    561 		  key = snap_replay_const(J, irk);
    562 		if (irr->o == IR_HREFK || irr->o == IR_AREF) {
    563 		  IRIns *irf = &T->ir[irr->op1];
    564 		  tmp = emitir(irf->ot, tmp, irf->op2);
    565 		}
    566 	      }
    567 	      tmp = emitir(irr->ot, tmp, key);
    568 	      val = snap_pref(J, T, map, nent, seen, irs->op2);
    569 	      if (val == 0) {
    570 		IRIns *irc = &T->ir[irs->op2];
    571 		lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
    572 		val = snap_pref(J, T, map, nent, seen, irc->op1);
    573 		val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
    574 	      } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
    575 			 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
    576 		IRType t = IRT_I64;
    577 		if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
    578 		  t = IRT_NUM;
    579 		lj_needsplit(J);
    580 		if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
    581 		  uint64_t k = (uint32_t)T->ir[irs->op2].i +
    582 			       ((uint64_t)T->ir[(irs+1)->op2].i << 32);
    583 		  val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
    584 		} else {
    585 		  val = emitir_raw(IRT(IR_HIOP, t), val,
    586 			  snap_pref(J, T, map, nent, seen, (irs+1)->op2));
    587 		}
    588 		tmp = emitir(IRT(irs->o, t), tmp, val);
    589 		continue;
    590 	      }
    591 	      tmp = emitir(irs->ot, tmp, val);
    592 	    } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
    593 	      emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
    594 	    }
    595 	}
    596       }
    597     }
    598   }
    599   J->base = J->slot + J->baseslot;
    600   J->maxslot = snap->nslots - J->baseslot;
    601   lj_snap_add(J);
    602   if (pass23)  /* Need explicit GC step _after_ initial snapshot. */
    603     emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
    604 }
    605 
    606 /* -- Snapshot restore ---------------------------------------------------- */
    607 
    608 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
    609 			SnapNo snapno, BloomFilter rfilt,
    610 			IRIns *ir, TValue *o);
    611 
    612 /* Restore a value from the trace exit state. */
    613 static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
    614 			    SnapNo snapno, BloomFilter rfilt,
    615 			    IRRef ref, TValue *o)
    616 {
    617   IRIns *ir = &T->ir[ref];
    618   IRType1 t = ir->t;
    619   RegSP rs = ir->prev;
    620   if (irref_isk(ref)) {  /* Restore constant slot. */
    621     lj_ir_kvalue(J->L, o, ir);
    622     return;
    623   }
    624   if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
    625     rs = snap_renameref(T, snapno, ref, rs);
    626   if (ra_hasspill(regsp_spill(rs))) {  /* Restore from spill slot. */
    627     int32_t *sps = &ex->spill[regsp_spill(rs)];
    628     if (irt_isinteger(t)) {
    629       setintV(o, *sps);
    630 #if !LJ_SOFTFP
    631     } else if (irt_isnum(t)) {
    632       o->u64 = *(uint64_t *)sps;
    633 #endif
    634 #if LJ_64 && !LJ_GC64
    635     } else if (irt_islightud(t)) {
    636       /* 64 bit lightuserdata which may escape already has the tag bits. */
    637       o->u64 = *(uint64_t *)sps;
    638 #endif
    639     } else {
    640       lua_assert(!irt_ispri(t));  /* PRI refs never have a spill slot. */
    641       setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
    642     }
    643   } else {  /* Restore from register. */
    644     Reg r = regsp_reg(rs);
    645     if (ra_noreg(r)) {
    646       lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
    647       snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
    648       if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
    649       return;
    650     } else if (irt_isinteger(t)) {
    651       setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
    652 #if !LJ_SOFTFP
    653     } else if (irt_isnum(t)) {
    654       setnumV(o, ex->fpr[r-RID_MIN_FPR]);
    655 #endif
    656 #if LJ_64 && !LJ_GC64
    657     } else if (irt_is64(t)) {
    658       /* 64 bit values that already have the tag bits. */
    659       o->u64 = ex->gpr[r-RID_MIN_GPR];
    660 #endif
    661     } else if (irt_ispri(t)) {
    662       setpriV(o, irt_toitype(t));
    663     } else {
    664       setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
    665     }
    666   }
    667 }
    668 
    669 #if LJ_HASFFI
    670 /* Restore raw data from the trace exit state. */
    671 static void snap_restoredata(GCtrace *T, ExitState *ex,
    672 			     SnapNo snapno, BloomFilter rfilt,
    673 			     IRRef ref, void *dst, CTSize sz)
    674 {
    675   IRIns *ir = &T->ir[ref];
    676   RegSP rs = ir->prev;
    677   int32_t *src;
    678   uint64_t tmp;
    679   if (irref_isk(ref)) {
    680     if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
    681       src = (int32_t *)&ir[1];
    682     } else if (sz == 8) {
    683       tmp = (uint64_t)(uint32_t)ir->i;
    684       src = (int32_t *)&tmp;
    685     } else {
    686       src = &ir->i;
    687     }
    688   } else {
    689     if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
    690       rs = snap_renameref(T, snapno, ref, rs);
    691     if (ra_hasspill(regsp_spill(rs))) {
    692       src = &ex->spill[regsp_spill(rs)];
    693       if (sz == 8 && !irt_is64(ir->t)) {
    694 	tmp = (uint64_t)(uint32_t)*src;
    695 	src = (int32_t *)&tmp;
    696       }
    697     } else {
    698       Reg r = regsp_reg(rs);
    699       if (ra_noreg(r)) {
    700 	/* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
    701 	lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
    702 	snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
    703 	*(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
    704 	return;
    705       }
    706       src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
    707 #if !LJ_SOFTFP
    708       if (r >= RID_MAX_GPR) {
    709 	src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
    710 #if LJ_TARGET_PPC
    711 	if (sz == 4) {  /* PPC FPRs are always doubles. */
    712 	  *(float *)dst = (float)*(double *)src;
    713 	  return;
    714 	}
    715 #else
    716 	if (LJ_BE && sz == 4) src++;
    717 #endif
    718       }
    719 #endif
    720     }
    721   }
    722   lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
    723   if (sz == 4) *(int32_t *)dst = *src;
    724   else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
    725   else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
    726   else *(int16_t *)dst = (int16_t)*src;
    727 }
    728 #endif
    729 
    730 /* Unsink allocation from the trace exit state. Unsink sunk stores. */
    731 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
    732 			SnapNo snapno, BloomFilter rfilt,
    733 			IRIns *ir, TValue *o)
    734 {
    735   lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
    736 	     ir->o == IR_CNEW || ir->o == IR_CNEWI);
    737 #if LJ_HASFFI
    738   if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
    739     CTState *cts = ctype_cts(J->L);
    740     CTypeID id = (CTypeID)T->ir[ir->op1].i;
    741     CTSize sz;
    742     CTInfo info = lj_ctype_info(cts, id, &sz);
    743     GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
    744     setcdataV(J->L, o, cd);
    745     if (ir->o == IR_CNEWI) {
    746       uint8_t *p = (uint8_t *)cdataptr(cd);
    747       lua_assert(sz == 4 || sz == 8);
    748       if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
    749 	snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
    750 	if (LJ_BE) p += 4;
    751 	sz = 4;
    752       }
    753       snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
    754     } else {
    755       IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
    756       for (irs = ir+1; irs < irlast; irs++)
    757 	if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
    758 	  IRIns *iro = &T->ir[T->ir[irs->op1].op2];
    759 	  uint8_t *p = (uint8_t *)cd;
    760 	  CTSize szs;
    761 	  lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
    762 	  lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
    763 	  if (irt_is64(irs->t)) szs = 8;
    764 	  else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
    765 	  else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
    766 	  else szs = 4;
    767 	  if (LJ_64 && iro->o == IR_KINT64)
    768 	    p += (int64_t)ir_k64(iro)->u64;
    769 	  else
    770 	    p += iro->i;
    771 	  lua_assert(p >= (uint8_t *)cdataptr(cd) &&
    772 		     p + szs <= (uint8_t *)cdataptr(cd) + sz);
    773 	  if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
    774 	    lua_assert(szs == 4);
    775 	    snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
    776 	    if (LJ_BE) p += 4;
    777 	  }
    778 	  snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
    779 	}
    780     }
    781   } else
    782 #endif
    783   {
    784     IRIns *irs, *irlast;
    785     GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
    786 				  lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
    787     settabV(J->L, o, t);
    788     irlast = &T->ir[T->snap[snapno].ref];
    789     for (irs = ir+1; irs < irlast; irs++)
    790       if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
    791 	IRIns *irk = &T->ir[irs->op1];
    792 	TValue tmp, *val;
    793 	lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
    794 		   irs->o == IR_FSTORE);
    795 	if (irk->o == IR_FREF) {
    796 	  lua_assert(irk->op2 == IRFL_TAB_META);
    797 	  snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
    798 	  /* NOBARRIER: The table is new (marked white). */
    799 	  setgcref(t->metatable, obj2gco(tabV(&tmp)));
    800 	} else {
    801 	  irk = &T->ir[irk->op2];
    802 	  if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
    803 	  lj_ir_kvalue(J->L, &tmp, irk);
    804 	  val = lj_tab_set(J->L, t, &tmp);
    805 	  /* NOBARRIER: The table is new (marked white). */
    806 	  snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
    807 	  if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
    808 	    snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
    809 	    val->u32.hi = tmp.u32.lo;
    810 	  }
    811 	}
    812       }
    813   }
    814 }
    815 
    816 /* Restore interpreter state from exit state with the help of a snapshot. */
    817 const BCIns *lj_snap_restore(jit_State *J, void *exptr)
    818 {
    819   ExitState *ex = (ExitState *)exptr;
    820   SnapNo snapno = J->exitno;  /* For now, snapno == exitno. */
    821   GCtrace *T = traceref(J, J->parent);
    822   SnapShot *snap = &T->snap[snapno];
    823   MSize n, nent = snap->nent;
    824   SnapEntry *map = &T->snapmap[snap->mapofs];
    825 #if !LJ_FR2 || defined(LUA_USE_ASSERT)
    826   SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
    827 #endif
    828 #if !LJ_FR2
    829   ptrdiff_t ftsz0;
    830 #endif
    831   TValue *frame;
    832   BloomFilter rfilt = snap_renamefilter(T, snapno);
    833   const BCIns *pc = snap_pc(&map[nent]);
    834   lua_State *L = J->L;
    835 
    836   /* Set interpreter PC to the next PC to get correct error messages. */
    837   setcframe_pc(cframe_raw(L->cframe), pc+1);
    838 
    839   /* Make sure the stack is big enough for the slots from the snapshot. */
    840   if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
    841     L->top = curr_topL(L);
    842     lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
    843   }
    844 
    845   /* Fill stack slots with data from the registers and spill slots. */
    846   frame = L->base-1-LJ_FR2;
    847 #if !LJ_FR2
    848   ftsz0 = frame_ftsz(frame);  /* Preserve link to previous frame in slot #0. */
    849 #endif
    850   for (n = 0; n < nent; n++) {
    851     SnapEntry sn = map[n];
    852     if (!(sn & SNAP_NORESTORE)) {
    853       TValue *o = &frame[snap_slot(sn)];
    854       IRRef ref = snap_ref(sn);
    855       IRIns *ir = &T->ir[ref];
    856       if (ir->r == RID_SUNK) {
    857 	MSize j;
    858 	for (j = 0; j < n; j++)
    859 	  if (snap_ref(map[j]) == ref) {  /* De-duplicate sunk allocations. */
    860 	    copyTV(L, o, &frame[snap_slot(map[j])]);
    861 	    goto dupslot;
    862 	  }
    863 	snap_unsink(J, T, ex, snapno, rfilt, ir, o);
    864       dupslot:
    865 	continue;
    866       }
    867       snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
    868       if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
    869 	TValue tmp;
    870 	snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
    871 	o->u32.hi = tmp.u32.lo;
    872 #if !LJ_FR2
    873       } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
    874 	/* Overwrite tag with frame link. */
    875 	setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
    876 	L->base = o+1;
    877 #endif
    878       }
    879     }
    880   }
    881 #if LJ_FR2
    882   L->base += (map[nent+LJ_BE] & 0xff);
    883 #endif
    884   lua_assert(map + nent == flinks);
    885 
    886   /* Compute current stack top. */
    887   switch (bc_op(*pc)) {
    888   default:
    889     if (bc_op(*pc) < BC_FUNCF) {
    890       L->top = curr_topL(L);
    891       break;
    892     }
    893     /* fallthrough */
    894   case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
    895     L->top = frame + snap->nslots;
    896     break;
    897   }
    898   return pc;
    899 }
    900 
    901 #undef emitir_raw
    902 #undef emitir
    903 
    904 #endif