lj_snap.c (28693B)
1 /* 2 ** Snapshot handling. 3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 */ 5 6 #define lj_snap_c 7 #define LUA_CORE 8 9 #include "lj_obj.h" 10 11 #if LJ_HASJIT 12 13 #include "lj_gc.h" 14 #include "lj_tab.h" 15 #include "lj_state.h" 16 #include "lj_frame.h" 17 #include "lj_bc.h" 18 #include "lj_ir.h" 19 #include "lj_jit.h" 20 #include "lj_iropt.h" 21 #include "lj_trace.h" 22 #include "lj_snap.h" 23 #include "lj_target.h" 24 #if LJ_HASFFI 25 #include "lj_ctype.h" 26 #include "lj_cdata.h" 27 #endif 28 29 /* Pass IR on to next optimization in chain (FOLD). */ 30 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) 31 32 /* Emit raw IR without passing through optimizations. */ 33 #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) 34 35 /* -- Snapshot buffer allocation ------------------------------------------ */ 36 37 /* Grow snapshot buffer. */ 38 void lj_snap_grow_buf_(jit_State *J, MSize need) 39 { 40 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; 41 if (need > maxsnap) 42 lj_trace_err(J, LJ_TRERR_SNAPOV); 43 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); 44 J->cur.snap = J->snapbuf; 45 } 46 47 /* Grow snapshot map buffer. */ 48 void lj_snap_grow_map_(jit_State *J, MSize need) 49 { 50 if (need < 2*J->sizesnapmap) 51 need = 2*J->sizesnapmap; 52 else if (need < 64) 53 need = 64; 54 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, 55 J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); 56 J->cur.snapmap = J->snapmapbuf; 57 J->sizesnapmap = need; 58 } 59 60 /* -- Snapshot generation ------------------------------------------------- */ 61 62 /* Add all modified slots to the snapshot. */ 63 static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) 64 { 65 IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */ 66 BCReg s; 67 MSize n = 0; 68 for (s = 0; s < nslots; s++) { 69 TRef tr = J->slot[s]; 70 IRRef ref = tref_ref(tr); 71 #if LJ_FR2 72 if (s == 1) continue; 73 if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { 74 TValue *base = J->L->base - J->baseslot; 75 tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); 76 ref = tref_ref(tr); 77 } 78 #endif 79 if (ref) { 80 SnapEntry sn = SNAP_TR(s, tr); 81 IRIns *ir = &J->cur.ir[ref]; 82 if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && 83 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { 84 /* No need to snapshot unmodified non-inherited slots. */ 85 if (!(ir->op2 & IRSLOAD_INHERIT)) 86 continue; 87 /* No need to restore readonly slots and unmodified non-parent slots. */ 88 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && 89 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) 90 sn |= SNAP_NORESTORE; 91 } 92 if (LJ_SOFTFP && irt_isnum(ir->t)) 93 sn |= SNAP_SOFTFPNUM; 94 map[n++] = sn; 95 } 96 } 97 return n; 98 } 99 100 /* Add frame links at the end of the snapshot. */ 101 static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) 102 { 103 cTValue *frame = J->L->base - 1; 104 cTValue *lim = J->L->base - J->baseslot + LJ_FR2; 105 GCfunc *fn = frame_func(frame); 106 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; 107 #if LJ_FR2 108 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); 109 lua_assert(2 <= J->baseslot && J->baseslot <= 257); 110 memcpy(map, &pcbase, sizeof(uint64_t)); 111 #else 112 MSize f = 0; 113 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ 114 #endif 115 while (frame > lim) { /* Backwards traversal of all frames above base. */ 116 if (frame_islua(frame)) { 117 #if !LJ_FR2 118 map[f++] = SNAP_MKPC(frame_pc(frame)); 119 #endif 120 frame = frame_prevl(frame); 121 } else if (frame_iscont(frame)) { 122 #if !LJ_FR2 123 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); 124 map[f++] = SNAP_MKPC(frame_contpc(frame)); 125 #endif 126 frame = frame_prevd(frame); 127 } else { 128 lua_assert(!frame_isc(frame)); 129 #if !LJ_FR2 130 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); 131 #endif 132 frame = frame_prevd(frame); 133 continue; 134 } 135 if (frame + funcproto(frame_func(frame))->framesize > ftop) 136 ftop = frame + funcproto(frame_func(frame))->framesize; 137 } 138 *topslot = (uint8_t)(ftop - lim); 139 #if LJ_FR2 140 lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t)); 141 return 2; 142 #else 143 lua_assert(f == (MSize)(1 + J->framedepth)); 144 return f; 145 #endif 146 } 147 148 /* Take a snapshot of the current stack. */ 149 static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) 150 { 151 BCReg nslots = J->baseslot + J->maxslot; 152 MSize nent; 153 SnapEntry *p; 154 /* Conservative estimate. */ 155 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); 156 p = &J->cur.snapmap[nsnapmap]; 157 nent = snapshot_slots(J, p, nslots); 158 snap->nent = (uint8_t)nent; 159 nent += snapshot_framelinks(J, p + nent, &snap->topslot); 160 snap->mapofs = (uint16_t)nsnapmap; 161 snap->ref = (IRRef1)J->cur.nins; 162 snap->nslots = (uint8_t)nslots; 163 snap->count = 0; 164 J->cur.nsnapmap = (uint16_t)(nsnapmap + nent); 165 } 166 167 /* Add or merge a snapshot. */ 168 void lj_snap_add(jit_State *J) 169 { 170 MSize nsnap = J->cur.nsnap; 171 MSize nsnapmap = J->cur.nsnapmap; 172 /* Merge if no ins. inbetween or if requested and no guard inbetween. */ 173 if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) || 174 (J->mergesnap && !irt_isguard(J->guardemit))) { 175 if (nsnap == 1) { /* But preserve snap #0 PC. */ 176 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); 177 goto nomerge; 178 } 179 nsnapmap = J->cur.snap[--nsnap].mapofs; 180 } else { 181 nomerge: 182 lj_snap_grow_buf(J, nsnap+1); 183 J->cur.nsnap = (uint16_t)(nsnap+1); 184 } 185 J->mergesnap = 0; 186 J->guardemit.irt = 0; 187 snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap); 188 } 189 190 /* -- Snapshot modification ----------------------------------------------- */ 191 192 #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA) 193 194 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */ 195 static BCReg snap_usedef(jit_State *J, uint8_t *udf, 196 const BCIns *pc, BCReg maxslot) 197 { 198 BCReg s; 199 GCobj *o; 200 201 if (maxslot == 0) return 0; 202 #ifdef LUAJIT_USE_VALGRIND 203 /* Avoid errors for harmless reads beyond maxslot. */ 204 memset(udf, 1, SNAP_USEDEF_SLOTS); 205 #else 206 memset(udf, 1, maxslot); 207 #endif 208 209 /* Treat open upvalues as used. */ 210 o = gcref(J->L->openupval); 211 while (o) { 212 if (uvval(gco2uv(o)) < J->L->base) break; 213 udf[uvval(gco2uv(o)) - J->L->base] = 0; 214 o = gcref(o->gch.nextgc); 215 } 216 217 #define USE_SLOT(s) udf[(s)] &= ~1 218 #define DEF_SLOT(s) udf[(s)] *= 3 219 220 /* Scan through following bytecode and check for uses/defs. */ 221 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); 222 for (;;) { 223 BCIns ins = *pc++; 224 BCOp op = bc_op(ins); 225 switch (bcmode_b(op)) { 226 case BCMvar: USE_SLOT(bc_b(ins)); break; 227 default: break; 228 } 229 switch (bcmode_c(op)) { 230 case BCMvar: USE_SLOT(bc_c(ins)); break; 231 case BCMrbase: 232 lua_assert(op == BC_CAT); 233 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); 234 for (; s < maxslot; s++) DEF_SLOT(s); 235 break; 236 case BCMjump: 237 handle_jump: { 238 BCReg minslot = bc_a(ins); 239 if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT; 240 else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1; 241 else if (op == BC_UCLO) { pc += bc_j(ins); break; } 242 for (s = minslot; s < maxslot; s++) DEF_SLOT(s); 243 return minslot < maxslot ? minslot : maxslot; 244 } 245 case BCMlit: 246 if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { 247 goto handle_jump; 248 } else if (bc_isret(op)) { 249 BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1); 250 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); 251 for (; s < top; s++) USE_SLOT(s); 252 for (; s < maxslot; s++) DEF_SLOT(s); 253 return 0; 254 } 255 break; 256 case BCMfunc: return maxslot; /* NYI: will abort, anyway. */ 257 default: break; 258 } 259 switch (bcmode_a(op)) { 260 case BCMvar: USE_SLOT(bc_a(ins)); break; 261 case BCMdst: 262 if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins)); 263 break; 264 case BCMbase: 265 if (op >= BC_CALLM && op <= BC_VARG) { 266 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? 267 maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2); 268 if (LJ_FR2) DEF_SLOT(bc_a(ins)+1); 269 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); 270 for (; s < top; s++) USE_SLOT(s); 271 for (; s < maxslot; s++) DEF_SLOT(s); 272 if (op == BC_CALLT || op == BC_CALLMT) { 273 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); 274 return 0; 275 } 276 } else if (op == BC_KNIL) { 277 for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s); 278 } else if (op == BC_TSETM) { 279 for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s); 280 } 281 break; 282 default: break; 283 } 284 lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); 285 } 286 287 #undef USE_SLOT 288 #undef DEF_SLOT 289 290 return 0; /* unreachable */ 291 } 292 293 /* Purge dead slots before the next snapshot. */ 294 void lj_snap_purge(jit_State *J) 295 { 296 uint8_t udf[SNAP_USEDEF_SLOTS]; 297 BCReg maxslot = J->maxslot; 298 BCReg s = snap_usedef(J, udf, J->pc, maxslot); 299 for (; s < maxslot; s++) 300 if (udf[s] != 0) 301 J->base[s] = 0; /* Purge dead slots. */ 302 } 303 304 /* Shrink last snapshot. */ 305 void lj_snap_shrink(jit_State *J) 306 { 307 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; 308 SnapEntry *map = &J->cur.snapmap[snap->mapofs]; 309 MSize n, m, nlim, nent = snap->nent; 310 uint8_t udf[SNAP_USEDEF_SLOTS]; 311 BCReg maxslot = J->maxslot; 312 BCReg baseslot = J->baseslot; 313 BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); 314 maxslot += baseslot; 315 minslot += baseslot; 316 snap->nslots = (uint8_t)maxslot; 317 for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */ 318 BCReg s = snap_slot(map[n]); 319 if (s < minslot || (s < maxslot && udf[s-baseslot] == 0)) 320 map[m++] = map[n]; /* Only copy used slots. */ 321 } 322 snap->nent = (uint8_t)m; 323 nlim = J->cur.nsnapmap - snap->mapofs - 1; 324 while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */ 325 J->cur.nsnapmap = (uint16_t)(snap->mapofs + m); /* Free up space in map. */ 326 } 327 328 /* -- Snapshot access ----------------------------------------------------- */ 329 330 /* Initialize a Bloom Filter with all renamed refs. 331 ** There are very few renames (often none), so the filter has 332 ** very few bits set. This makes it suitable for negative filtering. 333 */ 334 static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim) 335 { 336 BloomFilter rfilt = 0; 337 IRIns *ir; 338 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) 339 if (ir->op2 <= lim) 340 bloomset(rfilt, ir->op1); 341 return rfilt; 342 } 343 344 /* Process matching renames to find the original RegSP. */ 345 static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) 346 { 347 IRIns *ir; 348 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) 349 if (ir->op1 == ref && ir->op2 <= lim) 350 rs = ir->prev; 351 return rs; 352 } 353 354 /* Copy RegSP from parent snapshot to the parent links of the IR. */ 355 IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) 356 { 357 SnapShot *snap = &T->snap[snapno]; 358 SnapEntry *map = &T->snapmap[snap->mapofs]; 359 BloomFilter rfilt = snap_renamefilter(T, snapno); 360 MSize n = 0; 361 IRRef ref = 0; 362 for ( ; ; ir++) { 363 uint32_t rs; 364 if (ir->o == IR_SLOAD) { 365 if (!(ir->op2 & IRSLOAD_PARENT)) break; 366 for ( ; ; n++) { 367 lua_assert(n < snap->nent); 368 if (snap_slot(map[n]) == ir->op1) { 369 ref = snap_ref(map[n++]); 370 break; 371 } 372 } 373 } else if (LJ_SOFTFP && ir->o == IR_HIOP) { 374 ref++; 375 } else if (ir->o == IR_PVAL) { 376 ref = ir->op1 + REF_BIAS; 377 } else { 378 break; 379 } 380 rs = T->ir[ref].prev; 381 if (bloomtest(rfilt, ref)) 382 rs = snap_renameref(T, snapno, ref, rs); 383 ir->prev = (uint16_t)rs; 384 lua_assert(regsp_used(rs)); 385 } 386 return ir; 387 } 388 389 /* -- Snapshot replay ----------------------------------------------------- */ 390 391 /* Replay constant from parent trace. */ 392 static TRef snap_replay_const(jit_State *J, IRIns *ir) 393 { 394 /* Only have to deal with constants that can occur in stack slots. */ 395 switch ((IROp)ir->o) { 396 case IR_KPRI: return TREF_PRI(irt_type(ir->t)); 397 case IR_KINT: return lj_ir_kint(J, ir->i); 398 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); 399 case IR_KNUM: case IR_KINT64: 400 return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); 401 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ 402 default: lua_assert(0); return TREF_NIL; break; 403 } 404 } 405 406 /* De-duplicate parent reference. */ 407 static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) 408 { 409 MSize j; 410 for (j = 0; j < nmax; j++) 411 if (snap_ref(map[j]) == ref) 412 return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); 413 return 0; 414 } 415 416 /* Emit parent reference with de-duplication. */ 417 static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax, 418 BloomFilter seen, IRRef ref) 419 { 420 IRIns *ir = &T->ir[ref]; 421 TRef tr; 422 if (irref_isk(ref)) 423 tr = snap_replay_const(J, ir); 424 else if (!regsp_used(ir->prev)) 425 tr = 0; 426 else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0) 427 tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0); 428 return tr; 429 } 430 431 /* Check whether a sunk store corresponds to an allocation. Slow path. */ 432 static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs) 433 { 434 if (irs->o == IR_ASTORE || irs->o == IR_HSTORE || 435 irs->o == IR_FSTORE || irs->o == IR_XSTORE) { 436 IRIns *irk = &T->ir[irs->op1]; 437 if (irk->o == IR_AREF || irk->o == IR_HREFK) 438 irk = &T->ir[irk->op1]; 439 return (&T->ir[irk->op1] == ira); 440 } 441 return 0; 442 } 443 444 /* Check whether a sunk store corresponds to an allocation. Fast path. */ 445 static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs) 446 { 447 if (irs->s != 255) 448 return (ira + irs->s == irs); /* Fast check. */ 449 return snap_sunk_store2(T, ira, irs); 450 } 451 452 /* Replay snapshot state to setup side trace. */ 453 void lj_snap_replay(jit_State *J, GCtrace *T) 454 { 455 SnapShot *snap = &T->snap[J->exitno]; 456 SnapEntry *map = &T->snapmap[snap->mapofs]; 457 MSize n, nent = snap->nent; 458 BloomFilter seen = 0; 459 int pass23 = 0; 460 J->framedepth = 0; 461 /* Emit IR for slots inherited from parent snapshot. */ 462 for (n = 0; n < nent; n++) { 463 SnapEntry sn = map[n]; 464 BCReg s = snap_slot(sn); 465 IRRef ref = snap_ref(sn); 466 IRIns *ir = &T->ir[ref]; 467 TRef tr; 468 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ 469 if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0) 470 goto setslot; 471 bloomset(seen, ref); 472 if (irref_isk(ref)) { 473 tr = snap_replay_const(J, ir); 474 } else if (!regsp_used(ir->prev)) { 475 pass23 = 1; 476 lua_assert(s != 0); 477 tr = s; 478 } else { 479 IRType t = irt_type(ir->t); 480 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; 481 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; 482 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); 483 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); 484 } 485 setslot: 486 J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ 487 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); 488 if ((sn & SNAP_FRAME)) 489 J->baseslot = s+1; 490 } 491 if (pass23) { 492 IRIns *irlast = &T->ir[snap->ref]; 493 pass23 = 0; 494 /* Emit dependent PVALs. */ 495 for (n = 0; n < nent; n++) { 496 SnapEntry sn = map[n]; 497 IRRef refp = snap_ref(sn); 498 IRIns *ir = &T->ir[refp]; 499 if (regsp_reg(ir->r) == RID_SUNK) { 500 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; 501 pass23 = 1; 502 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || 503 ir->o == IR_CNEW || ir->o == IR_CNEWI); 504 if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); 505 if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); 506 if (LJ_HASFFI && ir->o == IR_CNEWI) { 507 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) 508 snap_pref(J, T, map, nent, seen, (ir+1)->op2); 509 } else { 510 IRIns *irs; 511 for (irs = ir+1; irs < irlast; irs++) 512 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { 513 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) 514 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); 515 else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && 516 irs+1 < irlast && (irs+1)->o == IR_HIOP) 517 snap_pref(J, T, map, nent, seen, (irs+1)->op2); 518 } 519 } 520 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { 521 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); 522 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); 523 } 524 } 525 /* Replay sunk instructions. */ 526 for (n = 0; pass23 && n < nent; n++) { 527 SnapEntry sn = map[n]; 528 IRRef refp = snap_ref(sn); 529 IRIns *ir = &T->ir[refp]; 530 if (regsp_reg(ir->r) == RID_SUNK) { 531 TRef op1, op2; 532 if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */ 533 J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; 534 continue; 535 } 536 op1 = ir->op1; 537 if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); 538 op2 = ir->op2; 539 if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); 540 if (LJ_HASFFI && ir->o == IR_CNEWI) { 541 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { 542 lj_needsplit(J); /* Emit joining HIOP. */ 543 op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, 544 snap_pref(J, T, map, nent, seen, (ir+1)->op2)); 545 } 546 J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2); 547 } else { 548 IRIns *irs; 549 TRef tr = emitir(ir->ot, op1, op2); 550 J->slot[snap_slot(sn)] = tr; 551 for (irs = ir+1; irs < irlast; irs++) 552 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { 553 IRIns *irr = &T->ir[irs->op1]; 554 TRef val, key = irr->op2, tmp = tr; 555 if (irr->o != IR_FREF) { 556 IRIns *irk = &T->ir[key]; 557 if (irr->o == IR_HREFK) 558 key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]), 559 irk->op2); 560 else 561 key = snap_replay_const(J, irk); 562 if (irr->o == IR_HREFK || irr->o == IR_AREF) { 563 IRIns *irf = &T->ir[irr->op1]; 564 tmp = emitir(irf->ot, tmp, irf->op2); 565 } 566 } 567 tmp = emitir(irr->ot, tmp, key); 568 val = snap_pref(J, T, map, nent, seen, irs->op2); 569 if (val == 0) { 570 IRIns *irc = &T->ir[irs->op2]; 571 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); 572 val = snap_pref(J, T, map, nent, seen, irc->op1); 573 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); 574 } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && 575 irs+1 < irlast && (irs+1)->o == IR_HIOP) { 576 IRType t = IRT_I64; 577 if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) 578 t = IRT_NUM; 579 lj_needsplit(J); 580 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { 581 uint64_t k = (uint32_t)T->ir[irs->op2].i + 582 ((uint64_t)T->ir[(irs+1)->op2].i << 32); 583 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k); 584 } else { 585 val = emitir_raw(IRT(IR_HIOP, t), val, 586 snap_pref(J, T, map, nent, seen, (irs+1)->op2)); 587 } 588 tmp = emitir(IRT(irs->o, t), tmp, val); 589 continue; 590 } 591 tmp = emitir(irs->ot, tmp, val); 592 } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) { 593 emitir(IRT(IR_XBAR, IRT_NIL), 0, 0); 594 } 595 } 596 } 597 } 598 } 599 J->base = J->slot + J->baseslot; 600 J->maxslot = snap->nslots - J->baseslot; 601 lj_snap_add(J); 602 if (pass23) /* Need explicit GC step _after_ initial snapshot. */ 603 emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0); 604 } 605 606 /* -- Snapshot restore ---------------------------------------------------- */ 607 608 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, 609 SnapNo snapno, BloomFilter rfilt, 610 IRIns *ir, TValue *o); 611 612 /* Restore a value from the trace exit state. */ 613 static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, 614 SnapNo snapno, BloomFilter rfilt, 615 IRRef ref, TValue *o) 616 { 617 IRIns *ir = &T->ir[ref]; 618 IRType1 t = ir->t; 619 RegSP rs = ir->prev; 620 if (irref_isk(ref)) { /* Restore constant slot. */ 621 lj_ir_kvalue(J->L, o, ir); 622 return; 623 } 624 if (LJ_UNLIKELY(bloomtest(rfilt, ref))) 625 rs = snap_renameref(T, snapno, ref, rs); 626 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ 627 int32_t *sps = &ex->spill[regsp_spill(rs)]; 628 if (irt_isinteger(t)) { 629 setintV(o, *sps); 630 #if !LJ_SOFTFP 631 } else if (irt_isnum(t)) { 632 o->u64 = *(uint64_t *)sps; 633 #endif 634 #if LJ_64 && !LJ_GC64 635 } else if (irt_islightud(t)) { 636 /* 64 bit lightuserdata which may escape already has the tag bits. */ 637 o->u64 = *(uint64_t *)sps; 638 #endif 639 } else { 640 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ 641 setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); 642 } 643 } else { /* Restore from register. */ 644 Reg r = regsp_reg(rs); 645 if (ra_noreg(r)) { 646 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); 647 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); 648 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); 649 return; 650 } else if (irt_isinteger(t)) { 651 setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); 652 #if !LJ_SOFTFP 653 } else if (irt_isnum(t)) { 654 setnumV(o, ex->fpr[r-RID_MIN_FPR]); 655 #endif 656 #if LJ_64 && !LJ_GC64 657 } else if (irt_is64(t)) { 658 /* 64 bit values that already have the tag bits. */ 659 o->u64 = ex->gpr[r-RID_MIN_GPR]; 660 #endif 661 } else if (irt_ispri(t)) { 662 setpriV(o, irt_toitype(t)); 663 } else { 664 setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t)); 665 } 666 } 667 } 668 669 #if LJ_HASFFI 670 /* Restore raw data from the trace exit state. */ 671 static void snap_restoredata(GCtrace *T, ExitState *ex, 672 SnapNo snapno, BloomFilter rfilt, 673 IRRef ref, void *dst, CTSize sz) 674 { 675 IRIns *ir = &T->ir[ref]; 676 RegSP rs = ir->prev; 677 int32_t *src; 678 uint64_t tmp; 679 if (irref_isk(ref)) { 680 if (ir->o == IR_KNUM || ir->o == IR_KINT64) { 681 src = (int32_t *)&ir[1]; 682 } else if (sz == 8) { 683 tmp = (uint64_t)(uint32_t)ir->i; 684 src = (int32_t *)&tmp; 685 } else { 686 src = &ir->i; 687 } 688 } else { 689 if (LJ_UNLIKELY(bloomtest(rfilt, ref))) 690 rs = snap_renameref(T, snapno, ref, rs); 691 if (ra_hasspill(regsp_spill(rs))) { 692 src = &ex->spill[regsp_spill(rs)]; 693 if (sz == 8 && !irt_is64(ir->t)) { 694 tmp = (uint64_t)(uint32_t)*src; 695 src = (int32_t *)&tmp; 696 } 697 } else { 698 Reg r = regsp_reg(rs); 699 if (ra_noreg(r)) { 700 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ 701 lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); 702 snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); 703 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; 704 return; 705 } 706 src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; 707 #if !LJ_SOFTFP 708 if (r >= RID_MAX_GPR) { 709 src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; 710 #if LJ_TARGET_PPC 711 if (sz == 4) { /* PPC FPRs are always doubles. */ 712 *(float *)dst = (float)*(double *)src; 713 return; 714 } 715 #else 716 if (LJ_BE && sz == 4) src++; 717 #endif 718 } 719 #endif 720 } 721 } 722 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 723 if (sz == 4) *(int32_t *)dst = *src; 724 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; 725 else if (sz == 1) *(int8_t *)dst = (int8_t)*src; 726 else *(int16_t *)dst = (int16_t)*src; 727 } 728 #endif 729 730 /* Unsink allocation from the trace exit state. Unsink sunk stores. */ 731 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, 732 SnapNo snapno, BloomFilter rfilt, 733 IRIns *ir, TValue *o) 734 { 735 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || 736 ir->o == IR_CNEW || ir->o == IR_CNEWI); 737 #if LJ_HASFFI 738 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { 739 CTState *cts = ctype_cts(J->L); 740 CTypeID id = (CTypeID)T->ir[ir->op1].i; 741 CTSize sz; 742 CTInfo info = lj_ctype_info(cts, id, &sz); 743 GCcdata *cd = lj_cdata_newx(cts, id, sz, info); 744 setcdataV(J->L, o, cd); 745 if (ir->o == IR_CNEWI) { 746 uint8_t *p = (uint8_t *)cdataptr(cd); 747 lua_assert(sz == 4 || sz == 8); 748 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { 749 snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); 750 if (LJ_BE) p += 4; 751 sz = 4; 752 } 753 snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); 754 } else { 755 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; 756 for (irs = ir+1; irs < irlast; irs++) 757 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { 758 IRIns *iro = &T->ir[T->ir[irs->op1].op2]; 759 uint8_t *p = (uint8_t *)cd; 760 CTSize szs; 761 lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); 762 lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); 763 if (irt_is64(irs->t)) szs = 8; 764 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; 765 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; 766 else szs = 4; 767 if (LJ_64 && iro->o == IR_KINT64) 768 p += (int64_t)ir_k64(iro)->u64; 769 else 770 p += iro->i; 771 lua_assert(p >= (uint8_t *)cdataptr(cd) && 772 p + szs <= (uint8_t *)cdataptr(cd) + sz); 773 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { 774 lua_assert(szs == 4); 775 snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); 776 if (LJ_BE) p += 4; 777 } 778 snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); 779 } 780 } 781 } else 782 #endif 783 { 784 IRIns *irs, *irlast; 785 GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) : 786 lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1])); 787 settabV(J->L, o, t); 788 irlast = &T->ir[T->snap[snapno].ref]; 789 for (irs = ir+1; irs < irlast; irs++) 790 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { 791 IRIns *irk = &T->ir[irs->op1]; 792 TValue tmp, *val; 793 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || 794 irs->o == IR_FSTORE); 795 if (irk->o == IR_FREF) { 796 lua_assert(irk->op2 == IRFL_TAB_META); 797 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); 798 /* NOBARRIER: The table is new (marked white). */ 799 setgcref(t->metatable, obj2gco(tabV(&tmp))); 800 } else { 801 irk = &T->ir[irk->op2]; 802 if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1]; 803 lj_ir_kvalue(J->L, &tmp, irk); 804 val = lj_tab_set(J->L, t, &tmp); 805 /* NOBARRIER: The table is new (marked white). */ 806 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); 807 if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { 808 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); 809 val->u32.hi = tmp.u32.lo; 810 } 811 } 812 } 813 } 814 } 815 816 /* Restore interpreter state from exit state with the help of a snapshot. */ 817 const BCIns *lj_snap_restore(jit_State *J, void *exptr) 818 { 819 ExitState *ex = (ExitState *)exptr; 820 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ 821 GCtrace *T = traceref(J, J->parent); 822 SnapShot *snap = &T->snap[snapno]; 823 MSize n, nent = snap->nent; 824 SnapEntry *map = &T->snapmap[snap->mapofs]; 825 #if !LJ_FR2 || defined(LUA_USE_ASSERT) 826 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; 827 #endif 828 #if !LJ_FR2 829 ptrdiff_t ftsz0; 830 #endif 831 TValue *frame; 832 BloomFilter rfilt = snap_renamefilter(T, snapno); 833 const BCIns *pc = snap_pc(&map[nent]); 834 lua_State *L = J->L; 835 836 /* Set interpreter PC to the next PC to get correct error messages. */ 837 setcframe_pc(cframe_raw(L->cframe), pc+1); 838 839 /* Make sure the stack is big enough for the slots from the snapshot. */ 840 if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) { 841 L->top = curr_topL(L); 842 lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize); 843 } 844 845 /* Fill stack slots with data from the registers and spill slots. */ 846 frame = L->base-1-LJ_FR2; 847 #if !LJ_FR2 848 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ 849 #endif 850 for (n = 0; n < nent; n++) { 851 SnapEntry sn = map[n]; 852 if (!(sn & SNAP_NORESTORE)) { 853 TValue *o = &frame[snap_slot(sn)]; 854 IRRef ref = snap_ref(sn); 855 IRIns *ir = &T->ir[ref]; 856 if (ir->r == RID_SUNK) { 857 MSize j; 858 for (j = 0; j < n; j++) 859 if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */ 860 copyTV(L, o, &frame[snap_slot(map[j])]); 861 goto dupslot; 862 } 863 snap_unsink(J, T, ex, snapno, rfilt, ir, o); 864 dupslot: 865 continue; 866 } 867 snap_restoreval(J, T, ex, snapno, rfilt, ref, o); 868 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { 869 TValue tmp; 870 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); 871 o->u32.hi = tmp.u32.lo; 872 #if !LJ_FR2 873 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { 874 /* Overwrite tag with frame link. */ 875 setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); 876 L->base = o+1; 877 #endif 878 } 879 } 880 } 881 #if LJ_FR2 882 L->base += (map[nent+LJ_BE] & 0xff); 883 #endif 884 lua_assert(map + nent == flinks); 885 886 /* Compute current stack top. */ 887 switch (bc_op(*pc)) { 888 default: 889 if (bc_op(*pc) < BC_FUNCF) { 890 L->top = curr_topL(L); 891 break; 892 } 893 /* fallthrough */ 894 case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM: 895 L->top = frame + snap->nslots; 896 break; 897 } 898 return pc; 899 } 900 901 #undef emitir_raw 902 #undef emitir 903 904 #endif