vm_x64.dasc (133620B)
1 |// Low-level VM code for x64 CPUs in LJ_GC64 mode. 2 |// Bytecode interpreter, fast functions and helper functions. 3 |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 | 5 |.arch x64 6 |.section code_op, code_sub 7 | 8 |.actionlist build_actionlist 9 |.globals GLOB_ 10 |.globalnames globnames 11 |.externnames extnames 12 | 13 |//----------------------------------------------------------------------- 14 | 15 |.if WIN 16 |.define X64WIN, 1 // Windows/x64 calling conventions. 17 |.endif 18 | 19 |// Fixed register assignments for the interpreter. 20 |// This is very fragile and has many dependencies. Caveat emptor. 21 |.define BASE, rdx // Not C callee-save, refetched anyway. 22 |.if X64WIN 23 |.define KBASE, rdi // Must be C callee-save. 24 |.define PC, rsi // Must be C callee-save. 25 |.define DISPATCH, rbx // Must be C callee-save. 26 |.define KBASEd, edi 27 |.define PCd, esi 28 |.define DISPATCHd, ebx 29 |.else 30 |.define KBASE, r15 // Must be C callee-save. 31 |.define PC, rbx // Must be C callee-save. 32 |.define DISPATCH, r14 // Must be C callee-save. 33 |.define KBASEd, r15d 34 |.define PCd, ebx 35 |.define DISPATCHd, r14d 36 |.endif 37 | 38 |.define RA, rcx 39 |.define RAd, ecx 40 |.define RAH, ch 41 |.define RAL, cl 42 |.define RB, rbp // Must be rbp (C callee-save). 43 |.define RBd, ebp 44 |.define RC, rax // Must be rax. 45 |.define RCd, eax 46 |.define RCW, ax 47 |.define RCH, ah 48 |.define RCL, al 49 |.define OP, RBd 50 |.define RD, RC 51 |.define RDd, RCd 52 |.define RDW, RCW 53 |.define RDL, RCL 54 |.define TMPR, r10 55 |.define TMPRd, r10d 56 |.define ITYPE, r11 57 |.define ITYPEd, r11d 58 | 59 |.if X64WIN 60 |.define CARG1, rcx // x64/WIN64 C call arguments. 61 |.define CARG2, rdx 62 |.define CARG3, r8 63 |.define CARG4, r9 64 |.define CARG1d, ecx 65 |.define CARG2d, edx 66 |.define CARG3d, r8d 67 |.define CARG4d, r9d 68 |.else 69 |.define CARG1, rdi // x64/POSIX C call arguments. 70 |.define CARG2, rsi 71 |.define CARG3, rdx 72 |.define CARG4, rcx 73 |.define CARG5, r8 74 |.define CARG6, r9 75 |.define CARG1d, edi 76 |.define CARG2d, esi 77 |.define CARG3d, edx 78 |.define CARG4d, ecx 79 |.define CARG5d, r8d 80 |.define CARG6d, r9d 81 |.endif 82 | 83 |// Type definitions. Some of these are only used for documentation. 84 |.type L, lua_State 85 |.type GL, global_State 86 |.type TVALUE, TValue 87 |.type GCOBJ, GCobj 88 |.type STR, GCstr 89 |.type TAB, GCtab 90 |.type LFUNC, GCfuncL 91 |.type CFUNC, GCfuncC 92 |.type PROTO, GCproto 93 |.type UPVAL, GCupval 94 |.type NODE, Node 95 |.type NARGS, int 96 |.type TRACE, GCtrace 97 |.type SBUF, SBuf 98 | 99 |// Stack layout while in interpreter. Must match with lj_frame.h. 100 |//----------------------------------------------------------------------- 101 |.if X64WIN // x64/Windows stack layout 102 | 103 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). 104 |.macro saveregs_ 105 | push rdi; push rsi; push rbx 106 | sub rsp, CFRAME_SPACE 107 |.endmacro 108 |.macro saveregs 109 | push rbp; saveregs_ 110 |.endmacro 111 |.macro restoreregs 112 | add rsp, CFRAME_SPACE 113 | pop rbx; pop rsi; pop rdi; pop rbp 114 |.endmacro 115 | 116 |.define SAVE_CFRAME, aword [rsp+aword*13] 117 |.define SAVE_PC, aword [rsp+aword*12] 118 |.define SAVE_L, aword [rsp+aword*11] 119 |.define SAVE_ERRF, dword [rsp+dword*21] 120 |.define SAVE_NRES, dword [rsp+dword*20] 121 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter 122 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. 123 |.define SAVE_R4, aword [rsp+aword*8] 124 |.define SAVE_R3, aword [rsp+aword*7] 125 |.define SAVE_R2, aword [rsp+aword*6] 126 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. 127 |.define ARG5, aword [rsp+aword*4] 128 |.define CSAVE_4, aword [rsp+aword*3] 129 |.define CSAVE_3, aword [rsp+aword*2] 130 |.define CSAVE_2, aword [rsp+aword*1] 131 |.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. 132 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee 133 | 134 |.define ARG5d, dword [rsp+dword*8] 135 |.define TMP1, ARG5 // TMP1 overlaps ARG5 136 |.define TMP1d, ARG5d 137 |.define TMP1hi, dword [rsp+dword*9] 138 |.define MULTRES, TMP1d // MULTRES overlaps TMP1d. 139 | 140 |//----------------------------------------------------------------------- 141 |.else // x64/POSIX stack layout 142 | 143 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). 144 |.macro saveregs_ 145 | push rbx; push r15; push r14 146 |.if NO_UNWIND 147 | push r13; push r12 148 |.endif 149 | sub rsp, CFRAME_SPACE 150 |.endmacro 151 |.macro saveregs 152 | push rbp; saveregs_ 153 |.endmacro 154 |.macro restoreregs 155 | add rsp, CFRAME_SPACE 156 |.if NO_UNWIND 157 | pop r12; pop r13 158 |.endif 159 | pop r14; pop r15; pop rbx; pop rbp 160 |.endmacro 161 | 162 |//----- 16 byte aligned, 163 |.if NO_UNWIND 164 |.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. 165 |.define SAVE_R4, aword [rsp+aword*10] 166 |.define SAVE_R3, aword [rsp+aword*9] 167 |.define SAVE_R2, aword [rsp+aword*8] 168 |.define SAVE_R1, aword [rsp+aword*7] 169 |.define SAVE_RU2, aword [rsp+aword*6] 170 |.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. 171 |.else 172 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. 173 |.define SAVE_R4, aword [rsp+aword*8] 174 |.define SAVE_R3, aword [rsp+aword*7] 175 |.define SAVE_R2, aword [rsp+aword*6] 176 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. 177 |.endif 178 |.define SAVE_CFRAME, aword [rsp+aword*4] 179 |.define SAVE_PC, aword [rsp+aword*3] 180 |.define SAVE_L, aword [rsp+aword*2] 181 |.define SAVE_ERRF, dword [rsp+dword*3] 182 |.define SAVE_NRES, dword [rsp+dword*2] 183 |.define TMP1, aword [rsp] //<-- rsp while in interpreter. 184 |//----- 16 byte aligned 185 | 186 |.define TMP1d, dword [rsp] 187 |.define TMP1hi, dword [rsp+dword*1] 188 |.define MULTRES, TMP1d // MULTRES overlaps TMP1d. 189 | 190 |.endif 191 | 192 |//----------------------------------------------------------------------- 193 | 194 |// Instruction headers. 195 |.macro ins_A; .endmacro 196 |.macro ins_AD; .endmacro 197 |.macro ins_AJ; .endmacro 198 |.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro 199 |.macro ins_AB_; movzx RBd, RCH; .endmacro 200 |.macro ins_A_C; movzx RCd, RCL; .endmacro 201 |.macro ins_AND; not RD; .endmacro 202 | 203 |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). 204 |.macro ins_NEXT 205 | mov RCd, [PC] 206 | movzx RAd, RCH 207 | movzx OP, RCL 208 | add PC, 4 209 | shr RCd, 16 210 | jmp aword [DISPATCH+OP*8] 211 |.endmacro 212 | 213 |// Instruction footer. 214 |.if 1 215 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. 216 | .define ins_next, ins_NEXT 217 | .define ins_next_, ins_NEXT 218 |.else 219 | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. 220 | // Affects only certain kinds of benchmarks (and only with -j off). 221 | // Around 10%-30% slower on Core2, a lot more slower on P4. 222 | .macro ins_next 223 | jmp ->ins_next 224 | .endmacro 225 | .macro ins_next_ 226 | ->ins_next: 227 | ins_NEXT 228 | .endmacro 229 |.endif 230 | 231 |// Call decode and dispatch. 232 |.macro ins_callt 233 | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC 234 | mov PC, LFUNC:RB->pc 235 | mov RAd, [PC] 236 | movzx OP, RAL 237 | movzx RAd, RAH 238 | add PC, 4 239 | jmp aword [DISPATCH+OP*8] 240 |.endmacro 241 | 242 |.macro ins_call 243 | // BASE = new base, RB = LFUNC, RD = nargs+1 244 | mov [BASE-8], PC 245 | ins_callt 246 |.endmacro 247 | 248 |//----------------------------------------------------------------------- 249 | 250 |// Macros to clear or set tags. 251 |.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro 252 |.macro settp, reg, tp 253 | mov64 ITYPE, ((uint64_t)tp<<47) 254 | or reg, ITYPE 255 |.endmacro 256 |.macro settp, dst, reg, tp 257 | mov64 dst, ((uint64_t)tp<<47) 258 | or dst, reg 259 |.endmacro 260 |.macro setint, reg 261 | settp reg, LJ_TISNUM 262 |.endmacro 263 |.macro setint, dst, reg 264 | settp dst, reg, LJ_TISNUM 265 |.endmacro 266 | 267 |// Macros to test operand types. 268 |.macro checktp_nc, reg, tp, target 269 | mov ITYPE, reg 270 | sar ITYPE, 47 271 | cmp ITYPEd, tp 272 | jne target 273 |.endmacro 274 |.macro checktp, reg, tp, target 275 | mov ITYPE, reg 276 | cleartp reg 277 | sar ITYPE, 47 278 | cmp ITYPEd, tp 279 | jne target 280 |.endmacro 281 |.macro checktptp, src, tp, target 282 | mov ITYPE, src 283 | sar ITYPE, 47 284 | cmp ITYPEd, tp 285 | jne target 286 |.endmacro 287 |.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro 288 |.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro 289 |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro 290 | 291 |.macro checknumx, reg, target, jump 292 | mov ITYPE, reg 293 | sar ITYPE, 47 294 | cmp ITYPEd, LJ_TISNUM 295 | jump target 296 |.endmacro 297 |.macro checkint, reg, target; checknumx reg, target, jne; .endmacro 298 |.macro checkinttp, src, target; checknumx src, target, jne; .endmacro 299 |.macro checknum, reg, target; checknumx reg, target, jae; .endmacro 300 |.macro checknumtp, src, target; checknumx src, target, jae; .endmacro 301 |.macro checknumber, src, target; checknumx src, target, ja; .endmacro 302 | 303 |.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro 304 |.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro 305 | 306 |// These operands must be used with movzx. 307 |.define PC_OP, byte [PC-4] 308 |.define PC_RA, byte [PC-3] 309 |.define PC_RB, byte [PC-1] 310 |.define PC_RC, byte [PC-2] 311 |.define PC_RD, word [PC-2] 312 | 313 |.macro branchPC, reg 314 | lea PC, [PC+reg*4-BCBIAS_J*4] 315 |.endmacro 316 | 317 |// Assumes DISPATCH is relative to GL. 318 #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) 319 #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) 320 | 321 #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) 322 | 323 |// Decrement hashed hotcount and trigger trace recorder if zero. 324 |.macro hotloop, reg 325 | mov reg, PCd 326 | shr reg, 1 327 | and reg, HOTCOUNT_PCMASK 328 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP 329 | jb ->vm_hotloop 330 |.endmacro 331 | 332 |.macro hotcall, reg 333 | mov reg, PCd 334 | shr reg, 1 335 | and reg, HOTCOUNT_PCMASK 336 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL 337 | jb ->vm_hotcall 338 |.endmacro 339 | 340 |// Set current VM state. 341 |.macro set_vmstate, st 342 | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st 343 |.endmacro 344 | 345 |.macro fpop1; fstp st1; .endmacro 346 | 347 |// Synthesize SSE FP constants. 348 |.macro sseconst_abs, reg, tmp // Synthesize abs mask. 349 | mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp 350 |.endmacro 351 | 352 |.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. 353 | mov64 tmp, U64x(val,00000000); movd reg, tmp 354 |.endmacro 355 | 356 |.macro sseconst_sign, reg, tmp // Synthesize sign mask. 357 | sseconst_hi reg, tmp, 80000000 358 |.endmacro 359 |.macro sseconst_1, reg, tmp // Synthesize 1.0. 360 | sseconst_hi reg, tmp, 3ff00000 361 |.endmacro 362 |.macro sseconst_m1, reg, tmp // Synthesize -1.0. 363 | sseconst_hi reg, tmp, bff00000 364 |.endmacro 365 |.macro sseconst_2p52, reg, tmp // Synthesize 2^52. 366 | sseconst_hi reg, tmp, 43300000 367 |.endmacro 368 |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. 369 | sseconst_hi reg, tmp, 43380000 370 |.endmacro 371 | 372 |// Move table write barrier back. Overwrites reg. 373 |.macro barrierback, tab, reg 374 | and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) 375 | mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] 376 | mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab 377 | mov tab->gclist, reg 378 |.endmacro 379 | 380 |//----------------------------------------------------------------------- 381 382 /* Generate subroutines used by opcodes and other parts of the VM. */ 383 /* The .code_sub section should be last to help static branch prediction. */ 384 static void build_subroutines(BuildCtx *ctx) 385 { 386 |.code_sub 387 | 388 |//----------------------------------------------------------------------- 389 |//-- Return handling ---------------------------------------------------- 390 |//----------------------------------------------------------------------- 391 | 392 |->vm_returnp: 393 | test PCd, FRAME_P 394 | jz ->cont_dispatch 395 | 396 | // Return from pcall or xpcall fast func. 397 | and PC, -8 398 | sub BASE, PC // Restore caller base. 399 | lea RA, [RA+PC-8] // Rebase RA and prepend one result. 400 | mov PC, [BASE-8] // Fetch PC of previous frame. 401 | // Prepending may overwrite the pcall frame, so do it at the end. 402 | mov_true ITYPE 403 | mov aword [BASE+RA], ITYPE // Prepend true to results. 404 | 405 |->vm_returnc: 406 | add RDd, 1 // RD = nresults+1 407 | jz ->vm_unwind_yield 408 | mov MULTRES, RDd 409 | test PC, FRAME_TYPE 410 | jz ->BC_RET_Z // Handle regular return to Lua. 411 | 412 |->vm_return: 413 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return 414 | xor PC, FRAME_C 415 | test PCd, FRAME_TYPE 416 | jnz ->vm_returnp 417 | 418 | // Return to C. 419 | set_vmstate C 420 | and PC, -8 421 | sub PC, BASE 422 | neg PC // Previous base = BASE - delta. 423 | 424 | sub RDd, 1 425 | jz >2 426 |1: // Move results down. 427 | mov RB, [BASE+RA] 428 | mov [BASE-16], RB 429 | add BASE, 8 430 | sub RDd, 1 431 | jnz <1 432 |2: 433 | mov L:RB, SAVE_L 434 | mov L:RB->base, PC 435 |3: 436 | mov RDd, MULTRES 437 | mov RAd, SAVE_NRES // RA = wanted nresults+1 438 |4: 439 | cmp RAd, RDd 440 | jne >6 // More/less results wanted? 441 |5: 442 | sub BASE, 16 443 | mov L:RB->top, BASE 444 | 445 |->vm_leave_cp: 446 | mov RA, SAVE_CFRAME // Restore previous C frame. 447 | mov L:RB->cframe, RA 448 | xor eax, eax // Ok return status for vm_pcall. 449 | 450 |->vm_leave_unw: 451 | restoreregs 452 | ret 453 | 454 |6: 455 | jb >7 // Less results wanted? 456 | // More results wanted. Check stack size and fill up results with nil. 457 | cmp BASE, L:RB->maxstack 458 | ja >8 459 | mov aword [BASE-16], LJ_TNIL 460 | add BASE, 8 461 | add RDd, 1 462 | jmp <4 463 | 464 |7: // Less results wanted. 465 | test RAd, RAd 466 | jz <5 // But check for LUA_MULTRET+1. 467 | sub RA, RD // Negative result! 468 | lea BASE, [BASE+RA*8] // Correct top. 469 | jmp <5 470 | 471 |8: // Corner case: need to grow stack for filling up results. 472 | // This can happen if: 473 | // - A C function grows the stack (a lot). 474 | // - The GC shrinks the stack in between. 475 | // - A return back from a lua_call() with (high) nresults adjustment. 476 | mov L:RB->top, BASE // Save current top held in BASE (yes). 477 | mov MULTRES, RDd // Need to fill only remainder with nil. 478 | mov CARG2d, RAd 479 | mov CARG1, L:RB 480 | call extern lj_state_growstack // (lua_State *L, int n) 481 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. 482 | jmp <3 483 | 484 |->vm_unwind_yield: 485 | mov al, LUA_YIELD 486 | jmp ->vm_unwind_c_eh 487 | 488 |->vm_unwind_c: // Unwind C stack, return from vm_pcall. 489 | // (void *cframe, int errcode) 490 | mov eax, CARG2d // Error return status for vm_pcall. 491 | mov rsp, CARG1 492 |->vm_unwind_c_eh: // Landing pad for external unwinder. 493 | mov L:RB, SAVE_L 494 | mov GL:RB, L:RB->glref 495 | mov dword GL:RB->vmstate, ~LJ_VMST_C 496 | jmp ->vm_leave_unw 497 | 498 |->vm_unwind_rethrow: 499 |.if not X64WIN 500 | mov CARG1, SAVE_L 501 | mov CARG2d, eax 502 | restoreregs 503 | jmp extern lj_err_throw // (lua_State *L, int errcode) 504 |.endif 505 | 506 |->vm_unwind_ff: // Unwind C stack, return from ff pcall. 507 | // (void *cframe) 508 | and CARG1, CFRAME_RAWMASK 509 | mov rsp, CARG1 510 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 511 | mov L:RB, SAVE_L 512 | mov RDd, 1+1 // Really 1+2 results, incr. later. 513 | mov BASE, L:RB->base 514 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 515 | add DISPATCH, GG_G2DISP 516 | mov PC, [BASE-8] // Fetch PC of previous frame. 517 | mov_false RA 518 | mov RB, [BASE] 519 | mov [BASE-16], RA // Prepend false to error message. 520 | mov [BASE-8], RB 521 | mov RA, -16 // Results start at BASE+RA = BASE-16. 522 | set_vmstate INTERP 523 | jmp ->vm_returnc // Increments RD/MULTRES and returns. 524 | 525 |//----------------------------------------------------------------------- 526 |//-- Grow stack for calls ----------------------------------------------- 527 |//----------------------------------------------------------------------- 528 | 529 |->vm_growstack_c: // Grow stack for C function. 530 | mov CARG2d, LUA_MINSTACK 531 | jmp >2 532 | 533 |->vm_growstack_v: // Grow stack for vararg Lua function. 534 | sub RD, 16 // LJ_FR2 535 | jmp >1 536 | 537 |->vm_growstack_f: // Grow stack for fixarg Lua function. 538 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC 539 | lea RD, [BASE+NARGS:RD*8-8] 540 |1: 541 | movzx RAd, byte [PC-4+PC2PROTO(framesize)] 542 | add PC, 4 // Must point after first instruction. 543 | mov L:RB->base, BASE 544 | mov L:RB->top, RD 545 | mov SAVE_PC, PC 546 | mov CARG2, RA 547 |2: 548 | // RB = L, L->base = new base, L->top = top 549 | mov CARG1, L:RB 550 | call extern lj_state_growstack // (lua_State *L, int n) 551 | mov BASE, L:RB->base 552 | mov RD, L:RB->top 553 | mov LFUNC:RB, [BASE-16] 554 | cleartp LFUNC:RB 555 | sub RD, BASE 556 | shr RDd, 3 557 | add NARGS:RDd, 1 558 | // BASE = new base, RB = LFUNC, RD = nargs+1 559 | ins_callt // Just retry the call. 560 | 561 |//----------------------------------------------------------------------- 562 |//-- Entry points into the assembler VM --------------------------------- 563 |//----------------------------------------------------------------------- 564 | 565 |->vm_resume: // Setup C frame and resume thread. 566 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) 567 | saveregs 568 | mov L:RB, CARG1 // Caveat: CARG1 may be RA. 569 | mov SAVE_L, CARG1 570 | mov RA, CARG2 571 | mov PCd, FRAME_CP 572 | xor RDd, RDd 573 | lea KBASE, [esp+CFRAME_RESUME] 574 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 575 | add DISPATCH, GG_G2DISP 576 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 577 | mov SAVE_CFRAME, RD 578 | mov SAVE_NRES, RDd 579 | mov SAVE_ERRF, RDd 580 | mov L:RB->cframe, KBASE 581 | cmp byte L:RB->status, RDL 582 | je >2 // Initial resume (like a call). 583 | 584 | // Resume after yield (like a return). 585 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 586 | set_vmstate INTERP 587 | mov byte L:RB->status, RDL 588 | mov BASE, L:RB->base 589 | mov RD, L:RB->top 590 | sub RD, RA 591 | shr RDd, 3 592 | add RDd, 1 // RD = nresults+1 593 | sub RA, BASE // RA = resultofs 594 | mov PC, [BASE-8] 595 | mov MULTRES, RDd 596 | test PCd, FRAME_TYPE 597 | jz ->BC_RET_Z 598 | jmp ->vm_return 599 | 600 |->vm_pcall: // Setup protected C frame and enter VM. 601 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) 602 | saveregs 603 | mov PCd, FRAME_CP 604 | mov SAVE_ERRF, CARG4d 605 | jmp >1 606 | 607 |->vm_call: // Setup C frame and enter VM. 608 | // (lua_State *L, TValue *base, int nres1) 609 | saveregs 610 | mov PCd, FRAME_C 611 | 612 |1: // Entry point for vm_pcall above (PC = ftype). 613 | mov SAVE_NRES, CARG3d 614 | mov L:RB, CARG1 // Caveat: CARG1 may be RA. 615 | mov SAVE_L, CARG1 616 | mov RA, CARG2 617 | 618 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 619 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 620 | mov SAVE_CFRAME, KBASE 621 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 622 | add DISPATCH, GG_G2DISP 623 | mov L:RB->cframe, rsp 624 | 625 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). 626 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 627 | set_vmstate INTERP 628 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 629 | add PC, RA 630 | sub PC, BASE // PC = frame delta + frame type 631 | 632 | mov RD, L:RB->top 633 | sub RD, RA 634 | shr NARGS:RDd, 3 635 | add NARGS:RDd, 1 // RD = nargs+1 636 | 637 |->vm_call_dispatch: 638 | mov LFUNC:RB, [RA-16] 639 | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE. 640 | 641 |->vm_call_dispatch_f: 642 | mov BASE, RA 643 | ins_call 644 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC 645 | 646 |->vm_cpcall: // Setup protected C frame, call C. 647 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) 648 | saveregs 649 | mov L:RB, CARG1 // Caveat: CARG1 may be RA. 650 | mov SAVE_L, CARG1 651 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 652 | 653 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 654 | sub KBASE, L:RB->top 655 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 656 | mov SAVE_ERRF, 0 // No error function. 657 | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame. 658 | add DISPATCH, GG_G2DISP 659 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 660 | 661 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 662 | mov SAVE_CFRAME, KBASE 663 | mov L:RB->cframe, rsp 664 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 665 | 666 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 667 | // TValue * (new base) or NULL returned in eax (RC). 668 | test RC, RC 669 | jz ->vm_leave_cp // No base? Just remove C frame. 670 | mov RA, RC 671 | mov PCd, FRAME_CP 672 | jmp <2 // Else continue with the call. 673 | 674 |//----------------------------------------------------------------------- 675 |//-- Metamethod handling ------------------------------------------------ 676 |//----------------------------------------------------------------------- 677 | 678 |//-- Continuation dispatch ---------------------------------------------- 679 | 680 |->cont_dispatch: 681 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) 682 | add RA, BASE 683 | and PC, -8 684 | mov RB, BASE 685 | sub BASE, PC // Restore caller BASE. 686 | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg. 687 | mov RC, RA // ... in [RC] 688 | mov PC, [RB-24] // Restore PC from [cont|PC]. 689 | mov RA, qword [RB-32] // May be negative on WIN64 with debug. 690 |.if FFI 691 | cmp RA, 1 692 | jbe >1 693 |.endif 694 | mov LFUNC:KBASE, [BASE-16] 695 | cleartp LFUNC:KBASE 696 | mov KBASE, LFUNC:KBASE->pc 697 | mov KBASE, [KBASE+PC2PROTO(k)] 698 | // BASE = base, RC = result, RB = meta base 699 | jmp RA // Jump to continuation. 700 | 701 |.if FFI 702 |1: 703 | je ->cont_ffi_callback // cont = 1: return from FFI callback. 704 | // cont = 0: Tail call from C function. 705 | sub RB, BASE 706 | shr RBd, 3 707 | lea RDd, [RBd-3] 708 | jmp ->vm_call_tail 709 |.endif 710 | 711 |->cont_cat: // BASE = base, RC = result, RB = mbase 712 | movzx RAd, PC_RB 713 | sub RB, 32 714 | lea RA, [BASE+RA*8] 715 | sub RA, RB 716 | je ->cont_ra 717 | neg RA 718 | shr RAd, 3 719 |.if X64WIN 720 | mov CARG3d, RAd 721 | mov L:CARG1, SAVE_L 722 | mov L:CARG1->base, BASE 723 | mov RC, [RC] 724 | mov [RB], RC 725 | mov CARG2, RB 726 |.else 727 | mov L:CARG1, SAVE_L 728 | mov L:CARG1->base, BASE 729 | mov CARG3d, RAd 730 | mov RA, [RC] 731 | mov [RB], RA 732 | mov CARG2, RB 733 |.endif 734 | jmp ->BC_CAT_Z 735 | 736 |//-- Table indexing metamethods ----------------------------------------- 737 | 738 |->vmeta_tgets: 739 | settp STR:RC, LJ_TSTR // STR:RC = GCstr * 740 | mov TMP1, STR:RC 741 | lea RC, TMP1 742 | cmp PC_OP, BC_GGET 743 | jne >1 744 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * 745 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. 746 | mov [RB], TAB:RA 747 | jmp >2 748 | 749 |->vmeta_tgetb: 750 | movzx RCd, PC_RC 751 |.if DUALNUM 752 | setint RC 753 | mov TMP1, RC 754 |.else 755 | cvtsi2sd xmm0, RCd 756 | movsd TMP1, xmm0 757 |.endif 758 | lea RC, TMP1 759 | jmp >1 760 | 761 |->vmeta_tgetv: 762 | movzx RCd, PC_RC // Reload TValue *k from RC. 763 | lea RC, [BASE+RC*8] 764 |1: 765 | movzx RBd, PC_RB // Reload TValue *t from RB. 766 | lea RB, [BASE+RB*8] 767 |2: 768 | mov L:CARG1, SAVE_L 769 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. 770 | mov CARG2, RB 771 | mov CARG3, RC 772 | mov L:RB, L:CARG1 773 | mov SAVE_PC, PC 774 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) 775 | // TValue * (finished) or NULL (metamethod) returned in eax (RC). 776 | mov BASE, L:RB->base 777 | test RC, RC 778 | jz >3 779 |->cont_ra: // BASE = base, RC = result 780 | movzx RAd, PC_RA 781 | mov RB, [RC] 782 | mov [BASE+RA*8], RB 783 | ins_next 784 | 785 |3: // Call __index metamethod. 786 | // BASE = base, L->top = new base, stack = cont/func/t/k 787 | mov RA, L:RB->top 788 | mov [RA-24], PC // [cont|PC] 789 | lea PC, [RA+FRAME_CONT] 790 | sub PC, BASE 791 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. 792 | mov NARGS:RDd, 2+1 // 2 args for func(t, k). 793 | cleartp LFUNC:RB 794 | jmp ->vm_call_dispatch_f 795 | 796 |->vmeta_tgetr: 797 | mov CARG1, TAB:RB 798 | mov RB, BASE // Save BASE. 799 | mov CARG2d, RCd // Caveat: CARG2 == BASE 800 | call extern lj_tab_getinth // (GCtab *t, int32_t key) 801 | // cTValue * or NULL returned in eax (RC). 802 | movzx RAd, PC_RA 803 | mov BASE, RB // Restore BASE. 804 | test RC, RC 805 | jnz ->BC_TGETR_Z 806 | mov ITYPE, LJ_TNIL 807 | jmp ->BC_TGETR2_Z 808 | 809 |//----------------------------------------------------------------------- 810 | 811 |->vmeta_tsets: 812 | settp STR:RC, LJ_TSTR // STR:RC = GCstr * 813 | mov TMP1, STR:RC 814 | lea RC, TMP1 815 | cmp PC_OP, BC_GSET 816 | jne >1 817 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * 818 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. 819 | mov [RB], TAB:RA 820 | jmp >2 821 | 822 |->vmeta_tsetb: 823 | movzx RCd, PC_RC 824 |.if DUALNUM 825 | setint RC 826 | mov TMP1, RC 827 |.else 828 | cvtsi2sd xmm0, RCd 829 | movsd TMP1, xmm0 830 |.endif 831 | lea RC, TMP1 832 | jmp >1 833 | 834 |->vmeta_tsetv: 835 | movzx RCd, PC_RC // Reload TValue *k from RC. 836 | lea RC, [BASE+RC*8] 837 |1: 838 | movzx RBd, PC_RB // Reload TValue *t from RB. 839 | lea RB, [BASE+RB*8] 840 |2: 841 | mov L:CARG1, SAVE_L 842 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. 843 | mov CARG2, RB 844 | mov CARG3, RC 845 | mov L:RB, L:CARG1 846 | mov SAVE_PC, PC 847 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 848 | // TValue * (finished) or NULL (metamethod) returned in eax (RC). 849 | mov BASE, L:RB->base 850 | test RC, RC 851 | jz >3 852 | // NOBARRIER: lj_meta_tset ensures the table is not black. 853 | movzx RAd, PC_RA 854 | mov RB, [BASE+RA*8] 855 | mov [RC], RB 856 |->cont_nop: // BASE = base, (RC = result) 857 | ins_next 858 | 859 |3: // Call __newindex metamethod. 860 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) 861 | mov RA, L:RB->top 862 | mov [RA-24], PC // [cont|PC] 863 | movzx RCd, PC_RA 864 | // Copy value to third argument. 865 | mov RB, [BASE+RC*8] 866 | mov [RA+16], RB 867 | lea PC, [RA+FRAME_CONT] 868 | sub PC, BASE 869 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. 870 | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v). 871 | cleartp LFUNC:RB 872 | jmp ->vm_call_dispatch_f 873 | 874 |->vmeta_tsetr: 875 |.if X64WIN 876 | mov L:CARG1, SAVE_L 877 | mov CARG3d, RCd 878 | mov L:CARG1->base, BASE 879 | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE. 880 |.else 881 | mov L:CARG1, SAVE_L 882 | mov CARG2, TAB:RB 883 | mov L:CARG1->base, BASE 884 | mov RB, BASE // Save BASE. 885 | mov CARG3d, RCd // Caveat: CARG3 == BASE. 886 |.endif 887 | mov SAVE_PC, PC 888 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) 889 | // TValue * returned in eax (RC). 890 | movzx RAd, PC_RA 891 | mov BASE, RB // Restore BASE. 892 | jmp ->BC_TSETR_Z 893 | 894 |//-- Comparison metamethods --------------------------------------------- 895 | 896 |->vmeta_comp: 897 | movzx RDd, PC_RD 898 | movzx RAd, PC_RA 899 | mov L:RB, SAVE_L 900 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE. 901 |.if X64WIN 902 | lea CARG3, [BASE+RD*8] 903 | lea CARG2, [BASE+RA*8] 904 |.else 905 | lea CARG2, [BASE+RA*8] 906 | lea CARG3, [BASE+RD*8] 907 |.endif 908 | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA. 909 | movzx CARG4d, PC_OP 910 | mov SAVE_PC, PC 911 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) 912 | // 0/1 or TValue * (metamethod) returned in eax (RC). 913 |3: 914 | mov BASE, L:RB->base 915 | cmp RC, 1 916 | ja ->vmeta_binop 917 |4: 918 | lea PC, [PC+4] 919 | jb >6 920 |5: 921 | movzx RDd, PC_RD 922 | branchPC RD 923 |6: 924 | ins_next 925 | 926 |->cont_condt: // BASE = base, RC = result 927 | add PC, 4 928 | mov ITYPE, [RC] 929 | sar ITYPE, 47 930 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true. 931 | jb <5 932 | jmp <6 933 | 934 |->cont_condf: // BASE = base, RC = result 935 | mov ITYPE, [RC] 936 | sar ITYPE, 47 937 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false. 938 | jmp <4 939 | 940 |->vmeta_equal: 941 | cleartp TAB:RD 942 | sub PC, 4 943 |.if X64WIN 944 | mov CARG3, RD 945 | mov CARG4d, RBd 946 | mov L:RB, SAVE_L 947 | mov L:RB->base, BASE // Caveat: CARG2 == BASE. 948 | mov CARG2, RA 949 | mov CARG1, L:RB // Caveat: CARG1 == RA. 950 |.else 951 | mov CARG2, RA 952 | mov CARG4d, RBd // Caveat: CARG4 == RA. 953 | mov L:RB, SAVE_L 954 | mov L:RB->base, BASE // Caveat: CARG3 == BASE. 955 | mov CARG3, RD 956 | mov CARG1, L:RB 957 |.endif 958 | mov SAVE_PC, PC 959 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) 960 | // 0/1 or TValue * (metamethod) returned in eax (RC). 961 | jmp <3 962 | 963 |->vmeta_equal_cd: 964 |.if FFI 965 | sub PC, 4 966 | mov L:RB, SAVE_L 967 | mov L:RB->base, BASE 968 | mov CARG1, L:RB 969 | mov CARG2d, dword [PC-4] 970 | mov SAVE_PC, PC 971 | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins) 972 | // 0/1 or TValue * (metamethod) returned in eax (RC). 973 | jmp <3 974 |.endif 975 | 976 |->vmeta_istype: 977 | mov L:RB, SAVE_L 978 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. 979 | mov CARG2d, RAd 980 | mov CARG3d, RDd 981 | mov L:CARG1, L:RB 982 | mov SAVE_PC, PC 983 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) 984 | mov BASE, L:RB->base 985 | jmp <6 986 | 987 |//-- Arithmetic metamethods --------------------------------------------- 988 | 989 |->vmeta_arith_vno: 990 |.if DUALNUM 991 | movzx RBd, PC_RB 992 | movzx RCd, PC_RC 993 |.endif 994 |->vmeta_arith_vn: 995 | lea RC, [KBASE+RC*8] 996 | jmp >1 997 | 998 |->vmeta_arith_nvo: 999 |.if DUALNUM 1000 | movzx RBd, PC_RB 1001 | movzx RCd, PC_RC 1002 |.endif 1003 |->vmeta_arith_nv: 1004 | lea TMPR, [KBASE+RC*8] 1005 | lea RC, [BASE+RB*8] 1006 | mov RB, TMPR 1007 | jmp >2 1008 | 1009 |->vmeta_unm: 1010 | lea RC, [BASE+RD*8] 1011 | mov RB, RC 1012 | jmp >2 1013 | 1014 |->vmeta_arith_vvo: 1015 |.if DUALNUM 1016 | movzx RBd, PC_RB 1017 | movzx RCd, PC_RC 1018 |.endif 1019 |->vmeta_arith_vv: 1020 | lea RC, [BASE+RC*8] 1021 |1: 1022 | lea RB, [BASE+RB*8] 1023 |2: 1024 | lea RA, [BASE+RA*8] 1025 |.if X64WIN 1026 | mov CARG3, RB 1027 | mov CARG4, RC 1028 | movzx RCd, PC_OP 1029 | mov ARG5d, RCd 1030 | mov L:RB, SAVE_L 1031 | mov L:RB->base, BASE // Caveat: CARG2 == BASE. 1032 | mov CARG2, RA 1033 | mov CARG1, L:RB // Caveat: CARG1 == RA. 1034 |.else 1035 | movzx CARG5d, PC_OP 1036 | mov CARG2, RA 1037 | mov CARG4, RC // Caveat: CARG4 == RA. 1038 | mov L:CARG1, SAVE_L 1039 | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE. 1040 | mov CARG3, RB 1041 | mov L:RB, L:CARG1 1042 |.endif 1043 | mov SAVE_PC, PC 1044 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 1045 | // NULL (finished) or TValue * (metamethod) returned in eax (RC). 1046 | mov BASE, L:RB->base 1047 | test RC, RC 1048 | jz ->cont_nop 1049 | 1050 | // Call metamethod for binary op. 1051 |->vmeta_binop: 1052 | // BASE = base, RC = new base, stack = cont/func/o1/o2 1053 | mov RA, RC 1054 | sub RC, BASE 1055 | mov [RA-24], PC // [cont|PC] 1056 | lea PC, [RC+FRAME_CONT] 1057 | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2). 1058 | jmp ->vm_call_dispatch 1059 | 1060 |->vmeta_len: 1061 | movzx RDd, PC_RD 1062 | mov L:RB, SAVE_L 1063 | mov L:RB->base, BASE 1064 | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE 1065 | mov L:CARG1, L:RB 1066 | mov SAVE_PC, PC 1067 | call extern lj_meta_len // (lua_State *L, TValue *o) 1068 | // NULL (retry) or TValue * (metamethod) returned in eax (RC). 1069 | mov BASE, L:RB->base 1070 #if LJ_52 1071 | test RC, RC 1072 | jne ->vmeta_binop // Binop call for compatibility. 1073 | movzx RDd, PC_RD 1074 | mov TAB:CARG1, [BASE+RD*8] 1075 | cleartp TAB:CARG1 1076 | jmp ->BC_LEN_Z 1077 #else 1078 | jmp ->vmeta_binop // Binop call for compatibility. 1079 #endif 1080 | 1081 |//-- Call metamethod ---------------------------------------------------- 1082 | 1083 |->vmeta_call_ra: 1084 | lea RA, [BASE+RA*8+16] 1085 |->vmeta_call: // Resolve and call __call metamethod. 1086 | // BASE = old base, RA = new base, RC = nargs+1, PC = return 1087 | mov TMP1d, NARGS:RDd // Save RA, RC for us. 1088 | mov RB, RA 1089 |.if X64WIN 1090 | mov L:TMPR, SAVE_L 1091 | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE. 1092 | lea CARG2, [RA-16] 1093 | lea CARG3, [RA+NARGS:RD*8-8] 1094 | mov CARG1, L:TMPR // Caveat: CARG1 is RA. 1095 |.else 1096 | mov L:CARG1, SAVE_L 1097 | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE. 1098 | lea CARG2, [RA-16] 1099 | lea CARG3, [RA+NARGS:RD*8-8] 1100 |.endif 1101 | mov SAVE_PC, PC 1102 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1103 | mov RA, RB 1104 | mov L:RB, SAVE_L 1105 | mov BASE, L:RB->base 1106 | mov NARGS:RDd, TMP1d 1107 | mov LFUNC:RB, [RA-16] 1108 | add NARGS:RDd, 1 1109 | // This is fragile. L->base must not move, KBASE must always be defined. 1110 | cmp KBASE, BASE // Continue with CALLT if flag set. 1111 | je ->BC_CALLT_Z 1112 | cleartp LFUNC:RB 1113 | mov BASE, RA 1114 | ins_call // Otherwise call resolved metamethod. 1115 | 1116 |//-- Argument coercion for 'for' statement ------------------------------ 1117 | 1118 |->vmeta_for: 1119 | mov L:RB, SAVE_L 1120 | mov L:RB->base, BASE 1121 | mov CARG2, RA // Caveat: CARG2 == BASE 1122 | mov L:CARG1, L:RB // Caveat: CARG1 == RA 1123 | mov SAVE_PC, PC 1124 | call extern lj_meta_for // (lua_State *L, TValue *base) 1125 | mov BASE, L:RB->base 1126 | mov RCd, [PC-4] 1127 | movzx RAd, RCH 1128 | movzx OP, RCL 1129 | shr RCd, 16 1130 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. 1131 | 1132 |//----------------------------------------------------------------------- 1133 |//-- Fast functions ----------------------------------------------------- 1134 |//----------------------------------------------------------------------- 1135 | 1136 |.macro .ffunc, name 1137 |->ff_ .. name: 1138 |.endmacro 1139 | 1140 |.macro .ffunc_1, name 1141 |->ff_ .. name: 1142 | cmp NARGS:RDd, 1+1; jb ->fff_fallback 1143 |.endmacro 1144 | 1145 |.macro .ffunc_2, name 1146 |->ff_ .. name: 1147 | cmp NARGS:RDd, 2+1; jb ->fff_fallback 1148 |.endmacro 1149 | 1150 |.macro .ffunc_n, name, op 1151 | .ffunc_1 name 1152 | checknumtp [BASE], ->fff_fallback 1153 | op xmm0, qword [BASE] 1154 |.endmacro 1155 | 1156 |.macro .ffunc_n, name 1157 | .ffunc_n name, movsd 1158 |.endmacro 1159 | 1160 |.macro .ffunc_nn, name 1161 | .ffunc_2 name 1162 | checknumtp [BASE], ->fff_fallback 1163 | checknumtp [BASE+8], ->fff_fallback 1164 | movsd xmm0, qword [BASE] 1165 | movsd xmm1, qword [BASE+8] 1166 |.endmacro 1167 | 1168 |// Inlined GC threshold check. Caveat: uses label 1. 1169 |.macro ffgccheck 1170 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] 1171 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] 1172 | jb >1 1173 | call ->fff_gcstep 1174 |1: 1175 |.endmacro 1176 | 1177 |//-- Base library: checks ----------------------------------------------- 1178 | 1179 |.ffunc_1 assert 1180 | mov ITYPE, [BASE] 1181 | mov RB, ITYPE 1182 | sar ITYPE, 47 1183 | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback 1184 | mov PC, [BASE-8] 1185 | mov MULTRES, RDd 1186 | mov RB, [BASE] 1187 | mov [BASE-16], RB 1188 | sub RDd, 2 1189 | jz >2 1190 | mov RA, BASE 1191 |1: 1192 | add RA, 8 1193 | mov RB, [RA] 1194 | mov [RA-16], RB 1195 | sub RDd, 1 1196 | jnz <1 1197 |2: 1198 | mov RDd, MULTRES 1199 | jmp ->fff_res_ 1200 | 1201 |.ffunc_1 type 1202 | mov RC, [BASE] 1203 | sar RC, 47 1204 | mov RBd, LJ_TISNUM 1205 | cmp RCd, RBd 1206 | cmovb RCd, RBd 1207 | not RCd 1208 |2: 1209 | mov CFUNC:RB, [BASE-16] 1210 | cleartp CFUNC:RB 1211 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] 1212 | mov PC, [BASE-8] 1213 | settp STR:RC, LJ_TSTR 1214 | mov [BASE-16], STR:RC 1215 | jmp ->fff_res1 1216 | 1217 |//-- Base library: getters and setters --------------------------------- 1218 | 1219 |.ffunc_1 getmetatable 1220 | mov TAB:RB, [BASE] 1221 | mov PC, [BASE-8] 1222 | checktab TAB:RB, >6 1223 |1: // Field metatable must be at same offset for GCtab and GCudata! 1224 | mov TAB:RB, TAB:RB->metatable 1225 |2: 1226 | test TAB:RB, TAB:RB 1227 | mov aword [BASE-16], LJ_TNIL 1228 | jz ->fff_res1 1229 | settp TAB:RC, TAB:RB, LJ_TTAB 1230 | mov [BASE-16], TAB:RC // Store metatable as default result. 1231 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)] 1232 | mov RAd, TAB:RB->hmask 1233 | and RAd, STR:RC->hash 1234 | settp STR:RC, LJ_TSTR 1235 | imul RAd, #NODE 1236 | add NODE:RA, TAB:RB->node 1237 |3: // Rearranged logic, because we expect _not_ to find the key. 1238 | cmp NODE:RA->key, STR:RC 1239 | je >5 1240 |4: 1241 | mov NODE:RA, NODE:RA->next 1242 | test NODE:RA, NODE:RA 1243 | jnz <3 1244 | jmp ->fff_res1 // Not found, keep default result. 1245 |5: 1246 | mov RB, NODE:RA->val 1247 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. 1248 | mov [BASE-16], RB // Return value of mt.__metatable. 1249 | jmp ->fff_res1 1250 | 1251 |6: 1252 | cmp ITYPEd, LJ_TUDATA; je <1 1253 | cmp ITYPEd, LJ_TISNUM; ja >7 1254 | mov ITYPEd, LJ_TISNUM 1255 |7: 1256 | not ITYPEd 1257 | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])] 1258 | jmp <2 1259 | 1260 |.ffunc_2 setmetatable 1261 | mov TAB:RB, [BASE] 1262 | mov TAB:TMPR, TAB:RB 1263 | checktab TAB:RB, ->fff_fallback 1264 | // Fast path: no mt for table yet and not clearing the mt. 1265 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback 1266 | mov TAB:RA, [BASE+8] 1267 | checktab TAB:RA, ->fff_fallback 1268 | mov TAB:RB->metatable, TAB:RA 1269 | mov PC, [BASE-8] 1270 | mov [BASE-16], TAB:TMPR // Return original table. 1271 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 1272 | jz >1 1273 | // Possible write barrier. Table is black, but skip iswhite(mt) check. 1274 | barrierback TAB:RB, RC 1275 |1: 1276 | jmp ->fff_res1 1277 | 1278 |.ffunc_2 rawget 1279 |.if X64WIN 1280 | mov TAB:RA, [BASE] 1281 | checktab TAB:RA, ->fff_fallback 1282 | mov RB, BASE // Save BASE. 1283 | lea CARG3, [BASE+8] 1284 | mov CARG2, TAB:RA // Caveat: CARG2 == BASE. 1285 | mov CARG1, SAVE_L 1286 |.else 1287 | mov TAB:CARG2, [BASE] 1288 | checktab TAB:CARG2, ->fff_fallback 1289 | mov RB, BASE // Save BASE. 1290 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. 1291 | mov CARG1, SAVE_L 1292 |.endif 1293 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1294 | // cTValue * returned in eax (RD). 1295 | mov BASE, RB // Restore BASE. 1296 | // Copy table slot. 1297 | mov RB, [RD] 1298 | mov PC, [BASE-8] 1299 | mov [BASE-16], RB 1300 | jmp ->fff_res1 1301 | 1302 |//-- Base library: conversions ------------------------------------------ 1303 | 1304 |.ffunc tonumber 1305 | // Only handles the number case inline (without a base argument). 1306 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. 1307 | mov RB, [BASE] 1308 | checknumber RB, ->fff_fallback 1309 | mov PC, [BASE-8] 1310 | mov [BASE-16], RB 1311 | jmp ->fff_res1 1312 | 1313 |.ffunc_1 tostring 1314 | // Only handles the string or number case inline. 1315 | mov PC, [BASE-8] 1316 | mov STR:RB, [BASE] 1317 | checktp_nc STR:RB, LJ_TSTR, >3 1318 | // A __tostring method in the string base metatable is ignored. 1319 |2: 1320 | mov [BASE-16], STR:RB 1321 | jmp ->fff_res1 1322 |3: // Handle numbers inline, unless a number base metatable is present. 1323 | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1 1324 | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 1325 | jne ->fff_fallback 1326 | ffgccheck // Caveat: uses label 1. 1327 | mov L:RB, SAVE_L 1328 | mov L:RB->base, BASE // Add frame since C call can throw. 1329 | mov SAVE_PC, PC // Redundant (but a defined value). 1330 |.if not X64WIN 1331 | mov CARG2, BASE // Otherwise: CARG2 == BASE 1332 |.endif 1333 | mov L:CARG1, L:RB 1334 |.if DUALNUM 1335 | call extern lj_strfmt_number // (lua_State *L, cTValue *o) 1336 |.else 1337 | call extern lj_strfmt_num // (lua_State *L, lua_Number *np) 1338 |.endif 1339 | // GCstr returned in eax (RD). 1340 | mov BASE, L:RB->base 1341 | settp STR:RB, RD, LJ_TSTR 1342 | jmp <2 1343 | 1344 |//-- Base library: iterators ------------------------------------------- 1345 | 1346 |.ffunc_1 next 1347 | je >2 // Missing 2nd arg? 1348 |1: 1349 |.if X64WIN 1350 | mov RA, [BASE] 1351 | checktab RA, ->fff_fallback 1352 |.else 1353 | mov CARG2, [BASE] 1354 | checktab CARG2, ->fff_fallback 1355 |.endif 1356 | mov L:RB, SAVE_L 1357 | mov L:RB->base, BASE // Add frame since C call can throw. 1358 | mov L:RB->top, BASE // Dummy frame length is ok. 1359 | mov PC, [BASE-8] 1360 |.if X64WIN 1361 | lea CARG3, [BASE+8] 1362 | mov CARG2, RA // Caveat: CARG2 == BASE. 1363 | mov CARG1, L:RB 1364 |.else 1365 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. 1366 | mov CARG1, L:RB 1367 |.endif 1368 | mov SAVE_PC, PC // Needed for ITERN fallback. 1369 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1370 | // Flag returned in eax (RD). 1371 | mov BASE, L:RB->base 1372 | test RDd, RDd; jz >3 // End of traversal? 1373 | // Copy key and value to results. 1374 | mov RB, [BASE+8] 1375 | mov RD, [BASE+16] 1376 | mov [BASE-16], RB 1377 | mov [BASE-8], RD 1378 |->fff_res2: 1379 | mov RDd, 1+2 1380 | jmp ->fff_res 1381 |2: // Set missing 2nd arg to nil. 1382 | mov aword [BASE+8], LJ_TNIL 1383 | jmp <1 1384 |3: // End of traversal: return nil. 1385 | mov aword [BASE-16], LJ_TNIL 1386 | jmp ->fff_res1 1387 | 1388 |.ffunc_1 pairs 1389 | mov TAB:RB, [BASE] 1390 | mov TMPR, TAB:RB 1391 | checktab TAB:RB, ->fff_fallback 1392 #if LJ_52 1393 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback 1394 #endif 1395 | mov CFUNC:RD, [BASE-16] 1396 | cleartp CFUNC:RD 1397 | mov CFUNC:RD, CFUNC:RD->upvalue[0] 1398 | settp CFUNC:RD, LJ_TFUNC 1399 | mov PC, [BASE-8] 1400 | mov [BASE-16], CFUNC:RD 1401 | mov [BASE-8], TMPR 1402 | mov aword [BASE], LJ_TNIL 1403 | mov RDd, 1+3 1404 | jmp ->fff_res 1405 | 1406 |.ffunc_2 ipairs_aux 1407 | mov TAB:RB, [BASE] 1408 | checktab TAB:RB, ->fff_fallback 1409 |.if DUALNUM 1410 | mov RA, [BASE+8] 1411 | checkint RA, ->fff_fallback 1412 |.else 1413 | checknumtp [BASE+8], ->fff_fallback 1414 | movsd xmm0, qword [BASE+8] 1415 |.endif 1416 | mov PC, [BASE-8] 1417 |.if DUALNUM 1418 | add RAd, 1 1419 | setint ITYPE, RA 1420 | mov [BASE-16], ITYPE 1421 |.else 1422 | sseconst_1 xmm1, TMPR 1423 | addsd xmm0, xmm1 1424 | cvttsd2si RAd, xmm0 1425 | movsd qword [BASE-16], xmm0 1426 |.endif 1427 | cmp RAd, TAB:RB->asize; jae >2 // Not in array part? 1428 | mov RD, TAB:RB->array 1429 | lea RD, [RD+RA*8] 1430 |1: 1431 | cmp aword [RD], LJ_TNIL; je ->fff_res0 1432 | // Copy array slot. 1433 | mov RB, [RD] 1434 | mov [BASE-8], RB 1435 | jmp ->fff_res2 1436 |2: // Check for empty hash part first. Otherwise call C function. 1437 | cmp dword TAB:RB->hmask, 0; je ->fff_res0 1438 |.if X64WIN 1439 | mov TMPR, BASE 1440 | mov CARG2d, RAd 1441 | mov CARG1, TAB:RB 1442 | mov RB, TMPR 1443 |.else 1444 | mov CARG1, TAB:RB 1445 | mov RB, BASE // Save BASE. 1446 | mov CARG2d, RAd // Caveat: CARG2 == BASE 1447 |.endif 1448 | call extern lj_tab_getinth // (GCtab *t, int32_t key) 1449 | // cTValue * or NULL returned in eax (RD). 1450 | mov BASE, RB 1451 | test RD, RD 1452 | jnz <1 1453 |->fff_res0: 1454 | mov RDd, 1+0 1455 | jmp ->fff_res 1456 | 1457 |.ffunc_1 ipairs 1458 | mov TAB:RB, [BASE] 1459 | mov TMPR, TAB:RB 1460 | checktab TAB:RB, ->fff_fallback 1461 #if LJ_52 1462 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback 1463 #endif 1464 | mov CFUNC:RD, [BASE-16] 1465 | cleartp CFUNC:RD 1466 | mov CFUNC:RD, CFUNC:RD->upvalue[0] 1467 | settp CFUNC:RD, LJ_TFUNC 1468 | mov PC, [BASE-8] 1469 | mov [BASE-16], CFUNC:RD 1470 | mov [BASE-8], TMPR 1471 |.if DUALNUM 1472 | mov64 RD, ((int64_t)LJ_TISNUM<<47) 1473 | mov [BASE], RD 1474 |.else 1475 | mov qword [BASE], 0 1476 |.endif 1477 | mov RDd, 1+3 1478 | jmp ->fff_res 1479 | 1480 |//-- Base library: catch errors ---------------------------------------- 1481 | 1482 |.ffunc_1 pcall 1483 | lea RA, [BASE+16] 1484 | sub NARGS:RDd, 1 1485 | mov PCd, 16+FRAME_PCALL 1486 |1: 1487 | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)] 1488 | shr RB, HOOK_ACTIVE_SHIFT 1489 | and RB, 1 1490 | add PC, RB // Remember active hook before pcall. 1491 | // Note: this does a (harmless) copy of the function to the PC slot, too. 1492 | mov KBASE, RD 1493 |2: 1494 | mov RB, [RA+KBASE*8-24] 1495 | mov [RA+KBASE*8-16], RB 1496 | sub KBASE, 1 1497 | ja <2 1498 | jmp ->vm_call_dispatch 1499 | 1500 |.ffunc_2 xpcall 1501 | mov LFUNC:RA, [BASE+8] 1502 | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback 1503 | mov LFUNC:RB, [BASE] // Swap function and traceback. 1504 | mov [BASE], LFUNC:RA 1505 | mov [BASE+8], LFUNC:RB 1506 | lea RA, [BASE+24] 1507 | sub NARGS:RDd, 2 1508 | mov PCd, 24+FRAME_PCALL 1509 | jmp <1 1510 | 1511 |//-- Coroutine library -------------------------------------------------- 1512 | 1513 |.macro coroutine_resume_wrap, resume 1514 |.if resume 1515 |.ffunc_1 coroutine_resume 1516 | mov L:RB, [BASE] 1517 | cleartp L:RB 1518 |.else 1519 |.ffunc coroutine_wrap_aux 1520 | mov CFUNC:RB, [BASE-16] 1521 | cleartp CFUNC:RB 1522 | mov L:RB, CFUNC:RB->upvalue[0].gcr 1523 | cleartp L:RB 1524 |.endif 1525 | mov PC, [BASE-8] 1526 | mov SAVE_PC, PC 1527 | mov TMP1, L:RB 1528 |.if resume 1529 | checktptp [BASE], LJ_TTHREAD, ->fff_fallback 1530 |.endif 1531 | cmp aword L:RB->cframe, 0; jne ->fff_fallback 1532 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback 1533 | mov RA, L:RB->top 1534 | je >1 // Status != LUA_YIELD (i.e. 0)? 1535 | cmp RA, L:RB->base // Check for presence of initial func. 1536 | je ->fff_fallback 1537 | mov PC, [RA-8] // Move initial function up. 1538 | mov [RA], PC 1539 | add RA, 8 1540 |1: 1541 |.if resume 1542 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). 1543 |.else 1544 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). 1545 |.endif 1546 | cmp PC, L:RB->maxstack; ja ->fff_fallback 1547 | mov L:RB->top, PC 1548 | 1549 | mov L:RB, SAVE_L 1550 | mov L:RB->base, BASE 1551 |.if resume 1552 | add BASE, 8 // Keep resumed thread in stack for GC. 1553 |.endif 1554 | mov L:RB->top, BASE 1555 |.if resume 1556 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. 1557 |.else 1558 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. 1559 |.endif 1560 | sub RB, PC // Relative to PC. 1561 | 1562 | cmp PC, RA 1563 | je >3 1564 |2: // Move args to coroutine. 1565 | mov RC, [PC+RB] 1566 | mov [PC-8], RC 1567 | sub PC, 8 1568 | cmp PC, RA 1569 | jne <2 1570 |3: 1571 | mov CARG2, RA 1572 | mov CARG1, TMP1 1573 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1574 | 1575 | mov L:RB, SAVE_L 1576 | mov L:PC, TMP1 1577 | mov BASE, L:RB->base 1578 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 1579 | set_vmstate INTERP 1580 | 1581 | cmp eax, LUA_YIELD 1582 | ja >8 1583 |4: 1584 | mov RA, L:PC->base 1585 | mov KBASE, L:PC->top 1586 | mov L:PC->top, RA // Clear coroutine stack. 1587 | mov PC, KBASE 1588 | sub PC, RA 1589 | je >6 // No results? 1590 | lea RD, [BASE+PC] 1591 | shr PCd, 3 1592 | cmp RD, L:RB->maxstack 1593 | ja >9 // Need to grow stack? 1594 | 1595 | mov RB, BASE 1596 | sub RB, RA 1597 |5: // Move results from coroutine. 1598 | mov RD, [RA] 1599 | mov [RA+RB], RD 1600 | add RA, 8 1601 | cmp RA, KBASE 1602 | jne <5 1603 |6: 1604 |.if resume 1605 | lea RDd, [PCd+2] // nresults+1 = 1 + true + results. 1606 | mov_true ITYPE // Prepend true to results. 1607 | mov [BASE-8], ITYPE 1608 |.else 1609 | lea RDd, [PCd+1] // nresults+1 = 1 + results. 1610 |.endif 1611 |7: 1612 | mov PC, SAVE_PC 1613 | mov MULTRES, RDd 1614 |.if resume 1615 | mov RA, -8 1616 |.else 1617 | xor RAd, RAd 1618 |.endif 1619 | test PCd, FRAME_TYPE 1620 | jz ->BC_RET_Z 1621 | jmp ->vm_return 1622 | 1623 |8: // Coroutine returned with error (at co->top-1). 1624 |.if resume 1625 | mov_false ITYPE // Prepend false to results. 1626 | mov [BASE-8], ITYPE 1627 | mov RA, L:PC->top 1628 | sub RA, 8 1629 | mov L:PC->top, RA // Clear error from coroutine stack. 1630 | // Copy error message. 1631 | mov RD, [RA] 1632 | mov [BASE], RD 1633 | mov RDd, 1+2 // nresults+1 = 1 + false + error. 1634 | jmp <7 1635 |.else 1636 | mov CARG2, L:PC 1637 | mov CARG1, L:RB 1638 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) 1639 | // Error function does not return. 1640 |.endif 1641 | 1642 |9: // Handle stack expansion on return from yield. 1643 | mov L:RA, TMP1 1644 | mov L:RA->top, KBASE // Undo coroutine stack clearing. 1645 | mov CARG2, PC 1646 | mov CARG1, L:RB 1647 | call extern lj_state_growstack // (lua_State *L, int n) 1648 | mov L:PC, TMP1 1649 | mov BASE, L:RB->base 1650 | jmp <4 // Retry the stack move. 1651 |.endmacro 1652 | 1653 | coroutine_resume_wrap 1 // coroutine.resume 1654 | coroutine_resume_wrap 0 // coroutine.wrap 1655 | 1656 |.ffunc coroutine_yield 1657 | mov L:RB, SAVE_L 1658 | test aword L:RB->cframe, CFRAME_RESUME 1659 | jz ->fff_fallback 1660 | mov L:RB->base, BASE 1661 | lea RD, [BASE+NARGS:RD*8-8] 1662 | mov L:RB->top, RD 1663 | xor RDd, RDd 1664 | mov aword L:RB->cframe, RD 1665 | mov al, LUA_YIELD 1666 | mov byte L:RB->status, al 1667 | jmp ->vm_leave_unw 1668 | 1669 |//-- Math library ------------------------------------------------------- 1670 | 1671 | .ffunc_1 math_abs 1672 | mov RB, [BASE] 1673 |.if DUALNUM 1674 | checkint RB, >3 1675 | cmp RBd, 0; jns ->fff_resi 1676 | neg RBd; js >2 1677 |->fff_resbit: 1678 |->fff_resi: 1679 | setint RB 1680 |->fff_resRB: 1681 | mov PC, [BASE-8] 1682 | mov [BASE-16], RB 1683 | jmp ->fff_res1 1684 |2: 1685 | mov64 RB, U64x(41e00000,00000000) // 2^31. 1686 | jmp ->fff_resRB 1687 |3: 1688 | ja ->fff_fallback 1689 |.else 1690 | checknum RB, ->fff_fallback 1691 |.endif 1692 | shl RB, 1 1693 | shr RB, 1 1694 | mov PC, [BASE-8] 1695 | mov [BASE-16], RB 1696 | jmp ->fff_res1 1697 | 1698 |.ffunc_n math_sqrt, sqrtsd 1699 |->fff_resxmm0: 1700 | mov PC, [BASE-8] 1701 | movsd qword [BASE-16], xmm0 1702 | // fallthrough 1703 | 1704 |->fff_res1: 1705 | mov RDd, 1+1 1706 |->fff_res: 1707 | mov MULTRES, RDd 1708 |->fff_res_: 1709 | test PCd, FRAME_TYPE 1710 | jnz >7 1711 |5: 1712 | cmp PC_RB, RDL // More results expected? 1713 | ja >6 1714 | // Adjust BASE. KBASE is assumed to be set for the calling frame. 1715 | movzx RAd, PC_RA 1716 | neg RA 1717 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 1718 | ins_next 1719 | 1720 |6: // Fill up results with nil. 1721 | mov aword [BASE+RD*8-24], LJ_TNIL 1722 | add RD, 1 1723 | jmp <5 1724 | 1725 |7: // Non-standard return case. 1726 | mov RA, -16 // Results start at BASE+RA = BASE-16. 1727 | jmp ->vm_return 1728 | 1729 |.macro math_round, func 1730 | .ffunc math_ .. func 1731 |.if DUALNUM 1732 | mov RB, [BASE] 1733 | checknumx RB, ->fff_resRB, je 1734 | ja ->fff_fallback 1735 |.else 1736 | checknumtp [BASE], ->fff_fallback 1737 |.endif 1738 | movsd xmm0, qword [BASE] 1739 | call ->vm_ .. func .. _sse 1740 |.if DUALNUM 1741 | cvttsd2si RBd, xmm0 1742 | cmp RBd, 0x80000000 1743 | jne ->fff_resi 1744 | cvtsi2sd xmm1, RBd 1745 | ucomisd xmm0, xmm1 1746 | jp ->fff_resxmm0 1747 | je ->fff_resi 1748 |.endif 1749 | jmp ->fff_resxmm0 1750 |.endmacro 1751 | 1752 | math_round floor 1753 | math_round ceil 1754 | 1755 |.ffunc math_log 1756 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. 1757 | checknumtp [BASE], ->fff_fallback 1758 | movsd xmm0, qword [BASE] 1759 | mov RB, BASE 1760 | call extern log 1761 | mov BASE, RB 1762 | jmp ->fff_resxmm0 1763 | 1764 |.macro math_extern, func 1765 | .ffunc_n math_ .. func 1766 | mov RB, BASE 1767 | call extern func 1768 | mov BASE, RB 1769 | jmp ->fff_resxmm0 1770 |.endmacro 1771 | 1772 |.macro math_extern2, func 1773 | .ffunc_nn math_ .. func 1774 | mov RB, BASE 1775 | call extern func 1776 | mov BASE, RB 1777 | jmp ->fff_resxmm0 1778 |.endmacro 1779 | 1780 | math_extern log10 1781 | math_extern exp 1782 | math_extern sin 1783 | math_extern cos 1784 | math_extern tan 1785 | math_extern asin 1786 | math_extern acos 1787 | math_extern atan 1788 | math_extern sinh 1789 | math_extern cosh 1790 | math_extern tanh 1791 | math_extern2 pow 1792 | math_extern2 atan2 1793 | math_extern2 fmod 1794 | 1795 |.ffunc_2 math_ldexp 1796 | checknumtp [BASE], ->fff_fallback 1797 | checknumtp [BASE+8], ->fff_fallback 1798 | fld qword [BASE+8] 1799 | fld qword [BASE] 1800 | fscale 1801 | fpop1 1802 | mov PC, [BASE-8] 1803 | fstp qword [BASE-16] 1804 | jmp ->fff_res1 1805 | 1806 |.ffunc_n math_frexp 1807 | mov RB, BASE 1808 |.if X64WIN 1809 | lea CARG2, TMP1 // Caveat: CARG2 == BASE 1810 |.else 1811 | lea CARG1, TMP1 1812 |.endif 1813 | call extern frexp 1814 | mov BASE, RB 1815 | mov RBd, TMP1d 1816 | mov PC, [BASE-8] 1817 | movsd qword [BASE-16], xmm0 1818 |.if DUALNUM 1819 | setint RB 1820 | mov [BASE-8], RB 1821 |.else 1822 | cvtsi2sd xmm1, RBd 1823 | movsd qword [BASE-8], xmm1 1824 |.endif 1825 | mov RDd, 1+2 1826 | jmp ->fff_res 1827 | 1828 |.ffunc_n math_modf 1829 | mov RB, BASE 1830 |.if X64WIN 1831 | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE 1832 |.else 1833 | lea CARG1, [BASE-16] 1834 |.endif 1835 | call extern modf 1836 | mov BASE, RB 1837 | mov PC, [BASE-8] 1838 | movsd qword [BASE-8], xmm0 1839 | mov RDd, 1+2 1840 | jmp ->fff_res 1841 | 1842 |.macro math_minmax, name, cmovop, sseop 1843 | .ffunc name 1844 | mov RAd, 2 1845 |.if DUALNUM 1846 | mov RB, [BASE] 1847 | checkint RB, >4 1848 |1: // Handle integers. 1849 | cmp RAd, RDd; jae ->fff_resRB 1850 | mov TMPR, [BASE+RA*8-8] 1851 | checkint TMPR, >3 1852 | cmp RBd, TMPRd 1853 | cmovop RB, TMPR 1854 | add RAd, 1 1855 | jmp <1 1856 |3: 1857 | ja ->fff_fallback 1858 | // Convert intermediate result to number and continue below. 1859 | cvtsi2sd xmm0, RBd 1860 | jmp >6 1861 |4: 1862 | ja ->fff_fallback 1863 |.else 1864 | checknumtp [BASE], ->fff_fallback 1865 |.endif 1866 | 1867 | movsd xmm0, qword [BASE] 1868 |5: // Handle numbers or integers. 1869 | cmp RAd, RDd; jae ->fff_resxmm0 1870 |.if DUALNUM 1871 | mov RB, [BASE+RA*8-8] 1872 | checknumx RB, >6, jb 1873 | ja ->fff_fallback 1874 | cvtsi2sd xmm1, RBd 1875 | jmp >7 1876 |.else 1877 | checknumtp [BASE+RA*8-8], ->fff_fallback 1878 |.endif 1879 |6: 1880 | movsd xmm1, qword [BASE+RA*8-8] 1881 |7: 1882 | sseop xmm0, xmm1 1883 | add RAd, 1 1884 | jmp <5 1885 |.endmacro 1886 | 1887 | math_minmax math_min, cmovg, minsd 1888 | math_minmax math_max, cmovl, maxsd 1889 | 1890 |//-- String library ----------------------------------------------------- 1891 | 1892 |.ffunc string_byte // Only handle the 1-arg case here. 1893 | cmp NARGS:RDd, 1+1; jne ->fff_fallback 1894 | mov STR:RB, [BASE] 1895 | checkstr STR:RB, ->fff_fallback 1896 | mov PC, [BASE-8] 1897 | cmp dword STR:RB->len, 1 1898 | jb ->fff_res0 // Return no results for empty string. 1899 | movzx RBd, byte STR:RB[1] 1900 |.if DUALNUM 1901 | jmp ->fff_resi 1902 |.else 1903 | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0 1904 |.endif 1905 | 1906 |.ffunc string_char // Only handle the 1-arg case here. 1907 | ffgccheck 1908 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg. 1909 |.if DUALNUM 1910 | mov RB, [BASE] 1911 | checkint RB, ->fff_fallback 1912 |.else 1913 | checknumtp [BASE], ->fff_fallback 1914 | cvttsd2si RBd, qword [BASE] 1915 |.endif 1916 | cmp RBd, 255; ja ->fff_fallback 1917 | mov TMP1d, RBd 1918 | mov TMPRd, 1 1919 | lea RD, TMP1 // Points to stack. Little-endian. 1920 |->fff_newstr: 1921 | mov L:RB, SAVE_L 1922 | mov L:RB->base, BASE 1923 | mov CARG3d, TMPRd // Zero-extended to size_t. 1924 | mov CARG2, RD 1925 | mov CARG1, L:RB 1926 | mov SAVE_PC, PC 1927 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 1928 |->fff_resstr: 1929 | // GCstr * returned in eax (RD). 1930 | mov BASE, L:RB->base 1931 | mov PC, [BASE-8] 1932 | settp STR:RD, LJ_TSTR 1933 | mov [BASE-16], STR:RD 1934 | jmp ->fff_res1 1935 | 1936 |.ffunc string_sub 1937 | ffgccheck 1938 | mov TMPRd, -1 1939 | cmp NARGS:RDd, 1+2; jb ->fff_fallback 1940 | jna >1 1941 |.if DUALNUM 1942 | mov TMPR, [BASE+16] 1943 | checkint TMPR, ->fff_fallback 1944 |.else 1945 | checknumtp [BASE+16], ->fff_fallback 1946 | cvttsd2si TMPRd, qword [BASE+16] 1947 |.endif 1948 |1: 1949 | mov STR:RB, [BASE] 1950 | checkstr STR:RB, ->fff_fallback 1951 |.if DUALNUM 1952 | mov ITYPE, [BASE+8] 1953 | mov RAd, ITYPEd // Must clear hiword for lea below. 1954 | sar ITYPE, 47 1955 | cmp ITYPEd, LJ_TISNUM 1956 | jne ->fff_fallback 1957 |.else 1958 | checknumtp [BASE+8], ->fff_fallback 1959 | cvttsd2si RAd, qword [BASE+8] 1960 |.endif 1961 | mov RCd, STR:RB->len 1962 | cmp RCd, TMPRd // len < end? (unsigned compare) 1963 | jb >5 1964 |2: 1965 | test RAd, RAd // start <= 0? 1966 | jle >7 1967 |3: 1968 | sub TMPRd, RAd // start > end? 1969 | jl ->fff_emptystr 1970 | lea RD, [STR:RB+RAd+#STR-1] 1971 | add TMPRd, 1 1972 |4: 1973 | jmp ->fff_newstr 1974 | 1975 |5: // Negative end or overflow. 1976 | jl >6 1977 | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1) 1978 | jmp <2 1979 |6: // Overflow. 1980 | mov TMPRd, RCd // end = len 1981 | jmp <2 1982 | 1983 |7: // Negative start or underflow. 1984 | je >8 1985 | add RAd, RCd // start = start+(len+1) 1986 | add RAd, 1 1987 | jg <3 // start > 0? 1988 |8: // Underflow. 1989 | mov RAd, 1 // start = 1 1990 | jmp <3 1991 | 1992 |->fff_emptystr: // Range underflow. 1993 | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok. 1994 | jmp <4 1995 | 1996 |.macro ffstring_op, name 1997 | .ffunc_1 string_ .. name 1998 | ffgccheck 1999 |.if X64WIN 2000 | mov STR:TMPR, [BASE] 2001 | checkstr STR:TMPR, ->fff_fallback 2002 |.else 2003 | mov STR:CARG2, [BASE] 2004 | checkstr STR:CARG2, ->fff_fallback 2005 |.endif 2006 | mov L:RB, SAVE_L 2007 | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] 2008 | mov L:RB->base, BASE 2009 |.if X64WIN 2010 | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE 2011 |.endif 2012 | mov RC, SBUF:CARG1->b 2013 | mov SBUF:CARG1->L, L:RB 2014 | mov SBUF:CARG1->p, RC 2015 | mov SAVE_PC, PC 2016 | call extern lj_buf_putstr_ .. name 2017 | mov CARG1, rax 2018 | call extern lj_buf_tostr 2019 | jmp ->fff_resstr 2020 |.endmacro 2021 | 2022 |ffstring_op reverse 2023 |ffstring_op lower 2024 |ffstring_op upper 2025 | 2026 |//-- Bit library -------------------------------------------------------- 2027 | 2028 |.macro .ffunc_bit, name, kind, fdef 2029 | fdef name 2030 |.if kind == 2 2031 | sseconst_tobit xmm1, RB 2032 |.endif 2033 |.if DUALNUM 2034 | mov RB, [BASE] 2035 | checkint RB, >1 2036 |.if kind > 0 2037 | jmp >2 2038 |.else 2039 | jmp ->fff_resbit 2040 |.endif 2041 |1: 2042 | ja ->fff_fallback 2043 | movd xmm0, RB 2044 |.else 2045 | checknumtp [BASE], ->fff_fallback 2046 | movsd xmm0, qword [BASE] 2047 |.endif 2048 |.if kind < 2 2049 | sseconst_tobit xmm1, RB 2050 |.endif 2051 | addsd xmm0, xmm1 2052 | movd RBd, xmm0 2053 |2: 2054 |.endmacro 2055 | 2056 |.macro .ffunc_bit, name, kind 2057 | .ffunc_bit name, kind, .ffunc_1 2058 |.endmacro 2059 | 2060 |.ffunc_bit bit_tobit, 0 2061 | jmp ->fff_resbit 2062 | 2063 |.macro .ffunc_bit_op, name, ins 2064 | .ffunc_bit name, 2 2065 | mov TMPRd, NARGS:RDd // Save for fallback. 2066 | lea RD, [BASE+NARGS:RD*8-16] 2067 |1: 2068 | cmp RD, BASE 2069 | jbe ->fff_resbit 2070 |.if DUALNUM 2071 | mov RA, [RD] 2072 | checkint RA, >2 2073 | ins RBd, RAd 2074 | sub RD, 8 2075 | jmp <1 2076 |2: 2077 | ja ->fff_fallback_bit_op 2078 | movd xmm0, RA 2079 |.else 2080 | checknumtp [RD], ->fff_fallback_bit_op 2081 | movsd xmm0, qword [RD] 2082 |.endif 2083 | addsd xmm0, xmm1 2084 | movd RAd, xmm0 2085 | ins RBd, RAd 2086 | sub RD, 8 2087 | jmp <1 2088 |.endmacro 2089 | 2090 |.ffunc_bit_op bit_band, and 2091 |.ffunc_bit_op bit_bor, or 2092 |.ffunc_bit_op bit_bxor, xor 2093 | 2094 |.ffunc_bit bit_bswap, 1 2095 | bswap RBd 2096 | jmp ->fff_resbit 2097 | 2098 |.ffunc_bit bit_bnot, 1 2099 | not RBd 2100 |.if DUALNUM 2101 | jmp ->fff_resbit 2102 |.else 2103 |->fff_resbit: 2104 | cvtsi2sd xmm0, RBd 2105 | jmp ->fff_resxmm0 2106 |.endif 2107 | 2108 |->fff_fallback_bit_op: 2109 | mov NARGS:RDd, TMPRd // Restore for fallback 2110 | jmp ->fff_fallback 2111 | 2112 |.macro .ffunc_bit_sh, name, ins 2113 |.if DUALNUM 2114 | .ffunc_bit name, 1, .ffunc_2 2115 | // Note: no inline conversion from number for 2nd argument! 2116 | mov RA, [BASE+8] 2117 | checkint RA, ->fff_fallback 2118 |.else 2119 | .ffunc_nn name 2120 | sseconst_tobit xmm2, RB 2121 | addsd xmm0, xmm2 2122 | addsd xmm1, xmm2 2123 | movd RBd, xmm0 2124 | movd RAd, xmm1 2125 |.endif 2126 | ins RBd, cl // Assumes RA is ecx. 2127 | jmp ->fff_resbit 2128 |.endmacro 2129 | 2130 |.ffunc_bit_sh bit_lshift, shl 2131 |.ffunc_bit_sh bit_rshift, shr 2132 |.ffunc_bit_sh bit_arshift, sar 2133 |.ffunc_bit_sh bit_rol, rol 2134 |.ffunc_bit_sh bit_ror, ror 2135 | 2136 |//----------------------------------------------------------------------- 2137 | 2138 |->fff_fallback_2: 2139 | mov NARGS:RDd, 1+2 // Other args are ignored, anyway. 2140 | jmp ->fff_fallback 2141 |->fff_fallback_1: 2142 | mov NARGS:RDd, 1+1 // Other args are ignored, anyway. 2143 |->fff_fallback: // Call fast function fallback handler. 2144 | // BASE = new base, RD = nargs+1 2145 | mov L:RB, SAVE_L 2146 | mov PC, [BASE-8] // Fallback may overwrite PC. 2147 | mov SAVE_PC, PC // Redundant (but a defined value). 2148 | mov L:RB->base, BASE 2149 | lea RD, [BASE+NARGS:RD*8-8] 2150 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. 2151 | mov L:RB->top, RD 2152 | mov CFUNC:RD, [BASE-16] 2153 | cleartp CFUNC:RD 2154 | cmp RA, L:RB->maxstack 2155 | ja >5 // Need to grow stack. 2156 | mov CARG1, L:RB 2157 | call aword CFUNC:RD->f // (lua_State *L) 2158 | mov BASE, L:RB->base 2159 | // Either throws an error, or recovers and returns -1, 0 or nresults+1. 2160 | test RDd, RDd; jg ->fff_res // Returned nresults+1? 2161 |1: 2162 | mov RA, L:RB->top 2163 | sub RA, BASE 2164 | shr RAd, 3 2165 | test RDd, RDd 2166 | lea NARGS:RDd, [RAd+1] 2167 | mov LFUNC:RB, [BASE-16] 2168 | jne ->vm_call_tail // Returned -1? 2169 | cleartp LFUNC:RB 2170 | ins_callt // Returned 0: retry fast path. 2171 | 2172 |// Reconstruct previous base for vmeta_call during tailcall. 2173 |->vm_call_tail: 2174 | mov RA, BASE 2175 | test PCd, FRAME_TYPE 2176 | jnz >3 2177 | movzx RBd, PC_RA 2178 | neg RB 2179 | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8 2180 | jmp ->vm_call_dispatch // Resolve again for tailcall. 2181 |3: 2182 | mov RB, PC 2183 | and RB, -8 2184 | sub BASE, RB 2185 | jmp ->vm_call_dispatch // Resolve again for tailcall. 2186 | 2187 |5: // Grow stack for fallback handler. 2188 | mov CARG2d, LUA_MINSTACK 2189 | mov CARG1, L:RB 2190 | call extern lj_state_growstack // (lua_State *L, int n) 2191 | mov BASE, L:RB->base 2192 | xor RDd, RDd // Simulate a return 0. 2193 | jmp <1 // Dumb retry (goes through ff first). 2194 | 2195 |->fff_gcstep: // Call GC step function. 2196 | // BASE = new base, RD = nargs+1 2197 | pop RB // Must keep stack at same level. 2198 | mov TMP1, RB // Save return address 2199 | mov L:RB, SAVE_L 2200 | mov SAVE_PC, PC // Redundant (but a defined value). 2201 | mov L:RB->base, BASE 2202 | lea RD, [BASE+NARGS:RD*8-8] 2203 | mov CARG1, L:RB 2204 | mov L:RB->top, RD 2205 | call extern lj_gc_step // (lua_State *L) 2206 | mov BASE, L:RB->base 2207 | mov RD, L:RB->top 2208 | sub RD, BASE 2209 | shr RDd, 3 2210 | add NARGS:RDd, 1 2211 | mov RB, TMP1 2212 | push RB // Restore return address. 2213 | ret 2214 | 2215 |//----------------------------------------------------------------------- 2216 |//-- Special dispatch targets ------------------------------------------- 2217 |//----------------------------------------------------------------------- 2218 | 2219 |->vm_record: // Dispatch target for recording phase. 2220 |.if JIT 2221 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] 2222 | test RDL, HOOK_VMEVENT // No recording while in vmevent. 2223 | jnz >5 2224 | // Decrement the hookcount for consistency, but always do the call. 2225 | test RDL, HOOK_ACTIVE 2226 | jnz >1 2227 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT 2228 | jz >1 2229 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] 2230 | jmp >1 2231 |.endif 2232 | 2233 |->vm_rethook: // Dispatch target for return hooks. 2234 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] 2235 | test RDL, HOOK_ACTIVE // Hook already active? 2236 | jnz >5 2237 | jmp >1 2238 | 2239 |->vm_inshook: // Dispatch target for instr/line hooks. 2240 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] 2241 | test RDL, HOOK_ACTIVE // Hook already active? 2242 | jnz >5 2243 | 2244 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT 2245 | jz >5 2246 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] 2247 | jz >1 2248 | test RDL, LUA_MASKLINE 2249 | jz >5 2250 |1: 2251 | mov L:RB, SAVE_L 2252 | mov L:RB->base, BASE 2253 | mov CARG2, PC // Caveat: CARG2 == BASE 2254 | mov CARG1, L:RB 2255 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2256 | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) 2257 |3: 2258 | mov BASE, L:RB->base 2259 |4: 2260 | movzx RAd, PC_RA 2261 |5: 2262 | movzx OP, PC_OP 2263 | movzx RDd, PC_RD 2264 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. 2265 | 2266 |->cont_hook: // Continue from hook yield. 2267 | add PC, 4 2268 | mov RA, [RB-40] 2269 | mov MULTRES, RAd // Restore MULTRES for *M ins. 2270 | jmp <4 2271 | 2272 |->vm_hotloop: // Hot loop counter underflow. 2273 |.if JIT 2274 | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L). 2275 | cleartp LFUNC:RB 2276 | mov RB, LFUNC:RB->pc 2277 | movzx RDd, byte [RB+PC2PROTO(framesize)] 2278 | lea RD, [BASE+RD*8] 2279 | mov L:RB, SAVE_L 2280 | mov L:RB->base, BASE 2281 | mov L:RB->top, RD 2282 | mov CARG2, PC 2283 | lea CARG1, [DISPATCH+GG_DISP2J] 2284 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB 2285 | mov SAVE_PC, PC 2286 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) 2287 | jmp <3 2288 |.endif 2289 | 2290 |->vm_callhook: // Dispatch target for call hooks. 2291 | mov SAVE_PC, PC 2292 |.if JIT 2293 | jmp >1 2294 |.endif 2295 | 2296 |->vm_hotcall: // Hot call counter underflow. 2297 |.if JIT 2298 | mov SAVE_PC, PC 2299 | or PC, 1 // Marker for hot call. 2300 |1: 2301 |.endif 2302 | lea RD, [BASE+NARGS:RD*8-8] 2303 | mov L:RB, SAVE_L 2304 | mov L:RB->base, BASE 2305 | mov L:RB->top, RD 2306 | mov CARG2, PC 2307 | mov CARG1, L:RB 2308 | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc) 2309 | // ASMFunction returned in eax/rax (RD). 2310 | mov SAVE_PC, 0 // Invalidate for subsequent line hook. 2311 |.if JIT 2312 | and PC, -2 2313 |.endif 2314 | mov BASE, L:RB->base 2315 | mov RA, RD 2316 | mov RD, L:RB->top 2317 | sub RD, BASE 2318 | mov RB, RA 2319 | movzx RAd, PC_RA 2320 | shr RDd, 3 2321 | add NARGS:RDd, 1 2322 | jmp RB 2323 | 2324 |->cont_stitch: // Trace stitching. 2325 |.if JIT 2326 | // BASE = base, RC = result, RB = mbase 2327 | mov TRACE:ITYPE, [RB-40] // Save previous trace. 2328 | cleartp TRACE:ITYPE 2329 | mov TMPRd, MULTRES 2330 | movzx RAd, PC_RA 2331 | lea RA, [BASE+RA*8] // Call base. 2332 | sub TMPRd, 1 2333 | jz >2 2334 |1: // Move results down. 2335 | mov RB, [RC] 2336 | mov [RA], RB 2337 | add RC, 8 2338 | add RA, 8 2339 | sub TMPRd, 1 2340 | jnz <1 2341 |2: 2342 | movzx RCd, PC_RA 2343 | movzx RBd, PC_RB 2344 | add RC, RB 2345 | lea RC, [BASE+RC*8-8] 2346 |3: 2347 | cmp RC, RA 2348 | ja >9 // More results wanted? 2349 | 2350 | test TRACE:ITYPE, TRACE:ITYPE 2351 | jz ->cont_nop 2352 | movzx RBd, word TRACE:ITYPE->traceno 2353 | movzx RDd, word TRACE:ITYPE->link 2354 | cmp RDd, RBd 2355 | je ->cont_nop // Blacklisted. 2356 | test RDd, RDd 2357 | jne =>BC_JLOOP // Jump to stitched trace. 2358 | 2359 | // Stitch a new trace to the previous trace. 2360 | mov [DISPATCH+DISPATCH_J(exitno)], RB 2361 | mov L:RB, SAVE_L 2362 | mov L:RB->base, BASE 2363 | mov CARG2, PC 2364 | lea CARG1, [DISPATCH+GG_DISP2J] 2365 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB 2366 | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) 2367 | mov BASE, L:RB->base 2368 | jmp ->cont_nop 2369 | 2370 |9: // Fill up results with nil. 2371 | mov aword [RA], LJ_TNIL 2372 | add RA, 8 2373 | jmp <3 2374 |.endif 2375 | 2376 |->vm_profhook: // Dispatch target for profiler hook. 2377 #if LJ_HASPROFILE 2378 | mov L:RB, SAVE_L 2379 | mov L:RB->base, BASE 2380 | mov CARG2, PC // Caveat: CARG2 == BASE 2381 | mov CARG1, L:RB 2382 | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) 2383 | mov BASE, L:RB->base 2384 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. 2385 | sub PC, 4 2386 | jmp ->cont_nop 2387 #endif 2388 | 2389 |//----------------------------------------------------------------------- 2390 |//-- Trace exit handler ------------------------------------------------- 2391 |//----------------------------------------------------------------------- 2392 | 2393 |// Called from an exit stub with the exit number on the stack. 2394 |// The 16 bit exit number is stored with two (sign-extended) push imm8. 2395 |->vm_exit_handler: 2396 |.if JIT 2397 | push r13; push r12 2398 | push r11; push r10; push r9; push r8 2399 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp 2400 | push rbx; push rdx; push rcx; push rax 2401 | movzx RCd, byte [rbp-8] // Reconstruct exit number. 2402 | mov RCH, byte [rbp-16] 2403 | mov [rbp-8], r15; mov [rbp-16], r14 2404 | // DISPATCH is preserved on-trace in LJ_GC64 mode. 2405 | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. 2406 | set_vmstate EXIT 2407 | mov [DISPATCH+DISPATCH_J(exitno)], RCd 2408 | mov [DISPATCH+DISPATCH_J(parent)], RAd 2409 |.if X64WIN 2410 | sub rsp, 16*8+4*8 // Room for SSE regs + save area. 2411 |.else 2412 | sub rsp, 16*8 // Room for SSE regs. 2413 |.endif 2414 | add rbp, -128 2415 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 2416 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 2417 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 2418 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 2419 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 2420 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 2421 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 2422 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 2423 | // Caveat: RB is rbp. 2424 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] 2425 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2426 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB 2427 | mov L:RB->base, BASE 2428 |.if X64WIN 2429 | lea CARG2, [rsp+4*8] 2430 |.else 2431 | mov CARG2, rsp 2432 |.endif 2433 | lea CARG1, [DISPATCH+GG_DISP2J] 2434 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 2435 | call extern lj_trace_exit // (jit_State *J, ExitState *ex) 2436 | // MULTRES or negated error code returned in eax (RD). 2437 | mov RA, L:RB->cframe 2438 | and RA, CFRAME_RAWMASK 2439 | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). 2440 | mov BASE, L:RB->base 2441 | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC. 2442 | jmp >1 2443 |.endif 2444 |->vm_exit_interp: 2445 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. 2446 |.if JIT 2447 | // Restore additional callee-save registers only used in compiled code. 2448 |.if X64WIN 2449 | lea RA, [rsp+10*16+4*8] 2450 |1: 2451 | movdqa xmm15, [RA-10*16] 2452 | movdqa xmm14, [RA-9*16] 2453 | movdqa xmm13, [RA-8*16] 2454 | movdqa xmm12, [RA-7*16] 2455 | movdqa xmm11, [RA-6*16] 2456 | movdqa xmm10, [RA-5*16] 2457 | movdqa xmm9, [RA-4*16] 2458 | movdqa xmm8, [RA-3*16] 2459 | movdqa xmm7, [RA-2*16] 2460 | mov rsp, RA // Reposition stack to C frame. 2461 | movdqa xmm6, [RA-1*16] 2462 | mov r15, CSAVE_1 2463 | mov r14, CSAVE_2 2464 | mov r13, CSAVE_3 2465 | mov r12, CSAVE_4 2466 |.else 2467 | lea RA, [rsp+16] 2468 |1: 2469 | mov r13, [RA-8] 2470 | mov r12, [RA] 2471 | mov rsp, RA // Reposition stack to C frame. 2472 |.endif 2473 | test RDd, RDd; js >9 // Check for error from exit. 2474 | mov L:RB, SAVE_L 2475 | mov MULTRES, RDd 2476 | mov LFUNC:KBASE, [BASE-16] 2477 | cleartp LFUNC:KBASE 2478 | mov KBASE, LFUNC:KBASE->pc 2479 | mov KBASE, [KBASE+PC2PROTO(k)] 2480 | mov L:RB->base, BASE 2481 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 2482 | set_vmstate INTERP 2483 | // Modified copy of ins_next which handles function header dispatch, too. 2484 | mov RCd, [PC] 2485 | movzx RAd, RCH 2486 | movzx OP, RCL 2487 | add PC, 4 2488 | shr RCd, 16 2489 | cmp OP, BC_FUNCF // Function header? 2490 | jb >3 2491 | cmp OP, BC_FUNCC+2 // Fast function? 2492 | jae >4 2493 |2: 2494 | mov RCd, MULTRES // RC/RD holds nres+1. 2495 |3: 2496 | jmp aword [DISPATCH+OP*8] 2497 | 2498 |4: // Check frame below fast function. 2499 | mov RC, [BASE-8] 2500 | test RCd, FRAME_TYPE 2501 | jnz <2 // Trace stitching continuation? 2502 | // Otherwise set KBASE for Lua function below fast function. 2503 | movzx RCd, byte [RC-3] 2504 | neg RC 2505 | mov LFUNC:KBASE, [BASE+RC*8-32] 2506 | cleartp LFUNC:KBASE 2507 | mov KBASE, LFUNC:KBASE->pc 2508 | mov KBASE, [KBASE+PC2PROTO(k)] 2509 | jmp <2 2510 | 2511 |9: // Rethrow error from the right C frame. 2512 | neg RD 2513 | mov CARG1, L:RB 2514 | mov CARG2, RD 2515 | call extern lj_err_throw // (lua_State *L, int errcode) 2516 |.endif 2517 | 2518 |//----------------------------------------------------------------------- 2519 |//-- Math helper functions ---------------------------------------------- 2520 |//----------------------------------------------------------------------- 2521 | 2522 |// FP value rounding. Called by math.floor/math.ceil fast functions 2523 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. 2524 |.macro vm_round, name, mode, cond 2525 |->name: 2526 |->name .. _sse: 2527 | sseconst_abs xmm2, RD 2528 | sseconst_2p52 xmm3, RD 2529 | movaps xmm1, xmm0 2530 | andpd xmm1, xmm2 // |x| 2531 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. 2532 | jbe >1 2533 | andnpd xmm2, xmm0 // Isolate sign bit. 2534 |.if mode == 2 // trunc(x)? 2535 | movaps xmm0, xmm1 2536 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 2537 | subsd xmm1, xmm3 2538 | sseconst_1 xmm3, RD 2539 | cmpsd xmm0, xmm1, 1 // |x| < result? 2540 | andpd xmm0, xmm3 2541 | subsd xmm1, xmm0 // If yes, subtract -1. 2542 | orpd xmm1, xmm2 // Merge sign bit back in. 2543 |.else 2544 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 2545 | subsd xmm1, xmm3 2546 | orpd xmm1, xmm2 // Merge sign bit back in. 2547 | .if mode == 1 // ceil(x)? 2548 | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0. 2549 | cmpsd xmm0, xmm1, 6 // x > result? 2550 | .else // floor(x)? 2551 | sseconst_1 xmm2, RD 2552 | cmpsd xmm0, xmm1, 1 // x < result? 2553 | .endif 2554 | andpd xmm0, xmm2 2555 | subsd xmm1, xmm0 // If yes, subtract +-1. 2556 |.endif 2557 | movaps xmm0, xmm1 2558 |1: 2559 | ret 2560 |.endmacro 2561 | 2562 | vm_round vm_floor, 0, 1 2563 | vm_round vm_ceil, 1, JIT 2564 | vm_round vm_trunc, 2, JIT 2565 | 2566 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 2567 |->vm_mod: 2568 |// Args in xmm0/xmm1, return value in xmm0. 2569 |// Caveat: xmm0-xmm5 and RC (eax) modified! 2570 | movaps xmm5, xmm0 2571 | divsd xmm0, xmm1 2572 | sseconst_abs xmm2, RD 2573 | sseconst_2p52 xmm3, RD 2574 | movaps xmm4, xmm0 2575 | andpd xmm4, xmm2 // |x/y| 2576 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. 2577 | jbe >1 2578 | andnpd xmm2, xmm0 // Isolate sign bit. 2579 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 2580 | subsd xmm4, xmm3 2581 | orpd xmm4, xmm2 // Merge sign bit back in. 2582 | sseconst_1 xmm2, RD 2583 | cmpsd xmm0, xmm4, 1 // x/y < result? 2584 | andpd xmm0, xmm2 2585 | subsd xmm4, xmm0 // If yes, subtract 1.0. 2586 | movaps xmm0, xmm5 2587 | mulsd xmm1, xmm4 2588 | subsd xmm0, xmm1 2589 | ret 2590 |1: 2591 | mulsd xmm1, xmm0 2592 | movaps xmm0, xmm5 2593 | subsd xmm0, xmm1 2594 | ret 2595 | 2596 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 2597 |->vm_powi_sse: 2598 | cmp eax, 1; jle >6 // i<=1? 2599 | // Now 1 < (unsigned)i <= 0x80000000. 2600 |1: // Handle leading zeros. 2601 | test eax, 1; jnz >2 2602 | mulsd xmm0, xmm0 2603 | shr eax, 1 2604 | jmp <1 2605 |2: 2606 | shr eax, 1; jz >5 2607 | movaps xmm1, xmm0 2608 |3: // Handle trailing bits. 2609 | mulsd xmm0, xmm0 2610 | shr eax, 1; jz >4 2611 | jnc <3 2612 | mulsd xmm1, xmm0 2613 | jmp <3 2614 |4: 2615 | mulsd xmm0, xmm1 2616 |5: 2617 | ret 2618 |6: 2619 | je <5 // x^1 ==> x 2620 | jb >7 // x^0 ==> 1 2621 | neg eax 2622 | call <1 2623 | sseconst_1 xmm1, RD 2624 | divsd xmm1, xmm0 2625 | movaps xmm0, xmm1 2626 | ret 2627 |7: 2628 | sseconst_1 xmm0, RD 2629 | ret 2630 | 2631 |//----------------------------------------------------------------------- 2632 |//-- Miscellaneous functions -------------------------------------------- 2633 |//----------------------------------------------------------------------- 2634 | 2635 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) 2636 |->vm_cpuid: 2637 | mov eax, CARG1d 2638 | .if X64WIN; push rsi; mov rsi, CARG2; .endif 2639 | push rbx 2640 | xor ecx, ecx 2641 | cpuid 2642 | mov [rsi], eax 2643 | mov [rsi+4], ebx 2644 | mov [rsi+8], ecx 2645 | mov [rsi+12], edx 2646 | pop rbx 2647 | .if X64WIN; pop rsi; .endif 2648 | ret 2649 | 2650 |//----------------------------------------------------------------------- 2651 |//-- Assertions --------------------------------------------------------- 2652 |//----------------------------------------------------------------------- 2653 | 2654 |->assert_bad_for_arg_type: 2655 #ifdef LUA_USE_ASSERT 2656 | int3 2657 #endif 2658 | int3 2659 | 2660 |//----------------------------------------------------------------------- 2661 |//-- FFI helper functions ----------------------------------------------- 2662 |//----------------------------------------------------------------------- 2663 | 2664 |// Handler for callback functions. Callback slot number in ah/al. 2665 |->vm_ffi_callback: 2666 |.if FFI 2667 |.type CTSTATE, CTState, PC 2668 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. 2669 | lea DISPATCH, [ebp+GG_G2DISP] 2670 | mov CTSTATE, GL:ebp->ctype_state 2671 | movzx eax, ax 2672 | mov CTSTATE->cb.slot, eax 2673 | mov CTSTATE->cb.gpr[0], CARG1 2674 | mov CTSTATE->cb.gpr[1], CARG2 2675 | mov CTSTATE->cb.gpr[2], CARG3 2676 | mov CTSTATE->cb.gpr[3], CARG4 2677 | movsd qword CTSTATE->cb.fpr[0], xmm0 2678 | movsd qword CTSTATE->cb.fpr[1], xmm1 2679 | movsd qword CTSTATE->cb.fpr[2], xmm2 2680 | movsd qword CTSTATE->cb.fpr[3], xmm3 2681 |.if X64WIN 2682 | lea rax, [rsp+CFRAME_SIZE+4*8] 2683 |.else 2684 | lea rax, [rsp+CFRAME_SIZE] 2685 | mov CTSTATE->cb.gpr[4], CARG5 2686 | mov CTSTATE->cb.gpr[5], CARG6 2687 | movsd qword CTSTATE->cb.fpr[4], xmm4 2688 | movsd qword CTSTATE->cb.fpr[5], xmm5 2689 | movsd qword CTSTATE->cb.fpr[6], xmm6 2690 | movsd qword CTSTATE->cb.fpr[7], xmm7 2691 |.endif 2692 | mov CTSTATE->cb.stack, rax 2693 | mov CARG2, rsp 2694 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. 2695 | mov CARG1, CTSTATE 2696 | call extern lj_ccallback_enter // (CTState *cts, void *cf) 2697 | // lua_State * returned in eax (RD). 2698 | set_vmstate INTERP 2699 | mov BASE, L:RD->base 2700 | mov RD, L:RD->top 2701 | sub RD, BASE 2702 | mov LFUNC:RB, [BASE-16] 2703 | cleartp LFUNC:RB 2704 | shr RD, 3 2705 | add RD, 1 2706 | ins_callt 2707 |.endif 2708 | 2709 |->cont_ffi_callback: // Return from FFI callback. 2710 |.if FFI 2711 | mov L:RA, SAVE_L 2712 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] 2713 | mov aword CTSTATE->L, L:RA 2714 | mov L:RA->base, BASE 2715 | mov L:RA->top, RB 2716 | mov CARG1, CTSTATE 2717 | mov CARG2, RC 2718 | call extern lj_ccallback_leave // (CTState *cts, TValue *o) 2719 | mov rax, CTSTATE->cb.gpr[0] 2720 | movsd xmm0, qword CTSTATE->cb.fpr[0] 2721 | jmp ->vm_leave_unw 2722 |.endif 2723 | 2724 |->vm_ffi_call: // Call C function via FFI. 2725 | // Caveat: needs special frame unwinding, see below. 2726 |.if FFI 2727 | .type CCSTATE, CCallState, rbx 2728 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 2729 | 2730 | // Readjust stack. 2731 | mov eax, CCSTATE->spadj 2732 | sub rsp, rax 2733 | 2734 | // Copy stack slots. 2735 | movzx ecx, byte CCSTATE->nsp 2736 | sub ecx, 1 2737 | js >2 2738 |1: 2739 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] 2740 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax 2741 | sub ecx, 1 2742 | jns <1 2743 |2: 2744 | 2745 | movzx eax, byte CCSTATE->nfpr 2746 | mov CARG1, CCSTATE->gpr[0] 2747 | mov CARG2, CCSTATE->gpr[1] 2748 | mov CARG3, CCSTATE->gpr[2] 2749 | mov CARG4, CCSTATE->gpr[3] 2750 |.if not X64WIN 2751 | mov CARG5, CCSTATE->gpr[4] 2752 | mov CARG6, CCSTATE->gpr[5] 2753 |.endif 2754 | test eax, eax; jz >5 2755 | movaps xmm0, CCSTATE->fpr[0] 2756 | movaps xmm1, CCSTATE->fpr[1] 2757 | movaps xmm2, CCSTATE->fpr[2] 2758 | movaps xmm3, CCSTATE->fpr[3] 2759 |.if not X64WIN 2760 | cmp eax, 4; jbe >5 2761 | movaps xmm4, CCSTATE->fpr[4] 2762 | movaps xmm5, CCSTATE->fpr[5] 2763 | movaps xmm6, CCSTATE->fpr[6] 2764 | movaps xmm7, CCSTATE->fpr[7] 2765 |.endif 2766 |5: 2767 | 2768 | call aword CCSTATE->func 2769 | 2770 | mov CCSTATE->gpr[0], rax 2771 | movaps CCSTATE->fpr[0], xmm0 2772 |.if not X64WIN 2773 | mov CCSTATE->gpr[1], rdx 2774 | movaps CCSTATE->fpr[1], xmm1 2775 |.endif 2776 | 2777 | mov rbx, [rbp-8]; leave; ret 2778 |.endif 2779 |// Note: vm_ffi_call must be the last function in this object file! 2780 | 2781 |//----------------------------------------------------------------------- 2782 } 2783 2784 /* Generate the code for a single instruction. */ 2785 static void build_ins(BuildCtx *ctx, BCOp op, int defop) 2786 { 2787 int vk = 0; 2788 |// Note: aligning all instructions does not pay off. 2789 |=>defop: 2790 2791 switch (op) { 2792 2793 /* -- Comparison ops ---------------------------------------------------- */ 2794 2795 /* Remember: all ops branch for a true comparison, fall through otherwise. */ 2796 2797 |.macro jmp_comp, lt, ge, le, gt, target 2798 ||switch (op) { 2799 ||case BC_ISLT: 2800 | lt target 2801 ||break; 2802 ||case BC_ISGE: 2803 | ge target 2804 ||break; 2805 ||case BC_ISLE: 2806 | le target 2807 ||break; 2808 ||case BC_ISGT: 2809 | gt target 2810 ||break; 2811 ||default: break; /* Shut up GCC. */ 2812 ||} 2813 |.endmacro 2814 2815 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2816 | // RA = src1, RD = src2, JMP with RD = target 2817 | ins_AD 2818 | mov ITYPE, [BASE+RA*8] 2819 | mov RB, [BASE+RD*8] 2820 | mov RA, ITYPE 2821 | mov RD, RB 2822 | sar ITYPE, 47 2823 | sar RB, 47 2824 |.if DUALNUM 2825 | cmp ITYPEd, LJ_TISNUM; jne >7 2826 | cmp RBd, LJ_TISNUM; jne >8 2827 | add PC, 4 2828 | cmp RAd, RDd 2829 | jmp_comp jge, jl, jg, jle, >9 2830 |6: 2831 | movzx RDd, PC_RD 2832 | branchPC RD 2833 |9: 2834 | ins_next 2835 | 2836 |7: // RA is not an integer. 2837 | ja ->vmeta_comp 2838 | // RA is a number. 2839 | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp 2840 | // RA is a number, RD is an integer. 2841 | cvtsi2sd xmm0, RDd 2842 | jmp >2 2843 | 2844 |8: // RA is an integer, RD is not an integer. 2845 | ja ->vmeta_comp 2846 | // RA is an integer, RD is a number. 2847 | cvtsi2sd xmm1, RAd 2848 | movd xmm0, RD 2849 | jmp >3 2850 |.else 2851 | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp 2852 | cmp RBd, LJ_TISNUM; jae ->vmeta_comp 2853 |.endif 2854 |1: 2855 | movd xmm0, RD 2856 |2: 2857 | movd xmm1, RA 2858 |3: 2859 | add PC, 4 2860 | ucomisd xmm0, xmm1 2861 | // Unordered: all of ZF CF PF set, ordered: PF clear. 2862 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 2863 |.if DUALNUM 2864 | jmp_comp jbe, ja, jb, jae, <9 2865 | jmp <6 2866 |.else 2867 | jmp_comp jbe, ja, jb, jae, >1 2868 | movzx RDd, PC_RD 2869 | branchPC RD 2870 |1: 2871 | ins_next 2872 |.endif 2873 break; 2874 2875 case BC_ISEQV: case BC_ISNEV: 2876 vk = op == BC_ISEQV; 2877 | ins_AD // RA = src1, RD = src2, JMP with RD = target 2878 | mov RB, [BASE+RD*8] 2879 | mov ITYPE, [BASE+RA*8] 2880 | add PC, 4 2881 | mov RD, RB 2882 | mov RA, ITYPE 2883 | sar RB, 47 2884 | sar ITYPE, 47 2885 |.if DUALNUM 2886 | cmp RBd, LJ_TISNUM; jne >7 2887 | cmp ITYPEd, LJ_TISNUM; jne >8 2888 | cmp RDd, RAd 2889 if (vk) { 2890 | jne >9 2891 } else { 2892 | je >9 2893 } 2894 | movzx RDd, PC_RD 2895 | branchPC RD 2896 |9: 2897 | ins_next 2898 | 2899 |7: // RD is not an integer. 2900 | ja >5 2901 | // RD is a number. 2902 | movd xmm1, RD 2903 | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5 2904 | // RD is a number, RA is an integer. 2905 | cvtsi2sd xmm0, RAd 2906 | jmp >2 2907 | 2908 |8: // RD is an integer, RA is not an integer. 2909 | ja >5 2910 | // RD is an integer, RA is a number. 2911 | cvtsi2sd xmm1, RDd 2912 | jmp >1 2913 | 2914 |.else 2915 | cmp RBd, LJ_TISNUM; jae >5 2916 | cmp ITYPEd, LJ_TISNUM; jae >5 2917 | movd xmm1, RD 2918 |.endif 2919 |1: 2920 | movd xmm0, RA 2921 |2: 2922 | ucomisd xmm0, xmm1 2923 |4: 2924 iseqne_fp: 2925 if (vk) { 2926 | jp >2 // Unordered means not equal. 2927 | jne >2 2928 } else { 2929 | jp >2 // Unordered means not equal. 2930 | je >1 2931 } 2932 iseqne_end: 2933 if (vk) { 2934 |1: // EQ: Branch to the target. 2935 | movzx RDd, PC_RD 2936 | branchPC RD 2937 |2: // NE: Fallthrough to next instruction. 2938 |.if not FFI 2939 |3: 2940 |.endif 2941 } else { 2942 |.if not FFI 2943 |3: 2944 |.endif 2945 |2: // NE: Branch to the target. 2946 | movzx RDd, PC_RD 2947 | branchPC RD 2948 |1: // EQ: Fallthrough to next instruction. 2949 } 2950 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || 2951 op == BC_ISEQN || op == BC_ISNEN)) { 2952 | jmp <9 2953 } else { 2954 | ins_next 2955 } 2956 | 2957 if (op == BC_ISEQV || op == BC_ISNEV) { 2958 |5: // Either or both types are not numbers. 2959 |.if FFI 2960 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd 2961 | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd 2962 |.endif 2963 | cmp RA, RD 2964 | je <1 // Same GCobjs or pvalues? 2965 | cmp RBd, ITYPEd 2966 | jne <2 // Not the same type? 2967 | cmp RBd, LJ_TISTABUD 2968 | ja <2 // Different objects and not table/ud? 2969 | 2970 | // Different tables or userdatas. Need to check __eq metamethod. 2971 | // Field metatable must be at same offset for GCtab and GCudata! 2972 | cleartp TAB:RA 2973 | mov TAB:RB, TAB:RA->metatable 2974 | test TAB:RB, TAB:RB 2975 | jz <2 // No metatable? 2976 | test byte TAB:RB->nomm, 1<<MM_eq 2977 | jnz <2 // Or 'no __eq' flag set? 2978 if (vk) { 2979 | xor RBd, RBd // ne = 0 2980 } else { 2981 | mov RBd, 1 // ne = 1 2982 } 2983 | jmp ->vmeta_equal // Handle __eq metamethod. 2984 } else { 2985 |.if FFI 2986 |3: 2987 | cmp ITYPEd, LJ_TCDATA 2988 if (LJ_DUALNUM && vk) { 2989 | jne <9 2990 } else { 2991 | jne <2 2992 } 2993 | jmp ->vmeta_equal_cd 2994 |.endif 2995 } 2996 break; 2997 case BC_ISEQS: case BC_ISNES: 2998 vk = op == BC_ISEQS; 2999 | ins_AND // RA = src, RD = str const, JMP with RD = target 3000 | mov RB, [BASE+RA*8] 3001 | add PC, 4 3002 | checkstr RB, >3 3003 | cmp RB, [KBASE+RD*8] 3004 iseqne_test: 3005 if (vk) { 3006 | jne >2 3007 } else { 3008 | je >1 3009 } 3010 goto iseqne_end; 3011 case BC_ISEQN: case BC_ISNEN: 3012 vk = op == BC_ISEQN; 3013 | ins_AD // RA = src, RD = num const, JMP with RD = target 3014 | mov RB, [BASE+RA*8] 3015 | add PC, 4 3016 |.if DUALNUM 3017 | checkint RB, >7 3018 | mov RD, [KBASE+RD*8] 3019 | checkint RD, >8 3020 | cmp RBd, RDd 3021 if (vk) { 3022 | jne >9 3023 } else { 3024 | je >9 3025 } 3026 | movzx RDd, PC_RD 3027 | branchPC RD 3028 |9: 3029 | ins_next 3030 | 3031 |7: // RA is not an integer. 3032 | ja >3 3033 | // RA is a number. 3034 | mov RD, [KBASE+RD*8] 3035 | checkint RD, >1 3036 | // RA is a number, RD is an integer. 3037 | cvtsi2sd xmm0, RDd 3038 | jmp >2 3039 | 3040 |8: // RA is an integer, RD is a number. 3041 | cvtsi2sd xmm0, RBd 3042 | movd xmm1, RD 3043 | ucomisd xmm0, xmm1 3044 | jmp >4 3045 |1: 3046 | movd xmm0, RD 3047 |.else 3048 | checknum RB, >3 3049 |1: 3050 | movsd xmm0, qword [KBASE+RD*8] 3051 |.endif 3052 |2: 3053 | ucomisd xmm0, qword [BASE+RA*8] 3054 |4: 3055 goto iseqne_fp; 3056 case BC_ISEQP: case BC_ISNEP: 3057 vk = op == BC_ISEQP; 3058 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target 3059 | mov RB, [BASE+RA*8] 3060 | sar RB, 47 3061 | add PC, 4 3062 | cmp RBd, RDd 3063 if (!LJ_HASFFI) goto iseqne_test; 3064 if (vk) { 3065 | jne >3 3066 | movzx RDd, PC_RD 3067 | branchPC RD 3068 |2: 3069 | ins_next 3070 |3: 3071 | cmp RBd, LJ_TCDATA; jne <2 3072 | jmp ->vmeta_equal_cd 3073 } else { 3074 | je >2 3075 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd 3076 | movzx RDd, PC_RD 3077 | branchPC RD 3078 |2: 3079 | ins_next 3080 } 3081 break; 3082 3083 /* -- Unary test and copy ops ------------------------------------------- */ 3084 3085 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: 3086 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target 3087 | mov ITYPE, [BASE+RD*8] 3088 | add PC, 4 3089 if (op == BC_ISTC || op == BC_ISFC) { 3090 | mov RB, ITYPE 3091 } 3092 | sar ITYPE, 47 3093 | cmp ITYPEd, LJ_TISTRUECOND 3094 if (op == BC_IST || op == BC_ISTC) { 3095 | jae >1 3096 } else { 3097 | jb >1 3098 } 3099 if (op == BC_ISTC || op == BC_ISFC) { 3100 | mov [BASE+RA*8], RB 3101 } 3102 | movzx RDd, PC_RD 3103 | branchPC RD 3104 |1: // Fallthrough to the next instruction. 3105 | ins_next 3106 break; 3107 3108 case BC_ISTYPE: 3109 | ins_AD // RA = src, RD = -type 3110 | mov RB, [BASE+RA*8] 3111 | sar RB, 47 3112 | add RBd, RDd 3113 | jne ->vmeta_istype 3114 | ins_next 3115 break; 3116 case BC_ISNUM: 3117 | ins_AD // RA = src, RD = -(TISNUM-1) 3118 | checknumtp [BASE+RA*8], ->vmeta_istype 3119 | ins_next 3120 break; 3121 3122 /* -- Unary ops --------------------------------------------------------- */ 3123 3124 case BC_MOV: 3125 | ins_AD // RA = dst, RD = src 3126 | mov RB, [BASE+RD*8] 3127 | mov [BASE+RA*8], RB 3128 | ins_next_ 3129 break; 3130 case BC_NOT: 3131 | ins_AD // RA = dst, RD = src 3132 | mov RB, [BASE+RD*8] 3133 | sar RB, 47 3134 | mov RCd, 2 3135 | cmp RB, LJ_TISTRUECOND 3136 | sbb RCd, 0 3137 | shl RC, 47 3138 | not RC 3139 | mov [BASE+RA*8], RC 3140 | ins_next 3141 break; 3142 case BC_UNM: 3143 | ins_AD // RA = dst, RD = src 3144 | mov RB, [BASE+RD*8] 3145 |.if DUALNUM 3146 | checkint RB, >5 3147 | neg RBd 3148 | jo >4 3149 | setint RB 3150 |9: 3151 | mov [BASE+RA*8], RB 3152 | ins_next 3153 |4: 3154 | mov64 RB, U64x(41e00000,00000000) // 2^31. 3155 | jmp <9 3156 |5: 3157 | ja ->vmeta_unm 3158 |.else 3159 | checknum RB, ->vmeta_unm 3160 |.endif 3161 | mov64 RD, U64x(80000000,00000000) 3162 | xor RB, RD 3163 |.if DUALNUM 3164 | jmp <9 3165 |.else 3166 | mov [BASE+RA*8], RB 3167 | ins_next 3168 |.endif 3169 break; 3170 case BC_LEN: 3171 | ins_AD // RA = dst, RD = src 3172 | mov RD, [BASE+RD*8] 3173 | checkstr RD, >2 3174 |.if DUALNUM 3175 | mov RDd, dword STR:RD->len 3176 |1: 3177 | setint RD 3178 | mov [BASE+RA*8], RD 3179 |.else 3180 | xorps xmm0, xmm0 3181 | cvtsi2sd xmm0, dword STR:RD->len 3182 |1: 3183 | movsd qword [BASE+RA*8], xmm0 3184 |.endif 3185 | ins_next 3186 |2: 3187 | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len 3188 | mov TAB:CARG1, TAB:RD 3189 #if LJ_52 3190 | mov TAB:RB, TAB:RD->metatable 3191 | cmp TAB:RB, 0 3192 | jnz >9 3193 |3: 3194 #endif 3195 |->BC_LEN_Z: 3196 | mov RB, BASE // Save BASE. 3197 | call extern lj_tab_len // (GCtab *t) 3198 | // Length of table returned in eax (RD). 3199 |.if DUALNUM 3200 | // Nothing to do. 3201 |.else 3202 | cvtsi2sd xmm0, RDd 3203 |.endif 3204 | mov BASE, RB // Restore BASE. 3205 | movzx RAd, PC_RA 3206 | jmp <1 3207 #if LJ_52 3208 |9: // Check for __len. 3209 | test byte TAB:RB->nomm, 1<<MM_len 3210 | jnz <3 3211 | jmp ->vmeta_len // 'no __len' flag NOT set: check. 3212 #endif 3213 break; 3214 3215 /* -- Binary ops -------------------------------------------------------- */ 3216 3217 |.macro ins_arithpre, sseins, ssereg 3218 | ins_ABC 3219 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3220 ||switch (vk) { 3221 ||case 0: 3222 | checknumtp [BASE+RB*8], ->vmeta_arith_vn 3223 | .if DUALNUM 3224 | checknumtp [KBASE+RC*8], ->vmeta_arith_vn 3225 | .endif 3226 | movsd xmm0, qword [BASE+RB*8] 3227 | sseins ssereg, qword [KBASE+RC*8] 3228 || break; 3229 ||case 1: 3230 | checknumtp [BASE+RB*8], ->vmeta_arith_nv 3231 | .if DUALNUM 3232 | checknumtp [KBASE+RC*8], ->vmeta_arith_nv 3233 | .endif 3234 | movsd xmm0, qword [KBASE+RC*8] 3235 | sseins ssereg, qword [BASE+RB*8] 3236 || break; 3237 ||default: 3238 | checknumtp [BASE+RB*8], ->vmeta_arith_vv 3239 | checknumtp [BASE+RC*8], ->vmeta_arith_vv 3240 | movsd xmm0, qword [BASE+RB*8] 3241 | sseins ssereg, qword [BASE+RC*8] 3242 || break; 3243 ||} 3244 |.endmacro 3245 | 3246 |.macro ins_arithdn, intins 3247 | ins_ABC 3248 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3249 ||switch (vk) { 3250 ||case 0: 3251 | mov RB, [BASE+RB*8] 3252 | mov RC, [KBASE+RC*8] 3253 | checkint RB, ->vmeta_arith_vno 3254 | checkint RC, ->vmeta_arith_vno 3255 | intins RBd, RCd; jo ->vmeta_arith_vno 3256 || break; 3257 ||case 1: 3258 | mov RB, [BASE+RB*8] 3259 | mov RC, [KBASE+RC*8] 3260 | checkint RB, ->vmeta_arith_nvo 3261 | checkint RC, ->vmeta_arith_nvo 3262 | intins RCd, RBd; jo ->vmeta_arith_nvo 3263 || break; 3264 ||default: 3265 | mov RB, [BASE+RB*8] 3266 | mov RC, [BASE+RC*8] 3267 | checkint RB, ->vmeta_arith_vvo 3268 | checkint RC, ->vmeta_arith_vvo 3269 | intins RBd, RCd; jo ->vmeta_arith_vvo 3270 || break; 3271 ||} 3272 ||if (vk == 1) { 3273 | setint RC 3274 | mov [BASE+RA*8], RC 3275 ||} else { 3276 | setint RB 3277 | mov [BASE+RA*8], RB 3278 ||} 3279 | ins_next 3280 |.endmacro 3281 | 3282 |.macro ins_arithpost 3283 | movsd qword [BASE+RA*8], xmm0 3284 |.endmacro 3285 | 3286 |.macro ins_arith, sseins 3287 | ins_arithpre sseins, xmm0 3288 | ins_arithpost 3289 | ins_next 3290 |.endmacro 3291 | 3292 |.macro ins_arith, intins, sseins 3293 |.if DUALNUM 3294 | ins_arithdn intins 3295 |.else 3296 | ins_arith, sseins 3297 |.endif 3298 |.endmacro 3299 3300 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3301 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3302 | ins_arith add, addsd 3303 break; 3304 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3305 | ins_arith sub, subsd 3306 break; 3307 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3308 | ins_arith imul, mulsd 3309 break; 3310 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3311 | ins_arith divsd 3312 break; 3313 case BC_MODVN: 3314 | ins_arithpre movsd, xmm1 3315 |->BC_MODVN_Z: 3316 | call ->vm_mod 3317 | ins_arithpost 3318 | ins_next 3319 break; 3320 case BC_MODNV: case BC_MODVV: 3321 | ins_arithpre movsd, xmm1 3322 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3323 break; 3324 case BC_POW: 3325 | ins_arithpre movsd, xmm1 3326 | mov RB, BASE 3327 | call extern pow 3328 | movzx RAd, PC_RA 3329 | mov BASE, RB 3330 | ins_arithpost 3331 | ins_next 3332 break; 3333 3334 case BC_CAT: 3335 | ins_ABC // RA = dst, RB = src_start, RC = src_end 3336 | mov L:CARG1, SAVE_L 3337 | mov L:CARG1->base, BASE 3338 | lea CARG2, [BASE+RC*8] 3339 | mov CARG3d, RCd 3340 | sub CARG3d, RBd 3341 |->BC_CAT_Z: 3342 | mov L:RB, L:CARG1 3343 | mov SAVE_PC, PC 3344 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) 3345 | // NULL (finished) or TValue * (metamethod) returned in eax (RC). 3346 | mov BASE, L:RB->base 3347 | test RC, RC 3348 | jnz ->vmeta_binop 3349 | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB]. 3350 | movzx RAd, PC_RA 3351 | mov RC, [BASE+RB*8] 3352 | mov [BASE+RA*8], RC 3353 | ins_next 3354 break; 3355 3356 /* -- Constant ops ------------------------------------------------------ */ 3357 3358 case BC_KSTR: 3359 | ins_AND // RA = dst, RD = str const (~) 3360 | mov RD, [KBASE+RD*8] 3361 | settp RD, LJ_TSTR 3362 | mov [BASE+RA*8], RD 3363 | ins_next 3364 break; 3365 case BC_KCDATA: 3366 |.if FFI 3367 | ins_AND // RA = dst, RD = cdata const (~) 3368 | mov RD, [KBASE+RD*8] 3369 | settp RD, LJ_TCDATA 3370 | mov [BASE+RA*8], RD 3371 | ins_next 3372 |.endif 3373 break; 3374 case BC_KSHORT: 3375 | ins_AD // RA = dst, RD = signed int16 literal 3376 |.if DUALNUM 3377 | movsx RDd, RDW 3378 | setint RD 3379 | mov [BASE+RA*8], RD 3380 |.else 3381 | movsx RDd, RDW // Sign-extend literal. 3382 | cvtsi2sd xmm0, RDd 3383 | movsd qword [BASE+RA*8], xmm0 3384 |.endif 3385 | ins_next 3386 break; 3387 case BC_KNUM: 3388 | ins_AD // RA = dst, RD = num const 3389 | movsd xmm0, qword [KBASE+RD*8] 3390 | movsd qword [BASE+RA*8], xmm0 3391 | ins_next 3392 break; 3393 case BC_KPRI: 3394 | ins_AD // RA = dst, RD = primitive type (~) 3395 | shl RD, 47 3396 | not RD 3397 | mov [BASE+RA*8], RD 3398 | ins_next 3399 break; 3400 case BC_KNIL: 3401 | ins_AD // RA = dst_start, RD = dst_end 3402 | lea RA, [BASE+RA*8+8] 3403 | lea RD, [BASE+RD*8] 3404 | mov RB, LJ_TNIL 3405 | mov [RA-8], RB // Sets minimum 2 slots. 3406 |1: 3407 | mov [RA], RB 3408 | add RA, 8 3409 | cmp RA, RD 3410 | jbe <1 3411 | ins_next 3412 break; 3413 3414 /* -- Upvalue and function ops ------------------------------------------ */ 3415 3416 case BC_UGET: 3417 | ins_AD // RA = dst, RD = upvalue # 3418 | mov LFUNC:RB, [BASE-16] 3419 | cleartp LFUNC:RB 3420 | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)] 3421 | mov RB, UPVAL:RB->v 3422 | mov RD, [RB] 3423 | mov [BASE+RA*8], RD 3424 | ins_next 3425 break; 3426 case BC_USETV: 3427 #define TV2MARKOFS \ 3428 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) 3429 | ins_AD // RA = upvalue #, RD = src 3430 | mov LFUNC:RB, [BASE-16] 3431 | cleartp LFUNC:RB 3432 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] 3433 | cmp byte UPVAL:RB->closed, 0 3434 | mov RB, UPVAL:RB->v 3435 | mov RA, [BASE+RD*8] 3436 | mov [RB], RA 3437 | jz >1 3438 | // Check barrier for closed upvalue. 3439 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) 3440 | jnz >2 3441 |1: 3442 | ins_next 3443 | 3444 |2: // Upvalue is black. Check if new value is collectable and white. 3445 | mov RD, RA 3446 | sar RD, 47 3447 | sub RDd, LJ_TISGCV 3448 | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) 3449 | jbe <1 3450 | cleartp GCOBJ:RA 3451 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) 3452 | jz <1 3453 | // Crossed a write barrier. Move the barrier forward. 3454 |.if not X64WIN 3455 | mov CARG2, RB 3456 | mov RB, BASE // Save BASE. 3457 |.else 3458 | xchg CARG2, RB // Save BASE (CARG2 == BASE). 3459 |.endif 3460 | lea GL:CARG1, [DISPATCH+GG_DISP2G] 3461 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3462 | mov BASE, RB // Restore BASE. 3463 | jmp <1 3464 break; 3465 #undef TV2MARKOFS 3466 case BC_USETS: 3467 | ins_AND // RA = upvalue #, RD = str const (~) 3468 | mov LFUNC:RB, [BASE-16] 3469 | cleartp LFUNC:RB 3470 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] 3471 | mov STR:RA, [KBASE+RD*8] 3472 | mov RD, UPVAL:RB->v 3473 | settp STR:ITYPE, STR:RA, LJ_TSTR 3474 | mov [RD], STR:ITYPE 3475 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) 3476 | jnz >2 3477 |1: 3478 | ins_next 3479 | 3480 |2: // Check if string is white and ensure upvalue is closed. 3481 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) 3482 | jz <1 3483 | cmp byte UPVAL:RB->closed, 0 3484 | jz <1 3485 | // Crossed a write barrier. Move the barrier forward. 3486 | mov RB, BASE // Save BASE (CARG2 == BASE). 3487 | mov CARG2, RD 3488 | lea GL:CARG1, [DISPATCH+GG_DISP2G] 3489 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3490 | mov BASE, RB // Restore BASE. 3491 | jmp <1 3492 break; 3493 case BC_USETN: 3494 | ins_AD // RA = upvalue #, RD = num const 3495 | mov LFUNC:RB, [BASE-16] 3496 | cleartp LFUNC:RB 3497 | movsd xmm0, qword [KBASE+RD*8] 3498 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] 3499 | mov RA, UPVAL:RB->v 3500 | movsd qword [RA], xmm0 3501 | ins_next 3502 break; 3503 case BC_USETP: 3504 | ins_AD // RA = upvalue #, RD = primitive type (~) 3505 | mov LFUNC:RB, [BASE-16] 3506 | cleartp LFUNC:RB 3507 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] 3508 | shl RD, 47 3509 | not RD 3510 | mov RA, UPVAL:RB->v 3511 | mov [RA], RD 3512 | ins_next 3513 break; 3514 case BC_UCLO: 3515 | ins_AD // RA = level, RD = target 3516 | branchPC RD // Do this first to free RD. 3517 | mov L:RB, SAVE_L 3518 | cmp aword L:RB->openupval, 0 3519 | je >1 3520 | mov L:RB->base, BASE 3521 | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE 3522 | mov L:CARG1, L:RB // Caveat: CARG1 == RA 3523 | call extern lj_func_closeuv // (lua_State *L, TValue *level) 3524 | mov BASE, L:RB->base 3525 |1: 3526 | ins_next 3527 break; 3528 3529 case BC_FNEW: 3530 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) 3531 | mov L:RB, SAVE_L 3532 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. 3533 | mov CARG3, [BASE-16] 3534 | cleartp CARG3 3535 | mov CARG2, [KBASE+RD*8] // Fetch GCproto *. 3536 | mov CARG1, L:RB 3537 | mov SAVE_PC, PC 3538 | // (lua_State *L, GCproto *pt, GCfuncL *parent) 3539 | call extern lj_func_newL_gc 3540 | // GCfuncL * returned in eax (RC). 3541 | mov BASE, L:RB->base 3542 | movzx RAd, PC_RA 3543 | settp LFUNC:RC, LJ_TFUNC 3544 | mov [BASE+RA*8], LFUNC:RC 3545 | ins_next 3546 break; 3547 3548 /* -- Table ops --------------------------------------------------------- */ 3549 3550 case BC_TNEW: 3551 | ins_AD // RA = dst, RD = hbits|asize 3552 | mov L:RB, SAVE_L 3553 | mov L:RB->base, BASE 3554 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] 3555 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] 3556 | mov SAVE_PC, PC 3557 | jae >5 3558 |1: 3559 | mov CARG3d, RDd 3560 | and RDd, 0x7ff 3561 | shr CARG3d, 11 3562 | cmp RDd, 0x7ff 3563 | je >3 3564 |2: 3565 | mov L:CARG1, L:RB 3566 | mov CARG2d, RDd 3567 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) 3568 | // Table * returned in eax (RC). 3569 | mov BASE, L:RB->base 3570 | movzx RAd, PC_RA 3571 | settp TAB:RC, LJ_TTAB 3572 | mov [BASE+RA*8], TAB:RC 3573 | ins_next 3574 |3: // Turn 0x7ff into 0x801. 3575 | mov RDd, 0x801 3576 | jmp <2 3577 |5: 3578 | mov L:CARG1, L:RB 3579 | call extern lj_gc_step_fixtop // (lua_State *L) 3580 | movzx RDd, PC_RD 3581 | jmp <1 3582 break; 3583 case BC_TDUP: 3584 | ins_AND // RA = dst, RD = table const (~) (holding template table) 3585 | mov L:RB, SAVE_L 3586 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] 3587 | mov SAVE_PC, PC 3588 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] 3589 | mov L:RB->base, BASE 3590 | jae >3 3591 |2: 3592 | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE 3593 | mov L:CARG1, L:RB // Caveat: CARG1 == RA 3594 | call extern lj_tab_dup // (lua_State *L, Table *kt) 3595 | // Table * returned in eax (RC). 3596 | mov BASE, L:RB->base 3597 | movzx RAd, PC_RA 3598 | settp TAB:RC, LJ_TTAB 3599 | mov [BASE+RA*8], TAB:RC 3600 | ins_next 3601 |3: 3602 | mov L:CARG1, L:RB 3603 | call extern lj_gc_step_fixtop // (lua_State *L) 3604 | movzx RDd, PC_RD // Need to reload RD. 3605 | not RD 3606 | jmp <2 3607 break; 3608 3609 case BC_GGET: 3610 | ins_AND // RA = dst, RD = str const (~) 3611 | mov LFUNC:RB, [BASE-16] 3612 | cleartp LFUNC:RB 3613 | mov TAB:RB, LFUNC:RB->env 3614 | mov STR:RC, [KBASE+RD*8] 3615 | jmp ->BC_TGETS_Z 3616 break; 3617 case BC_GSET: 3618 | ins_AND // RA = src, RD = str const (~) 3619 | mov LFUNC:RB, [BASE-16] 3620 | cleartp LFUNC:RB 3621 | mov TAB:RB, LFUNC:RB->env 3622 | mov STR:RC, [KBASE+RD*8] 3623 | jmp ->BC_TSETS_Z 3624 break; 3625 3626 case BC_TGETV: 3627 | ins_ABC // RA = dst, RB = table, RC = key 3628 | mov TAB:RB, [BASE+RB*8] 3629 | mov RC, [BASE+RC*8] 3630 | checktab TAB:RB, ->vmeta_tgetv 3631 | 3632 | // Integer key? 3633 |.if DUALNUM 3634 | checkint RC, >5 3635 |.else 3636 | // Convert number to int and back and compare. 3637 | checknum RC, >5 3638 | movd xmm0, RC 3639 | cvttsd2si RCd, xmm0 3640 | cvtsi2sd xmm1, RCd 3641 | ucomisd xmm0, xmm1 3642 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 3643 |.endif 3644 | cmp RCd, TAB:RB->asize // Takes care of unordered, too. 3645 | jae ->vmeta_tgetv // Not in array part? Use fallback. 3646 | shl RCd, 3 3647 | add RC, TAB:RB->array 3648 | // Get array slot. 3649 | mov ITYPE, [RC] 3650 | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath. 3651 | je >2 3652 |1: 3653 | mov [BASE+RA*8], ITYPE 3654 | ins_next 3655 | 3656 |2: // Check for __index if table value is nil. 3657 | mov TAB:TMPR, TAB:RB->metatable 3658 | test TAB:TMPR, TAB:TMPR 3659 | jz <1 3660 | test byte TAB:TMPR->nomm, 1<<MM_index 3661 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check. 3662 | jmp <1 3663 | 3664 |5: // String key? 3665 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv 3666 | cleartp STR:RC 3667 | jmp ->BC_TGETS_Z 3668 break; 3669 case BC_TGETS: 3670 | ins_ABC // RA = dst, RB = table, RC = str const (~) 3671 | mov TAB:RB, [BASE+RB*8] 3672 | not RC 3673 | mov STR:RC, [KBASE+RC*8] 3674 | checktab TAB:RB, ->vmeta_tgets 3675 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * 3676 | mov TMPRd, TAB:RB->hmask 3677 | and TMPRd, STR:RC->hash 3678 | imul TMPRd, #NODE 3679 | add NODE:TMPR, TAB:RB->node 3680 | settp ITYPE, STR:RC, LJ_TSTR 3681 |1: 3682 | cmp NODE:TMPR->key, ITYPE 3683 | jne >4 3684 | // Get node value. 3685 | mov ITYPE, NODE:TMPR->val 3686 | cmp ITYPE, LJ_TNIL 3687 | je >5 // Key found, but nil value? 3688 |2: 3689 | mov [BASE+RA*8], ITYPE 3690 | ins_next 3691 | 3692 |4: // Follow hash chain. 3693 | mov NODE:TMPR, NODE:TMPR->next 3694 | test NODE:TMPR, NODE:TMPR 3695 | jnz <1 3696 | // End of hash chain: key not found, nil result. 3697 | mov ITYPE, LJ_TNIL 3698 | 3699 |5: // Check for __index if table value is nil. 3700 | mov TAB:TMPR, TAB:RB->metatable 3701 | test TAB:TMPR, TAB:TMPR 3702 | jz <2 // No metatable: done. 3703 | test byte TAB:TMPR->nomm, 1<<MM_index 3704 | jnz <2 // 'no __index' flag set: done. 3705 | jmp ->vmeta_tgets // Caveat: preserve STR:RC. 3706 break; 3707 case BC_TGETB: 3708 | ins_ABC // RA = dst, RB = table, RC = byte literal 3709 | mov TAB:RB, [BASE+RB*8] 3710 | checktab TAB:RB, ->vmeta_tgetb 3711 | cmp RCd, TAB:RB->asize 3712 | jae ->vmeta_tgetb 3713 | shl RCd, 3 3714 | add RC, TAB:RB->array 3715 | // Get array slot. 3716 | mov ITYPE, [RC] 3717 | cmp ITYPE, LJ_TNIL 3718 | je >2 3719 |1: 3720 | mov [BASE+RA*8], ITYPE 3721 | ins_next 3722 | 3723 |2: // Check for __index if table value is nil. 3724 | mov TAB:TMPR, TAB:RB->metatable 3725 | test TAB:TMPR, TAB:TMPR 3726 | jz <1 3727 | test byte TAB:TMPR->nomm, 1<<MM_index 3728 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check. 3729 | jmp <1 3730 break; 3731 case BC_TGETR: 3732 | ins_ABC // RA = dst, RB = table, RC = key 3733 | mov TAB:RB, [BASE+RB*8] 3734 | cleartp TAB:RB 3735 |.if DUALNUM 3736 | mov RCd, dword [BASE+RC*8] 3737 |.else 3738 | cvttsd2si RCd, qword [BASE+RC*8] 3739 |.endif 3740 | cmp RCd, TAB:RB->asize 3741 | jae ->vmeta_tgetr // Not in array part? Use fallback. 3742 | shl RCd, 3 3743 | add RC, TAB:RB->array 3744 | // Get array slot. 3745 |->BC_TGETR_Z: 3746 | mov ITYPE, [RC] 3747 |->BC_TGETR2_Z: 3748 | mov [BASE+RA*8], ITYPE 3749 | ins_next 3750 break; 3751 3752 case BC_TSETV: 3753 | ins_ABC // RA = src, RB = table, RC = key 3754 | mov TAB:RB, [BASE+RB*8] 3755 | mov RC, [BASE+RC*8] 3756 | checktab TAB:RB, ->vmeta_tsetv 3757 | 3758 | // Integer key? 3759 |.if DUALNUM 3760 | checkint RC, >5 3761 |.else 3762 | // Convert number to int and back and compare. 3763 | checknum RC, >5 3764 | movd xmm0, RC 3765 | cvttsd2si RCd, xmm0 3766 | cvtsi2sd xmm1, RCd 3767 | ucomisd xmm0, xmm1 3768 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 3769 |.endif 3770 | cmp RCd, TAB:RB->asize // Takes care of unordered, too. 3771 | jae ->vmeta_tsetv 3772 | shl RCd, 3 3773 | add RC, TAB:RB->array 3774 | cmp aword [RC], LJ_TNIL 3775 | je >3 // Previous value is nil? 3776 |1: 3777 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 3778 | jnz >7 3779 |2: // Set array slot. 3780 | mov RB, [BASE+RA*8] 3781 | mov [RC], RB 3782 | ins_next 3783 | 3784 |3: // Check for __newindex if previous value is nil. 3785 | mov TAB:TMPR, TAB:RB->metatable 3786 | test TAB:TMPR, TAB:TMPR 3787 | jz <1 3788 | test byte TAB:TMPR->nomm, 1<<MM_newindex 3789 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check. 3790 | jmp <1 3791 | 3792 |5: // String key? 3793 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv 3794 | cleartp STR:RC 3795 | jmp ->BC_TSETS_Z 3796 | 3797 |7: // Possible table write barrier for the value. Skip valiswhite check. 3798 | barrierback TAB:RB, TMPR 3799 | jmp <2 3800 break; 3801 case BC_TSETS: 3802 | ins_ABC // RA = src, RB = table, RC = str const (~) 3803 | mov TAB:RB, [BASE+RB*8] 3804 | not RC 3805 | mov STR:RC, [KBASE+RC*8] 3806 | checktab TAB:RB, ->vmeta_tsets 3807 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * 3808 | mov TMPRd, TAB:RB->hmask 3809 | and TMPRd, STR:RC->hash 3810 | imul TMPRd, #NODE 3811 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. 3812 | add NODE:TMPR, TAB:RB->node 3813 | settp ITYPE, STR:RC, LJ_TSTR 3814 |1: 3815 | cmp NODE:TMPR->key, ITYPE 3816 | jne >5 3817 | // Ok, key found. Assumes: offsetof(Node, val) == 0 3818 | cmp aword [TMPR], LJ_TNIL 3819 | je >4 // Previous value is nil? 3820 |2: 3821 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 3822 | jnz >7 3823 |3: // Set node value. 3824 | mov ITYPE, [BASE+RA*8] 3825 | mov [TMPR], ITYPE 3826 | ins_next 3827 | 3828 |4: // Check for __newindex if previous value is nil. 3829 | mov TAB:ITYPE, TAB:RB->metatable 3830 | test TAB:ITYPE, TAB:ITYPE 3831 | jz <2 3832 | test byte TAB:ITYPE->nomm, 1<<MM_newindex 3833 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. 3834 | jmp <2 3835 | 3836 |5: // Follow hash chain. 3837 | mov NODE:TMPR, NODE:TMPR->next 3838 | test NODE:TMPR, NODE:TMPR 3839 | jnz <1 3840 | // End of hash chain: key not found, add a new one. 3841 | 3842 | // But check for __newindex first. 3843 | mov TAB:TMPR, TAB:RB->metatable 3844 | test TAB:TMPR, TAB:TMPR 3845 | jz >6 // No metatable: continue. 3846 | test byte TAB:TMPR->nomm, 1<<MM_newindex 3847 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. 3848 |6: 3849 | mov TMP1, ITYPE 3850 | mov L:CARG1, SAVE_L 3851 | mov L:CARG1->base, BASE 3852 | lea CARG3, TMP1 3853 | mov CARG2, TAB:RB 3854 | mov SAVE_PC, PC 3855 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 3856 | // Handles write barrier for the new key. TValue * returned in eax (RC). 3857 | mov L:CARG1, SAVE_L 3858 | mov BASE, L:CARG1->base 3859 | mov TMPR, rax 3860 | movzx RAd, PC_RA 3861 | jmp <2 // Must check write barrier for value. 3862 | 3863 |7: // Possible table write barrier for the value. Skip valiswhite check. 3864 | barrierback TAB:RB, ITYPE 3865 | jmp <3 3866 break; 3867 case BC_TSETB: 3868 | ins_ABC // RA = src, RB = table, RC = byte literal 3869 | mov TAB:RB, [BASE+RB*8] 3870 | checktab TAB:RB, ->vmeta_tsetb 3871 | cmp RCd, TAB:RB->asize 3872 | jae ->vmeta_tsetb 3873 | shl RCd, 3 3874 | add RC, TAB:RB->array 3875 | cmp aword [RC], LJ_TNIL 3876 | je >3 // Previous value is nil? 3877 |1: 3878 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 3879 | jnz >7 3880 |2: // Set array slot. 3881 | mov ITYPE, [BASE+RA*8] 3882 | mov [RC], ITYPE 3883 | ins_next 3884 | 3885 |3: // Check for __newindex if previous value is nil. 3886 | mov TAB:TMPR, TAB:RB->metatable 3887 | test TAB:TMPR, TAB:TMPR 3888 | jz <1 3889 | test byte TAB:TMPR->nomm, 1<<MM_newindex 3890 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. 3891 | jmp <1 3892 | 3893 |7: // Possible table write barrier for the value. Skip valiswhite check. 3894 | barrierback TAB:RB, TMPR 3895 | jmp <2 3896 break; 3897 case BC_TSETR: 3898 | ins_ABC // RA = src, RB = table, RC = key 3899 | mov TAB:RB, [BASE+RB*8] 3900 | cleartp TAB:RB 3901 |.if DUALNUM 3902 | mov RC, [BASE+RC*8] 3903 |.else 3904 | cvttsd2si RCd, qword [BASE+RC*8] 3905 |.endif 3906 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 3907 | jnz >7 3908 |2: 3909 | cmp RCd, TAB:RB->asize 3910 | jae ->vmeta_tsetr 3911 | shl RCd, 3 3912 | add RC, TAB:RB->array 3913 | // Set array slot. 3914 |->BC_TSETR_Z: 3915 | mov ITYPE, [BASE+RA*8] 3916 | mov [RC], ITYPE 3917 | ins_next 3918 | 3919 |7: // Possible table write barrier for the value. Skip valiswhite check. 3920 | barrierback TAB:RB, TMPR 3921 | jmp <2 3922 break; 3923 3924 case BC_TSETM: 3925 | ins_AD // RA = base (table at base-1), RD = num const (start index) 3926 |1: 3927 | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word. 3928 | lea RA, [BASE+RA*8] 3929 | mov TAB:RB, [RA-8] // Guaranteed to be a table. 3930 | cleartp TAB:RB 3931 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 3932 | jnz >7 3933 |2: 3934 | mov RDd, MULTRES 3935 | sub RDd, 1 3936 | jz >4 // Nothing to copy? 3937 | add RDd, TMPRd // Compute needed size. 3938 | cmp RDd, TAB:RB->asize 3939 | ja >5 // Doesn't fit into array part? 3940 | sub RDd, TMPRd 3941 | shl TMPRd, 3 3942 | add TMPR, TAB:RB->array 3943 |3: // Copy result slots to table. 3944 | mov RB, [RA] 3945 | add RA, 8 3946 | mov [TMPR], RB 3947 | add TMPR, 8 3948 | sub RDd, 1 3949 | jnz <3 3950 |4: 3951 | ins_next 3952 | 3953 |5: // Need to resize array part. 3954 | mov L:CARG1, SAVE_L 3955 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. 3956 | mov CARG2, TAB:RB 3957 | mov CARG3d, RDd 3958 | mov L:RB, L:CARG1 3959 | mov SAVE_PC, PC 3960 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) 3961 | mov BASE, L:RB->base 3962 | movzx RAd, PC_RA // Restore RA. 3963 | movzx RDd, PC_RD // Restore RD. 3964 | jmp <1 // Retry. 3965 | 3966 |7: // Possible table write barrier for any value. Skip valiswhite check. 3967 | barrierback TAB:RB, RD 3968 | jmp <2 3969 break; 3970 3971 /* -- Calls and vararg handling ----------------------------------------- */ 3972 3973 case BC_CALL: case BC_CALLM: 3974 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs 3975 if (op == BC_CALLM) { 3976 | add NARGS:RDd, MULTRES 3977 } 3978 | mov LFUNC:RB, [BASE+RA*8] 3979 | checkfunc LFUNC:RB, ->vmeta_call_ra 3980 | lea BASE, [BASE+RA*8+16] 3981 | ins_call 3982 break; 3983 3984 case BC_CALLMT: 3985 | ins_AD // RA = base, RD = extra_nargs 3986 | add NARGS:RDd, MULTRES 3987 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. 3988 break; 3989 case BC_CALLT: 3990 | ins_AD // RA = base, RD = nargs+1 3991 | lea RA, [BASE+RA*8+16] 3992 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. 3993 | mov LFUNC:RB, [RA-16] 3994 | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call 3995 |->BC_CALLT_Z: 3996 | mov PC, [BASE-8] 3997 | test PCd, FRAME_TYPE 3998 | jnz >7 3999 |1: 4000 | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below. 4001 | mov MULTRES, NARGS:RDd 4002 | sub NARGS:RDd, 1 4003 | jz >3 4004 |2: // Move args down. 4005 | mov RB, [RA] 4006 | add RA, 8 4007 | mov [KBASE], RB 4008 | add KBASE, 8 4009 | sub NARGS:RDd, 1 4010 | jnz <2 4011 | 4012 | mov LFUNC:RB, [BASE-16] 4013 |3: 4014 | cleartp LFUNC:RB 4015 | mov NARGS:RDd, MULTRES 4016 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? 4017 | ja >5 4018 |4: 4019 | ins_callt 4020 | 4021 |5: // Tailcall to a fast function. 4022 | test PCd, FRAME_TYPE // Lua frame below? 4023 | jnz <4 4024 | movzx RAd, PC_RA 4025 | neg RA 4026 | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE. 4027 | cleartp LFUNC:KBASE 4028 | mov KBASE, LFUNC:KBASE->pc 4029 | mov KBASE, [KBASE+PC2PROTO(k)] 4030 | jmp <4 4031 | 4032 |7: // Tailcall from a vararg function. 4033 | sub PC, FRAME_VARG 4034 | test PCd, FRAME_TYPEP 4035 | jnz >8 // Vararg frame below? 4036 | sub BASE, PC // Need to relocate BASE/KBASE down. 4037 | mov KBASE, BASE 4038 | mov PC, [BASE-8] 4039 | jmp <1 4040 |8: 4041 | add PCd, FRAME_VARG 4042 | jmp <1 4043 break; 4044 4045 case BC_ITERC: 4046 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) 4047 | lea RA, [BASE+RA*8+16] // fb = base+2 4048 | mov RB, [RA-32] // Copy state. fb[0] = fb[-4]. 4049 | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3]. 4050 | mov [RA], RB 4051 | mov [RA+8], RC 4052 | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5] 4053 | mov [RA-16], LFUNC:RB 4054 | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call. 4055 | checkfunc LFUNC:RB, ->vmeta_call 4056 | mov BASE, RA 4057 | ins_call 4058 break; 4059 4060 case BC_ITERN: 4061 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) 4062 |.if JIT 4063 | // NYI: add hotloop, record BC_ITERN. 4064 |.endif 4065 | mov TAB:RB, [BASE+RA*8-16] 4066 | cleartp TAB:RB 4067 | mov RCd, [BASE+RA*8-8] // Get index from control var. 4068 | mov TMPRd, TAB:RB->asize 4069 | add PC, 4 4070 | mov ITYPE, TAB:RB->array 4071 |1: // Traverse array part. 4072 | cmp RCd, TMPRd; jae >5 // Index points after array part? 4073 | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4 4074 |.if not DUALNUM 4075 | cvtsi2sd xmm0, RCd 4076 |.endif 4077 | // Copy array slot to returned value. 4078 | mov RB, [ITYPE+RC*8] 4079 | mov [BASE+RA*8+8], RB 4080 | // Return array index as a numeric key. 4081 |.if DUALNUM 4082 | setint ITYPE, RC 4083 | mov [BASE+RA*8], ITYPE 4084 |.else 4085 | movsd qword [BASE+RA*8], xmm0 4086 |.endif 4087 | add RCd, 1 4088 | mov [BASE+RA*8-8], RCd // Update control var. 4089 |2: 4090 | movzx RDd, PC_RD // Get target from ITERL. 4091 | branchPC RD 4092 |3: 4093 | ins_next 4094 | 4095 |4: // Skip holes in array part. 4096 | add RCd, 1 4097 | jmp <1 4098 | 4099 |5: // Traverse hash part. 4100 | sub RCd, TMPRd 4101 |6: 4102 | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. 4103 | imul ITYPEd, RCd, #NODE 4104 | add NODE:ITYPE, TAB:RB->node 4105 | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7 4106 | lea TMPRd, [RCd+TMPRd+1] 4107 | // Copy key and value from hash slot. 4108 | mov RB, NODE:ITYPE->key 4109 | mov RC, NODE:ITYPE->val 4110 | mov [BASE+RA*8], RB 4111 | mov [BASE+RA*8+8], RC 4112 | mov [BASE+RA*8-8], TMPRd 4113 | jmp <2 4114 | 4115 |7: // Skip holes in hash part. 4116 | add RCd, 1 4117 | jmp <6 4118 break; 4119 4120 case BC_ISNEXT: 4121 | ins_AD // RA = base, RD = target (points to ITERN) 4122 | mov CFUNC:RB, [BASE+RA*8-24] 4123 | checkfunc CFUNC:RB, >5 4124 | checktptp [BASE+RA*8-16], LJ_TTAB, >5 4125 | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5 4126 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 4127 | branchPC RD 4128 | mov64 TMPR, U64x(fffe7fff, 00000000) 4129 | mov [BASE+RA*8-8], TMPR // Initialize control var. 4130 |1: 4131 | ins_next 4132 |5: // Despecialize bytecode if any of the checks fail. 4133 | mov PC_OP, BC_JMP 4134 | branchPC RD 4135 | mov byte [PC], BC_ITERC 4136 | jmp <1 4137 break; 4138 4139 case BC_VARG: 4140 | ins_ABC // RA = base, RB = nresults+1, RC = numparams 4141 | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)] 4142 | lea RA, [BASE+RA*8] 4143 | sub TMPR, [BASE-8] 4144 | // Note: TMPR may now be even _above_ BASE if nargs was < numparams. 4145 | test RB, RB 4146 | jz >5 // Copy all varargs? 4147 | lea RB, [RA+RB*8-8] 4148 | cmp TMPR, BASE // No vararg slots? 4149 | jnb >2 4150 |1: // Copy vararg slots to destination slots. 4151 | mov RC, [TMPR-16] 4152 | add TMPR, 8 4153 | mov [RA], RC 4154 | add RA, 8 4155 | cmp RA, RB // All destination slots filled? 4156 | jnb >3 4157 | cmp TMPR, BASE // No more vararg slots? 4158 | jb <1 4159 |2: // Fill up remainder with nil. 4160 | mov aword [RA], LJ_TNIL 4161 | add RA, 8 4162 | cmp RA, RB 4163 | jb <2 4164 |3: 4165 | ins_next 4166 | 4167 |5: // Copy all varargs. 4168 | mov MULTRES, 1 // MULTRES = 0+1 4169 | mov RC, BASE 4170 | sub RC, TMPR 4171 | jbe <3 // No vararg slots? 4172 | mov RBd, RCd 4173 | shr RBd, 3 4174 | add RBd, 1 4175 | mov MULTRES, RBd // MULTRES = #varargs+1 4176 | mov L:RB, SAVE_L 4177 | add RC, RA 4178 | cmp RC, L:RB->maxstack 4179 | ja >7 // Need to grow stack? 4180 |6: // Copy all vararg slots. 4181 | mov RC, [TMPR-16] 4182 | add TMPR, 8 4183 | mov [RA], RC 4184 | add RA, 8 4185 | cmp TMPR, BASE // No more vararg slots? 4186 | jb <6 4187 | jmp <3 4188 | 4189 |7: // Grow stack for varargs. 4190 | mov L:RB->base, BASE 4191 | mov L:RB->top, RA 4192 | mov SAVE_PC, PC 4193 | sub TMPR, BASE // Need delta, because BASE may change. 4194 | mov TMP1hi, TMPRd 4195 | mov CARG2d, MULTRES 4196 | sub CARG2d, 1 4197 | mov CARG1, L:RB 4198 | call extern lj_state_growstack // (lua_State *L, int n) 4199 | mov BASE, L:RB->base 4200 | movsxd TMPR, TMP1hi 4201 | mov RA, L:RB->top 4202 | add TMPR, BASE 4203 | jmp <6 4204 break; 4205 4206 /* -- Returns ----------------------------------------------------------- */ 4207 4208 case BC_RETM: 4209 | ins_AD // RA = results, RD = extra_nresults 4210 | add RDd, MULTRES // MULTRES >=1, so RD >=1. 4211 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. 4212 break; 4213 4214 case BC_RET: case BC_RET0: case BC_RET1: 4215 | ins_AD // RA = results, RD = nresults+1 4216 if (op != BC_RET0) { 4217 | shl RAd, 3 4218 } 4219 |1: 4220 | mov PC, [BASE-8] 4221 | mov MULTRES, RDd // Save nresults+1. 4222 | test PCd, FRAME_TYPE // Check frame type marker. 4223 | jnz >7 // Not returning to a fixarg Lua func? 4224 switch (op) { 4225 case BC_RET: 4226 |->BC_RET_Z: 4227 | mov KBASE, BASE // Use KBASE for result move. 4228 | sub RDd, 1 4229 | jz >3 4230 |2: // Move results down. 4231 | mov RB, [KBASE+RA] 4232 | mov [KBASE-16], RB 4233 | add KBASE, 8 4234 | sub RDd, 1 4235 | jnz <2 4236 |3: 4237 | mov RDd, MULTRES // Note: MULTRES may be >255. 4238 | movzx RBd, PC_RB // So cannot compare with RDL! 4239 |5: 4240 | cmp RBd, RDd // More results expected? 4241 | ja >6 4242 break; 4243 case BC_RET1: 4244 | mov RB, [BASE+RA] 4245 | mov [BASE-16], RB 4246 /* fallthrough */ 4247 case BC_RET0: 4248 |5: 4249 | cmp PC_RB, RDL // More results expected? 4250 | ja >6 4251 default: 4252 break; 4253 } 4254 | movzx RAd, PC_RA 4255 | neg RA 4256 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 4257 | mov LFUNC:KBASE, [BASE-16] 4258 | cleartp LFUNC:KBASE 4259 | mov KBASE, LFUNC:KBASE->pc 4260 | mov KBASE, [KBASE+PC2PROTO(k)] 4261 | ins_next 4262 | 4263 |6: // Fill up results with nil. 4264 if (op == BC_RET) { 4265 | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base. 4266 | add KBASE, 8 4267 } else { 4268 | mov aword [BASE+RD*8-24], LJ_TNIL 4269 } 4270 | add RD, 1 4271 | jmp <5 4272 | 4273 |7: // Non-standard return case. 4274 | lea RB, [PC-FRAME_VARG] 4275 | test RBd, FRAME_TYPEP 4276 | jnz ->vm_return 4277 | // Return from vararg function: relocate BASE down and RA up. 4278 | sub BASE, RB 4279 if (op != BC_RET0) { 4280 | add RA, RB 4281 } 4282 | jmp <1 4283 break; 4284 4285 /* -- Loops and branches ------------------------------------------------ */ 4286 4287 |.define FOR_IDX, [RA] 4288 |.define FOR_STOP, [RA+8] 4289 |.define FOR_STEP, [RA+16] 4290 |.define FOR_EXT, [RA+24] 4291 4292 case BC_FORL: 4293 |.if JIT 4294 | hotloop RBd 4295 |.endif 4296 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. 4297 break; 4298 4299 case BC_JFORI: 4300 case BC_JFORL: 4301 #if !LJ_HASJIT 4302 break; 4303 #endif 4304 case BC_FORI: 4305 case BC_IFORL: 4306 vk = (op == BC_IFORL || op == BC_JFORL); 4307 | ins_AJ // RA = base, RD = target (after end of loop or start of loop) 4308 | lea RA, [BASE+RA*8] 4309 if (LJ_DUALNUM) { 4310 | mov RB, FOR_IDX 4311 | checkint RB, >9 4312 | mov TMPR, FOR_STOP 4313 if (!vk) { 4314 | checkint TMPR, ->vmeta_for 4315 | mov ITYPE, FOR_STEP 4316 | test ITYPEd, ITYPEd; js >5 4317 | sar ITYPE, 47; 4318 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for 4319 } else { 4320 #ifdef LUA_USE_ASSERT 4321 | checkinttp FOR_STOP, ->assert_bad_for_arg_type 4322 | checkinttp FOR_STEP, ->assert_bad_for_arg_type 4323 #endif 4324 | mov ITYPE, FOR_STEP 4325 | test ITYPEd, ITYPEd; js >5 4326 | add RBd, ITYPEd; jo >1 4327 | setint RB 4328 | mov FOR_IDX, RB 4329 } 4330 | cmp RBd, TMPRd 4331 | mov FOR_EXT, RB 4332 if (op == BC_FORI) { 4333 | jle >7 4334 |1: 4335 |6: 4336 | branchPC RD 4337 } else if (op == BC_JFORI) { 4338 | branchPC RD 4339 | movzx RDd, PC_RD 4340 | jle =>BC_JLOOP 4341 |1: 4342 |6: 4343 } else if (op == BC_IFORL) { 4344 | jg >7 4345 |6: 4346 | branchPC RD 4347 |1: 4348 } else { 4349 | jle =>BC_JLOOP 4350 |1: 4351 |6: 4352 } 4353 |7: 4354 | ins_next 4355 | 4356 |5: // Invert check for negative step. 4357 if (!vk) { 4358 | sar ITYPE, 47; 4359 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for 4360 } else { 4361 | add RBd, ITYPEd; jo <1 4362 | setint RB 4363 | mov FOR_IDX, RB 4364 } 4365 | cmp RBd, TMPRd 4366 | mov FOR_EXT, RB 4367 if (op == BC_FORI) { 4368 | jge <7 4369 } else if (op == BC_JFORI) { 4370 | branchPC RD 4371 | movzx RDd, PC_RD 4372 | jge =>BC_JLOOP 4373 } else if (op == BC_IFORL) { 4374 | jl <7 4375 } else { 4376 | jge =>BC_JLOOP 4377 } 4378 | jmp <6 4379 |9: // Fallback to FP variant. 4380 if (!vk) { 4381 | jae ->vmeta_for 4382 } 4383 } else if (!vk) { 4384 | checknumtp FOR_IDX, ->vmeta_for 4385 } 4386 if (!vk) { 4387 | checknumtp FOR_STOP, ->vmeta_for 4388 } else { 4389 #ifdef LUA_USE_ASSERT 4390 | checknumtp FOR_STOP, ->assert_bad_for_arg_type 4391 | checknumtp FOR_STEP, ->assert_bad_for_arg_type 4392 #endif 4393 } 4394 | mov RB, FOR_STEP 4395 if (!vk) { 4396 | checknum RB, ->vmeta_for 4397 } 4398 | movsd xmm0, qword FOR_IDX 4399 | movsd xmm1, qword FOR_STOP 4400 if (vk) { 4401 | addsd xmm0, qword FOR_STEP 4402 | movsd qword FOR_IDX, xmm0 4403 | test RB, RB; js >3 4404 } else { 4405 | jl >3 4406 } 4407 | ucomisd xmm1, xmm0 4408 |1: 4409 | movsd qword FOR_EXT, xmm0 4410 if (op == BC_FORI) { 4411 |.if DUALNUM 4412 | jnb <7 4413 |.else 4414 | jnb >2 4415 | branchPC RD 4416 |.endif 4417 } else if (op == BC_JFORI) { 4418 | branchPC RD 4419 | movzx RDd, PC_RD 4420 | jnb =>BC_JLOOP 4421 } else if (op == BC_IFORL) { 4422 |.if DUALNUM 4423 | jb <7 4424 |.else 4425 | jb >2 4426 | branchPC RD 4427 |.endif 4428 } else { 4429 | jnb =>BC_JLOOP 4430 } 4431 |.if DUALNUM 4432 | jmp <6 4433 |.else 4434 |2: 4435 | ins_next 4436 |.endif 4437 | 4438 |3: // Invert comparison if step is negative. 4439 | ucomisd xmm0, xmm1 4440 | jmp <1 4441 break; 4442 4443 case BC_ITERL: 4444 |.if JIT 4445 | hotloop RBd 4446 |.endif 4447 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. 4448 break; 4449 4450 case BC_JITERL: 4451 #if !LJ_HASJIT 4452 break; 4453 #endif 4454 case BC_IITERL: 4455 | ins_AJ // RA = base, RD = target 4456 | lea RA, [BASE+RA*8] 4457 | mov RB, [RA] 4458 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. 4459 if (op == BC_JITERL) { 4460 | mov [RA-8], RB 4461 | jmp =>BC_JLOOP 4462 } else { 4463 | branchPC RD // Otherwise save control var + branch. 4464 | mov [RA-8], RB 4465 } 4466 |1: 4467 | ins_next 4468 break; 4469 4470 case BC_LOOP: 4471 | ins_A // RA = base, RD = target (loop extent) 4472 | // Note: RA/RD is only used by trace recorder to determine scope/extent 4473 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 4474 |.if JIT 4475 | hotloop RBd 4476 |.endif 4477 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 4478 break; 4479 4480 case BC_ILOOP: 4481 | ins_A // RA = base, RD = target (loop extent) 4482 | ins_next 4483 break; 4484 4485 case BC_JLOOP: 4486 |.if JIT 4487 | ins_AD // RA = base (ignored), RD = traceno 4488 | mov RA, [DISPATCH+DISPATCH_J(trace)] 4489 | mov TRACE:RD, [RA+RD*8] 4490 | mov RD, TRACE:RD->mcode 4491 | mov L:RB, SAVE_L 4492 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 4493 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB 4494 | // Save additional callee-save registers only used in compiled code. 4495 |.if X64WIN 4496 | mov CSAVE_4, r12 4497 | mov CSAVE_3, r13 4498 | mov CSAVE_2, r14 4499 | mov CSAVE_1, r15 4500 | mov RA, rsp 4501 | sub rsp, 10*16+4*8 4502 | movdqa [RA-1*16], xmm6 4503 | movdqa [RA-2*16], xmm7 4504 | movdqa [RA-3*16], xmm8 4505 | movdqa [RA-4*16], xmm9 4506 | movdqa [RA-5*16], xmm10 4507 | movdqa [RA-6*16], xmm11 4508 | movdqa [RA-7*16], xmm12 4509 | movdqa [RA-8*16], xmm13 4510 | movdqa [RA-9*16], xmm14 4511 | movdqa [RA-10*16], xmm15 4512 |.else 4513 | sub rsp, 16 4514 | mov [rsp+16], r12 4515 | mov [rsp+8], r13 4516 |.endif 4517 | jmp RD 4518 |.endif 4519 break; 4520 4521 case BC_JMP: 4522 | ins_AJ // RA = unused, RD = target 4523 | branchPC RD 4524 | ins_next 4525 break; 4526 4527 /* -- Function headers -------------------------------------------------- */ 4528 4529 /* 4530 ** Reminder: A function may be called with func/args above L->maxstack, 4531 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, 4532 ** too. This means all FUNC* ops (including fast functions) must check 4533 ** for stack overflow _before_ adding more slots! 4534 */ 4535 4536 case BC_FUNCF: 4537 |.if JIT 4538 | hotcall RBd 4539 |.endif 4540 case BC_FUNCV: /* NYI: compiled vararg functions. */ 4541 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. 4542 break; 4543 4544 case BC_JFUNCF: 4545 #if !LJ_HASJIT 4546 break; 4547 #endif 4548 case BC_IFUNCF: 4549 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 4550 | mov KBASE, [PC-4+PC2PROTO(k)] 4551 | mov L:RB, SAVE_L 4552 | lea RA, [BASE+RA*8] // Top of frame. 4553 | cmp RA, L:RB->maxstack 4554 | ja ->vm_growstack_f 4555 | movzx RAd, byte [PC-4+PC2PROTO(numparams)] 4556 | cmp NARGS:RDd, RAd // Check for missing parameters. 4557 | jbe >3 4558 |2: 4559 if (op == BC_JFUNCF) { 4560 | movzx RDd, PC_RD 4561 | jmp =>BC_JLOOP 4562 } else { 4563 | ins_next 4564 } 4565 | 4566 |3: // Clear missing parameters. 4567 | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL 4568 | add NARGS:RDd, 1 4569 | cmp NARGS:RDd, RAd 4570 | jbe <3 4571 | jmp <2 4572 break; 4573 4574 case BC_JFUNCV: 4575 #if !LJ_HASJIT 4576 break; 4577 #endif 4578 | int3 // NYI: compiled vararg functions 4579 break; /* NYI: compiled vararg functions. */ 4580 4581 case BC_IFUNCV: 4582 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 4583 | lea RBd, [NARGS:RD*8+FRAME_VARG+8] 4584 | lea RD, [BASE+NARGS:RD*8+8] 4585 | mov LFUNC:KBASE, [BASE-16] 4586 | mov [RD-8], RB // Store delta + FRAME_VARG. 4587 | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC. 4588 | mov L:RB, SAVE_L 4589 | lea RA, [RD+RA*8] 4590 | cmp RA, L:RB->maxstack 4591 | ja ->vm_growstack_v // Need to grow stack. 4592 | mov RA, BASE 4593 | mov BASE, RD 4594 | movzx RBd, byte [PC-4+PC2PROTO(numparams)] 4595 | test RBd, RBd 4596 | jz >2 4597 | add RA, 8 4598 |1: // Copy fixarg slots up to new frame. 4599 | add RA, 8 4600 | cmp RA, BASE 4601 | jnb >3 // Less args than parameters? 4602 | mov KBASE, [RA-16] 4603 | mov [RD], KBASE 4604 | add RD, 8 4605 | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC). 4606 | sub RBd, 1 4607 | jnz <1 4608 |2: 4609 if (op == BC_JFUNCV) { 4610 | movzx RDd, PC_RD 4611 | jmp =>BC_JLOOP 4612 } else { 4613 | mov KBASE, [PC-4+PC2PROTO(k)] 4614 | ins_next 4615 } 4616 | 4617 |3: // Clear missing parameters. 4618 | mov aword [RD], LJ_TNIL 4619 | add RD, 8 4620 | sub RBd, 1 4621 | jnz <3 4622 | jmp <2 4623 break; 4624 4625 case BC_FUNCC: 4626 case BC_FUNCCW: 4627 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 4628 | mov CFUNC:RB, [BASE-16] 4629 | cleartp CFUNC:RB 4630 | mov KBASE, CFUNC:RB->f 4631 | mov L:RB, SAVE_L 4632 | lea RD, [BASE+NARGS:RD*8-8] 4633 | mov L:RB->base, BASE 4634 | lea RA, [RD+8*LUA_MINSTACK] 4635 | cmp RA, L:RB->maxstack 4636 | mov L:RB->top, RD 4637 if (op == BC_FUNCC) { 4638 | mov CARG1, L:RB // Caveat: CARG1 may be RA. 4639 } else { 4640 | mov CARG2, KBASE 4641 | mov CARG1, L:RB // Caveat: CARG1 may be RA. 4642 } 4643 | ja ->vm_growstack_c // Need to grow stack. 4644 | set_vmstate C 4645 if (op == BC_FUNCC) { 4646 | call KBASE // (lua_State *L) 4647 } else { 4648 | // (lua_State *L, lua_CFunction f) 4649 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 4650 } 4651 | // nresults returned in eax (RD). 4652 | mov BASE, L:RB->base 4653 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 4654 | set_vmstate INTERP 4655 | lea RA, [BASE+RD*8] 4656 | neg RA 4657 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 4658 | mov PC, [BASE-8] // Fetch PC of caller. 4659 | jmp ->vm_returnc 4660 break; 4661 4662 /* ---------------------------------------------------------------------- */ 4663 4664 default: 4665 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); 4666 exit(2); 4667 break; 4668 } 4669 } 4670 4671 static int build_backend(BuildCtx *ctx) 4672 { 4673 int op; 4674 dasm_growpc(Dst, BC__MAX); 4675 build_subroutines(ctx); 4676 |.code_op 4677 for (op = 0; op < BC__MAX; op++) 4678 build_ins(ctx, (BCOp)op, op); 4679 return BC__MAX; 4680 } 4681 4682 /* Emit pseudo frame-info for all assembler functions. */ 4683 static void emit_asm_debug(BuildCtx *ctx) 4684 { 4685 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); 4686 switch (ctx->mode) { 4687 case BUILD_elfasm: 4688 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); 4689 fprintf(ctx->fp, 4690 ".Lframe0:\n" 4691 "\t.long .LECIE0-.LSCIE0\n" 4692 ".LSCIE0:\n" 4693 "\t.long 0xffffffff\n" 4694 "\t.byte 0x1\n" 4695 "\t.string \"\"\n" 4696 "\t.uleb128 0x1\n" 4697 "\t.sleb128 -8\n" 4698 "\t.byte 0x10\n" 4699 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" 4700 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" 4701 "\t.align 8\n" 4702 ".LECIE0:\n\n"); 4703 fprintf(ctx->fp, 4704 ".LSFDE0:\n" 4705 "\t.long .LEFDE0-.LASFDE0\n" 4706 ".LASFDE0:\n" 4707 "\t.long .Lframe0\n" 4708 "\t.quad .Lbegin\n" 4709 "\t.quad %d\n" 4710 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 4711 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 4712 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 4713 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ 4714 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ 4715 #if LJ_NO_UNWIND 4716 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ 4717 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ 4718 #endif 4719 "\t.align 8\n" 4720 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); 4721 #if LJ_HASFFI 4722 fprintf(ctx->fp, 4723 ".LSFDE1:\n" 4724 "\t.long .LEFDE1-.LASFDE1\n" 4725 ".LASFDE1:\n" 4726 "\t.long .Lframe0\n" 4727 "\t.quad lj_vm_ffi_call\n" 4728 "\t.quad %d\n" 4729 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ 4730 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 4731 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ 4732 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 4733 "\t.align 8\n" 4734 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 4735 #endif 4736 #if !LJ_NO_UNWIND 4737 #if (defined(__sun__) && defined(__svr4__)) 4738 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); 4739 #else 4740 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); 4741 #endif 4742 fprintf(ctx->fp, 4743 ".Lframe1:\n" 4744 "\t.long .LECIE1-.LSCIE1\n" 4745 ".LSCIE1:\n" 4746 "\t.long 0\n" 4747 "\t.byte 0x1\n" 4748 "\t.string \"zPR\"\n" 4749 "\t.uleb128 0x1\n" 4750 "\t.sleb128 -8\n" 4751 "\t.byte 0x10\n" 4752 "\t.uleb128 6\n" /* augmentation length */ 4753 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 4754 "\t.long lj_err_unwind_dwarf-.\n" 4755 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 4756 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" 4757 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" 4758 "\t.align 8\n" 4759 ".LECIE1:\n\n"); 4760 fprintf(ctx->fp, 4761 ".LSFDE2:\n" 4762 "\t.long .LEFDE2-.LASFDE2\n" 4763 ".LASFDE2:\n" 4764 "\t.long .LASFDE2-.Lframe1\n" 4765 "\t.long .Lbegin-.\n" 4766 "\t.long %d\n" 4767 "\t.uleb128 0\n" /* augmentation length */ 4768 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 4769 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 4770 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 4771 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ 4772 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ 4773 "\t.align 8\n" 4774 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); 4775 #if LJ_HASFFI 4776 fprintf(ctx->fp, 4777 ".Lframe2:\n" 4778 "\t.long .LECIE2-.LSCIE2\n" 4779 ".LSCIE2:\n" 4780 "\t.long 0\n" 4781 "\t.byte 0x1\n" 4782 "\t.string \"zR\"\n" 4783 "\t.uleb128 0x1\n" 4784 "\t.sleb128 -8\n" 4785 "\t.byte 0x10\n" 4786 "\t.uleb128 1\n" /* augmentation length */ 4787 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 4788 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" 4789 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" 4790 "\t.align 8\n" 4791 ".LECIE2:\n\n"); 4792 fprintf(ctx->fp, 4793 ".LSFDE3:\n" 4794 "\t.long .LEFDE3-.LASFDE3\n" 4795 ".LASFDE3:\n" 4796 "\t.long .LASFDE3-.Lframe2\n" 4797 "\t.long lj_vm_ffi_call-.\n" 4798 "\t.long %d\n" 4799 "\t.uleb128 0\n" /* augmentation length */ 4800 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ 4801 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 4802 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ 4803 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 4804 "\t.align 8\n" 4805 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); 4806 #endif 4807 #endif 4808 break; 4809 #if !LJ_NO_UNWIND 4810 /* Mental note: never let Apple design an assembler. 4811 ** Or a linker. Or a plastic case. But I digress. 4812 */ 4813 case BUILD_machasm: { 4814 #if LJ_HASFFI 4815 int fcsize = 0; 4816 #endif 4817 int i; 4818 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); 4819 fprintf(ctx->fp, 4820 "EH_frame1:\n" 4821 "\t.set L$set$x,LECIEX-LSCIEX\n" 4822 "\t.long L$set$x\n" 4823 "LSCIEX:\n" 4824 "\t.long 0\n" 4825 "\t.byte 0x1\n" 4826 "\t.ascii \"zPR\\0\"\n" 4827 "\t.byte 0x1\n" 4828 "\t.byte 128-8\n" 4829 "\t.byte 0x10\n" 4830 "\t.byte 6\n" /* augmentation length */ 4831 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ 4832 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" 4833 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 4834 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" 4835 "\t.byte 0x80+0x10\n\t.byte 0x1\n" 4836 "\t.align 3\n" 4837 "LECIEX:\n\n"); 4838 for (i = 0; i < ctx->nsym; i++) { 4839 const char *name = ctx->sym[i].name; 4840 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; 4841 if (size == 0) continue; 4842 #if LJ_HASFFI 4843 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } 4844 #endif 4845 fprintf(ctx->fp, 4846 "%s.eh:\n" 4847 "LSFDE%d:\n" 4848 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" 4849 "\t.long L$set$%d\n" 4850 "LASFDE%d:\n" 4851 "\t.long LASFDE%d-EH_frame1\n" 4852 "\t.long %s-.\n" 4853 "\t.long %d\n" 4854 "\t.byte 0\n" /* augmentation length */ 4855 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ 4856 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ 4857 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ 4858 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ 4859 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ 4860 "\t.align 3\n" 4861 "LEFDE%d:\n\n", 4862 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); 4863 } 4864 #if LJ_HASFFI 4865 if (fcsize) { 4866 fprintf(ctx->fp, 4867 "EH_frame2:\n" 4868 "\t.set L$set$y,LECIEY-LSCIEY\n" 4869 "\t.long L$set$y\n" 4870 "LSCIEY:\n" 4871 "\t.long 0\n" 4872 "\t.byte 0x1\n" 4873 "\t.ascii \"zR\\0\"\n" 4874 "\t.byte 0x1\n" 4875 "\t.byte 128-8\n" 4876 "\t.byte 0x10\n" 4877 "\t.byte 1\n" /* augmentation length */ 4878 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 4879 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" 4880 "\t.byte 0x80+0x10\n\t.byte 0x1\n" 4881 "\t.align 3\n" 4882 "LECIEY:\n\n"); 4883 fprintf(ctx->fp, 4884 "_lj_vm_ffi_call.eh:\n" 4885 "LSFDEY:\n" 4886 "\t.set L$set$yy,LEFDEY-LASFDEY\n" 4887 "\t.long L$set$yy\n" 4888 "LASFDEY:\n" 4889 "\t.long LASFDEY-EH_frame2\n" 4890 "\t.long _lj_vm_ffi_call-.\n" 4891 "\t.long %d\n" 4892 "\t.byte 0\n" /* augmentation length */ 4893 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ 4894 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ 4895 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ 4896 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ 4897 "\t.align 3\n" 4898 "LEFDEY:\n\n", fcsize); 4899 } 4900 #endif 4901 fprintf(ctx->fp, ".subsections_via_symbols\n"); 4902 } 4903 break; 4904 #endif 4905 default: /* Difficult for other modes. */ 4906 break; 4907 } 4908 } 4909