vm_x86.dasc (157287B)
1 |// Low-level VM code for x86 CPUs. 2 |// Bytecode interpreter, fast functions and helper functions. 3 |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 | 5 |// Lua 5.2 modifications: ESETV, setmetatable __gc. 6 |// Copyright (C) 2014 Karel Tuma. See Copyright Notice in luajit.h 7 | 8 |.if P64 9 |.arch x64 10 |.else 11 |.arch x86 12 |.endif 13 |.section code_op, code_sub 14 | 15 |.actionlist build_actionlist 16 |.globals GLOB_ 17 |.globalnames globnames 18 |.externnames extnames 19 | 20 |//----------------------------------------------------------------------- 21 | 22 |.if P64 23 |.define X64, 1 24 |.if WIN 25 |.define X64WIN, 1 26 |.endif 27 |.endif 28 | 29 |// Fixed register assignments for the interpreter. 30 |// This is very fragile and has many dependencies. Caveat emptor. 31 |.define BASE, edx // Not C callee-save, refetched anyway. 32 |.if not X64 33 |.define KBASE, edi // Must be C callee-save. 34 |.define KBASEa, KBASE 35 |.define PC, esi // Must be C callee-save. 36 |.define PCa, PC 37 |.define DISPATCH, ebx // Must be C callee-save. 38 |.elif X64WIN 39 |.define KBASE, edi // Must be C callee-save. 40 |.define KBASEa, rdi 41 |.define PC, esi // Must be C callee-save. 42 |.define PCa, rsi 43 |.define DISPATCH, ebx // Must be C callee-save. 44 |.else 45 |.define KBASE, r15d // Must be C callee-save. 46 |.define KBASEa, r15 47 |.define PC, ebx // Must be C callee-save. 48 |.define PCa, rbx 49 |.define DISPATCH, r14d // Must be C callee-save. 50 |.endif 51 | 52 |.define RA, ecx 53 |.define RAH, ch 54 |.define RAL, cl 55 |.define RB, ebp // Must be ebp (C callee-save). 56 |.define RC, eax // Must be eax. 57 |.define RCW, ax 58 |.define RCH, ah 59 |.define RCL, al 60 |.define OP, RB 61 |.define RD, RC 62 |.define RDW, RCW 63 |.define RDL, RCL 64 |.if X64 65 |.define RAa, rcx 66 |.define RBa, rbp 67 |.define RCa, rax 68 |.define RDa, rax 69 |.else 70 |.define RAa, RA 71 |.define RBa, RB 72 |.define RCa, RC 73 |.define RDa, RD 74 |.endif 75 | 76 |.if not X64 77 |.define FCARG1, ecx // x86 fastcall arguments. 78 |.define FCARG2, edx 79 |.elif X64WIN 80 |.define CARG1, rcx // x64/WIN64 C call arguments. 81 |.define CARG2, rdx 82 |.define CARG3, r8 83 |.define CARG4, r9 84 |.define CARG1d, ecx 85 |.define CARG2d, edx 86 |.define CARG3d, r8d 87 |.define CARG4d, r9d 88 |.define FCARG1, CARG1d // Upwards compatible to x86 fastcall. 89 |.define FCARG2, CARG2d 90 |.else 91 |.define CARG1, rdi // x64/POSIX C call arguments. 92 |.define CARG2, rsi 93 |.define CARG3, rdx 94 |.define CARG4, rcx 95 |.define CARG5, r8 96 |.define CARG6, r9 97 |.define CARG1d, edi 98 |.define CARG2d, esi 99 |.define CARG3d, edx 100 |.define CARG4d, ecx 101 |.define CARG5d, r8d 102 |.define CARG6d, r9d 103 |.define FCARG1, CARG1d // Simulate x86 fastcall. 104 |.define FCARG2, CARG2d 105 |.endif 106 | 107 |// Type definitions. Some of these are only used for documentation. 108 |.type L, lua_State 109 |.type GL, global_State 110 |.type TVALUE, TValue 111 |.type GCOBJ, GCobj 112 |.type STR, GCstr 113 |.type TAB, GCtab 114 |.type LFUNC, GCfuncL 115 |.type CFUNC, GCfuncC 116 |.type PROTO, GCproto 117 |.type UPVAL, GCupval 118 |.type NODE, Node 119 |.type NARGS, int 120 |.type TRACE, GCtrace 121 |.type SBUF, SBuf 122 | 123 |// Stack layout while in interpreter. Must match with lj_frame.h. 124 |//----------------------------------------------------------------------- 125 |.if not X64 // x86 stack layout. 126 | 127 |.if WIN 128 | 129 |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). 130 |.macro saveregs_ 131 | push edi; push esi; push ebx 132 | push extern lj_err_unwind_win 133 | fs; push dword [0] 134 | fs; mov [0], esp 135 | sub esp, CFRAME_SPACE 136 |.endmacro 137 |.macro restoreregs 138 | add esp, CFRAME_SPACE 139 | fs; pop dword [0] 140 | pop edi // Short for esp += 4. 141 | pop ebx; pop esi; pop edi; pop ebp 142 |.endmacro 143 | 144 |.else 145 | 146 |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). 147 |.macro saveregs_ 148 | push edi; push esi; push ebx 149 | sub esp, CFRAME_SPACE 150 |.endmacro 151 |.macro restoreregs 152 | add esp, CFRAME_SPACE 153 | pop ebx; pop esi; pop edi; pop ebp 154 |.endmacro 155 | 156 |.endif 157 | 158 |.macro saveregs 159 | push ebp; saveregs_ 160 |.endmacro 161 | 162 |.if WIN 163 |.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. 164 |.define SAVE_NRES, aword [esp+aword*18] 165 |.define SAVE_CFRAME, aword [esp+aword*17] 166 |.define SAVE_L, aword [esp+aword*16] 167 |//----- 16 byte aligned, ^^^ arguments from C caller 168 |.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. 169 |.define SAVE_R4, aword [esp+aword*14] 170 |.define SAVE_R3, aword [esp+aword*13] 171 |.define SAVE_R2, aword [esp+aword*12] 172 |//----- 16 byte aligned 173 |.define SAVE_R1, aword [esp+aword*11] 174 |.define SEH_FUNC, aword [esp+aword*10] 175 |.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. 176 |.define UNUSED2, aword [esp+aword*8] 177 |//----- 16 byte aligned 178 |.define UNUSED1, aword [esp+aword*7] 179 |.define SAVE_PC, aword [esp+aword*6] 180 |.define TMP2, aword [esp+aword*5] 181 |.define TMP1, aword [esp+aword*4] 182 |//----- 16 byte aligned 183 |.define ARG4, aword [esp+aword*3] 184 |.define ARG3, aword [esp+aword*2] 185 |.define ARG2, aword [esp+aword*1] 186 |.define ARG1, aword [esp] //<-- esp while in interpreter. 187 |//----- 16 byte aligned, ^^^ arguments for C callee 188 |.else 189 |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. 190 |.define SAVE_NRES, aword [esp+aword*14] 191 |.define SAVE_CFRAME, aword [esp+aword*13] 192 |.define SAVE_L, aword [esp+aword*12] 193 |//----- 16 byte aligned, ^^^ arguments from C caller 194 |.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. 195 |.define SAVE_R4, aword [esp+aword*10] 196 |.define SAVE_R3, aword [esp+aword*9] 197 |.define SAVE_R2, aword [esp+aword*8] 198 |//----- 16 byte aligned 199 |.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. 200 |.define SAVE_PC, aword [esp+aword*6] 201 |.define TMP2, aword [esp+aword*5] 202 |.define TMP1, aword [esp+aword*4] 203 |//----- 16 byte aligned 204 |.define ARG4, aword [esp+aword*3] 205 |.define ARG3, aword [esp+aword*2] 206 |.define ARG2, aword [esp+aword*1] 207 |.define ARG1, aword [esp] //<-- esp while in interpreter. 208 |//----- 16 byte aligned, ^^^ arguments for C callee 209 |.endif 210 | 211 |// FPARGx overlaps ARGx and ARG(x+1) on x86. 212 |.define FPARG3, qword [esp+qword*1] 213 |.define FPARG1, qword [esp] 214 |// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ). 215 |.define TMPQ, qword [esp+aword*4] 216 |.define TMP3, ARG4 217 |.define ARG5, TMP1 218 |.define TMPa, TMP1 219 |.define MULTRES, TMP2 220 | 221 |// Arguments for vm_call and vm_pcall. 222 |.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME! 223 | 224 |// Arguments for vm_cpcall. 225 |.define INARG_CP_CALL, SAVE_ERRF 226 |.define INARG_CP_UD, SAVE_NRES 227 |.define INARG_CP_FUNC, SAVE_CFRAME 228 | 229 |//----------------------------------------------------------------------- 230 |.elif X64WIN // x64/Windows stack layout 231 | 232 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). 233 |.macro saveregs_ 234 | push rdi; push rsi; push rbx 235 | sub rsp, CFRAME_SPACE 236 |.endmacro 237 |.macro saveregs 238 | push rbp; saveregs_ 239 |.endmacro 240 |.macro restoreregs 241 | add rsp, CFRAME_SPACE 242 | pop rbx; pop rsi; pop rdi; pop rbp 243 |.endmacro 244 | 245 |.define SAVE_CFRAME, aword [rsp+aword*13] 246 |.define SAVE_PC, dword [rsp+dword*25] 247 |.define SAVE_L, dword [rsp+dword*24] 248 |.define SAVE_ERRF, dword [rsp+dword*23] 249 |.define SAVE_NRES, dword [rsp+dword*22] 250 |.define TMP2, dword [rsp+dword*21] 251 |.define TMP1, dword [rsp+dword*20] 252 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter 253 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. 254 |.define SAVE_R4, aword [rsp+aword*8] 255 |.define SAVE_R3, aword [rsp+aword*7] 256 |.define SAVE_R2, aword [rsp+aword*6] 257 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. 258 |.define ARG5, aword [rsp+aword*4] 259 |.define CSAVE_4, aword [rsp+aword*3] 260 |.define CSAVE_3, aword [rsp+aword*2] 261 |.define CSAVE_2, aword [rsp+aword*1] 262 |.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. 263 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee 264 | 265 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). 266 |.define TMPQ, qword [rsp+aword*10] 267 |.define MULTRES, TMP2 268 |.define TMPa, ARG5 269 |.define ARG5d, dword [rsp+aword*4] 270 |.define TMP3, ARG5d 271 | 272 |//----------------------------------------------------------------------- 273 |.else // x64/POSIX stack layout 274 | 275 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). 276 |.macro saveregs_ 277 | push rbx; push r15; push r14 278 |.if NO_UNWIND 279 | push r13; push r12 280 |.endif 281 | sub rsp, CFRAME_SPACE 282 |.endmacro 283 |.macro saveregs 284 | push rbp; saveregs_ 285 |.endmacro 286 |.macro restoreregs 287 | add rsp, CFRAME_SPACE 288 |.if NO_UNWIND 289 | pop r12; pop r13 290 |.endif 291 | pop r14; pop r15; pop rbx; pop rbp 292 |.endmacro 293 | 294 |//----- 16 byte aligned, 295 |.if NO_UNWIND 296 |.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. 297 |.define SAVE_R4, aword [rsp+aword*10] 298 |.define SAVE_R3, aword [rsp+aword*9] 299 |.define SAVE_R2, aword [rsp+aword*8] 300 |.define SAVE_R1, aword [rsp+aword*7] 301 |.define SAVE_RU2, aword [rsp+aword*6] 302 |.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. 303 |.else 304 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. 305 |.define SAVE_R4, aword [rsp+aword*8] 306 |.define SAVE_R3, aword [rsp+aword*7] 307 |.define SAVE_R2, aword [rsp+aword*6] 308 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. 309 |.endif 310 |.define SAVE_CFRAME, aword [rsp+aword*4] 311 |.define SAVE_PC, dword [rsp+dword*7] 312 |.define SAVE_L, dword [rsp+dword*6] 313 |.define SAVE_ERRF, dword [rsp+dword*5] 314 |.define SAVE_NRES, dword [rsp+dword*4] 315 |.define TMPa, aword [rsp+aword*1] 316 |.define TMP2, dword [rsp+dword*1] 317 |.define TMP1, dword [rsp] //<-- rsp while in interpreter. 318 |//----- 16 byte aligned 319 | 320 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). 321 |.define TMPQ, qword [rsp] 322 |.define TMP3, dword [rsp+aword*1] 323 |.define MULTRES, TMP2 324 | 325 |.endif 326 | 327 |//----------------------------------------------------------------------- 328 | 329 |// Instruction headers. 330 |.macro ins_A; .endmacro 331 |.macro ins_AD; .endmacro 332 |.macro ins_AJ; .endmacro 333 |.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro 334 |.macro ins_AB_; movzx RB, RCH; .endmacro 335 |.macro ins_A_C; movzx RC, RCL; .endmacro 336 |.macro ins_AND; not RDa; .endmacro 337 | 338 |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). 339 |.macro ins_NEXT 340 | mov RC, [PC] 341 | movzx RA, RCH 342 | movzx OP, RCL 343 | add PC, 4 344 | shr RC, 16 345 |.if X64 346 | jmp aword [DISPATCH+OP*8] 347 |.else 348 | jmp aword [DISPATCH+OP*4] 349 |.endif 350 |.endmacro 351 | 352 |.macro ins_refetch 353 | mov RC, [PC-4] 354 | movzx RA, RCH 355 | shr RC, 16 356 |.endmacro 357 | 358 |// Instruction footer. 359 |.if 1 360 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. 361 | .define ins_next, ins_NEXT 362 | .define ins_next_, ins_NEXT 363 |.else 364 | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. 365 | // Affects only certain kinds of benchmarks (and only with -j off). 366 | // Around 10%-30% slower on Core2, a lot more slower on P4. 367 | .macro ins_next 368 | jmp ->ins_next 369 | .endmacro 370 | .macro ins_next_ 371 | ->ins_next: 372 | ins_NEXT 373 | .endmacro 374 |.endif 375 | 376 |// Call decode and dispatch. 377 |.macro ins_callt 378 | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC 379 | mov PC, LFUNC:RB->pc 380 | mov RA, [PC] 381 | movzx OP, RAL 382 | movzx RA, RAH 383 | add PC, 4 384 |.if X64 385 | jmp aword [DISPATCH+OP*8] 386 |.else 387 | jmp aword [DISPATCH+OP*4] 388 |.endif 389 |.endmacro 390 | 391 |.macro ins_call 392 | // BASE = new base, RB = LFUNC, RD = nargs+1 393 | mov [BASE-4], PC 394 | ins_callt 395 |.endmacro 396 | 397 |//----------------------------------------------------------------------- 398 | 399 |// Macros to test operand types. 400 |.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro 401 |.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro 402 |.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro 403 |.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro 404 |.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro 405 | 406 |// These operands must be used with movzx. 407 |.define PC_OP, byte [PC-4] 408 |.define PC_RA, byte [PC-3] 409 |.define PC_RB, byte [PC-1] 410 |.define PC_RC, byte [PC-2] 411 |.define PC_RD, word [PC-2] 412 | 413 |.macro branchPC, reg 414 | lea PC, [PC+reg*4-BCBIAS_J*4] 415 |.endmacro 416 | 417 |// Assumes DISPATCH is relative to GL. 418 #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) 419 #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) 420 | 421 #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) 422 | 423 |// Decrement hashed hotcount and trigger trace recorder if zero. 424 |.macro hotloop, reg 425 | mov reg, PC 426 | shr reg, 1 427 | and reg, HOTCOUNT_PCMASK 428 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP 429 | jb ->vm_hotloop 430 |.endmacro 431 | 432 |.macro hotcall, reg 433 | mov reg, PC 434 | shr reg, 1 435 | and reg, HOTCOUNT_PCMASK 436 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL 437 | jb ->vm_hotcall 438 |.endmacro 439 | 440 |// Set current VM state. 441 |.macro set_vmstate, st 442 | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st 443 |.endmacro 444 | 445 |// x87 compares. 446 |.macro fcomparepp // Compare and pop st0 >< st1. 447 | fucomip st1 448 | fpop 449 |.endmacro 450 | 451 |.macro fpop1; fstp st1; .endmacro 452 | 453 |// Synthesize SSE FP constants. 454 |.macro sseconst_abs, reg, tmp // Synthesize abs mask. 455 |.if X64 456 | mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp 457 |.else 458 | pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1 459 |.endif 460 |.endmacro 461 | 462 |.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. 463 |.if X64 464 | mov64 tmp, U64x(val,00000000); movd reg, tmp 465 |.else 466 | mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51 467 |.endif 468 |.endmacro 469 | 470 |.macro sseconst_sign, reg, tmp // Synthesize sign mask. 471 | sseconst_hi reg, tmp, 80000000 472 |.endmacro 473 |.macro sseconst_1, reg, tmp // Synthesize 1.0. 474 | sseconst_hi reg, tmp, 3ff00000 475 |.endmacro 476 |.macro sseconst_m1, reg, tmp // Synthesize -1.0. 477 | sseconst_hi reg, tmp, bff00000 478 |.endmacro 479 |.macro sseconst_2p52, reg, tmp // Synthesize 2^52. 480 | sseconst_hi reg, tmp, 43300000 481 |.endmacro 482 |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. 483 | sseconst_hi reg, tmp, 43380000 484 |.endmacro 485 | 486 |// Move table write barrier back. Overwrites reg. 487 |.macro barrierback, tab, reg 488 | and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) 489 | mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] 490 | mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab 491 | mov tab->gclist, reg 492 |.endmacro 493 | 494 |//----------------------------------------------------------------------- 495 496 /* Generate subroutines used by opcodes and other parts of the VM. */ 497 /* The .code_sub section should be last to help static branch prediction. */ 498 static void build_subroutines(BuildCtx *ctx) 499 { 500 |.code_sub 501 | 502 |//----------------------------------------------------------------------- 503 |//-- Return handling ---------------------------------------------------- 504 |//----------------------------------------------------------------------- 505 | 506 |->vm_returnp: 507 | test PC, FRAME_P 508 | jz ->cont_dispatch 509 | 510 | // Return from pcall or xpcall fast func. 511 | and PC, -8 512 | sub BASE, PC // Restore caller base. 513 | lea RAa, [RA+PC-8] // Rebase RA and prepend one result. 514 | mov PC, [BASE-4] // Fetch PC of previous frame. 515 | // Prepending may overwrite the pcall frame, so do it at the end. 516 | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results. 517 | 518 |->vm_returnc: 519 | add RD, 1 // RD = nresults+1 520 | jz ->vm_unwind_yield 521 | mov MULTRES, RD 522 | test PC, FRAME_TYPE 523 | jz ->BC_RET_Z // Handle regular return to Lua. 524 | 525 |->vm_return: 526 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return 527 | xor PC, FRAME_C 528 | test PC, FRAME_TYPE 529 | jnz ->vm_returnp 530 | 531 | // Return to C. 532 | set_vmstate C 533 | and PC, -8 534 | sub PC, BASE 535 | neg PC // Previous base = BASE - delta. 536 | 537 | sub RD, 1 538 | jz >2 539 |1: // Move results down. 540 |.if X64 541 | mov RBa, [BASE+RA] 542 | mov [BASE-8], RBa 543 |.else 544 | mov RB, [BASE+RA] 545 | mov [BASE-8], RB 546 | mov RB, [BASE+RA+4] 547 | mov [BASE-4], RB 548 |.endif 549 | add BASE, 8 550 | sub RD, 1 551 | jnz <1 552 |2: 553 | mov L:RB, SAVE_L 554 | mov L:RB->base, PC 555 |3: 556 | mov RD, MULTRES 557 | mov RA, SAVE_NRES // RA = wanted nresults+1 558 |4: 559 | cmp RA, RD 560 | jne >6 // More/less results wanted? 561 |5: 562 | sub BASE, 8 563 | mov L:RB->top, BASE 564 | 565 |->vm_leave_cp: 566 | mov RAa, SAVE_CFRAME // Restore previous C frame. 567 | mov L:RB->cframe, RAa 568 | xor eax, eax // Ok return status for vm_pcall. 569 | 570 |->vm_leave_unw: 571 | restoreregs 572 | ret 573 | 574 |6: 575 | jb >7 // Less results wanted? 576 | // More results wanted. Check stack size and fill up results with nil. 577 | cmp BASE, L:RB->maxstack 578 | ja >8 579 | mov dword [BASE-4], LJ_TNIL 580 | add BASE, 8 581 | add RD, 1 582 | jmp <4 583 | 584 |7: // Less results wanted. 585 | test RA, RA 586 | jz <5 // But check for LUA_MULTRET+1. 587 | sub RA, RD // Negative result! 588 | lea BASE, [BASE+RA*8] // Correct top. 589 | jmp <5 590 | 591 |8: // Corner case: need to grow stack for filling up results. 592 | // This can happen if: 593 | // - A C function grows the stack (a lot). 594 | // - The GC shrinks the stack in between. 595 | // - A return back from a lua_call() with (high) nresults adjustment. 596 | mov L:RB->top, BASE // Save current top held in BASE (yes). 597 | mov MULTRES, RD // Need to fill only remainder with nil. 598 | mov FCARG2, RA 599 | mov FCARG1, L:RB 600 | call extern lj_state_growstack@8 // (lua_State *L, int n) 601 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. 602 | jmp <3 603 | 604 |->vm_unwind_yield: 605 | mov al, LUA_YIELD 606 | jmp ->vm_unwind_c_eh 607 | 608 |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall. 609 | // (void *cframe, int errcode) 610 |.if X64 611 | mov eax, CARG2d // Error return status for vm_pcall. 612 | mov rsp, CARG1 613 |.else 614 | mov eax, FCARG2 // Error return status for vm_pcall. 615 | mov esp, FCARG1 616 |.if WIN 617 | lea FCARG1, SEH_NEXT 618 | fs; mov [0], FCARG1 619 |.endif 620 |.endif 621 |->vm_unwind_c_eh: // Landing pad for external unwinder. 622 | mov L:RB, SAVE_L 623 | mov GL:RB, L:RB->glref 624 | mov dword GL:RB->vmstate, ~LJ_VMST_C 625 | jmp ->vm_leave_unw 626 | 627 |->vm_unwind_rethrow: 628 |.if X64 and not X64WIN 629 | mov FCARG1, SAVE_L 630 | mov FCARG2, eax 631 | restoreregs 632 | jmp extern lj_err_throw@8 // (lua_State *L, int errcode) 633 |.endif 634 | 635 |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall. 636 | // (void *cframe) 637 |.if X64 638 | and CARG1, CFRAME_RAWMASK 639 | mov rsp, CARG1 640 |.else 641 | and FCARG1, CFRAME_RAWMASK 642 | mov esp, FCARG1 643 |.if WIN 644 | lea FCARG1, SEH_NEXT 645 | fs; mov [0], FCARG1 646 |.endif 647 |.endif 648 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 649 | mov L:RB, SAVE_L 650 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 651 | mov RD, 1+1 // Really 1+2 results, incr. later. 652 | mov BASE, L:RB->base 653 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 654 | add DISPATCH, GG_G2DISP 655 | mov PC, [BASE-4] // Fetch PC of previous frame. 656 | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message. 657 | set_vmstate INTERP 658 | jmp ->vm_returnc // Increments RD/MULTRES and returns. 659 | 660 |.if WIN and not X64 661 |->vm_rtlunwind@16: // Thin layer around RtlUnwind. 662 | // (void *cframe, void *excptrec, void *unwinder, int errcode) 663 | mov [esp], FCARG1 // Return value for RtlUnwind. 664 | push FCARG2 // Exception record for RtlUnwind. 665 | push 0 // Ignored by RtlUnwind. 666 | push dword [FCARG1+CFRAME_OFS_SEH] 667 | call extern RtlUnwind@16 // Violates ABI (clobbers too much). 668 | mov FCARG1, eax 669 | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). 670 | ret // Jump to unwinder. 671 |.endif 672 | 673 |//----------------------------------------------------------------------- 674 |//-- Grow stack for calls ----------------------------------------------- 675 |//----------------------------------------------------------------------- 676 | 677 |->vm_growstack_c: // Grow stack for C function. 678 | mov FCARG2, LUA_MINSTACK 679 | jmp >2 680 | 681 |->vm_growstack_v: // Grow stack for vararg Lua function. 682 | sub RD, 8 683 | jmp >1 684 | 685 |->vm_growstack_f: // Grow stack for fixarg Lua function. 686 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC 687 | lea RD, [BASE+NARGS:RD*8-8] 688 |1: 689 | movzx RA, byte [PC-4+PC2PROTO(framesize)] 690 | add PC, 4 // Must point after first instruction. 691 | mov L:RB->base, BASE 692 | mov L:RB->top, RD 693 | mov SAVE_PC, PC 694 | mov FCARG2, RA 695 |2: 696 | // RB = L, L->base = new base, L->top = top 697 | mov FCARG1, L:RB 698 | call extern lj_state_growstack@8 // (lua_State *L, int n) 699 | mov BASE, L:RB->base 700 | mov RD, L:RB->top 701 | mov LFUNC:RB, [BASE-8] 702 | sub RD, BASE 703 | shr RD, 3 704 | add NARGS:RD, 1 705 | // BASE = new base, RB = LFUNC, RD = nargs+1 706 | ins_callt // Just retry the call. 707 | 708 |//----------------------------------------------------------------------- 709 |//-- Entry points into the assembler VM --------------------------------- 710 |//----------------------------------------------------------------------- 711 | 712 |->vm_resume: // Setup C frame and resume thread. 713 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) 714 | saveregs 715 |.if X64 716 | mov L:RB, CARG1d // Caveat: CARG1d may be RA. 717 | mov SAVE_L, CARG1d 718 | mov RA, CARG2d 719 |.else 720 | mov L:RB, SAVE_L 721 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 722 |.endif 723 | mov PC, FRAME_CP 724 | xor RD, RD 725 | lea KBASEa, [esp+CFRAME_RESUME] 726 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 727 | add DISPATCH, GG_G2DISP 728 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 729 | mov SAVE_CFRAME, RDa 730 |.if X64 731 | mov SAVE_NRES, RD 732 | mov SAVE_ERRF, RD 733 |.endif 734 | mov L:RB->cframe, KBASEa 735 | cmp byte L:RB->status, RDL 736 | je >2 // Initial resume (like a call). 737 | 738 | // Resume after yield (like a return). 739 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 740 | set_vmstate INTERP 741 | mov byte L:RB->status, RDL 742 | mov BASE, L:RB->base 743 | mov RD, L:RB->top 744 | sub RD, RA 745 | shr RD, 3 746 | add RD, 1 // RD = nresults+1 747 | sub RA, BASE // RA = resultofs 748 | mov PC, [BASE-4] 749 | mov MULTRES, RD 750 | test PC, FRAME_TYPE 751 | jz ->BC_RET_Z 752 | jmp ->vm_return 753 | 754 |->vm_pcall: // Setup protected C frame and enter VM. 755 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) 756 | saveregs 757 | mov PC, FRAME_CP 758 |.if X64 759 | mov SAVE_ERRF, CARG4d 760 |.endif 761 | jmp >1 762 | 763 |->vm_call: // Setup C frame and enter VM. 764 | // (lua_State *L, TValue *base, int nres1) 765 | saveregs 766 | mov PC, FRAME_C 767 | 768 |1: // Entry point for vm_pcall above (PC = ftype). 769 |.if X64 770 | mov SAVE_NRES, CARG3d 771 | mov L:RB, CARG1d // Caveat: CARG1d may be RA. 772 | mov SAVE_L, CARG1d 773 | mov RA, CARG2d 774 |.else 775 | mov L:RB, SAVE_L 776 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 777 |.endif 778 | 779 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 780 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 781 | mov SAVE_CFRAME, KBASEa 782 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 783 | add DISPATCH, GG_G2DISP 784 |.if X64 785 | mov L:RB->cframe, rsp 786 |.else 787 | mov L:RB->cframe, esp 788 |.endif 789 | 790 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). 791 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 792 | set_vmstate INTERP 793 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 794 | add PC, RA 795 | sub PC, BASE // PC = frame delta + frame type 796 | 797 | mov RD, L:RB->top 798 | sub RD, RA 799 | shr NARGS:RD, 3 800 | add NARGS:RD, 1 // RD = nargs+1 801 | 802 |->vm_call_dispatch: 803 | mov LFUNC:RB, [RA-8] 804 | cmp dword [RA-4], LJ_TFUNC 805 | jne ->vmeta_call // Ensure KBASE defined and != BASE. 806 | 807 |->vm_call_dispatch_f: 808 | mov BASE, RA 809 | ins_call 810 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC 811 | 812 |->vm_cpcall: // Setup protected C frame, call C. 813 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) 814 | saveregs 815 |.if X64 816 | mov L:RB, CARG1d // Caveat: CARG1d may be RA. 817 | mov SAVE_L, CARG1d 818 |.else 819 | mov L:RB, SAVE_L 820 | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap! 821 | mov RC, INARG_CP_UD // Get args before they are overwritten. 822 | mov RA, INARG_CP_FUNC 823 | mov BASE, INARG_CP_CALL 824 |.endif 825 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 826 | 827 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 828 | sub KBASE, L:RB->top 829 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 830 | mov SAVE_ERRF, 0 // No error function. 831 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. 832 | add DISPATCH, GG_G2DISP 833 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 834 | 835 |.if X64 836 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 837 | mov SAVE_CFRAME, KBASEa 838 | mov L:RB->cframe, rsp 839 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 840 | 841 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 842 |.else 843 | mov ARG3, RC // Have to copy args downwards. 844 | mov ARG2, RA 845 | mov ARG1, L:RB 846 | 847 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 848 | mov SAVE_CFRAME, KBASE 849 | mov L:RB->cframe, esp 850 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 851 | 852 | call BASE // (lua_State *L, lua_CFunction func, void *ud) 853 |.endif 854 | // TValue * (new base) or NULL returned in eax (RC). 855 | test RC, RC 856 | jz ->vm_leave_cp // No base? Just remove C frame. 857 | mov RA, RC 858 | mov PC, FRAME_CP 859 | jmp <2 // Else continue with the call. 860 | 861 |//----------------------------------------------------------------------- 862 |//-- Metamethod handling ------------------------------------------------ 863 |//----------------------------------------------------------------------- 864 | 865 |//-- Continuation dispatch ---------------------------------------------- 866 | 867 |->cont_dispatch: 868 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) 869 | add RA, BASE 870 | and PC, -8 871 | mov RB, BASE 872 | sub BASE, PC // Restore caller BASE. 873 | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg. 874 | mov RC, RA // ... in [RC] 875 | mov PC, [RB-12] // Restore PC from [cont|PC]. 876 |.if X64 877 | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. 878 |.if FFI 879 | cmp RA, 1 880 | jbe >1 881 |.endif 882 | lea KBASEa, qword [=>0] 883 | add RAa, KBASEa 884 |.else 885 | mov RA, dword [RB-16] 886 |.if FFI 887 | cmp RA, 1 888 | jbe >1 889 |.endif 890 |.endif 891 | mov LFUNC:KBASE, [BASE-8] 892 | mov KBASE, LFUNC:KBASE->pc 893 | mov KBASE, [KBASE+PC2PROTO(k)] 894 | // BASE = base, RC = result, RB = meta base 895 | jmp RAa // Jump to continuation. 896 | 897 |.if FFI 898 |1: 899 | je ->cont_ffi_callback // cont = 1: return from FFI callback. 900 | // cont = 0: Tail call from C function. 901 | sub RB, BASE 902 | shr RB, 3 903 | lea RD, [RB-1] 904 | jmp ->vm_call_tail 905 |.endif 906 | 907 |->cont_cat: // BASE = base, RC = result, RB = mbase 908 | movzx RA, PC_RB 909 | sub RB, 16 910 | lea RA, [BASE+RA*8] 911 | sub RA, RB 912 | je ->cont_ra 913 | neg RA 914 | shr RA, 3 915 |.if X64WIN 916 | mov CARG3d, RA 917 | mov L:CARG1d, SAVE_L 918 | mov L:CARG1d->base, BASE 919 | mov RCa, [RC] 920 | mov [RB], RCa 921 | mov CARG2d, RB 922 |.elif X64 923 | mov L:CARG1d, SAVE_L 924 | mov L:CARG1d->base, BASE 925 | mov CARG3d, RA 926 | mov RAa, [RC] 927 | mov [RB], RAa 928 | mov CARG2d, RB 929 |.else 930 | mov ARG3, RA 931 | mov RA, [RC+4] 932 | mov RC, [RC] 933 | mov [RB+4], RA 934 | mov [RB], RC 935 | mov ARG2, RB 936 |.endif 937 | jmp ->BC_CAT_Z 938 | 939 |//-- Table indexing metamethods ----------------------------------------- 940 | 941 |->vmeta_tgets: 942 | mov TMP1, RC // RC = GCstr * 943 | mov TMP2, LJ_TSTR 944 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. 945 | cmp PC_OP, BC_GGET 946 | jne >1 947 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. 948 | mov [RA], TAB:RB // RB = GCtab * 949 | mov dword [RA+4], LJ_TTAB 950 | mov RB, RA 951 | jmp >2 952 | 953 |->vmeta_tgetb: 954 | movzx RC, PC_RC 955 |.if DUALNUM 956 | mov TMP2, LJ_TISNUM 957 | mov TMP1, RC 958 |.else 959 | cvtsi2sd xmm0, RC 960 | movsd TMPQ, xmm0 961 |.endif 962 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 963 | jmp >1 964 | 965 |->vmeta_tgetv: 966 | movzx RC, PC_RC // Reload TValue *k from RC. 967 | lea RC, [BASE+RC*8] 968 |1: 969 | movzx RB, PC_RB // Reload TValue *t from RB. 970 | lea RB, [BASE+RB*8] 971 |2: 972 |.if X64 973 | mov L:CARG1d, SAVE_L 974 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 975 | mov CARG2d, RB 976 | mov CARG3, RCa // May be 64 bit ptr to stack. 977 | mov L:RB, L:CARG1d 978 |.else 979 | mov ARG2, RB 980 | mov L:RB, SAVE_L 981 | mov ARG3, RC 982 | mov ARG1, L:RB 983 | mov L:RB->base, BASE 984 |.endif 985 | mov SAVE_PC, PC 986 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) 987 | // TValue * (finished) or NULL (metamethod) returned in eax (RC). 988 | mov BASE, L:RB->base 989 | test RC, RC 990 | jz >3 991 |->cont_ra: // BASE = base, RC = result 992 | movzx RA, PC_RA 993 |.if X64 994 | mov RBa, [RC] 995 | mov [BASE+RA*8], RBa 996 |.else 997 | mov RB, [RC+4] 998 | mov RC, [RC] 999 | mov [BASE+RA*8+4], RB 1000 | mov [BASE+RA*8], RC 1001 |.endif 1002 | ins_next 1003 | 1004 |3: // Call __index metamethod. 1005 | // BASE = base, L->top = new base, stack = cont/func/t/k/origt 1006 | mov RA, L:RB->top 1007 | mov [RA-12], PC // [cont|PC] 1008 | lea PC, [RA+FRAME_CONT] 1009 | sub PC, BASE 1010 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. 1011 | mov NARGS:RD, 3+1 // 2 args for func(t, k). 1012 | jmp ->vm_call_dispatch_f 1013 | 1014 |->vmeta_tgetr: 1015 | mov FCARG1, TAB:RB 1016 | mov RB, BASE // Save BASE. 1017 | mov FCARG2, RC // Caveat: FCARG2 == BASE 1018 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) 1019 | // cTValue * or NULL returned in eax (RC). 1020 | movzx RA, PC_RA 1021 | mov BASE, RB // Restore BASE. 1022 | test RC, RC 1023 | jnz ->BC_TGETR_Z 1024 | mov dword [BASE+RA*8+4], LJ_TNIL 1025 | jmp ->BC_TGETR2_Z 1026 | 1027 |//----------------------------------------------------------------------- 1028 | 1029 |->vmeta_tsets: 1030 | mov TMP1, RC // RC = GCstr * 1031 | mov TMP2, LJ_TSTR 1032 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. 1033 | cmp PC_OP, BC_GSET 1034 | jne >1 1035 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. 1036 | mov [RA], TAB:RB // RB = GCtab * 1037 | mov dword [RA+4], LJ_TTAB 1038 | mov RB, RA 1039 | jmp >2 1040 | 1041 |->vmeta_tsetb: 1042 | movzx RC, PC_RC 1043 |.if DUALNUM 1044 | mov TMP2, LJ_TISNUM 1045 | mov TMP1, RC 1046 |.else 1047 | cvtsi2sd xmm0, RC 1048 | movsd TMPQ, xmm0 1049 |.endif 1050 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 1051 | jmp >1 1052 | 1053 |->vmeta_tsetv: 1054 | movzx RC, PC_RC // Reload TValue *k from RC. 1055 | lea RC, [BASE+RC*8] 1056 |1: 1057 | movzx RB, PC_RB // Reload TValue *t from RB. 1058 | lea RB, [BASE+RB*8] 1059 |2: 1060 |.if X64 1061 | mov L:CARG1d, SAVE_L 1062 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 1063 | mov CARG2d, RB 1064 | mov CARG3, RCa // May be 64 bit ptr to stack. 1065 | mov L:RB, L:CARG1d 1066 |.else 1067 | mov ARG2, RB 1068 | mov L:RB, SAVE_L 1069 | mov ARG3, RC 1070 | mov ARG1, L:RB 1071 | mov L:RB->base, BASE 1072 |.endif 1073 | mov SAVE_PC, PC 1074 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 1075 | // TValue * (finished) or NULL (metamethod) returned in eax (RC). 1076 | mov BASE, L:RB->base 1077 | test RC, RC 1078 | jz >3 1079 | // NOBARRIER: lj_meta_tset ensures the table is not black. 1080 | movzx RA, PC_RA 1081 |.if X64 1082 | mov RBa, [BASE+RA*8] 1083 | mov [RC], RBa 1084 |.else 1085 | mov RB, [BASE+RA*8+4] 1086 | mov RA, [BASE+RA*8] 1087 | mov [RC+4], RB 1088 | mov [RC], RA 1089 |.endif 1090 |->cont_nop: // BASE = base, (RC = result) 1091 | ins_next 1092 | 1093 |3: // Call __newindex metamethod. 1094 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) 1095 | mov RA, L:RB->top 1096 | mov [RA-12], PC // [cont|PC] 1097 | movzx RC, PC_RA 1098 | // Copy value to third argument. 1099 |.if X64 1100 | mov RBa, [BASE+RC*8] 1101 | mov [RA+16], RBa 1102 |.else 1103 | mov RB, [BASE+RC*8+4] 1104 | mov RC, [BASE+RC*8] 1105 | mov [RA+20], RB 1106 | mov [RA+16], RC 1107 |.endif 1108 | lea PC, [RA+FRAME_CONT] 1109 | sub PC, BASE 1110 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. 1111 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1112 | jmp ->vm_call_dispatch_f 1113 | 1114 |->vmeta_tsetr: 1115 |.if X64WIN 1116 | mov L:CARG1d, SAVE_L 1117 | mov CARG3d, RC 1118 | mov L:CARG1d->base, BASE 1119 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. 1120 |.elif X64 1121 | mov L:CARG1d, SAVE_L 1122 | mov CARG2d, TAB:RB 1123 | mov L:CARG1d->base, BASE 1124 | mov RB, BASE // Save BASE. 1125 | mov CARG3d, RC // Caveat: CARG3d == BASE. 1126 |.else 1127 | mov L:RA, SAVE_L 1128 | mov ARG2, TAB:RB 1129 | mov RB, BASE // Save BASE. 1130 | mov ARG3, RC 1131 | mov ARG1, L:RA 1132 | mov L:RA->base, BASE 1133 |.endif 1134 | mov SAVE_PC, PC 1135 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) 1136 | // TValue * returned in eax (RC). 1137 | movzx RA, PC_RA 1138 | mov BASE, RB // Restore BASE. 1139 | jmp ->BC_TSETR_Z 1140 | 1141 |//-- Comparison metamethods --------------------------------------------- 1142 | 1143 |->vmeta_comp: 1144 |.if X64 1145 | mov L:RB, SAVE_L 1146 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE. 1147 |.if X64WIN 1148 | lea CARG3d, [BASE+RD*8] 1149 | lea CARG2d, [BASE+RA*8] 1150 |.else 1151 | lea CARG2d, [BASE+RA*8] 1152 | lea CARG3d, [BASE+RD*8] 1153 |.endif 1154 | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA. 1155 | movzx CARG4d, PC_OP 1156 |.else 1157 | movzx RB, PC_OP 1158 | lea RD, [BASE+RD*8] 1159 | lea RA, [BASE+RA*8] 1160 | mov ARG4, RB 1161 | mov L:RB, SAVE_L 1162 | mov ARG3, RD 1163 | mov ARG2, RA 1164 | mov ARG1, L:RB 1165 | mov L:RB->base, BASE 1166 |.endif 1167 | mov SAVE_PC, PC 1168 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) 1169 | // 0/1 or TValue * (metamethod) returned in eax (RC). 1170 |3: 1171 | mov BASE, L:RB->base 1172 | cmp RC, 1 1173 | ja ->vmeta_binop 1174 |4: 1175 | lea PC, [PC+4] 1176 | jb >6 1177 |5: 1178 | movzx RD, PC_RD 1179 | branchPC RD 1180 |6: 1181 | ins_next 1182 | 1183 |->cont_condt: // BASE = base, RC = result 1184 | add PC, 4 1185 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true. 1186 | jb <5 1187 | jmp <6 1188 | 1189 |->cont_condf: // BASE = base, RC = result 1190 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false. 1191 | jmp <4 1192 | 1193 |->vmeta_equal: 1194 | sub PC, 4 1195 |.if X64WIN 1196 | mov CARG3d, RD 1197 | mov CARG4d, RB 1198 | mov L:RB, SAVE_L 1199 | mov L:RB->base, BASE // Caveat: CARG2d == BASE. 1200 | mov CARG2d, RA 1201 | mov CARG1d, L:RB // Caveat: CARG1d == RA. 1202 |.elif X64 1203 | mov CARG2d, RA 1204 | mov CARG4d, RB // Caveat: CARG4d == RA. 1205 | mov L:RB, SAVE_L 1206 | mov L:RB->base, BASE // Caveat: CARG3d == BASE. 1207 | mov CARG3d, RD 1208 | mov CARG1d, L:RB 1209 |.else 1210 | mov ARG4, RB 1211 | mov L:RB, SAVE_L 1212 | mov ARG3, RD 1213 | mov ARG2, RA 1214 | mov ARG1, L:RB 1215 | mov L:RB->base, BASE 1216 |.endif 1217 | mov SAVE_PC, PC 1218 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) 1219 | // 0/1 or TValue * (metamethod) returned in eax (RC). 1220 | jmp <3 1221 | 1222 |->vmeta_equal_cd: 1223 |.if FFI 1224 | sub PC, 4 1225 | mov L:RB, SAVE_L 1226 | mov L:RB->base, BASE 1227 | mov FCARG1, L:RB 1228 | mov FCARG2, dword [PC-4] 1229 | mov SAVE_PC, PC 1230 | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) 1231 | // 0/1 or TValue * (metamethod) returned in eax (RC). 1232 | jmp <3 1233 |.endif 1234 | 1235 |->vmeta_istype: 1236 |.if X64 1237 | mov L:RB, SAVE_L 1238 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 1239 | mov CARG2d, RA 1240 | movzx CARG3d, PC_RD 1241 | mov L:CARG1d, L:RB 1242 |.else 1243 | movzx RD, PC_RD 1244 | mov ARG2, RA 1245 | mov L:RB, SAVE_L 1246 | mov ARG3, RD 1247 | mov ARG1, L:RB 1248 | mov L:RB->base, BASE 1249 |.endif 1250 | mov SAVE_PC, PC 1251 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) 1252 | mov BASE, L:RB->base 1253 | jmp <6 1254 | 1255 |//-- Arithmetic metamethods --------------------------------------------- 1256 | 1257 |->vmeta_arith_vno: 1258 |.if DUALNUM 1259 | movzx RB, PC_RB 1260 |.endif 1261 |->vmeta_arith_vn: 1262 | lea RC, [KBASE+RC*8] 1263 | jmp >1 1264 | 1265 |->vmeta_arith_nvo: 1266 |.if DUALNUM 1267 | movzx RC, PC_RC 1268 |.endif 1269 |->vmeta_arith_nv: 1270 | lea RC, [KBASE+RC*8] 1271 | lea RB, [BASE+RB*8] 1272 | xchg RB, RC 1273 | jmp >2 1274 | 1275 |->vmeta_unm: 1276 | lea RC, [BASE+RD*8] 1277 | mov RB, RC 1278 | jmp >2 1279 | 1280 |->vmeta_arith_vvo: 1281 |.if DUALNUM 1282 | movzx RB, PC_RB 1283 |.endif 1284 |->vmeta_arith_vv: 1285 | lea RC, [BASE+RC*8] 1286 |1: 1287 | lea RB, [BASE+RB*8] 1288 |2: 1289 | lea RA, [BASE+RA*8] 1290 |.if X64WIN 1291 | mov CARG3d, RB 1292 | mov CARG4d, RC 1293 | movzx RC, PC_OP 1294 | mov ARG5d, RC 1295 | mov L:RB, SAVE_L 1296 | mov L:RB->base, BASE // Caveat: CARG2d == BASE. 1297 | mov CARG2d, RA 1298 | mov CARG1d, L:RB // Caveat: CARG1d == RA. 1299 |.elif X64 1300 | movzx CARG5d, PC_OP 1301 | mov CARG2d, RA 1302 | mov CARG4d, RC // Caveat: CARG4d == RA. 1303 | mov L:CARG1d, SAVE_L 1304 | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE. 1305 | mov CARG3d, RB 1306 | mov L:RB, L:CARG1d 1307 |.else 1308 | mov ARG3, RB 1309 | mov L:RB, SAVE_L 1310 | mov ARG4, RC 1311 | movzx RC, PC_OP 1312 | mov ARG2, RA 1313 | mov ARG5, RC 1314 | mov ARG1, L:RB 1315 | mov L:RB->base, BASE 1316 |.endif 1317 | mov SAVE_PC, PC 1318 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 1319 | // NULL (finished) or TValue * (metamethod) returned in eax (RC). 1320 | mov BASE, L:RB->base 1321 | test RC, RC 1322 | jz ->cont_nop 1323 | 1324 | // Call metamethod for binary op. 1325 |->vmeta_binop: 1326 | // BASE = base, RC = new base, stack = cont/func/o1/o2 1327 | mov RA, RC 1328 | sub RC, BASE 1329 | mov [RA-12], PC // [cont|PC] 1330 | lea PC, [RC+FRAME_CONT] 1331 | mov NARGS:RD, 2+1 // 2 args for func(o1, o2). 1332 | jmp ->vm_call_dispatch 1333 | 1334 |->vmeta_len: 1335 | mov L:RB, SAVE_L 1336 | mov L:RB->base, BASE 1337 | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE 1338 | mov L:FCARG1, L:RB 1339 | mov SAVE_PC, PC 1340 | call extern lj_meta_len@8 // (lua_State *L, TValue *o) 1341 | // NULL (retry) or TValue * (metamethod) returned in eax (RC). 1342 | mov BASE, L:RB->base 1343 | test RC, RC 1344 | jne ->vmeta_binop // Binop call for compatibility. 1345 | movzx RD, PC_RD 1346 | mov TAB:FCARG1, [BASE+RD*8] 1347 | jmp ->BC_LEN_Z 1348 | 1349 |//-- Call metamethod ---------------------------------------------------- 1350 | 1351 |->vmeta_call_ra: 1352 | lea RA, [BASE+RA*8+8] 1353 |->vmeta_call: // Resolve and call __call metamethod. 1354 | // BASE = old base, RA = new base, RC = nargs+1, PC = return 1355 | mov TMP2, RA // Save RA, RC for us. 1356 | mov TMP1, NARGS:RD 1357 | sub RA, 8 1358 |.if X64 1359 | mov L:RB, SAVE_L 1360 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 1361 | mov CARG2d, RA 1362 | lea CARG3d, [RA+NARGS:RD*8] 1363 | mov CARG1d, L:RB // Caveat: CARG1d may be RA. 1364 |.else 1365 | lea RC, [RA+NARGS:RD*8] 1366 | mov L:RB, SAVE_L 1367 | mov ARG2, RA 1368 | mov ARG3, RC 1369 | mov ARG1, L:RB 1370 | mov L:RB->base, BASE // This is the callers base! 1371 |.endif 1372 | mov SAVE_PC, PC 1373 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1374 | mov BASE, L:RB->base 1375 | mov RA, TMP2 1376 | mov NARGS:RD, TMP1 1377 | mov LFUNC:RB, [RA-8] 1378 | add NARGS:RD, 1 1379 | // This is fragile. L->base must not move, KBASE must always be defined. 1380 | cmp KBASE, BASE // Continue with CALLT if flag set. 1381 | je ->BC_CALLT_Z 1382 | mov BASE, RA 1383 | ins_call // Otherwise call resolved metamethod. 1384 | 1385 |//-- Argument coercion for 'for' statement ------------------------------ 1386 | 1387 |->vmeta_for: 1388 | mov L:RB, SAVE_L 1389 | mov L:RB->base, BASE 1390 | mov FCARG2, RA // Caveat: FCARG2 == BASE 1391 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA 1392 | mov SAVE_PC, PC 1393 | call extern lj_meta_for@8 // (lua_State *L, TValue *base) 1394 | mov BASE, L:RB->base 1395 | mov RC, [PC-4] 1396 | movzx RA, RCH 1397 | movzx OP, RCL 1398 | shr RC, 16 1399 |.if X64 1400 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. 1401 |.else 1402 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI. 1403 |.endif 1404 | 1405 |//----------------------------------------------------------------------- 1406 |//-- Fast functions ----------------------------------------------------- 1407 |//----------------------------------------------------------------------- 1408 | 1409 |.macro .ffunc, name 1410 |->ff_ .. name: 1411 |.endmacro 1412 | 1413 |.macro .ffunc_1, name 1414 |->ff_ .. name: 1415 | cmp NARGS:RD, 1+1; jb ->fff_fallback 1416 |.endmacro 1417 | 1418 |.macro .ffunc_2, name 1419 |->ff_ .. name: 1420 | cmp NARGS:RD, 2+1; jb ->fff_fallback 1421 |.endmacro 1422 | 1423 |.macro .ffunc_nsse, name, op 1424 | .ffunc_1 name 1425 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1426 | op xmm0, qword [BASE] 1427 |.endmacro 1428 | 1429 |.macro .ffunc_nsse, name 1430 | .ffunc_nsse name, movsd 1431 |.endmacro 1432 | 1433 |.macro .ffunc_nnsse, name 1434 | .ffunc_2 name 1435 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1436 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback 1437 | movsd xmm0, qword [BASE] 1438 | movsd xmm1, qword [BASE+8] 1439 |.endmacro 1440 | 1441 |.macro .ffunc_nnr, name 1442 | .ffunc_2 name 1443 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1444 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback 1445 | fld qword [BASE+8] 1446 | fld qword [BASE] 1447 |.endmacro 1448 | 1449 |// Inlined GC threshold check. Caveat: uses label 1. 1450 |.macro ffgccheck 1451 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] 1452 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] 1453 | jb >1 1454 | call ->fff_gcstep 1455 |1: 1456 |.endmacro 1457 | 1458 |//-- Base library: checks ----------------------------------------------- 1459 | 1460 |.ffunc_1 assert 1461 | mov RB, [BASE+4] 1462 | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback 1463 | mov PC, [BASE-4] 1464 | mov MULTRES, RD 1465 | mov [BASE-4], RB 1466 | mov RB, [BASE] 1467 | mov [BASE-8], RB 1468 | sub RD, 2 1469 | jz >2 1470 | mov RA, BASE 1471 |1: 1472 | add RA, 8 1473 |.if X64 1474 | mov RBa, [RA] 1475 | mov [RA-8], RBa 1476 |.else 1477 | mov RB, [RA+4] 1478 | mov [RA-4], RB 1479 | mov RB, [RA] 1480 | mov [RA-8], RB 1481 |.endif 1482 | sub RD, 1 1483 | jnz <1 1484 |2: 1485 | mov RD, MULTRES 1486 | jmp ->fff_res_ 1487 | 1488 |.ffunc_1 type 1489 | mov RB, [BASE+4] 1490 |.if X64 1491 | mov RA, RB 1492 | sar RA, 15 1493 | cmp RA, -2 1494 | je >3 1495 |.endif 1496 | mov RC, ~LJ_TNUMX 1497 | not RB 1498 | cmp RC, RB 1499 | cmova RC, RB 1500 |2: 1501 | mov CFUNC:RB, [BASE-8] 1502 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] 1503 | mov PC, [BASE-4] 1504 | mov dword [BASE-4], LJ_TSTR 1505 | mov [BASE-8], STR:RC 1506 | jmp ->fff_res1 1507 |.if X64 1508 |3: 1509 | mov RC, ~LJ_TLIGHTUD 1510 | jmp <2 1511 |.endif 1512 | 1513 |//-- Base library: getters and setters --------------------------------- 1514 | 1515 |.ffunc_1 getmetatable 1516 | mov RB, [BASE+4] 1517 | mov PC, [BASE-4] 1518 | cmp RB, LJ_TTAB; jne >6 1519 |1: // Field metatable must be at same offset for GCtab and GCudata! 1520 | mov TAB:RB, [BASE] 1521 | mov TAB:RB, TAB:RB->metatable 1522 |2: 1523 | test TAB:RB, TAB:RB 1524 | mov dword [BASE-4], LJ_TNIL 1525 | jz ->fff_res1 1526 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)] 1527 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. 1528 | mov [BASE-8], TAB:RB 1529 | mov RA, TAB:RB->hmask 1530 | and RA, STR:RC->hash 1531 | imul RA, #NODE 1532 | add NODE:RA, TAB:RB->node 1533 |3: // Rearranged logic, because we expect _not_ to find the key. 1534 | cmp dword NODE:RA->key.it, LJ_TSTR 1535 | jne >4 1536 | cmp dword NODE:RA->key.gcr, STR:RC 1537 | je >5 1538 |4: 1539 | mov NODE:RA, NODE:RA->next 1540 | test NODE:RA, NODE:RA 1541 | jnz <3 1542 | jmp ->fff_res1 // Not found, keep default result. 1543 |5: 1544 | mov RB, [RA+4] 1545 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. 1546 | mov RC, [RA] 1547 | mov [BASE-4], RB // Return value of mt.__metatable. 1548 | mov [BASE-8], RC 1549 | jmp ->fff_res1 1550 | 1551 |6: 1552 | cmp RB, LJ_TUDATA; je <1 1553 |.if X64 1554 | cmp RB, LJ_TNUMX; ja >8 1555 | cmp RB, LJ_TISNUM; jbe >7 1556 | mov RB, LJ_TLIGHTUD 1557 | jmp >8 1558 |7: 1559 |.else 1560 | cmp RB, LJ_TISNUM; ja >8 1561 |.endif 1562 | mov RB, LJ_TNUMX 1563 |8: 1564 | not RB 1565 | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])] 1566 | jmp <2 1567 | 1568 |.ffunc_2 setmetatable 1569 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1570 | // Fast path: no mt for table yet and not clearing the mt. 1571 | mov TAB:RB, [BASE] 1572 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback 1573 | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback 1574 | mov TAB:RA, [BASE+8] 1575 | // fallback if metatable contains __gc 1576 | test byte TAB:RA->nomm, 1<<MM_gc; jz ->fff_fallback 1577 | mov TAB:RB->metatable, TAB:RA 1578 | mov PC, [BASE-4] 1579 | mov dword [BASE-4], LJ_TTAB // Return original table. 1580 | mov [BASE-8], TAB:RB 1581 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 1582 | jz >1 1583 | // Possible write barrier. Table is black, but skip iswhite(mt) check. 1584 | barrierback TAB:RB, RA 1585 |1: 1586 | jmp ->fff_res1 1587 | 1588 |.ffunc_2 rawget 1589 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1590 |.if X64WIN 1591 | mov RB, BASE // Save BASE. 1592 | lea CARG3d, [BASE+8] 1593 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. 1594 | mov CARG1d, SAVE_L 1595 |.elif X64 1596 | mov RB, BASE // Save BASE. 1597 | mov CARG2d, [BASE] 1598 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. 1599 | mov CARG1d, SAVE_L 1600 |.else 1601 | mov TAB:RD, [BASE] 1602 | mov L:RB, SAVE_L 1603 | mov ARG2, TAB:RD 1604 | mov ARG1, L:RB 1605 | mov RB, BASE // Save BASE. 1606 | add BASE, 8 1607 | mov ARG3, BASE 1608 |.endif 1609 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1610 | // cTValue * returned in eax (RD). 1611 | mov BASE, RB // Restore BASE. 1612 | // Copy table slot. 1613 |.if X64 1614 | mov RBa, [RD] 1615 | mov PC, [BASE-4] 1616 | mov [BASE-8], RBa 1617 |.else 1618 | mov RB, [RD] 1619 | mov RD, [RD+4] 1620 | mov PC, [BASE-4] 1621 | mov [BASE-8], RB 1622 | mov [BASE-4], RD 1623 |.endif 1624 | jmp ->fff_res1 1625 | 1626 |//-- Base library: conversions ------------------------------------------ 1627 | 1628 |.ffunc tonumber 1629 | // Only handles the number case inline (without a base argument). 1630 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 1631 | cmp dword [BASE+4], LJ_TISNUM 1632 |.if DUALNUM 1633 | jne >1 1634 | mov RB, dword [BASE]; jmp ->fff_resi 1635 |1: 1636 | ja ->fff_fallback 1637 |.else 1638 | jae ->fff_fallback 1639 |.endif 1640 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1641 | 1642 |.ffunc_1 tostring 1643 | // Only handles the string or number case inline. 1644 | mov PC, [BASE-4] 1645 | cmp dword [BASE+4], LJ_TSTR; jne >3 1646 | // A __tostring method in the string base metatable is ignored. 1647 | mov STR:RD, [BASE] 1648 |2: 1649 | mov dword [BASE-4], LJ_TSTR 1650 | mov [BASE-8], STR:RD 1651 | jmp ->fff_res1 1652 |3: // Handle numbers inline, unless a number base metatable is present. 1653 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback 1654 | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 1655 | jne ->fff_fallback 1656 | ffgccheck // Caveat: uses label 1. 1657 | mov L:RB, SAVE_L 1658 | mov L:RB->base, BASE // Add frame since C call can throw. 1659 | mov SAVE_PC, PC // Redundant (but a defined value). 1660 |.if X64 and not X64WIN 1661 | mov FCARG2, BASE // Otherwise: FCARG2 == BASE 1662 |.endif 1663 | mov L:FCARG1, L:RB 1664 |.if DUALNUM 1665 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) 1666 |.else 1667 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) 1668 |.endif 1669 | // GCstr returned in eax (RD). 1670 | mov BASE, L:RB->base 1671 | jmp <2 1672 | 1673 |//-- Base library: iterators ------------------------------------------- 1674 | 1675 |.ffunc_1 next 1676 | je >2 // Missing 2nd arg? 1677 |1: 1678 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1679 | mov L:RB, SAVE_L 1680 | mov L:RB->base, BASE // Add frame since C call can throw. 1681 | mov L:RB->top, BASE // Dummy frame length is ok. 1682 | mov PC, [BASE-4] 1683 |.if X64WIN 1684 | lea CARG3d, [BASE+8] 1685 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. 1686 | mov CARG1d, L:RB 1687 |.elif X64 1688 | mov CARG2d, [BASE] 1689 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. 1690 | mov CARG1d, L:RB 1691 |.else 1692 | mov TAB:RD, [BASE] 1693 | mov ARG2, TAB:RD 1694 | mov ARG1, L:RB 1695 | add BASE, 8 1696 | mov ARG3, BASE 1697 |.endif 1698 | mov SAVE_PC, PC // Needed for ITERN fallback. 1699 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1700 | // Flag returned in eax (RD). 1701 | mov BASE, L:RB->base 1702 | test RD, RD; jz >3 // End of traversal? 1703 | // Copy key and value to results. 1704 |.if X64 1705 | mov RBa, [BASE+8] 1706 | mov RDa, [BASE+16] 1707 | mov [BASE-8], RBa 1708 | mov [BASE], RDa 1709 |.else 1710 | mov RB, [BASE+8] 1711 | mov RD, [BASE+12] 1712 | mov [BASE-8], RB 1713 | mov [BASE-4], RD 1714 | mov RB, [BASE+16] 1715 | mov RD, [BASE+20] 1716 | mov [BASE], RB 1717 | mov [BASE+4], RD 1718 |.endif 1719 |->fff_res2: 1720 | mov RD, 1+2 1721 | jmp ->fff_res 1722 |2: // Set missing 2nd arg to nil. 1723 | mov dword [BASE+12], LJ_TNIL 1724 | jmp <1 1725 |3: // End of traversal: return nil. 1726 | mov dword [BASE-4], LJ_TNIL 1727 | jmp ->fff_res1 1728 | 1729 |.ffunc_1 pairs 1730 | mov TAB:RB, [BASE] 1731 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1732 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback 1733 | mov CFUNC:RB, [BASE-8] 1734 | mov CFUNC:RD, CFUNC:RB->upvalue[0] 1735 | mov PC, [BASE-4] 1736 | mov dword [BASE-4], LJ_TFUNC 1737 | mov [BASE-8], CFUNC:RD 1738 | mov dword [BASE+12], LJ_TNIL 1739 | mov RD, 1+3 1740 | jmp ->fff_res 1741 | 1742 |.ffunc_2 ipairs_aux 1743 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1744 | cmp dword [BASE+12], LJ_TISNUM 1745 |.if DUALNUM 1746 | jne ->fff_fallback 1747 |.else 1748 | jae ->fff_fallback 1749 |.endif 1750 | mov PC, [BASE-4] 1751 |.if DUALNUM 1752 | mov RD, dword [BASE+8] 1753 | add RD, 1 1754 | mov dword [BASE-4], LJ_TISNUM 1755 | mov dword [BASE-8], RD 1756 |.else 1757 | movsd xmm0, qword [BASE+8] 1758 | sseconst_1 xmm1, RBa 1759 | addsd xmm0, xmm1 1760 | cvttsd2si RD, xmm0 1761 | movsd qword [BASE-8], xmm0 1762 |.endif 1763 | mov TAB:RB, [BASE] 1764 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1765 | shl RD, 3 1766 | add RD, TAB:RB->array 1767 |1: 1768 | cmp dword [RD+4], LJ_TNIL; je ->fff_res0 1769 | // Copy array slot. 1770 |.if X64 1771 | mov RBa, [RD] 1772 | mov [BASE], RBa 1773 |.else 1774 | mov RB, [RD] 1775 | mov RD, [RD+4] 1776 | mov [BASE], RB 1777 | mov [BASE+4], RD 1778 |.endif 1779 | jmp ->fff_res2 1780 |2: // Check for empty hash part first. Otherwise call C function. 1781 | cmp dword TAB:RB->hmask, 0; je ->fff_res0 1782 | mov FCARG1, TAB:RB 1783 | mov RB, BASE // Save BASE. 1784 | mov FCARG2, RD // Caveat: FCARG2 == BASE 1785 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) 1786 | // cTValue * or NULL returned in eax (RD). 1787 | mov BASE, RB 1788 | test RD, RD 1789 | jnz <1 1790 |->fff_res0: 1791 | mov RD, 1+0 1792 | jmp ->fff_res 1793 | 1794 |.ffunc_1 ipairs 1795 | mov TAB:RB, [BASE] 1796 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback 1797 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback 1798 | mov CFUNC:RB, [BASE-8] 1799 | mov CFUNC:RD, CFUNC:RB->upvalue[0] 1800 | mov PC, [BASE-4] 1801 | mov dword [BASE-4], LJ_TFUNC 1802 | mov [BASE-8], CFUNC:RD 1803 |.if DUALNUM 1804 | mov dword [BASE+12], LJ_TISNUM 1805 | mov dword [BASE+8], 0 1806 |.else 1807 | xorps xmm0, xmm0 1808 | movsd qword [BASE+8], xmm0 1809 |.endif 1810 | mov RD, 1+3 1811 | jmp ->fff_res 1812 | 1813 |//-- Base library: catch errors ---------------------------------------- 1814 | 1815 |.ffunc_1 pcall 1816 | lea RA, [BASE+8] 1817 | sub NARGS:RD, 1 1818 | mov PC, 8+FRAME_PCALL 1819 |1: 1820 | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)] 1821 | shr RB, HOOK_ACTIVE_SHIFT 1822 | and RB, 1 1823 | add PC, RB // Remember active hook before pcall. 1824 | jmp ->vm_call_dispatch 1825 | 1826 |.ffunc_2 xpcall 1827 | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback 1828 | mov RB, [BASE+4] // Swap function and traceback. 1829 | mov [BASE+12], RB 1830 | mov dword [BASE+4], LJ_TFUNC 1831 | mov LFUNC:RB, [BASE] 1832 | mov PC, [BASE+8] 1833 | mov [BASE+8], LFUNC:RB 1834 | mov [BASE], PC 1835 | lea RA, [BASE+16] 1836 | sub NARGS:RD, 2 1837 | mov PC, 16+FRAME_PCALL 1838 | jmp <1 1839 | 1840 |//-- Coroutine library -------------------------------------------------- 1841 | 1842 |.macro coroutine_resume_wrap, resume 1843 |.if resume 1844 |.ffunc_1 coroutine_resume 1845 | mov L:RB, [BASE] 1846 |.else 1847 |.ffunc coroutine_wrap_aux 1848 | mov CFUNC:RB, [BASE-8] 1849 | mov L:RB, CFUNC:RB->upvalue[0].gcr 1850 |.endif 1851 | mov PC, [BASE-4] 1852 | mov SAVE_PC, PC 1853 |.if X64 1854 | mov TMP1, L:RB 1855 |.else 1856 | mov ARG1, L:RB 1857 |.endif 1858 |.if resume 1859 | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback 1860 |.endif 1861 | cmp aword L:RB->cframe, 0; jne ->fff_fallback 1862 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback 1863 | mov RA, L:RB->top 1864 | je >1 // Status != LUA_YIELD (i.e. 0)? 1865 | cmp RA, L:RB->base // Check for presence of initial func. 1866 | je ->fff_fallback 1867 |1: 1868 |.if resume 1869 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). 1870 |.else 1871 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). 1872 |.endif 1873 | cmp PC, L:RB->maxstack; ja ->fff_fallback 1874 | mov L:RB->top, PC 1875 | 1876 | mov L:RB, SAVE_L 1877 | mov L:RB->base, BASE 1878 |.if resume 1879 | add BASE, 8 // Keep resumed thread in stack for GC. 1880 |.endif 1881 | mov L:RB->top, BASE 1882 |.if resume 1883 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. 1884 |.else 1885 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. 1886 |.endif 1887 | sub RBa, PCa // Relative to PC. 1888 | 1889 | cmp PC, RA 1890 | je >3 1891 |2: // Move args to coroutine. 1892 |.if X64 1893 | mov RCa, [PC+RB] 1894 | mov [PC-8], RCa 1895 |.else 1896 | mov RC, [PC+RB+4] 1897 | mov [PC-4], RC 1898 | mov RC, [PC+RB] 1899 | mov [PC-8], RC 1900 |.endif 1901 | sub PC, 8 1902 | cmp PC, RA 1903 | jne <2 1904 |3: 1905 |.if X64 1906 | mov CARG2d, RA 1907 | mov CARG1d, TMP1 1908 |.else 1909 | mov ARG2, RA 1910 | xor RA, RA 1911 | mov ARG4, RA 1912 | mov ARG3, RA 1913 |.endif 1914 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1915 | 1916 | mov L:RB, SAVE_L 1917 |.if X64 1918 | mov L:PC, TMP1 1919 |.else 1920 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. 1921 |.endif 1922 | mov BASE, L:RB->base 1923 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 1924 | set_vmstate INTERP 1925 | 1926 | cmp eax, LUA_YIELD 1927 | ja >8 1928 |4: 1929 | mov RA, L:PC->base 1930 | mov KBASE, L:PC->top 1931 | mov L:PC->top, RA // Clear coroutine stack. 1932 | mov PC, KBASE 1933 | sub PC, RA 1934 | je >6 // No results? 1935 | lea RD, [BASE+PC] 1936 | shr PC, 3 1937 | cmp RD, L:RB->maxstack 1938 | ja >9 // Need to grow stack? 1939 | 1940 | mov RB, BASE 1941 | sub RBa, RAa 1942 |5: // Move results from coroutine. 1943 |.if X64 1944 | mov RDa, [RA] 1945 | mov [RA+RB], RDa 1946 |.else 1947 | mov RD, [RA] 1948 | mov [RA+RB], RD 1949 | mov RD, [RA+4] 1950 | mov [RA+RB+4], RD 1951 |.endif 1952 | add RA, 8 1953 | cmp RA, KBASE 1954 | jne <5 1955 |6: 1956 |.if resume 1957 | lea RD, [PC+2] // nresults+1 = 1 + true + results. 1958 | mov dword [BASE-4], LJ_TTRUE // Prepend true to results. 1959 |.else 1960 | lea RD, [PC+1] // nresults+1 = 1 + results. 1961 |.endif 1962 |7: 1963 | mov PC, SAVE_PC 1964 | mov MULTRES, RD 1965 |.if resume 1966 | mov RAa, -8 1967 |.else 1968 | xor RA, RA 1969 |.endif 1970 | test PC, FRAME_TYPE 1971 | jz ->BC_RET_Z 1972 | jmp ->vm_return 1973 | 1974 |8: // Coroutine returned with error (at co->top-1). 1975 |.if resume 1976 | mov dword [BASE-4], LJ_TFALSE // Prepend false to results. 1977 | mov RA, L:PC->top 1978 | sub RA, 8 1979 | mov L:PC->top, RA // Clear error from coroutine stack. 1980 | // Copy error message. 1981 |.if X64 1982 | mov RDa, [RA] 1983 | mov [BASE], RDa 1984 |.else 1985 | mov RD, [RA] 1986 | mov [BASE], RD 1987 | mov RD, [RA+4] 1988 | mov [BASE+4], RD 1989 |.endif 1990 | mov RD, 1+2 // nresults+1 = 1 + false + error. 1991 | jmp <7 1992 |.else 1993 | mov FCARG2, L:PC 1994 | mov FCARG1, L:RB 1995 | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co) 1996 | // Error function does not return. 1997 |.endif 1998 | 1999 |9: // Handle stack expansion on return from yield. 2000 |.if X64 2001 | mov L:RA, TMP1 2002 |.else 2003 | mov L:RA, ARG1 // The callee doesn't modify SAVE_L. 2004 |.endif 2005 | mov L:RA->top, KBASE // Undo coroutine stack clearing. 2006 | mov FCARG2, PC 2007 | mov FCARG1, L:RB 2008 | call extern lj_state_growstack@8 // (lua_State *L, int n) 2009 |.if X64 2010 | mov L:PC, TMP1 2011 |.else 2012 | mov L:PC, ARG1 2013 |.endif 2014 | mov BASE, L:RB->base 2015 | jmp <4 // Retry the stack move. 2016 |.endmacro 2017 | 2018 | coroutine_resume_wrap 1 // coroutine.resume 2019 | coroutine_resume_wrap 0 // coroutine.wrap 2020 | 2021 |.ffunc coroutine_yield 2022 | mov L:RB, SAVE_L 2023 | test aword L:RB->cframe, CFRAME_RESUME 2024 | jz ->fff_fallback 2025 | mov L:RB->base, BASE 2026 | lea RD, [BASE+NARGS:RD*8-8] 2027 | mov L:RB->top, RD 2028 | xor RD, RD 2029 | mov aword L:RB->cframe, RDa 2030 | mov al, LUA_YIELD 2031 | mov byte L:RB->status, al 2032 | jmp ->vm_leave_unw 2033 | 2034 |//-- Math library ------------------------------------------------------- 2035 | 2036 |.if not DUALNUM 2037 |->fff_resi: // Dummy. 2038 |.endif 2039 | 2040 |->fff_resn: 2041 | mov PC, [BASE-4] 2042 | fstp qword [BASE-8] 2043 | jmp ->fff_res1 2044 | 2045 | .ffunc_1 math_abs 2046 |.if DUALNUM 2047 | cmp dword [BASE+4], LJ_TISNUM; jne >2 2048 | mov RB, dword [BASE] 2049 | cmp RB, 0; jns ->fff_resi 2050 | neg RB; js >1 2051 |->fff_resbit: 2052 |->fff_resi: 2053 | mov PC, [BASE-4] 2054 | mov dword [BASE-4], LJ_TISNUM 2055 | mov dword [BASE-8], RB 2056 | jmp ->fff_res1 2057 |1: 2058 | mov PC, [BASE-4] 2059 | mov dword [BASE-4], 0x41e00000 // 2^31. 2060 | mov dword [BASE-8], 0 2061 | jmp ->fff_res1 2062 |2: 2063 | ja ->fff_fallback 2064 |.else 2065 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2066 |.endif 2067 | movsd xmm0, qword [BASE] 2068 | sseconst_abs xmm1, RDa 2069 | andps xmm0, xmm1 2070 |->fff_resxmm0: 2071 | mov PC, [BASE-4] 2072 | movsd qword [BASE-8], xmm0 2073 | // fallthrough 2074 | 2075 |->fff_res1: 2076 | mov RD, 1+1 2077 |->fff_res: 2078 | mov MULTRES, RD 2079 |->fff_res_: 2080 | test PC, FRAME_TYPE 2081 | jnz >7 2082 |5: 2083 | cmp PC_RB, RDL // More results expected? 2084 | ja >6 2085 | // Adjust BASE. KBASE is assumed to be set for the calling frame. 2086 | movzx RA, PC_RA 2087 | not RAa // Note: ~RA = -(RA+1) 2088 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 2089 | ins_next 2090 | 2091 |6: // Fill up results with nil. 2092 | mov dword [BASE+RD*8-12], LJ_TNIL 2093 | add RD, 1 2094 | jmp <5 2095 | 2096 |7: // Non-standard return case. 2097 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 2098 | jmp ->vm_return 2099 | 2100 |.if X64 2101 |.define fff_resfp, fff_resxmm0 2102 |.else 2103 |.define fff_resfp, fff_resn 2104 |.endif 2105 | 2106 |.macro math_round, func 2107 | .ffunc math_ .. func 2108 |.if DUALNUM 2109 | cmp dword [BASE+4], LJ_TISNUM; jne >1 2110 | mov RB, dword [BASE]; jmp ->fff_resi 2111 |1: 2112 | ja ->fff_fallback 2113 |.else 2114 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2115 |.endif 2116 | movsd xmm0, qword [BASE] 2117 | call ->vm_ .. func .. _sse 2118 |.if DUALNUM 2119 | cvttsd2si RB, xmm0 2120 | cmp RB, 0x80000000 2121 | jne ->fff_resi 2122 | cvtsi2sd xmm1, RB 2123 | ucomisd xmm0, xmm1 2124 | jp ->fff_resxmm0 2125 | je ->fff_resi 2126 |.endif 2127 | jmp ->fff_resxmm0 2128 |.endmacro 2129 | 2130 | math_round floor 2131 | math_round ceil 2132 | 2133 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2134 | 2135 |.ffunc math_log 2136 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2137 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2138 | movsd xmm0, qword [BASE] 2139 |.if not X64 2140 | movsd FPARG1, xmm0 2141 |.endif 2142 | mov RB, BASE 2143 | call extern log 2144 | mov BASE, RB 2145 | jmp ->fff_resfp 2146 | 2147 |.macro math_extern, func 2148 | .ffunc_nsse math_ .. func 2149 |.if not X64 2150 | movsd FPARG1, xmm0 2151 |.endif 2152 | mov RB, BASE 2153 | call extern func 2154 | mov BASE, RB 2155 | jmp ->fff_resfp 2156 |.endmacro 2157 | 2158 |.macro math_extern2, func 2159 | .ffunc_nnsse math_ .. func 2160 |.if not X64 2161 | movsd FPARG1, xmm0 2162 | movsd FPARG3, xmm1 2163 |.endif 2164 | mov RB, BASE 2165 | call extern func 2166 | mov BASE, RB 2167 | jmp ->fff_resfp 2168 |.endmacro 2169 | 2170 | math_extern log10 2171 | math_extern exp 2172 | math_extern sin 2173 | math_extern cos 2174 | math_extern tan 2175 | math_extern asin 2176 | math_extern acos 2177 | math_extern atan 2178 | math_extern sinh 2179 | math_extern cosh 2180 | math_extern tanh 2181 | math_extern2 pow 2182 | math_extern2 atan2 2183 | math_extern2 fmod 2184 | 2185 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2186 | 2187 |.ffunc_1 math_frexp 2188 | mov RB, [BASE+4] 2189 | cmp RB, LJ_TISNUM; jae ->fff_fallback 2190 | mov PC, [BASE-4] 2191 | mov RC, [BASE] 2192 | mov [BASE-4], RB; mov [BASE-8], RC 2193 | shl RB, 1; cmp RB, 0xffe00000; jae >3 2194 | or RC, RB; jz >3 2195 | mov RC, 1022 2196 | cmp RB, 0x00200000; jb >4 2197 |1: 2198 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2199 | cvtsi2sd xmm0, RB 2200 | mov RB, [BASE-4] 2201 | and RB, 0x800fffff // Mask off exponent. 2202 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2203 | mov [BASE-4], RB 2204 |2: 2205 | movsd qword [BASE], xmm0 2206 | mov RD, 1+2 2207 | jmp ->fff_res 2208 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2209 | xorps xmm0, xmm0; jmp <2 2210 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2211 | movsd xmm0, qword [BASE] 2212 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2213 | mulsd xmm0, xmm1 2214 | movsd qword [BASE-8], xmm0 2215 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2216 | 2217 |.ffunc_nsse math_modf 2218 | mov RB, [BASE+4] 2219 | mov PC, [BASE-4] 2220 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2221 | movaps xmm4, xmm0 2222 | call ->vm_trunc_sse 2223 | subsd xmm4, xmm0 2224 |1: 2225 | movsd qword [BASE-8], xmm0 2226 | movsd qword [BASE], xmm4 2227 | mov RC, [BASE-4]; mov RB, [BASE+4] 2228 | xor RC, RB; js >3 // Need to adjust sign? 2229 |2: 2230 | mov RD, 1+2 2231 | jmp ->fff_res 2232 |3: 2233 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2234 | jmp <2 2235 |4: 2236 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2237 | 2238 |.macro math_minmax, name, cmovop, sseop 2239 | .ffunc name 2240 | mov RA, 2 2241 | cmp dword [BASE+4], LJ_TISNUM 2242 |.if DUALNUM 2243 | jne >4 2244 | mov RB, dword [BASE] 2245 |1: // Handle integers. 2246 | cmp RA, RD; jae ->fff_resi 2247 | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3 2248 | cmp RB, dword [BASE+RA*8-8] 2249 | cmovop RB, dword [BASE+RA*8-8] 2250 | add RA, 1 2251 | jmp <1 2252 |3: 2253 | ja ->fff_fallback 2254 | // Convert intermediate result to number and continue below. 2255 | cvtsi2sd xmm0, RB 2256 | jmp >6 2257 |4: 2258 | ja ->fff_fallback 2259 |.else 2260 | jae ->fff_fallback 2261 |.endif 2262 | 2263 | movsd xmm0, qword [BASE] 2264 |5: // Handle numbers or integers. 2265 | cmp RA, RD; jae ->fff_resxmm0 2266 | cmp dword [BASE+RA*8-4], LJ_TISNUM 2267 |.if DUALNUM 2268 | jb >6 2269 | ja ->fff_fallback 2270 | cvtsi2sd xmm1, dword [BASE+RA*8-8] 2271 | jmp >7 2272 |.else 2273 | jae ->fff_fallback 2274 |.endif 2275 |6: 2276 | movsd xmm1, qword [BASE+RA*8-8] 2277 |7: 2278 | sseop xmm0, xmm1 2279 | add RA, 1 2280 | jmp <5 2281 |.endmacro 2282 | 2283 | math_minmax math_min, cmovg, minsd 2284 | math_minmax math_max, cmovl, maxsd 2285 | 2286 |//-- String library ----------------------------------------------------- 2287 | 2288 |.ffunc string_byte // Only handle the 1-arg case here. 2289 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2290 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2291 | mov STR:RB, [BASE] 2292 | mov PC, [BASE-4] 2293 | cmp dword STR:RB->len, 1 2294 | jb ->fff_res0 // Return no results for empty string. 2295 | movzx RB, byte STR:RB[1] 2296 |.if DUALNUM 2297 | jmp ->fff_resi 2298 |.else 2299 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 2300 |.endif 2301 | 2302 |.ffunc string_char // Only handle the 1-arg case here. 2303 | ffgccheck 2304 | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. 2305 | cmp dword [BASE+4], LJ_TISNUM 2306 |.if DUALNUM 2307 | jne ->fff_fallback 2308 | mov RB, dword [BASE] 2309 | cmp RB, 255; ja ->fff_fallback 2310 | mov TMP2, RB 2311 |.else 2312 | jae ->fff_fallback 2313 | cvttsd2si RB, qword [BASE] 2314 | cmp RB, 255; ja ->fff_fallback 2315 | mov TMP2, RB 2316 |.endif 2317 |.if X64 2318 | mov TMP3, 1 2319 |.else 2320 | mov ARG3, 1 2321 |.endif 2322 | lea RDa, TMP2 // Points to stack. Little-endian. 2323 |->fff_newstr: 2324 | mov L:RB, SAVE_L 2325 | mov L:RB->base, BASE 2326 |.if X64 2327 | mov CARG3d, TMP3 // Zero-extended to size_t. 2328 | mov CARG2, RDa // May be 64 bit ptr to stack. 2329 | mov CARG1d, L:RB 2330 |.else 2331 | mov ARG2, RD 2332 | mov ARG1, L:RB 2333 |.endif 2334 | mov SAVE_PC, PC 2335 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2336 |->fff_resstr: 2337 | // GCstr * returned in eax (RD). 2338 | mov BASE, L:RB->base 2339 | mov PC, [BASE-4] 2340 | mov dword [BASE-4], LJ_TSTR 2341 | mov [BASE-8], STR:RD 2342 | jmp ->fff_res1 2343 | 2344 |.ffunc string_sub 2345 | ffgccheck 2346 | mov TMP2, -1 2347 | cmp NARGS:RD, 1+2; jb ->fff_fallback 2348 | jna >1 2349 | cmp dword [BASE+20], LJ_TISNUM 2350 |.if DUALNUM 2351 | jne ->fff_fallback 2352 | mov RB, dword [BASE+16] 2353 | mov TMP2, RB 2354 |.else 2355 | jae ->fff_fallback 2356 | cvttsd2si RB, qword [BASE+16] 2357 | mov TMP2, RB 2358 |.endif 2359 |1: 2360 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2361 | cmp dword [BASE+12], LJ_TISNUM 2362 |.if DUALNUM 2363 | jne ->fff_fallback 2364 |.else 2365 | jae ->fff_fallback 2366 |.endif 2367 | mov STR:RB, [BASE] 2368 | mov TMP3, STR:RB 2369 | mov RB, STR:RB->len 2370 |.if DUALNUM 2371 | mov RA, dword [BASE+8] 2372 |.else 2373 | cvttsd2si RA, qword [BASE+8] 2374 |.endif 2375 | mov RC, TMP2 2376 | cmp RB, RC // len < end? (unsigned compare) 2377 | jb >5 2378 |2: 2379 | test RA, RA // start <= 0? 2380 | jle >7 2381 |3: 2382 | mov STR:RB, TMP3 2383 | sub RC, RA // start > end? 2384 | jl ->fff_emptystr 2385 | lea RB, [STR:RB+RA+#STR-1] 2386 | add RC, 1 2387 |4: 2388 |.if X64 2389 | mov TMP3, RC 2390 |.else 2391 | mov ARG3, RC 2392 |.endif 2393 | mov RD, RB 2394 | jmp ->fff_newstr 2395 | 2396 |5: // Negative end or overflow. 2397 | jl >6 2398 | lea RC, [RC+RB+1] // end = end+(len+1) 2399 | jmp <2 2400 |6: // Overflow. 2401 | mov RC, RB // end = len 2402 | jmp <2 2403 | 2404 |7: // Negative start or underflow. 2405 | je >8 2406 | add RA, RB // start = start+(len+1) 2407 | add RA, 1 2408 | jg <3 // start > 0? 2409 |8: // Underflow. 2410 | mov RA, 1 // start = 1 2411 | jmp <3 2412 | 2413 |->fff_emptystr: // Range underflow. 2414 | xor RC, RC // Zero length. Any ptr in RB is ok. 2415 | jmp <4 2416 | 2417 |.macro ffstring_op, name 2418 | .ffunc_1 string_ .. name 2419 | ffgccheck 2420 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2421 | mov L:RB, SAVE_L 2422 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] 2423 | mov L:RB->base, BASE 2424 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE 2425 | mov RC, SBUF:FCARG1->b 2426 | mov SBUF:FCARG1->L, L:RB 2427 | mov SBUF:FCARG1->p, RC 2428 | mov SAVE_PC, PC 2429 | call extern lj_buf_putstr_ .. name .. @8 2430 | mov FCARG1, eax 2431 | call extern lj_buf_tostr@4 2432 | jmp ->fff_resstr 2433 |.endmacro 2434 | 2435 |ffstring_op reverse 2436 |ffstring_op lower 2437 |ffstring_op upper 2438 | 2439 |//-- Bit library -------------------------------------------------------- 2440 | 2441 |.macro .ffunc_bit, name, kind, fdef 2442 | fdef name 2443 |.if kind == 2 2444 | sseconst_tobit xmm1, RBa 2445 |.endif 2446 | cmp dword [BASE+4], LJ_TISNUM 2447 |.if DUALNUM 2448 | jne >1 2449 | mov RB, dword [BASE] 2450 |.if kind > 0 2451 | jmp >2 2452 |.else 2453 | jmp ->fff_resbit 2454 |.endif 2455 |1: 2456 | ja ->fff_fallback 2457 |.else 2458 | jae ->fff_fallback 2459 |.endif 2460 | movsd xmm0, qword [BASE] 2461 |.if kind < 2 2462 | sseconst_tobit xmm1, RBa 2463 |.endif 2464 | addsd xmm0, xmm1 2465 | movd RB, xmm0 2466 |2: 2467 |.endmacro 2468 | 2469 |.macro .ffunc_bit, name, kind 2470 | .ffunc_bit name, kind, .ffunc_1 2471 |.endmacro 2472 | 2473 |.ffunc_bit bit_tobit, 0 2474 | jmp ->fff_resbit 2475 | 2476 |.macro .ffunc_bit_op, name, ins 2477 | .ffunc_bit name, 2 2478 | mov TMP2, NARGS:RD // Save for fallback. 2479 | lea RD, [BASE+NARGS:RD*8-16] 2480 |1: 2481 | cmp RD, BASE 2482 | jbe ->fff_resbit 2483 | cmp dword [RD+4], LJ_TISNUM 2484 |.if DUALNUM 2485 | jne >2 2486 | ins RB, dword [RD] 2487 | sub RD, 8 2488 | jmp <1 2489 |2: 2490 | ja ->fff_fallback_bit_op 2491 |.else 2492 | jae ->fff_fallback_bit_op 2493 |.endif 2494 | movsd xmm0, qword [RD] 2495 | addsd xmm0, xmm1 2496 | movd RA, xmm0 2497 | ins RB, RA 2498 | sub RD, 8 2499 | jmp <1 2500 |.endmacro 2501 | 2502 |.ffunc_bit_op bit_band, and 2503 |.ffunc_bit_op bit_bor, or 2504 |.ffunc_bit_op bit_bxor, xor 2505 | 2506 |.ffunc_bit bit_bswap, 1 2507 | bswap RB 2508 | jmp ->fff_resbit 2509 | 2510 |.ffunc_bit bit_bnot, 1 2511 | not RB 2512 |.if DUALNUM 2513 | jmp ->fff_resbit 2514 |.else 2515 |->fff_resbit: 2516 | cvtsi2sd xmm0, RB 2517 | jmp ->fff_resxmm0 2518 |.endif 2519 | 2520 |->fff_fallback_bit_op: 2521 | mov NARGS:RD, TMP2 // Restore for fallback 2522 | jmp ->fff_fallback 2523 | 2524 |.macro .ffunc_bit_sh, name, ins 2525 |.if DUALNUM 2526 | .ffunc_bit name, 1, .ffunc_2 2527 | // Note: no inline conversion from number for 2nd argument! 2528 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2529 | mov RA, dword [BASE+8] 2530 |.else 2531 | .ffunc_nnsse name 2532 | sseconst_tobit xmm2, RBa 2533 | addsd xmm0, xmm2 2534 | addsd xmm1, xmm2 2535 | movd RB, xmm0 2536 | movd RA, xmm1 2537 |.endif 2538 | ins RB, cl // Assumes RA is ecx. 2539 | jmp ->fff_resbit 2540 |.endmacro 2541 | 2542 |.ffunc_bit_sh bit_lshift, shl 2543 |.ffunc_bit_sh bit_rshift, shr 2544 |.ffunc_bit_sh bit_arshift, sar 2545 |.ffunc_bit_sh bit_rol, rol 2546 |.ffunc_bit_sh bit_ror, ror 2547 | 2548 |//----------------------------------------------------------------------- 2549 | 2550 |->fff_fallback_2: 2551 | mov NARGS:RD, 1+2 // Other args are ignored, anyway. 2552 | jmp ->fff_fallback 2553 |->fff_fallback_1: 2554 | mov NARGS:RD, 1+1 // Other args are ignored, anyway. 2555 |->fff_fallback: // Call fast function fallback handler. 2556 | // BASE = new base, RD = nargs+1 2557 | mov L:RB, SAVE_L 2558 | mov PC, [BASE-4] // Fallback may overwrite PC. 2559 | mov SAVE_PC, PC // Redundant (but a defined value). 2560 | mov L:RB->base, BASE 2561 | lea RD, [BASE+NARGS:RD*8-8] 2562 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. 2563 | mov L:RB->top, RD 2564 | mov CFUNC:RD, [BASE-8] 2565 | cmp RA, L:RB->maxstack 2566 | ja >5 // Need to grow stack. 2567 |.if X64 2568 | mov CARG1d, L:RB 2569 |.else 2570 | mov ARG1, L:RB 2571 |.endif 2572 | call aword CFUNC:RD->f // (lua_State *L) 2573 | mov BASE, L:RB->base 2574 | // Either throws an error, or recovers and returns -1, 0 or nresults+1. 2575 | test RD, RD; jg ->fff_res // Returned nresults+1? 2576 |1: 2577 | mov RA, L:RB->top 2578 | sub RA, BASE 2579 | shr RA, 3 2580 | test RD, RD 2581 | lea NARGS:RD, [RA+1] 2582 | mov LFUNC:RB, [BASE-8] 2583 | jne ->vm_call_tail // Returned -1? 2584 | ins_callt // Returned 0: retry fast path. 2585 | 2586 |// Reconstruct previous base for vmeta_call during tailcall. 2587 |->vm_call_tail: 2588 | mov RA, BASE 2589 | test PC, FRAME_TYPE 2590 | jnz >3 2591 | movzx RB, PC_RA 2592 | not RBa // Note: ~RB = -(RB+1) 2593 | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8 2594 | jmp ->vm_call_dispatch // Resolve again for tailcall. 2595 |3: 2596 | mov RB, PC 2597 | and RB, -8 2598 | sub BASE, RB 2599 | jmp ->vm_call_dispatch // Resolve again for tailcall. 2600 | 2601 |5: // Grow stack for fallback handler. 2602 | mov FCARG2, LUA_MINSTACK 2603 | mov FCARG1, L:RB 2604 | call extern lj_state_growstack@8 // (lua_State *L, int n) 2605 | mov BASE, L:RB->base 2606 | xor RD, RD // Simulate a return 0. 2607 | jmp <1 // Dumb retry (goes through ff first). 2608 | 2609 |->fff_gcstep: // Call GC step function. 2610 | // BASE = new base, RD = nargs+1 2611 | pop RBa // Must keep stack at same level. 2612 | mov TMPa, RBa // Save return address 2613 | mov L:RB, SAVE_L 2614 | mov SAVE_PC, PC // Redundant (but a defined value). 2615 | mov L:RB->base, BASE 2616 | lea RD, [BASE+NARGS:RD*8-8] 2617 | mov FCARG1, L:RB 2618 | mov L:RB->top, RD 2619 | call extern lj_gc_step@4 // (lua_State *L) 2620 | mov BASE, L:RB->base 2621 | mov RD, L:RB->top 2622 | sub RD, BASE 2623 | shr RD, 3 2624 | add NARGS:RD, 1 2625 | mov RBa, TMPa 2626 | push RBa // Restore return address. 2627 | ret 2628 | 2629 |//----------------------------------------------------------------------- 2630 |//-- Special dispatch targets ------------------------------------------- 2631 |//----------------------------------------------------------------------- 2632 | 2633 |->vm_record: // Dispatch target for recording phase. 2634 |.if JIT 2635 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] 2636 | test RDL, HOOK_VMEVENT // No recording while in vmevent. 2637 | jnz >5 2638 | // Decrement the hookcount for consistency, but always do the call. 2639 | test RDL, HOOK_ACTIVE 2640 | jnz >1 2641 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT 2642 | jz >1 2643 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] 2644 | jmp >1 2645 |.endif 2646 | 2647 |->vm_rethook: // Dispatch target for return hooks. 2648 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] 2649 | test RDL, HOOK_ACTIVE // Hook already active? 2650 | jnz >5 2651 | jmp >1 2652 | 2653 |->vm_inshook: // Dispatch target for instr/line hooks. 2654 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] 2655 | test RDL, HOOK_ACTIVE // Hook already active? 2656 | jnz >5 2657 | 2658 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT 2659 | jz >5 2660 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] 2661 | jz >1 2662 | test RDL, LUA_MASKLINE 2663 | jz >5 2664 |1: 2665 | mov L:RB, SAVE_L 2666 | mov L:RB->base, BASE 2667 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2668 | mov FCARG1, L:RB 2669 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2670 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) 2671 |3: 2672 | mov BASE, L:RB->base 2673 |4: 2674 | movzx RA, PC_RA 2675 |5: 2676 | movzx OP, PC_OP 2677 | movzx RD, PC_RD 2678 |.if X64 2679 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. 2680 |.else 2681 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins. 2682 |.endif 2683 | 2684 |->cont_hook: // Continue from hook yield. 2685 | add PC, 4 2686 | mov RA, [RB-24] 2687 | mov MULTRES, RA // Restore MULTRES for *M ins. 2688 | jmp <4 2689 | 2690 |->vm_hotloop: // Hot loop counter underflow. 2691 |.if JIT 2692 | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). 2693 | mov RB, LFUNC:RB->pc 2694 | movzx RD, byte [RB+PC2PROTO(framesize)] 2695 | lea RD, [BASE+RD*8] 2696 | mov L:RB, SAVE_L 2697 | mov L:RB->base, BASE 2698 | mov L:RB->top, RD 2699 | mov FCARG2, PC 2700 | lea FCARG1, [DISPATCH+GG_DISP2J] 2701 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2702 | mov SAVE_PC, PC 2703 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) 2704 | jmp <3 2705 |.endif 2706 | 2707 |->vm_callhook: // Dispatch target for call hooks. 2708 | mov SAVE_PC, PC 2709 |.if JIT 2710 | jmp >1 2711 |.endif 2712 | 2713 |->vm_hotcall: // Hot call counter underflow. 2714 |.if JIT 2715 | mov SAVE_PC, PC 2716 | or PC, 1 // Marker for hot call. 2717 |1: 2718 |.endif 2719 | lea RD, [BASE+NARGS:RD*8-8] 2720 | mov L:RB, SAVE_L 2721 | mov L:RB->base, BASE 2722 | mov L:RB->top, RD 2723 | mov FCARG2, PC 2724 | mov FCARG1, L:RB 2725 | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) 2726 | // ASMFunction returned in eax/rax (RDa). 2727 | mov SAVE_PC, 0 // Invalidate for subsequent line hook. 2728 |.if JIT 2729 | and PC, -2 2730 |.endif 2731 | mov BASE, L:RB->base 2732 | mov RAa, RDa 2733 | mov RD, L:RB->top 2734 | sub RD, BASE 2735 | mov RBa, RAa 2736 | movzx RA, PC_RA 2737 | shr RD, 3 2738 | add NARGS:RD, 1 2739 | jmp RBa 2740 | 2741 |->cont_stitch: // Trace stitching. 2742 |.if JIT 2743 | // BASE = base, RC = result, RB = mbase 2744 | mov TRACE:RA, [RB-24] // Save previous trace. 2745 | mov TMP1, TRACE:RA 2746 | mov TMP3, DISPATCH // Need one more register. 2747 | mov DISPATCH, MULTRES 2748 | movzx RA, PC_RA 2749 | lea RA, [BASE+RA*8] // Call base. 2750 | sub DISPATCH, 1 2751 | jz >2 2752 |1: // Move results down. 2753 |.if X64 2754 | mov RBa, [RC] 2755 | mov [RA], RBa 2756 |.else 2757 | mov RB, [RC] 2758 | mov [RA], RB 2759 | mov RB, [RC+4] 2760 | mov [RA+4], RB 2761 |.endif 2762 | add RC, 8 2763 | add RA, 8 2764 | sub DISPATCH, 1 2765 | jnz <1 2766 |2: 2767 | movzx RC, PC_RA 2768 | movzx RB, PC_RB 2769 | add RC, RB 2770 | lea RC, [BASE+RC*8-8] 2771 |3: 2772 | cmp RC, RA 2773 | ja >9 // More results wanted? 2774 | 2775 | mov DISPATCH, TMP3 2776 | mov TRACE:RD, TMP1 // Get previous trace. 2777 | movzx RB, word TRACE:RD->traceno 2778 | movzx RD, word TRACE:RD->link 2779 | cmp RD, RB 2780 | je ->cont_nop // Blacklisted. 2781 | test RD, RD 2782 | jne =>BC_JLOOP // Jump to stitched trace. 2783 | 2784 | // Stitch a new trace to the previous trace. 2785 | mov [DISPATCH+DISPATCH_J(exitno)], RB 2786 | mov L:RB, SAVE_L 2787 | mov L:RB->base, BASE 2788 | mov FCARG2, PC 2789 | lea FCARG1, [DISPATCH+GG_DISP2J] 2790 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2791 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) 2792 | mov BASE, L:RB->base 2793 | jmp ->cont_nop 2794 | 2795 |9: // Fill up results with nil. 2796 | mov dword [RA+4], LJ_TNIL 2797 | add RA, 8 2798 | jmp <3 2799 |.endif 2800 | 2801 |->vm_profhook: // Dispatch target for profiler hook. 2802 #if LJ_HASPROFILE 2803 | mov L:RB, SAVE_L 2804 | mov L:RB->base, BASE 2805 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2806 | mov FCARG1, L:RB 2807 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) 2808 | mov BASE, L:RB->base 2809 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. 2810 | sub PC, 4 2811 | jmp ->cont_nop 2812 #endif 2813 | 2814 |//----------------------------------------------------------------------- 2815 |//-- Trace exit handler ------------------------------------------------- 2816 |//----------------------------------------------------------------------- 2817 | 2818 |// Called from an exit stub with the exit number on the stack. 2819 |// The 16 bit exit number is stored with two (sign-extended) push imm8. 2820 |->vm_exit_handler: 2821 |.if JIT 2822 |.if X64 2823 | push r13; push r12 2824 | push r11; push r10; push r9; push r8 2825 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp 2826 | push rbx; push rdx; push rcx; push rax 2827 | movzx RC, byte [rbp-8] // Reconstruct exit number. 2828 | mov RCH, byte [rbp-16] 2829 | mov [rbp-8], r15; mov [rbp-16], r14 2830 |.else 2831 | push ebp; lea ebp, [esp+12]; push ebp 2832 | push ebx; push edx; push ecx; push eax 2833 | movzx RC, byte [ebp-4] // Reconstruct exit number. 2834 | mov RCH, byte [ebp-8] 2835 | mov [ebp-4], edi; mov [ebp-8], esi 2836 |.endif 2837 | // Caveat: DISPATCH is ebx. 2838 | mov DISPATCH, [ebp] 2839 | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. 2840 | set_vmstate EXIT 2841 | mov [DISPATCH+DISPATCH_J(exitno)], RC 2842 | mov [DISPATCH+DISPATCH_J(parent)], RA 2843 |.if X64 2844 |.if X64WIN 2845 | sub rsp, 16*8+4*8 // Room for SSE regs + save area. 2846 |.else 2847 | sub rsp, 16*8 // Room for SSE regs. 2848 |.endif 2849 | add rbp, -128 2850 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 2851 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 2852 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 2853 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 2854 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 2855 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 2856 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 2857 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 2858 |.else 2859 | sub esp, 8*8+16 // Room for SSE regs + args. 2860 | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6 2861 | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4 2862 | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2 2863 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 2864 |.endif 2865 | // Caveat: RB is ebp. 2866 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] 2867 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2868 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2869 | mov L:RB->base, BASE 2870 |.if X64WIN 2871 | lea CARG2, [rsp+4*8] 2872 |.elif X64 2873 | mov CARG2, rsp 2874 |.else 2875 | lea FCARG2, [esp+16] 2876 |.endif 2877 | lea FCARG1, [DISPATCH+GG_DISP2J] 2878 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 2879 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) 2880 | // MULTRES or negated error code returned in eax (RD). 2881 | mov RAa, L:RB->cframe 2882 | and RAa, CFRAME_RAWMASK 2883 |.if X64WIN 2884 | // Reposition stack later. 2885 |.elif X64 2886 | mov rsp, RAa // Reposition stack to C frame. 2887 |.else 2888 | mov esp, RAa // Reposition stack to C frame. 2889 |.endif 2890 | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). 2891 | mov BASE, L:RB->base 2892 | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC. 2893 |.if X64 2894 | jmp >1 2895 |.endif 2896 |.endif 2897 |->vm_exit_interp: 2898 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. 2899 |.if JIT 2900 |.if X64 2901 | // Restore additional callee-save registers only used in compiled code. 2902 |.if X64WIN 2903 | lea RAa, [rsp+9*16+4*8] 2904 |1: 2905 | movdqa xmm15, [RAa-9*16] 2906 | movdqa xmm14, [RAa-8*16] 2907 | movdqa xmm13, [RAa-7*16] 2908 | movdqa xmm12, [RAa-6*16] 2909 | movdqa xmm11, [RAa-5*16] 2910 | movdqa xmm10, [RAa-4*16] 2911 | movdqa xmm9, [RAa-3*16] 2912 | movdqa xmm8, [RAa-2*16] 2913 | movdqa xmm7, [RAa-1*16] 2914 | mov rsp, RAa // Reposition stack to C frame. 2915 | movdqa xmm6, [RAa] 2916 | mov r15, CSAVE_3 2917 | mov r14, CSAVE_4 2918 |.else 2919 | add rsp, 16 // Reposition stack to C frame. 2920 |1: 2921 |.endif 2922 | mov r13, TMPa 2923 | mov r12, TMPQ 2924 |.endif 2925 | test RD, RD; js >9 // Check for error from exit. 2926 | mov L:RB, SAVE_L 2927 | mov MULTRES, RD 2928 | mov LFUNC:KBASE, [BASE-8] 2929 | mov KBASE, LFUNC:KBASE->pc 2930 | mov KBASE, [KBASE+PC2PROTO(k)] 2931 | mov L:RB->base, BASE 2932 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 2933 | set_vmstate INTERP 2934 | // Modified copy of ins_next which handles function header dispatch, too. 2935 | mov RC, [PC] 2936 | movzx RA, RCH 2937 | movzx OP, RCL 2938 | add PC, 4 2939 | shr RC, 16 2940 | cmp OP, BC_FUNCF // Function header? 2941 | jb >3 2942 | cmp OP, BC_FUNCC+2 // Fast function? 2943 | jae >4 2944 |2: 2945 | mov RC, MULTRES // RC/RD holds nres+1. 2946 |3: 2947 |.if X64 2948 | jmp aword [DISPATCH+OP*8] 2949 |.else 2950 | jmp aword [DISPATCH+OP*4] 2951 |.endif 2952 | 2953 |4: // Check frame below fast function. 2954 | mov RC, [BASE-4] 2955 | test RC, FRAME_TYPE 2956 | jnz <2 // Trace stitching continuation? 2957 | // Otherwise set KBASE for Lua function below fast function. 2958 | movzx RC, byte [RC-3] 2959 | not RCa 2960 | mov LFUNC:KBASE, [BASE+RC*8-8] 2961 | mov KBASE, LFUNC:KBASE->pc 2962 | mov KBASE, [KBASE+PC2PROTO(k)] 2963 | jmp <2 2964 | 2965 |9: // Rethrow error from the right C frame. 2966 | neg RD 2967 | mov FCARG1, L:RB 2968 | mov FCARG2, RD 2969 | call extern lj_err_throw@8 // (lua_State *L, int errcode) 2970 |.endif 2971 | 2972 |//----------------------------------------------------------------------- 2973 |//-- Math helper functions ---------------------------------------------- 2974 |//----------------------------------------------------------------------- 2975 | 2976 |// FP value rounding. Called by math.floor/math.ceil fast functions 2977 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. 2978 |.macro vm_round, name, mode, cond 2979 |->name: 2980 |.if not X64 and cond 2981 | movsd xmm0, qword [esp+4] 2982 | call ->name .. _sse 2983 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. 2984 | fld qword [esp+4] 2985 | ret 2986 |.endif 2987 | 2988 |->name .. _sse: 2989 | sseconst_abs xmm2, RDa 2990 | sseconst_2p52 xmm3, RDa 2991 | movaps xmm1, xmm0 2992 | andpd xmm1, xmm2 // |x| 2993 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. 2994 | jbe >1 2995 | andnpd xmm2, xmm0 // Isolate sign bit. 2996 |.if mode == 2 // trunc(x)? 2997 | movaps xmm0, xmm1 2998 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 2999 | subsd xmm1, xmm3 3000 | sseconst_1 xmm3, RDa 3001 | cmpsd xmm0, xmm1, 1 // |x| < result? 3002 | andpd xmm0, xmm3 3003 | subsd xmm1, xmm0 // If yes, subtract -1. 3004 | orpd xmm1, xmm2 // Merge sign bit back in. 3005 |.else 3006 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 3007 | subsd xmm1, xmm3 3008 | orpd xmm1, xmm2 // Merge sign bit back in. 3009 | .if mode == 1 // ceil(x)? 3010 | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0. 3011 | cmpsd xmm0, xmm1, 6 // x > result? 3012 | .else // floor(x)? 3013 | sseconst_1 xmm2, RDa 3014 | cmpsd xmm0, xmm1, 1 // x < result? 3015 | .endif 3016 | andpd xmm0, xmm2 3017 | subsd xmm1, xmm0 // If yes, subtract +-1. 3018 |.endif 3019 | movaps xmm0, xmm1 3020 |1: 3021 | ret 3022 |.endmacro 3023 | 3024 | vm_round vm_floor, 0, 1 3025 | vm_round vm_ceil, 1, JIT 3026 | vm_round vm_trunc, 2, JIT 3027 | 3028 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 3029 |->vm_mod: 3030 |// Args in xmm0/xmm1, return value in xmm0. 3031 |// Caveat: xmm0-xmm5 and RC (eax) modified! 3032 | movaps xmm5, xmm0 3033 | divsd xmm0, xmm1 3034 | sseconst_abs xmm2, RDa 3035 | sseconst_2p52 xmm3, RDa 3036 | movaps xmm4, xmm0 3037 | andpd xmm4, xmm2 // |x/y| 3038 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. 3039 | jbe >1 3040 | andnpd xmm2, xmm0 // Isolate sign bit. 3041 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 3042 | subsd xmm4, xmm3 3043 | orpd xmm4, xmm2 // Merge sign bit back in. 3044 | sseconst_1 xmm2, RDa 3045 | cmpsd xmm0, xmm4, 1 // x/y < result? 3046 | andpd xmm0, xmm2 3047 | subsd xmm4, xmm0 // If yes, subtract 1.0. 3048 | movaps xmm0, xmm5 3049 | mulsd xmm1, xmm4 3050 | subsd xmm0, xmm1 3051 | ret 3052 |1: 3053 | mulsd xmm1, xmm0 3054 | movaps xmm0, xmm5 3055 | subsd xmm0, xmm1 3056 | ret 3057 | 3058 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 3059 |->vm_powi_sse: 3060 | cmp eax, 1; jle >6 // i<=1? 3061 | // Now 1 < (unsigned)i <= 0x80000000. 3062 |1: // Handle leading zeros. 3063 | test eax, 1; jnz >2 3064 | mulsd xmm0, xmm0 3065 | shr eax, 1 3066 | jmp <1 3067 |2: 3068 | shr eax, 1; jz >5 3069 | movaps xmm1, xmm0 3070 |3: // Handle trailing bits. 3071 | mulsd xmm0, xmm0 3072 | shr eax, 1; jz >4 3073 | jnc <3 3074 | mulsd xmm1, xmm0 3075 | jmp <3 3076 |4: 3077 | mulsd xmm0, xmm1 3078 |5: 3079 | ret 3080 |6: 3081 | je <5 // x^1 ==> x 3082 | jb >7 // x^0 ==> 1 3083 | neg eax 3084 | call <1 3085 | sseconst_1 xmm1, RDa 3086 | divsd xmm1, xmm0 3087 | movaps xmm0, xmm1 3088 | ret 3089 |7: 3090 | sseconst_1 xmm0, RDa 3091 | ret 3092 | 3093 |//----------------------------------------------------------------------- 3094 |//-- Miscellaneous functions -------------------------------------------- 3095 |//----------------------------------------------------------------------- 3096 | 3097 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) 3098 |->vm_cpuid: 3099 |.if X64 3100 | mov eax, CARG1d 3101 | .if X64WIN; push rsi; mov rsi, CARG2; .endif 3102 | push rbx 3103 | xor ecx, ecx 3104 | cpuid 3105 | mov [rsi], eax 3106 | mov [rsi+4], ebx 3107 | mov [rsi+8], ecx 3108 | mov [rsi+12], edx 3109 | pop rbx 3110 | .if X64WIN; pop rsi; .endif 3111 | ret 3112 |.else 3113 | pushfd 3114 | pop edx 3115 | mov ecx, edx 3116 | xor edx, 0x00200000 // Toggle ID bit in flags. 3117 | push edx 3118 | popfd 3119 | pushfd 3120 | pop edx 3121 | xor eax, eax // Zero means no features supported. 3122 | cmp ecx, edx 3123 | jz >1 // No ID toggle means no CPUID support. 3124 | mov eax, [esp+4] // Argument 1 is function number. 3125 | push edi 3126 | push ebx 3127 | xor ecx, ecx 3128 | cpuid 3129 | mov edi, [esp+16] // Argument 2 is result area. 3130 | mov [edi], eax 3131 | mov [edi+4], ebx 3132 | mov [edi+8], ecx 3133 | mov [edi+12], edx 3134 | pop ebx 3135 | pop edi 3136 |1: 3137 | ret 3138 |.endif 3139 | 3140 |//----------------------------------------------------------------------- 3141 |//-- Assertions --------------------------------------------------------- 3142 |//----------------------------------------------------------------------- 3143 | 3144 |->assert_bad_for_arg_type: 3145 #ifdef LUA_USE_ASSERT 3146 | int3 3147 #endif 3148 | int3 3149 | 3150 |//----------------------------------------------------------------------- 3151 |//-- FFI helper functions ----------------------------------------------- 3152 |//----------------------------------------------------------------------- 3153 | 3154 |// Handler for callback functions. Callback slot number in ah/al. 3155 |->vm_ffi_callback: 3156 |.if FFI 3157 |.type CTSTATE, CTState, PC 3158 |.if not X64 3159 | sub esp, 16 // Leave room for SAVE_ERRF etc. 3160 |.endif 3161 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. 3162 | lea DISPATCH, [ebp+GG_G2DISP] 3163 | mov CTSTATE, GL:ebp->ctype_state 3164 | movzx eax, ax 3165 | mov CTSTATE->cb.slot, eax 3166 |.if X64 3167 | mov CTSTATE->cb.gpr[0], CARG1 3168 | mov CTSTATE->cb.gpr[1], CARG2 3169 | mov CTSTATE->cb.gpr[2], CARG3 3170 | mov CTSTATE->cb.gpr[3], CARG4 3171 | movsd qword CTSTATE->cb.fpr[0], xmm0 3172 | movsd qword CTSTATE->cb.fpr[1], xmm1 3173 | movsd qword CTSTATE->cb.fpr[2], xmm2 3174 | movsd qword CTSTATE->cb.fpr[3], xmm3 3175 |.if X64WIN 3176 | lea rax, [rsp+CFRAME_SIZE+4*8] 3177 |.else 3178 | lea rax, [rsp+CFRAME_SIZE] 3179 | mov CTSTATE->cb.gpr[4], CARG5 3180 | mov CTSTATE->cb.gpr[5], CARG6 3181 | movsd qword CTSTATE->cb.fpr[4], xmm4 3182 | movsd qword CTSTATE->cb.fpr[5], xmm5 3183 | movsd qword CTSTATE->cb.fpr[6], xmm6 3184 | movsd qword CTSTATE->cb.fpr[7], xmm7 3185 |.endif 3186 | mov CTSTATE->cb.stack, rax 3187 | mov CARG2, rsp 3188 |.else 3189 | lea eax, [esp+CFRAME_SIZE+16] 3190 | mov CTSTATE->cb.gpr[0], FCARG1 3191 | mov CTSTATE->cb.gpr[1], FCARG2 3192 | mov CTSTATE->cb.stack, eax 3193 | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp. 3194 | mov FCARG2, [esp+CFRAME_SIZE+8] 3195 | mov SAVE_RET, FCARG1 3196 | mov SAVE_R4, FCARG2 3197 | mov FCARG2, esp 3198 |.endif 3199 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. 3200 | mov FCARG1, CTSTATE 3201 | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf) 3202 | // lua_State * returned in eax (RD). 3203 | set_vmstate INTERP 3204 | mov BASE, L:RD->base 3205 | mov RD, L:RD->top 3206 | sub RD, BASE 3207 | mov LFUNC:RB, [BASE-8] 3208 | shr RD, 3 3209 | add RD, 1 3210 | ins_callt 3211 |.endif 3212 | 3213 |->cont_ffi_callback: // Return from FFI callback. 3214 |.if FFI 3215 | mov L:RA, SAVE_L 3216 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] 3217 | mov aword CTSTATE->L, L:RAa 3218 | mov L:RA->base, BASE 3219 | mov L:RA->top, RB 3220 | mov FCARG1, CTSTATE 3221 | mov FCARG2, RC 3222 | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o) 3223 |.if X64 3224 | mov rax, CTSTATE->cb.gpr[0] 3225 | movsd xmm0, qword CTSTATE->cb.fpr[0] 3226 | jmp ->vm_leave_unw 3227 |.else 3228 | mov L:RB, SAVE_L 3229 | mov eax, CTSTATE->cb.gpr[0] 3230 | mov edx, CTSTATE->cb.gpr[1] 3231 | cmp dword CTSTATE->cb.gpr[2], 1 3232 | jb >7 3233 | je >6 3234 | fld qword CTSTATE->cb.fpr[0].d 3235 | jmp >7 3236 |6: 3237 | fld dword CTSTATE->cb.fpr[0].f 3238 |7: 3239 | mov ecx, L:RB->top 3240 | movzx ecx, word [ecx+6] // Get stack adjustment and copy up. 3241 | mov SAVE_L, ecx // Must be one slot above SAVE_RET 3242 | restoreregs 3243 | pop ecx // Move return addr from SAVE_RET. 3244 | add esp, [esp] // Adjust stack. 3245 | add esp, 16 3246 | push ecx 3247 | ret 3248 |.endif 3249 |.endif 3250 | 3251 |->vm_ffi_call@4: // Call C function via FFI. 3252 | // Caveat: needs special frame unwinding, see below. 3253 |.if FFI 3254 |.if X64 3255 | .type CCSTATE, CCallState, rbx 3256 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 3257 |.else 3258 | .type CCSTATE, CCallState, ebx 3259 | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1 3260 |.endif 3261 | 3262 | // Readjust stack. 3263 |.if X64 3264 | mov eax, CCSTATE->spadj 3265 | sub rsp, rax 3266 |.else 3267 | sub esp, CCSTATE->spadj 3268 |.if WIN 3269 | mov CCSTATE->spadj, esp 3270 |.endif 3271 |.endif 3272 | 3273 | // Copy stack slots. 3274 | movzx ecx, byte CCSTATE->nsp 3275 | sub ecx, 1 3276 | js >2 3277 |1: 3278 |.if X64 3279 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] 3280 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax 3281 |.else 3282 | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] 3283 | mov [esp+ecx*4], eax 3284 |.endif 3285 | sub ecx, 1 3286 | jns <1 3287 |2: 3288 | 3289 |.if X64 3290 | movzx eax, byte CCSTATE->nfpr 3291 | mov CARG1, CCSTATE->gpr[0] 3292 | mov CARG2, CCSTATE->gpr[1] 3293 | mov CARG3, CCSTATE->gpr[2] 3294 | mov CARG4, CCSTATE->gpr[3] 3295 |.if not X64WIN 3296 | mov CARG5, CCSTATE->gpr[4] 3297 | mov CARG6, CCSTATE->gpr[5] 3298 |.endif 3299 | test eax, eax; jz >5 3300 | movaps xmm0, CCSTATE->fpr[0] 3301 | movaps xmm1, CCSTATE->fpr[1] 3302 | movaps xmm2, CCSTATE->fpr[2] 3303 | movaps xmm3, CCSTATE->fpr[3] 3304 |.if not X64WIN 3305 | cmp eax, 4; jbe >5 3306 | movaps xmm4, CCSTATE->fpr[4] 3307 | movaps xmm5, CCSTATE->fpr[5] 3308 | movaps xmm6, CCSTATE->fpr[6] 3309 | movaps xmm7, CCSTATE->fpr[7] 3310 |.endif 3311 |5: 3312 |.else 3313 | mov FCARG1, CCSTATE->gpr[0] 3314 | mov FCARG2, CCSTATE->gpr[1] 3315 |.endif 3316 | 3317 | call aword CCSTATE->func 3318 | 3319 |.if X64 3320 | mov CCSTATE->gpr[0], rax 3321 | movaps CCSTATE->fpr[0], xmm0 3322 |.if not X64WIN 3323 | mov CCSTATE->gpr[1], rdx 3324 | movaps CCSTATE->fpr[1], xmm1 3325 |.endif 3326 |.else 3327 | mov CCSTATE->gpr[0], eax 3328 | mov CCSTATE->gpr[1], edx 3329 | cmp byte CCSTATE->resx87, 1 3330 | jb >7 3331 | je >6 3332 | fstp qword CCSTATE->fpr[0].d[0] 3333 | jmp >7 3334 |6: 3335 | fstp dword CCSTATE->fpr[0].f[0] 3336 |7: 3337 |.if WIN 3338 | sub CCSTATE->spadj, esp 3339 |.endif 3340 |.endif 3341 | 3342 |.if X64 3343 | mov rbx, [rbp-8]; leave; ret 3344 |.else 3345 | mov ebx, [ebp-4]; leave; ret 3346 |.endif 3347 |.endif 3348 |// Note: vm_ffi_call must be the last function in this object file! 3349 | 3350 |//----------------------------------------------------------------------- 3351 } 3352 3353 /* Generate the code for a single instruction. */ 3354 static void build_ins(BuildCtx *ctx, BCOp op, int defop) 3355 { 3356 int vk = 0; 3357 |// Note: aligning all instructions does not pay off. 3358 |=>defop: 3359 3360 switch (op) { 3361 3362 /* -- Comparison ops ---------------------------------------------------- */ 3363 3364 /* Remember: all ops branch for a true comparison, fall through otherwise. */ 3365 3366 |.macro jmp_comp, lt, ge, le, gt, target 3367 ||switch (op) { 3368 ||case BC_ISLT: 3369 | lt target 3370 ||break; 3371 ||case BC_ISGE: 3372 | ge target 3373 ||break; 3374 ||case BC_ISLE: 3375 | le target 3376 ||break; 3377 ||case BC_ISGT: 3378 | gt target 3379 ||break; 3380 ||default: break; /* Shut up GCC. */ 3381 ||} 3382 |.endmacro 3383 3384 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 3385 | // RA = src1, RD = src2, JMP with RD = target 3386 | ins_AD 3387 |.if DUALNUM 3388 | checkint RA, >7 3389 | checkint RD, >8 3390 | mov RB, dword [BASE+RA*8] 3391 | add PC, 4 3392 | cmp RB, dword [BASE+RD*8] 3393 | jmp_comp jge, jl, jg, jle, >9 3394 |6: 3395 | movzx RD, PC_RD 3396 | branchPC RD 3397 |9: 3398 | ins_next 3399 | 3400 |7: // RA is not an integer. 3401 | ja ->vmeta_comp 3402 | // RA is a number. 3403 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3404 | // RA is a number, RD is an integer. 3405 | cvtsi2sd xmm0, dword [BASE+RD*8] 3406 | jmp >2 3407 | 3408 |8: // RA is an integer, RD is not an integer. 3409 | ja ->vmeta_comp 3410 | // RA is an integer, RD is a number. 3411 | cvtsi2sd xmm1, dword [BASE+RA*8] 3412 | movsd xmm0, qword [BASE+RD*8] 3413 | add PC, 4 3414 | ucomisd xmm0, xmm1 3415 | jmp_comp jbe, ja, jb, jae, <9 3416 | jmp <6 3417 |.else 3418 | checknum RA, ->vmeta_comp 3419 | checknum RD, ->vmeta_comp 3420 |.endif 3421 |1: 3422 | movsd xmm0, qword [BASE+RD*8] 3423 |2: 3424 | add PC, 4 3425 | ucomisd xmm0, qword [BASE+RA*8] 3426 |3: 3427 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3428 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3429 |.if DUALNUM 3430 | jmp_comp jbe, ja, jb, jae, <9 3431 | jmp <6 3432 |.else 3433 | jmp_comp jbe, ja, jb, jae, >1 3434 | movzx RD, PC_RD 3435 | branchPC RD 3436 |1: 3437 | ins_next 3438 |.endif 3439 break; 3440 3441 case BC_ISEQV: case BC_ISNEV: 3442 vk = op == BC_ISEQV; 3443 | ins_AD // RA = src1, RD = src2, JMP with RD = target 3444 | mov RB, [BASE+RD*8+4] 3445 | add PC, 4 3446 |.if DUALNUM 3447 | cmp RB, LJ_TISNUM; jne >7 3448 | checkint RA, >8 3449 | mov RB, dword [BASE+RD*8] 3450 | cmp RB, dword [BASE+RA*8] 3451 if (vk) { 3452 | jne >9 3453 } else { 3454 | je >9 3455 } 3456 | movzx RD, PC_RD 3457 | branchPC RD 3458 |9: 3459 | ins_next 3460 | 3461 |7: // RD is not an integer. 3462 | ja >5 3463 | // RD is a number. 3464 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3465 | // RD is a number, RA is an integer. 3466 | cvtsi2sd xmm0, dword [BASE+RA*8] 3467 | jmp >2 3468 | 3469 |8: // RD is an integer, RA is not an integer. 3470 | ja >5 3471 | // RD is an integer, RA is a number. 3472 | cvtsi2sd xmm0, dword [BASE+RD*8] 3473 | ucomisd xmm0, qword [BASE+RA*8] 3474 | jmp >4 3475 | 3476 |.else 3477 | cmp RB, LJ_TISNUM; jae >5 3478 | checknum RA, >5 3479 |.endif 3480 |1: 3481 | movsd xmm0, qword [BASE+RA*8] 3482 |2: 3483 | ucomisd xmm0, qword [BASE+RD*8] 3484 |4: 3485 iseqne_fp: 3486 if (vk) { 3487 | jp >2 // Unordered means not equal. 3488 | jne >2 3489 } else { 3490 | jp >2 // Unordered means not equal. 3491 | je >1 3492 } 3493 iseqne_end: 3494 if (vk) { 3495 |1: // EQ: Branch to the target. 3496 | movzx RD, PC_RD 3497 | branchPC RD 3498 |2: // NE: Fallthrough to next instruction. 3499 |.if not FFI 3500 |3: 3501 |.endif 3502 } else { 3503 |.if not FFI 3504 |3: 3505 |.endif 3506 |2: // NE: Branch to the target. 3507 | movzx RD, PC_RD 3508 | branchPC RD 3509 |1: // EQ: Fallthrough to next instruction. 3510 } 3511 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || 3512 op == BC_ISEQN || op == BC_ISNEN)) { 3513 | jmp <9 3514 } else { 3515 | ins_next 3516 } 3517 | 3518 if (op == BC_ISEQV || op == BC_ISNEV) { 3519 |5: // Either or both types are not numbers. 3520 |.if FFI 3521 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd 3522 | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd 3523 |.endif 3524 | checktp RA, RB // Compare types. 3525 | jne <2 // Not the same type? 3526 | cmp RB, LJ_TISPRI 3527 | jae <1 // Same type and primitive type? 3528 | 3529 | // Same types and not a primitive type. Compare GCobj or pvalue. 3530 | mov RA, [BASE+RA*8] 3531 | mov RD, [BASE+RD*8] 3532 | cmp RA, RD 3533 | je <1 // Same GCobjs or pvalues? 3534 | cmp RB, LJ_TISTABUD 3535 | ja <2 // Different objects and not table/ud? 3536 |.if X64 3537 | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata. 3538 | jb <2 3539 |.endif 3540 | 3541 | // Different tables or userdatas. Need to check __eq metamethod. 3542 | // Field metatable must be at same offset for GCtab and GCudata! 3543 | mov TAB:RB, TAB:RA->metatable 3544 | test TAB:RB, TAB:RB 3545 | jz <2 // No metatable? 3546 | test byte TAB:RB->nomm, 1<<MM_eq 3547 | jnz <2 // Or 'no __eq' flag set? 3548 if (vk) { 3549 | xor RB, RB // ne = 0 3550 } else { 3551 | mov RB, 1 // ne = 1 3552 } 3553 | jmp ->vmeta_equal // Handle __eq metamethod. 3554 } else { 3555 |.if FFI 3556 |3: 3557 | cmp RB, LJ_TCDATA 3558 if (LJ_DUALNUM && vk) { 3559 | jne <9 3560 } else { 3561 | jne <2 3562 } 3563 | jmp ->vmeta_equal_cd 3564 |.endif 3565 } 3566 break; 3567 case BC_ISEQS: case BC_ISNES: 3568 vk = op == BC_ISEQS; 3569 | ins_AND // RA = src, RD = str const, JMP with RD = target 3570 | mov RB, [BASE+RA*8+4] 3571 | add PC, 4 3572 | cmp RB, LJ_TSTR; jne >3 3573 | mov RA, [BASE+RA*8] 3574 | cmp RA, [KBASE+RD*4] 3575 iseqne_test: 3576 if (vk) { 3577 | jne >2 3578 } else { 3579 | je >1 3580 } 3581 goto iseqne_end; 3582 case BC_ISEQN: case BC_ISNEN: 3583 vk = op == BC_ISEQN; 3584 | ins_AD // RA = src, RD = num const, JMP with RD = target 3585 | mov RB, [BASE+RA*8+4] 3586 | add PC, 4 3587 |.if DUALNUM 3588 | cmp RB, LJ_TISNUM; jne >7 3589 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 3590 | mov RB, dword [KBASE+RD*8] 3591 | cmp RB, dword [BASE+RA*8] 3592 if (vk) { 3593 | jne >9 3594 } else { 3595 | je >9 3596 } 3597 | movzx RD, PC_RD 3598 | branchPC RD 3599 |9: 3600 | ins_next 3601 | 3602 |7: // RA is not an integer. 3603 | ja >3 3604 | // RA is a number. 3605 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3606 | // RA is a number, RD is an integer. 3607 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3608 | jmp >2 3609 | 3610 |8: // RA is an integer, RD is a number. 3611 | cvtsi2sd xmm0, dword [BASE+RA*8] 3612 | ucomisd xmm0, qword [KBASE+RD*8] 3613 | jmp >4 3614 |.else 3615 | cmp RB, LJ_TISNUM; jae >3 3616 |.endif 3617 |1: 3618 | movsd xmm0, qword [KBASE+RD*8] 3619 |2: 3620 | ucomisd xmm0, qword [BASE+RA*8] 3621 |4: 3622 goto iseqne_fp; 3623 case BC_ISEQP: case BC_ISNEP: 3624 vk = op == BC_ISEQP; 3625 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target 3626 | mov RB, [BASE+RA*8+4] 3627 | add PC, 4 3628 | cmp RB, RD 3629 if (!LJ_HASFFI) goto iseqne_test; 3630 if (vk) { 3631 | jne >3 3632 | movzx RD, PC_RD 3633 | branchPC RD 3634 |2: 3635 | ins_next 3636 |3: 3637 | cmp RB, LJ_TCDATA; jne <2 3638 | jmp ->vmeta_equal_cd 3639 } else { 3640 | je >2 3641 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd 3642 | movzx RD, PC_RD 3643 | branchPC RD 3644 |2: 3645 | ins_next 3646 } 3647 break; 3648 3649 /* -- Unary test and copy ops ------------------------------------------- */ 3650 3651 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: 3652 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target 3653 | mov RB, [BASE+RD*8+4] 3654 | add PC, 4 3655 | cmp RB, LJ_TISTRUECOND 3656 if (op == BC_IST || op == BC_ISTC) { 3657 | jae >1 3658 } else { 3659 | jb >1 3660 } 3661 if (op == BC_ISTC || op == BC_ISFC) { 3662 | mov [BASE+RA*8+4], RB 3663 | mov RB, [BASE+RD*8] 3664 | mov [BASE+RA*8], RB 3665 } 3666 | movzx RD, PC_RD 3667 | branchPC RD 3668 |1: // Fallthrough to the next instruction. 3669 | ins_next 3670 break; 3671 3672 case BC_ISTYPE: 3673 | ins_AD // RA = src, RD = -type 3674 | add RD, [BASE+RA*8+4] 3675 | jne ->vmeta_istype 3676 | ins_next 3677 break; 3678 case BC_ISNUM: 3679 | ins_AD // RA = src, RD = -(TISNUM-1) 3680 | checknum RA, ->vmeta_istype 3681 | ins_next 3682 break; 3683 3684 /* -- Unary ops --------------------------------------------------------- */ 3685 3686 case BC_MOV: 3687 | ins_AD // RA = dst, RD = src 3688 |.if X64 3689 | mov RBa, [BASE+RD*8] 3690 | mov [BASE+RA*8], RBa 3691 |.else 3692 | mov RB, [BASE+RD*8+4] 3693 | mov RD, [BASE+RD*8] 3694 | mov [BASE+RA*8+4], RB 3695 | mov [BASE+RA*8], RD 3696 |.endif 3697 | ins_next_ 3698 break; 3699 case BC_NOT: 3700 | ins_AD // RA = dst, RD = src 3701 | xor RB, RB 3702 | checktp RD, LJ_TISTRUECOND 3703 | adc RB, LJ_TTRUE 3704 | mov [BASE+RA*8+4], RB 3705 | ins_next 3706 break; 3707 case BC_UNM: 3708 | ins_AD // RA = dst, RD = src 3709 |.if DUALNUM 3710 | checkint RD, >5 3711 | mov RB, [BASE+RD*8] 3712 | neg RB 3713 | jo >4 3714 | mov dword [BASE+RA*8+4], LJ_TISNUM 3715 | mov dword [BASE+RA*8], RB 3716 |9: 3717 | ins_next 3718 |4: 3719 | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. 3720 | mov dword [BASE+RA*8], 0 3721 | jmp <9 3722 |5: 3723 | ja ->vmeta_unm 3724 |.else 3725 | checknum RD, ->vmeta_unm 3726 |.endif 3727 | movsd xmm0, qword [BASE+RD*8] 3728 | sseconst_sign xmm1, RDa 3729 | xorps xmm0, xmm1 3730 | movsd qword [BASE+RA*8], xmm0 3731 |.if DUALNUM 3732 | jmp <9 3733 |.else 3734 | ins_next 3735 |.endif 3736 break; 3737 case BC_LEN: 3738 | ins_AD // RA = dst, RD = src 3739 | checkstr RD, >2 3740 | mov STR:RD, [BASE+RD*8] 3741 |.if DUALNUM 3742 | mov RD, dword STR:RD->len 3743 |1: 3744 | mov dword [BASE+RA*8+4], LJ_TISNUM 3745 | mov dword [BASE+RA*8], RD 3746 |.else 3747 | xorps xmm0, xmm0 3748 | cvtsi2sd xmm0, dword STR:RD->len 3749 |1: 3750 | movsd qword [BASE+RA*8], xmm0 3751 |.endif 3752 | ins_next 3753 |2: 3754 | checktab RD, ->vmeta_len 3755 | mov TAB:FCARG1, [BASE+RD*8] 3756 | mov TAB:RB, TAB:FCARG1->metatable 3757 | cmp TAB:RB, 0 3758 | jnz >9 3759 |3: 3760 |->BC_LEN_Z: 3761 | mov RB, BASE // Save BASE. 3762 | call extern lj_tab_len@4 // (GCtab *t) 3763 | // Length of table returned in eax (RD). 3764 |.if DUALNUM 3765 | // Nothing to do. 3766 |.else 3767 | cvtsi2sd xmm0, RD 3768 |.endif 3769 | mov BASE, RB // Restore BASE. 3770 | movzx RA, PC_RA 3771 | jmp <1 3772 |9: // Check for __len. 3773 | test byte TAB:RB->nomm, 1<<MM_len 3774 | jnz <3 3775 | jmp ->vmeta_len // 'no __len' flag NOT set: check. 3776 break; 3777 #if LJ_53 3778 case BC_BNOT: 3779 | ins_AD // RA = dst, RD = src 3780 | jmp ->vmeta_unm 3781 break; 3782 3783 /* -- Binary ops -------------------------------------------------------- */ 3784 case BC_IDIV: 3785 case BC_BAND: 3786 case BC_BOR: 3787 case BC_BXOR: 3788 case BC_SHL: 3789 case BC_SHR: 3790 | ins_ABC 3791 | jmp ->vmeta_arith_vv 3792 break; 3793 #endif 3794 |.macro ins_arithpre, sseins, ssereg 3795 | ins_ABC 3796 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3797 ||switch (vk) { 3798 ||case 0: 3799 | checknum RB, ->vmeta_arith_vn 3800 | .if DUALNUM 3801 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3802 | .endif 3803 | movsd xmm0, qword [BASE+RB*8] 3804 | sseins ssereg, qword [KBASE+RC*8] 3805 || break; 3806 ||case 1: 3807 | checknum RB, ->vmeta_arith_nv 3808 | .if DUALNUM 3809 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3810 | .endif 3811 | movsd xmm0, qword [KBASE+RC*8] 3812 | sseins ssereg, qword [BASE+RB*8] 3813 || break; 3814 ||default: 3815 | checknum RB, ->vmeta_arith_vv 3816 | checknum RC, ->vmeta_arith_vv 3817 | movsd xmm0, qword [BASE+RB*8] 3818 | sseins ssereg, qword [BASE+RC*8] 3819 || break; 3820 ||} 3821 |.endmacro 3822 | 3823 |.macro ins_arithdn, intins 3824 | ins_ABC 3825 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3826 ||switch (vk) { 3827 ||case 0: 3828 | checkint RB, ->vmeta_arith_vn 3829 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn 3830 | mov RB, [BASE+RB*8] 3831 | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno 3832 || break; 3833 ||case 1: 3834 | checkint RB, ->vmeta_arith_nv 3835 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv 3836 | mov RC, [KBASE+RC*8] 3837 | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo 3838 || break; 3839 ||default: 3840 | checkint RB, ->vmeta_arith_vv 3841 | checkint RC, ->vmeta_arith_vv 3842 | mov RB, [BASE+RB*8] 3843 | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo 3844 || break; 3845 ||} 3846 | mov dword [BASE+RA*8+4], LJ_TISNUM 3847 ||if (vk == 1) { 3848 | mov dword [BASE+RA*8], RC 3849 ||} else { 3850 | mov dword [BASE+RA*8], RB 3851 ||} 3852 | ins_next 3853 |.endmacro 3854 | 3855 |.macro ins_arithpost 3856 | movsd qword [BASE+RA*8], xmm0 3857 |.endmacro 3858 | 3859 |.macro ins_arith, sseins 3860 | ins_arithpre sseins, xmm0 3861 | ins_arithpost 3862 | ins_next 3863 |.endmacro 3864 | 3865 |.macro ins_arith, intins, sseins 3866 |.if DUALNUM 3867 | ins_arithdn intins 3868 |.else 3869 | ins_arith, sseins 3870 |.endif 3871 |.endmacro 3872 3873 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3874 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3875 | ins_arith add, addsd 3876 break; 3877 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3878 | ins_arith sub, subsd 3879 break; 3880 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3881 | ins_arith imul, mulsd 3882 break; 3883 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3884 | ins_arith divsd 3885 break; 3886 case BC_MODVN: 3887 | ins_arithpre movsd, xmm1 3888 |->BC_MODVN_Z: 3889 | call ->vm_mod 3890 | ins_arithpost 3891 | ins_next 3892 break; 3893 case BC_MODNV: case BC_MODVV: 3894 | ins_arithpre movsd, xmm1 3895 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3896 break; 3897 case BC_POW: 3898 | ins_arithpre movsd, xmm1 3899 | mov RB, BASE 3900 |.if not X64 3901 | movsd FPARG1, xmm0 3902 | movsd FPARG3, xmm1 3903 |.endif 3904 | call extern pow 3905 | movzx RA, PC_RA 3906 | mov BASE, RB 3907 |.if X64 3908 | ins_arithpost 3909 |.else 3910 | fstp qword [BASE+RA*8] 3911 |.endif 3912 | ins_next 3913 break; 3914 3915 case BC_CAT: 3916 | ins_ABC // RA = dst, RB = src_start, RC = src_end 3917 |.if X64 3918 | mov L:CARG1d, SAVE_L 3919 | mov L:CARG1d->base, BASE 3920 | lea CARG2d, [BASE+RC*8] 3921 | mov CARG3d, RC 3922 | sub CARG3d, RB 3923 |->BC_CAT_Z: 3924 | mov L:RB, L:CARG1d 3925 |.else 3926 | lea RA, [BASE+RC*8] 3927 | sub RC, RB 3928 | mov ARG2, RA 3929 | mov ARG3, RC 3930 |->BC_CAT_Z: 3931 | mov L:RB, SAVE_L 3932 | mov ARG1, L:RB 3933 | mov L:RB->base, BASE 3934 |.endif 3935 | mov SAVE_PC, PC 3936 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) 3937 | // NULL (finished) or TValue * (metamethod) returned in eax (RC). 3938 | mov BASE, L:RB->base 3939 | test RC, RC 3940 | jnz ->vmeta_binop 3941 | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. 3942 | movzx RA, PC_RA 3943 |.if X64 3944 | mov RCa, [BASE+RB*8] 3945 | mov [BASE+RA*8], RCa 3946 |.else 3947 | mov RC, [BASE+RB*8+4] 3948 | mov RB, [BASE+RB*8] 3949 | mov [BASE+RA*8+4], RC 3950 | mov [BASE+RA*8], RB 3951 |.endif 3952 | ins_next 3953 break; 3954 3955 /* -- Constant ops ------------------------------------------------------ */ 3956 3957 case BC_KSTR: 3958 | ins_AND // RA = dst, RD = str const (~) 3959 | mov RD, [KBASE+RD*4] 3960 | mov dword [BASE+RA*8+4], LJ_TSTR 3961 | mov [BASE+RA*8], RD 3962 | ins_next 3963 break; 3964 case BC_KCDATA: 3965 |.if FFI 3966 | ins_AND // RA = dst, RD = cdata const (~) 3967 | mov RD, [KBASE+RD*4] 3968 | mov dword [BASE+RA*8+4], LJ_TCDATA 3969 | mov [BASE+RA*8], RD 3970 | ins_next 3971 |.endif 3972 break; 3973 case BC_KSHORT: 3974 | ins_AD // RA = dst, RD = signed int16 literal 3975 |.if DUALNUM 3976 | movsx RD, RDW 3977 | mov dword [BASE+RA*8+4], LJ_TISNUM 3978 | mov dword [BASE+RA*8], RD 3979 |.else 3980 | movsx RD, RDW // Sign-extend literal. 3981 | cvtsi2sd xmm0, RD 3982 | movsd qword [BASE+RA*8], xmm0 3983 |.endif 3984 | ins_next 3985 break; 3986 case BC_KNUM: 3987 | ins_AD // RA = dst, RD = num const 3988 | movsd xmm0, qword [KBASE+RD*8] 3989 | movsd qword [BASE+RA*8], xmm0 3990 | ins_next 3991 break; 3992 case BC_KPRI: 3993 | ins_AND // RA = dst, RD = primitive type (~) 3994 | mov [BASE+RA*8+4], RD 3995 | ins_next 3996 break; 3997 case BC_KNIL: 3998 | ins_AD // RA = dst_start, RD = dst_end 3999 | lea RA, [BASE+RA*8+12] 4000 | lea RD, [BASE+RD*8+4] 4001 | mov RB, LJ_TNIL 4002 | mov [RA-8], RB // Sets minimum 2 slots. 4003 |1: 4004 | mov [RA], RB 4005 | add RA, 8 4006 | cmp RA, RD 4007 | jbe <1 4008 | ins_next 4009 break; 4010 4011 /* -- Upvalue and function ops ------------------------------------------ */ 4012 4013 case BC_UGET: 4014 | ins_AD // RA = dst, RD = upvalue # 4015 | mov LFUNC:RB, [BASE-8] 4016 | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)] 4017 | mov RB, UPVAL:RB->v 4018 |.if X64 4019 | mov RDa, [RB] 4020 | mov [BASE+RA*8], RDa 4021 |.else 4022 | mov RD, [RB+4] 4023 | mov RB, [RB] 4024 | mov [BASE+RA*8+4], RD 4025 | mov [BASE+RA*8], RB 4026 |.endif 4027 | ins_next 4028 break; 4029 case BC_ESETV: // TBD: NOBARRIER? 4030 | ins_AD 4031 | checktp RD, LJ_TTAB 4032 | mov RD, [BASE+RD*8] // val to assign 4033 | mov LFUNC:RB, [BASE-8] // lfunc 4034 | jne >2 // silently skip if not table 4035 |1: 4036 | mov LFUNC:RB->env, RD // set env 4037 | mov LFUNC:RB, LFUNC:RB->next_ENV 4038 | cmp LFUNC:RB, [BASE-8] // set for all members sharing this _ENV uv 4039 | jnz <1 4040 |2: 4041 | ins_refetch 4042 break; 4043 case BC_USETV: 4044 #define TV2MARKOFS \ 4045 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) 4046 | ins_AD // RA = upvalue #, RD = src 4047 | mov LFUNC:RB, [BASE-8] 4048 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4049 | cmp byte UPVAL:RB->closed, 0 4050 | mov RB, UPVAL:RB->v 4051 | mov RA, [BASE+RD*8] 4052 | mov RD, [BASE+RD*8+4] 4053 | mov [RB], RA 4054 | mov [RB+4], RD 4055 | jz >1 4056 | // Check barrier for closed upvalue. 4057 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) 4058 | jnz >2 4059 |1: 4060 | ins_next 4061 | 4062 |2: // Upvalue is black. Check if new value is collectable and white. 4063 | sub RD, LJ_TISGCV 4064 | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) 4065 | jbe <1 4066 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) 4067 | jz <1 4068 | // Crossed a write barrier. Move the barrier forward. 4069 |.if X64 and not X64WIN 4070 | mov FCARG2, RB 4071 | mov RB, BASE // Save BASE. 4072 |.else 4073 | xchg FCARG2, RB // Save BASE (FCARG2 == BASE). 4074 |.endif 4075 | lea GL:FCARG1, [DISPATCH+GG_DISP2G] 4076 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) 4077 | mov BASE, RB // Restore BASE. 4078 | jmp <1 4079 break; 4080 #undef TV2MARKOFS 4081 case BC_USETS: 4082 | ins_AND // RA = upvalue #, RD = str const (~) 4083 | mov LFUNC:RB, [BASE-8] 4084 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4085 | mov GCOBJ:RA, [KBASE+RD*4] 4086 | mov RD, UPVAL:RB->v 4087 | mov [RD], GCOBJ:RA 4088 | mov dword [RD+4], LJ_TSTR 4089 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) 4090 | jnz >2 4091 |1: 4092 | ins_next 4093 | 4094 |2: // Check if string is white and ensure upvalue is closed. 4095 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) 4096 | jz <1 4097 | cmp byte UPVAL:RB->closed, 0 4098 | jz <1 4099 | // Crossed a write barrier. Move the barrier forward. 4100 | mov RB, BASE // Save BASE (FCARG2 == BASE). 4101 | mov FCARG2, RD 4102 | lea GL:FCARG1, [DISPATCH+GG_DISP2G] 4103 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) 4104 | mov BASE, RB // Restore BASE. 4105 | jmp <1 4106 break; 4107 case BC_USETN: 4108 | ins_AD // RA = upvalue #, RD = num const 4109 | mov LFUNC:RB, [BASE-8] 4110 | movsd xmm0, qword [KBASE+RD*8] 4111 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4112 | mov RA, UPVAL:RB->v 4113 | movsd qword [RA], xmm0 4114 | ins_next 4115 break; 4116 case BC_USETP: 4117 | ins_AND // RA = upvalue #, RD = primitive type (~) 4118 | mov LFUNC:RB, [BASE-8] 4119 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4120 | mov RA, UPVAL:RB->v 4121 | mov [RA+4], RD 4122 | ins_next 4123 break; 4124 case BC_UCLO: 4125 | ins_AD // RA = level, RD = target 4126 | branchPC RD // Do this first to free RD. 4127 | mov L:RB, SAVE_L 4128 | cmp dword L:RB->openupval, 0 4129 | je >1 4130 | mov L:RB->base, BASE 4131 | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE 4132 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA 4133 | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level) 4134 | mov BASE, L:RB->base 4135 |1: 4136 | ins_next 4137 break; 4138 4139 case BC_FNEW: 4140 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) 4141 |.if X64 4142 | mov L:RB, SAVE_L 4143 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 4144 | mov CARG3d, [BASE-8] 4145 | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *. 4146 | mov CARG1d, L:RB 4147 |.else 4148 | mov LFUNC:RA, [BASE-8] 4149 | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *. 4150 | mov L:RB, SAVE_L 4151 | mov ARG3, LFUNC:RA 4152 | mov ARG2, PROTO:RD 4153 | mov ARG1, L:RB 4154 | mov L:RB->base, BASE 4155 |.endif 4156 | mov SAVE_PC, PC 4157 | // (lua_State *L, GCproto *pt, GCfuncL *parent) 4158 | call extern lj_func_newL_gc 4159 | // GCfuncL * returned in eax (RC). 4160 | mov BASE, L:RB->base 4161 | movzx RA, PC_RA 4162 | mov [BASE+RA*8], LFUNC:RC 4163 | mov dword [BASE+RA*8+4], LJ_TFUNC 4164 | ins_next 4165 break; 4166 4167 /* -- Table ops --------------------------------------------------------- */ 4168 4169 case BC_TNEW: 4170 | ins_AD // RA = dst, RD = hbits|asize 4171 | mov L:RB, SAVE_L 4172 | mov L:RB->base, BASE 4173 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] 4174 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] 4175 | mov SAVE_PC, PC 4176 | jae >5 4177 |1: 4178 |.if X64 4179 | mov CARG3d, RD 4180 | and RD, 0x7ff 4181 | shr CARG3d, 11 4182 |.else 4183 | mov RA, RD 4184 | and RD, 0x7ff 4185 | shr RA, 11 4186 | mov ARG3, RA 4187 |.endif 4188 | cmp RD, 0x7ff 4189 | je >3 4190 |2: 4191 |.if X64 4192 | mov L:CARG1d, L:RB 4193 | mov CARG2d, RD 4194 |.else 4195 | mov ARG1, L:RB 4196 | mov ARG2, RD 4197 |.endif 4198 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) 4199 | // Table * returned in eax (RC). 4200 | mov BASE, L:RB->base 4201 | movzx RA, PC_RA 4202 | mov [BASE+RA*8], TAB:RC 4203 | mov dword [BASE+RA*8+4], LJ_TTAB 4204 | ins_next 4205 |3: // Turn 0x7ff into 0x801. 4206 | mov RD, 0x801 4207 | jmp <2 4208 |5: 4209 | mov L:FCARG1, L:RB 4210 | call extern lj_gc_step_fixtop@4 // (lua_State *L) 4211 | movzx RD, PC_RD 4212 | jmp <1 4213 break; 4214 case BC_TDUP: 4215 | ins_AND // RA = dst, RD = table const (~) (holding template table) 4216 | mov L:RB, SAVE_L 4217 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] 4218 | mov SAVE_PC, PC 4219 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] 4220 | mov L:RB->base, BASE 4221 | jae >3 4222 |2: 4223 | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE 4224 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA 4225 | call extern lj_tab_dup@8 // (lua_State *L, Table *kt) 4226 | // Table * returned in eax (RC). 4227 | mov BASE, L:RB->base 4228 | movzx RA, PC_RA 4229 | mov [BASE+RA*8], TAB:RC 4230 | mov dword [BASE+RA*8+4], LJ_TTAB 4231 | ins_next 4232 |3: 4233 | mov L:FCARG1, L:RB 4234 | call extern lj_gc_step_fixtop@4 // (lua_State *L) 4235 | movzx RD, PC_RD // Need to reload RD. 4236 | not RDa 4237 | jmp <2 4238 break; 4239 4240 case BC_GGET: 4241 | ins_AND // RA = dst, RD = str const (~) 4242 | mov LFUNC:RB, [BASE-8] 4243 | mov TAB:RB, LFUNC:RB->env 4244 | mov STR:RC, [KBASE+RD*4] 4245 | jmp ->BC_TGETS_Z 4246 break; 4247 case BC_GSET: 4248 | ins_AND // RA = src, RD = str const (~) 4249 | mov LFUNC:RB, [BASE-8] 4250 | mov TAB:RB, LFUNC:RB->env 4251 | mov STR:RC, [KBASE+RD*4] 4252 | jmp ->BC_TSETS_Z 4253 break; 4254 4255 case BC_TGETV: 4256 | ins_ABC // RA = dst, RB = table, RC = key 4257 | checktab RB, ->vmeta_tgetv 4258 | mov TAB:RB, [BASE+RB*8] 4259 | 4260 | // Integer key? 4261 |.if DUALNUM 4262 | checkint RC, >5 4263 | mov RC, dword [BASE+RC*8] 4264 |.else 4265 | // Convert number to int and back and compare. 4266 | checknum RC, >5 4267 | movsd xmm0, qword [BASE+RC*8] 4268 | cvttsd2si RC, xmm0 4269 | cvtsi2sd xmm1, RC 4270 | ucomisd xmm0, xmm1 4271 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4272 |.endif 4273 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4274 | jae ->vmeta_tgetv // Not in array part? Use fallback. 4275 | shl RC, 3 4276 | add RC, TAB:RB->array 4277 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. 4278 | je >2 4279 | // Get array slot. 4280 |.if X64 4281 | mov RBa, [RC] 4282 | mov [BASE+RA*8], RBa 4283 |.else 4284 | mov RB, [RC] 4285 | mov RC, [RC+4] 4286 | mov [BASE+RA*8], RB 4287 | mov [BASE+RA*8+4], RC 4288 |.endif 4289 |1: 4290 | ins_next 4291 | 4292 |2: // Check for __index if table value is nil. 4293 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. 4294 | jz >3 4295 | mov TAB:RA, TAB:RB->metatable 4296 | test byte TAB:RA->nomm, 1<<MM_index 4297 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check. 4298 | movzx RA, PC_RA // Restore RA. 4299 |3: 4300 | mov dword [BASE+RA*8+4], LJ_TNIL 4301 | jmp <1 4302 | 4303 |5: // String key? 4304 | checkstr RC, ->vmeta_tgetv 4305 | mov STR:RC, [BASE+RC*8] 4306 | jmp ->BC_TGETS_Z 4307 break; 4308 case BC_TGETS: 4309 | ins_ABC // RA = dst, RB = table, RC = str const (~) 4310 | not RCa 4311 | mov STR:RC, [KBASE+RC*4] 4312 | checktab RB, ->vmeta_tgets 4313 | mov TAB:RB, [BASE+RB*8] 4314 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4315 | mov RA, TAB:RB->hmask 4316 | and RA, STR:RC->hash 4317 | imul RA, #NODE 4318 | add NODE:RA, TAB:RB->node 4319 |1: 4320 | cmp dword NODE:RA->key.it, LJ_TSTR 4321 | jne >4 4322 | cmp dword NODE:RA->key.gcr, STR:RC 4323 | jne >4 4324 | // Ok, key found. Assumes: offsetof(Node, val) == 0 4325 | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath. 4326 | je >5 // Key found, but nil value? 4327 | movzx RC, PC_RA 4328 | // Get node value. 4329 |.if X64 4330 | mov RBa, [RA] 4331 | mov [BASE+RC*8], RBa 4332 |.else 4333 | mov RB, [RA] 4334 | mov RA, [RA+4] 4335 | mov [BASE+RC*8], RB 4336 | mov [BASE+RC*8+4], RA 4337 |.endif 4338 |2: 4339 | ins_next 4340 | 4341 |3: 4342 | movzx RC, PC_RA 4343 | mov dword [BASE+RC*8+4], LJ_TNIL 4344 | jmp <2 4345 | 4346 |4: // Follow hash chain. 4347 | mov NODE:RA, NODE:RA->next 4348 | test NODE:RA, NODE:RA 4349 | jnz <1 4350 | // End of hash chain: key not found, nil result. 4351 | 4352 |5: // Check for __index if table value is nil. 4353 | mov TAB:RA, TAB:RB->metatable 4354 | test TAB:RA, TAB:RA 4355 | jz <3 // No metatable: done. 4356 | test byte TAB:RA->nomm, 1<<MM_index 4357 | jnz <3 // 'no __index' flag set: done. 4358 | jmp ->vmeta_tgets // Caveat: preserve STR:RC. 4359 break; 4360 case BC_TGETB: 4361 | ins_ABC // RA = dst, RB = table, RC = byte literal 4362 | checktab RB, ->vmeta_tgetb 4363 | mov TAB:RB, [BASE+RB*8] 4364 | cmp RC, TAB:RB->asize 4365 | jae ->vmeta_tgetb 4366 | shl RC, 3 4367 | add RC, TAB:RB->array 4368 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. 4369 | je >2 4370 | // Get array slot. 4371 |.if X64 4372 | mov RBa, [RC] 4373 | mov [BASE+RA*8], RBa 4374 |.else 4375 | mov RB, [RC] 4376 | mov RC, [RC+4] 4377 | mov [BASE+RA*8], RB 4378 | mov [BASE+RA*8+4], RC 4379 |.endif 4380 |1: 4381 | ins_next 4382 | 4383 |2: // Check for __index if table value is nil. 4384 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. 4385 | jz >3 4386 | mov TAB:RA, TAB:RB->metatable 4387 | test byte TAB:RA->nomm, 1<<MM_index 4388 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check. 4389 | movzx RA, PC_RA // Restore RA. 4390 |3: 4391 | mov dword [BASE+RA*8+4], LJ_TNIL 4392 | jmp <1 4393 break; 4394 case BC_TGETR: 4395 | ins_ABC // RA = dst, RB = table, RC = key 4396 | mov TAB:RB, [BASE+RB*8] 4397 |.if DUALNUM 4398 | mov RC, dword [BASE+RC*8] 4399 |.else 4400 | cvttsd2si RC, qword [BASE+RC*8] 4401 |.endif 4402 | cmp RC, TAB:RB->asize 4403 | jae ->vmeta_tgetr // Not in array part? Use fallback. 4404 | shl RC, 3 4405 | add RC, TAB:RB->array 4406 | // Get array slot. 4407 |->BC_TGETR_Z: 4408 |.if X64 4409 | mov RBa, [RC] 4410 | mov [BASE+RA*8], RBa 4411 |.else 4412 | mov RB, [RC] 4413 | mov RC, [RC+4] 4414 | mov [BASE+RA*8], RB 4415 | mov [BASE+RA*8+4], RC 4416 |.endif 4417 |->BC_TGETR2_Z: 4418 | ins_next 4419 break; 4420 4421 case BC_TSETV: 4422 | ins_ABC // RA = src, RB = table, RC = key 4423 | checktab RB, ->vmeta_tsetv 4424 | mov TAB:RB, [BASE+RB*8] 4425 | 4426 | // Integer key? 4427 |.if DUALNUM 4428 | checkint RC, >5 4429 | mov RC, dword [BASE+RC*8] 4430 |.else 4431 | // Convert number to int and back and compare. 4432 | checknum RC, >5 4433 | movsd xmm0, qword [BASE+RC*8] 4434 | cvttsd2si RC, xmm0 4435 | cvtsi2sd xmm1, RC 4436 | ucomisd xmm0, xmm1 4437 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4438 |.endif 4439 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4440 | jae ->vmeta_tsetv 4441 | shl RC, 3 4442 | add RC, TAB:RB->array 4443 | cmp dword [RC+4], LJ_TNIL 4444 | je >3 // Previous value is nil? 4445 |1: 4446 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 4447 | jnz >7 4448 |2: // Set array slot. 4449 |.if X64 4450 | mov RBa, [BASE+RA*8] 4451 | mov [RC], RBa 4452 |.else 4453 | mov RB, [BASE+RA*8+4] 4454 | mov RA, [BASE+RA*8] 4455 | mov [RC+4], RB 4456 | mov [RC], RA 4457 |.endif 4458 | ins_next 4459 | 4460 |3: // Check for __newindex if previous value is nil. 4461 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. 4462 | jz <1 4463 | mov TAB:RA, TAB:RB->metatable 4464 | test byte TAB:RA->nomm, 1<<MM_newindex 4465 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check. 4466 | movzx RA, PC_RA // Restore RA. 4467 | jmp <1 4468 | 4469 |5: // String key? 4470 | checkstr RC, ->vmeta_tsetv 4471 | mov STR:RC, [BASE+RC*8] 4472 | jmp ->BC_TSETS_Z 4473 | 4474 |7: // Possible table write barrier for the value. Skip valiswhite check. 4475 | barrierback TAB:RB, RA 4476 | movzx RA, PC_RA // Restore RA. 4477 | jmp <2 4478 break; 4479 case BC_TSETS: 4480 | ins_ABC // RA = src, RB = table, RC = str const (~) 4481 | not RCa 4482 | mov STR:RC, [KBASE+RC*4] 4483 | checktab RB, ->vmeta_tsets 4484 | mov TAB:RB, [BASE+RB*8] 4485 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. 4486 | mov RA, TAB:RB->hmask 4487 | and RA, STR:RC->hash 4488 | imul RA, #NODE 4489 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. 4490 | add NODE:RA, TAB:RB->node 4491 |1: 4492 | cmp dword NODE:RA->key.it, LJ_TSTR 4493 | jne >5 4494 | cmp dword NODE:RA->key.gcr, STR:RC 4495 | jne >5 4496 | // Ok, key found. Assumes: offsetof(Node, val) == 0 4497 | cmp dword [RA+4], LJ_TNIL 4498 | je >4 // Previous value is nil? 4499 |2: 4500 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 4501 | jnz >7 4502 |3: // Set node value. 4503 | movzx RC, PC_RA 4504 |.if X64 4505 | mov RBa, [BASE+RC*8] 4506 | mov [RA], RBa 4507 |.else 4508 | mov RB, [BASE+RC*8+4] 4509 | mov RC, [BASE+RC*8] 4510 | mov [RA+4], RB 4511 | mov [RA], RC 4512 |.endif 4513 | ins_next 4514 | 4515 |4: // Check for __newindex if previous value is nil. 4516 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. 4517 | jz <2 4518 | mov TMP1, RA // Save RA. 4519 | mov TAB:RA, TAB:RB->metatable 4520 | test byte TAB:RA->nomm, 1<<MM_newindex 4521 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. 4522 | mov RA, TMP1 // Restore RA. 4523 | jmp <2 4524 | 4525 |5: // Follow hash chain. 4526 | mov NODE:RA, NODE:RA->next 4527 | test NODE:RA, NODE:RA 4528 | jnz <1 4529 | // End of hash chain: key not found, add a new one. 4530 | 4531 | // But check for __newindex first. 4532 | mov TAB:RA, TAB:RB->metatable 4533 | test TAB:RA, TAB:RA 4534 | jz >6 // No metatable: continue. 4535 | test byte TAB:RA->nomm, 1<<MM_newindex 4536 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. 4537 |6: 4538 | mov TMP1, STR:RC 4539 | mov TMP2, LJ_TSTR 4540 | mov TMP3, TAB:RB // Save TAB:RB for us. 4541 |.if X64 4542 | mov L:CARG1d, SAVE_L 4543 | mov L:CARG1d->base, BASE 4544 | lea CARG3, TMP1 4545 | mov CARG2d, TAB:RB 4546 | mov L:RB, L:CARG1d 4547 |.else 4548 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. 4549 | mov ARG2, TAB:RB 4550 | mov L:RB, SAVE_L 4551 | mov ARG3, RC 4552 | mov ARG1, L:RB 4553 | mov L:RB->base, BASE 4554 |.endif 4555 | mov SAVE_PC, PC 4556 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4557 | // Handles write barrier for the new key. TValue * returned in eax (RC). 4558 | mov BASE, L:RB->base 4559 | mov TAB:RB, TMP3 // Need TAB:RB for barrier. 4560 | mov RA, eax 4561 | jmp <2 // Must check write barrier for value. 4562 | 4563 |7: // Possible table write barrier for the value. Skip valiswhite check. 4564 | barrierback TAB:RB, RC // Destroys STR:RC. 4565 | jmp <3 4566 break; 4567 case BC_TSETB: 4568 | ins_ABC // RA = src, RB = table, RC = byte literal 4569 | checktab RB, ->vmeta_tsetb 4570 | mov TAB:RB, [BASE+RB*8] 4571 | cmp RC, TAB:RB->asize 4572 | jae ->vmeta_tsetb 4573 | shl RC, 3 4574 | add RC, TAB:RB->array 4575 | cmp dword [RC+4], LJ_TNIL 4576 | je >3 // Previous value is nil? 4577 |1: 4578 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 4579 | jnz >7 4580 |2: // Set array slot. 4581 |.if X64 4582 | mov RAa, [BASE+RA*8] 4583 | mov [RC], RAa 4584 |.else 4585 | mov RB, [BASE+RA*8+4] 4586 | mov RA, [BASE+RA*8] 4587 | mov [RC+4], RB 4588 | mov [RC], RA 4589 |.endif 4590 | ins_next 4591 | 4592 |3: // Check for __newindex if previous value is nil. 4593 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. 4594 | jz <1 4595 | mov TAB:RA, TAB:RB->metatable 4596 | test byte TAB:RA->nomm, 1<<MM_newindex 4597 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. 4598 | movzx RA, PC_RA // Restore RA. 4599 | jmp <1 4600 | 4601 |7: // Possible table write barrier for the value. Skip valiswhite check. 4602 | barrierback TAB:RB, RA 4603 | movzx RA, PC_RA // Restore RA. 4604 | jmp <2 4605 break; 4606 case BC_TSETR: 4607 | ins_ABC // RA = src, RB = table, RC = key 4608 | mov TAB:RB, [BASE+RB*8] 4609 |.if DUALNUM 4610 | mov RC, dword [BASE+RC*8] 4611 |.else 4612 | cvttsd2si RC, qword [BASE+RC*8] 4613 |.endif 4614 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 4615 | jnz >7 4616 |2: 4617 | cmp RC, TAB:RB->asize 4618 | jae ->vmeta_tsetr 4619 | shl RC, 3 4620 | add RC, TAB:RB->array 4621 | // Set array slot. 4622 |->BC_TSETR_Z: 4623 |.if X64 4624 | mov RBa, [BASE+RA*8] 4625 | mov [RC], RBa 4626 |.else 4627 | mov RB, [BASE+RA*8+4] 4628 | mov RA, [BASE+RA*8] 4629 | mov [RC+4], RB 4630 | mov [RC], RA 4631 |.endif 4632 | ins_next 4633 | 4634 |7: // Possible table write barrier for the value. Skip valiswhite check. 4635 | barrierback TAB:RB, RA 4636 | movzx RA, PC_RA // Restore RA. 4637 | jmp <2 4638 break; 4639 4640 case BC_TSETM: 4641 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4642 | mov TMP1, KBASE // Need one more free register. 4643 | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word. 4644 |1: 4645 | lea RA, [BASE+RA*8] 4646 | mov TAB:RB, [RA-8] // Guaranteed to be a table. 4647 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 4648 | jnz >7 4649 |2: 4650 | mov RD, MULTRES 4651 | sub RD, 1 4652 | jz >4 // Nothing to copy? 4653 | add RD, KBASE // Compute needed size. 4654 | cmp RD, TAB:RB->asize 4655 | ja >5 // Doesn't fit into array part? 4656 | sub RD, KBASE 4657 | shl KBASE, 3 4658 | add KBASE, TAB:RB->array 4659 |3: // Copy result slots to table. 4660 |.if X64 4661 | mov RBa, [RA] 4662 | add RA, 8 4663 | mov [KBASE], RBa 4664 |.else 4665 | mov RB, [RA] 4666 | mov [KBASE], RB 4667 | mov RB, [RA+4] 4668 | add RA, 8 4669 | mov [KBASE+4], RB 4670 |.endif 4671 | add KBASE, 8 4672 | sub RD, 1 4673 | jnz <3 4674 |4: 4675 | mov KBASE, TMP1 4676 | ins_next 4677 | 4678 |5: // Need to resize array part. 4679 |.if X64 4680 | mov L:CARG1d, SAVE_L 4681 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 4682 | mov CARG2d, TAB:RB 4683 | mov CARG3d, RD 4684 | mov L:RB, L:CARG1d 4685 |.else 4686 | mov ARG2, TAB:RB 4687 | mov L:RB, SAVE_L 4688 | mov L:RB->base, BASE 4689 | mov ARG3, RD 4690 | mov ARG1, L:RB 4691 |.endif 4692 | mov SAVE_PC, PC 4693 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) 4694 | mov BASE, L:RB->base 4695 | movzx RA, PC_RA // Restore RA. 4696 | jmp <1 // Retry. 4697 | 4698 |7: // Possible table write barrier for any value. Skip valiswhite check. 4699 | barrierback TAB:RB, RD 4700 | jmp <2 4701 break; 4702 4703 /* -- Calls and vararg handling ----------------------------------------- */ 4704 4705 case BC_CALL: case BC_CALLM: 4706 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs 4707 if (op == BC_CALLM) { 4708 | add NARGS:RD, MULTRES 4709 } 4710 | cmp dword [BASE+RA*8+4], LJ_TFUNC 4711 | mov LFUNC:RB, [BASE+RA*8] 4712 | jne ->vmeta_call_ra 4713 | lea BASE, [BASE+RA*8+8] 4714 | ins_call 4715 break; 4716 4717 case BC_CALLMT: 4718 | ins_AD // RA = base, RD = extra_nargs 4719 | add NARGS:RD, MULTRES 4720 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. 4721 break; 4722 case BC_CALLT: 4723 | ins_AD // RA = base, RD = nargs+1 4724 | lea RA, [BASE+RA*8+8] 4725 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. 4726 | mov LFUNC:RB, [RA-8] 4727 | cmp dword [RA-4], LJ_TFUNC 4728 | jne ->vmeta_call 4729 |->BC_CALLT_Z: 4730 | mov PC, [BASE-4] 4731 | test PC, FRAME_TYPE 4732 | jnz >7 4733 |1: 4734 | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below. 4735 | mov MULTRES, NARGS:RD 4736 | sub NARGS:RD, 1 4737 | jz >3 4738 |2: // Move args down. 4739 |.if X64 4740 | mov RBa, [RA] 4741 | add RA, 8 4742 | mov [KBASE], RBa 4743 |.else 4744 | mov RB, [RA] 4745 | mov [KBASE], RB 4746 | mov RB, [RA+4] 4747 | add RA, 8 4748 | mov [KBASE+4], RB 4749 |.endif 4750 | add KBASE, 8 4751 | sub NARGS:RD, 1 4752 | jnz <2 4753 | 4754 | mov LFUNC:RB, [BASE-8] 4755 |3: 4756 | mov NARGS:RD, MULTRES 4757 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? 4758 | ja >5 4759 |4: 4760 | ins_callt 4761 | 4762 |5: // Tailcall to a fast function. 4763 | test PC, FRAME_TYPE // Lua frame below? 4764 | jnz <4 4765 | movzx RA, PC_RA 4766 | not RAa 4767 | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE. 4768 | mov KBASE, LFUNC:KBASE->pc 4769 | mov KBASE, [KBASE+PC2PROTO(k)] 4770 | jmp <4 4771 | 4772 |7: // Tailcall from a vararg function. 4773 | sub PC, FRAME_VARG 4774 | test PC, FRAME_TYPEP 4775 | jnz >8 // Vararg frame below? 4776 | sub BASE, PC // Need to relocate BASE/KBASE down. 4777 | mov KBASE, BASE 4778 | mov PC, [BASE-4] 4779 | jmp <1 4780 |8: 4781 | add PC, FRAME_VARG 4782 | jmp <1 4783 break; 4784 4785 case BC_ITERC: 4786 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) 4787 | lea RA, [BASE+RA*8+8] // fb = base+1 4788 |.if X64 4789 | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3]. 4790 | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2]. 4791 | mov [RA], RBa 4792 | mov [RA+8], RCa 4793 |.else 4794 | mov RB, [RA-24] // Copy state. fb[0] = fb[-3]. 4795 | mov RC, [RA-20] 4796 | mov [RA], RB 4797 | mov [RA+4], RC 4798 | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2]. 4799 | mov RC, [RA-12] 4800 | mov [RA+8], RB 4801 | mov [RA+12], RC 4802 |.endif 4803 | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4] 4804 | mov RC, [RA-28] 4805 | mov [RA-8], LFUNC:RB 4806 | mov [RA-4], RC 4807 | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call. 4808 | mov NARGS:RD, 2+1 4809 | jne ->vmeta_call 4810 | mov BASE, RA 4811 | ins_call 4812 break; 4813 4814 case BC_ITERN: 4815 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) 4816 |.if JIT 4817 | // NYI: add hotloop, record BC_ITERN. 4818 |.endif 4819 | mov TMP1, KBASE // Need two more free registers. 4820 | mov TMP2, DISPATCH 4821 | mov TAB:RB, [BASE+RA*8-16] 4822 | mov RC, [BASE+RA*8-8] // Get index from control var. 4823 | mov DISPATCH, TAB:RB->asize 4824 | add PC, 4 4825 | mov KBASE, TAB:RB->array 4826 |1: // Traverse array part. 4827 | cmp RC, DISPATCH; jae >5 // Index points after array part? 4828 | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 4829 |.if DUALNUM 4830 | mov dword [BASE+RA*8+4], LJ_TISNUM 4831 | mov dword [BASE+RA*8], RC 4832 |.else 4833 | cvtsi2sd xmm0, RC 4834 |.endif 4835 | // Copy array slot to returned value. 4836 |.if X64 4837 | mov RBa, [KBASE+RC*8] 4838 | mov [BASE+RA*8+8], RBa 4839 |.else 4840 | mov RB, [KBASE+RC*8+4] 4841 | mov [BASE+RA*8+12], RB 4842 | mov RB, [KBASE+RC*8] 4843 | mov [BASE+RA*8+8], RB 4844 |.endif 4845 | add RC, 1 4846 | // Return array index as a numeric key. 4847 |.if DUALNUM 4848 | // See above. 4849 |.else 4850 | movsd qword [BASE+RA*8], xmm0 4851 |.endif 4852 | mov [BASE+RA*8-8], RC // Update control var. 4853 |2: 4854 | movzx RD, PC_RD // Get target from ITERL. 4855 | branchPC RD 4856 |3: 4857 | mov DISPATCH, TMP2 4858 | mov KBASE, TMP1 4859 | ins_next 4860 | 4861 |4: // Skip holes in array part. 4862 | add RC, 1 4863 | jmp <1 4864 | 4865 |5: // Traverse hash part. 4866 | sub RC, DISPATCH 4867 |6: 4868 | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. 4869 | imul KBASE, RC, #NODE 4870 | add NODE:KBASE, TAB:RB->node 4871 | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7 4872 | lea DISPATCH, [RC+DISPATCH+1] 4873 | // Copy key and value from hash slot. 4874 |.if X64 4875 | mov RBa, NODE:KBASE->key 4876 | mov RCa, NODE:KBASE->val 4877 | mov [BASE+RA*8], RBa 4878 | mov [BASE+RA*8+8], RCa 4879 |.else 4880 | mov RB, NODE:KBASE->key.gcr 4881 | mov RC, NODE:KBASE->key.it 4882 | mov [BASE+RA*8], RB 4883 | mov [BASE+RA*8+4], RC 4884 | mov RB, NODE:KBASE->val.gcr 4885 | mov RC, NODE:KBASE->val.it 4886 | mov [BASE+RA*8+8], RB 4887 | mov [BASE+RA*8+12], RC 4888 |.endif 4889 | mov [BASE+RA*8-8], DISPATCH 4890 | jmp <2 4891 | 4892 |7: // Skip holes in hash part. 4893 | add RC, 1 4894 | jmp <6 4895 break; 4896 4897 case BC_ISNEXT: 4898 | ins_AD // RA = base, RD = target (points to ITERN) 4899 | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5 4900 | mov CFUNC:RB, [BASE+RA*8-24] 4901 | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5 4902 | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5 4903 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 4904 | branchPC RD 4905 | mov dword [BASE+RA*8-8], 0 // Initialize control var. 4906 | mov dword [BASE+RA*8-4], 0xfffe7fff 4907 |1: 4908 | ins_next 4909 |5: // Despecialize bytecode if any of the checks fail. 4910 | mov PC_OP, BC_JMP 4911 | branchPC RD 4912 | mov byte [PC], BC_ITERC 4913 | jmp <1 4914 break; 4915 4916 case BC_VARG: 4917 | ins_ABC // RA = base, RB = nresults+1, RC = numparams 4918 | mov TMP1, KBASE // Need one more free register. 4919 | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] 4920 | lea RA, [BASE+RA*8] 4921 | sub KBASE, [BASE-4] 4922 | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. 4923 | test RB, RB 4924 | jz >5 // Copy all varargs? 4925 | lea RB, [RA+RB*8-8] 4926 | cmp KBASE, BASE // No vararg slots? 4927 | jnb >2 4928 |1: // Copy vararg slots to destination slots. 4929 |.if X64 4930 | mov RCa, [KBASE-8] 4931 | add KBASE, 8 4932 | mov [RA], RCa 4933 |.else 4934 | mov RC, [KBASE-8] 4935 | mov [RA], RC 4936 | mov RC, [KBASE-4] 4937 | add KBASE, 8 4938 | mov [RA+4], RC 4939 |.endif 4940 | add RA, 8 4941 | cmp RA, RB // All destination slots filled? 4942 | jnb >3 4943 | cmp KBASE, BASE // No more vararg slots? 4944 | jb <1 4945 |2: // Fill up remainder with nil. 4946 | mov dword [RA+4], LJ_TNIL 4947 | add RA, 8 4948 | cmp RA, RB 4949 | jb <2 4950 |3: 4951 | mov KBASE, TMP1 4952 | ins_next 4953 | 4954 |5: // Copy all varargs. 4955 | mov MULTRES, 1 // MULTRES = 0+1 4956 | mov RC, BASE 4957 | sub RC, KBASE 4958 | jbe <3 // No vararg slots? 4959 | mov RB, RC 4960 | shr RB, 3 4961 | add RB, 1 4962 | mov MULTRES, RB // MULTRES = #varargs+1 4963 | mov L:RB, SAVE_L 4964 | add RC, RA 4965 | cmp RC, L:RB->maxstack 4966 | ja >7 // Need to grow stack? 4967 |6: // Copy all vararg slots. 4968 |.if X64 4969 | mov RCa, [KBASE-8] 4970 | add KBASE, 8 4971 | mov [RA], RCa 4972 |.else 4973 | mov RC, [KBASE-8] 4974 | mov [RA], RC 4975 | mov RC, [KBASE-4] 4976 | add KBASE, 8 4977 | mov [RA+4], RC 4978 |.endif 4979 | add RA, 8 4980 | cmp KBASE, BASE // No more vararg slots? 4981 | jb <6 4982 | jmp <3 4983 | 4984 |7: // Grow stack for varargs. 4985 | mov L:RB->base, BASE 4986 | mov L:RB->top, RA 4987 | mov SAVE_PC, PC 4988 | sub KBASE, BASE // Need delta, because BASE may change. 4989 | mov FCARG2, MULTRES 4990 | sub FCARG2, 1 4991 | mov FCARG1, L:RB 4992 | call extern lj_state_growstack@8 // (lua_State *L, int n) 4993 | mov BASE, L:RB->base 4994 | mov RA, L:RB->top 4995 | add KBASE, BASE 4996 | jmp <6 4997 break; 4998 4999 /* -- Returns ----------------------------------------------------------- */ 5000 5001 case BC_RETM: 5002 | ins_AD // RA = results, RD = extra_nresults 5003 | add RD, MULTRES // MULTRES >=1, so RD >=1. 5004 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. 5005 break; 5006 5007 case BC_RET: case BC_RET0: case BC_RET1: 5008 | ins_AD // RA = results, RD = nresults+1 5009 if (op != BC_RET0) { 5010 | shl RA, 3 5011 } 5012 |1: 5013 | mov PC, [BASE-4] 5014 | mov MULTRES, RD // Save nresults+1. 5015 | test PC, FRAME_TYPE // Check frame type marker. 5016 | jnz >7 // Not returning to a fixarg Lua func? 5017 switch (op) { 5018 case BC_RET: 5019 |->BC_RET_Z: 5020 | mov KBASE, BASE // Use KBASE for result move. 5021 | sub RD, 1 5022 | jz >3 5023 |2: // Move results down. 5024 |.if X64 5025 | mov RBa, [KBASE+RA] 5026 | mov [KBASE-8], RBa 5027 |.else 5028 | mov RB, [KBASE+RA] 5029 | mov [KBASE-8], RB 5030 | mov RB, [KBASE+RA+4] 5031 | mov [KBASE-4], RB 5032 |.endif 5033 | add KBASE, 8 5034 | sub RD, 1 5035 | jnz <2 5036 |3: 5037 | mov RD, MULTRES // Note: MULTRES may be >255. 5038 | movzx RB, PC_RB // So cannot compare with RDL! 5039 |5: 5040 | cmp RB, RD // More results expected? 5041 | ja >6 5042 break; 5043 case BC_RET1: 5044 |.if X64 5045 | mov RBa, [BASE+RA] 5046 | mov [BASE-8], RBa 5047 |.else 5048 | mov RB, [BASE+RA+4] 5049 | mov [BASE-4], RB 5050 | mov RB, [BASE+RA] 5051 | mov [BASE-8], RB 5052 |.endif 5053 /* fallthrough */ 5054 case BC_RET0: 5055 |5: 5056 | cmp PC_RB, RDL // More results expected? 5057 | ja >6 5058 default: 5059 break; 5060 } 5061 | movzx RA, PC_RA 5062 | not RAa // Note: ~RA = -(RA+1) 5063 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 5064 | mov LFUNC:KBASE, [BASE-8] 5065 | mov KBASE, LFUNC:KBASE->pc 5066 | mov KBASE, [KBASE+PC2PROTO(k)] 5067 | ins_next 5068 | 5069 |6: // Fill up results with nil. 5070 if (op == BC_RET) { 5071 | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base. 5072 | add KBASE, 8 5073 } else { 5074 | mov dword [BASE+RD*8-12], LJ_TNIL 5075 } 5076 | add RD, 1 5077 | jmp <5 5078 | 5079 |7: // Non-standard return case. 5080 | lea RB, [PC-FRAME_VARG] 5081 | test RB, FRAME_TYPEP 5082 | jnz ->vm_return 5083 | // Return from vararg function: relocate BASE down and RA up. 5084 | sub BASE, RB 5085 if (op != BC_RET0) { 5086 | add RA, RB 5087 } 5088 | jmp <1 5089 break; 5090 5091 /* -- Loops and branches ------------------------------------------------ */ 5092 5093 |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4] 5094 |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12] 5095 |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20] 5096 |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] 5097 5098 case BC_FORL: 5099 |.if JIT 5100 | hotloop RB 5101 |.endif 5102 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. 5103 break; 5104 5105 case BC_JFORI: 5106 case BC_JFORL: 5107 #if !LJ_HASJIT 5108 break; 5109 #endif 5110 case BC_FORI: 5111 case BC_IFORL: 5112 vk = (op == BC_IFORL || op == BC_JFORL); 5113 | ins_AJ // RA = base, RD = target (after end of loop or start of loop) 5114 | lea RA, [BASE+RA*8] 5115 if (LJ_DUALNUM) { 5116 | cmp FOR_TIDX, LJ_TISNUM; jne >9 5117 if (!vk) { 5118 | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for 5119 | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for 5120 | mov RB, dword FOR_IDX 5121 | cmp dword FOR_STEP, 0; jl >5 5122 } else { 5123 #ifdef LUA_USE_ASSERT 5124 | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type 5125 | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type 5126 #endif 5127 | mov RB, dword FOR_STEP 5128 | test RB, RB; js >5 5129 | add RB, dword FOR_IDX; jo >1 5130 | mov dword FOR_IDX, RB 5131 } 5132 | cmp RB, dword FOR_STOP 5133 | mov FOR_TEXT, LJ_TISNUM 5134 | mov dword FOR_EXT, RB 5135 if (op == BC_FORI) { 5136 | jle >7 5137 |1: 5138 |6: 5139 | branchPC RD 5140 } else if (op == BC_JFORI) { 5141 | branchPC RD 5142 | movzx RD, PC_RD 5143 | jle =>BC_JLOOP 5144 |1: 5145 |6: 5146 } else if (op == BC_IFORL) { 5147 | jg >7 5148 |6: 5149 | branchPC RD 5150 |1: 5151 } else { 5152 | jle =>BC_JLOOP 5153 |1: 5154 |6: 5155 } 5156 |7: 5157 | ins_next 5158 | 5159 |5: // Invert check for negative step. 5160 if (vk) { 5161 | add RB, dword FOR_IDX; jo <1 5162 | mov dword FOR_IDX, RB 5163 } 5164 | cmp RB, dword FOR_STOP 5165 | mov FOR_TEXT, LJ_TISNUM 5166 | mov dword FOR_EXT, RB 5167 if (op == BC_FORI) { 5168 | jge <7 5169 } else if (op == BC_JFORI) { 5170 | branchPC RD 5171 | movzx RD, PC_RD 5172 | jge =>BC_JLOOP 5173 } else if (op == BC_IFORL) { 5174 | jl <7 5175 } else { 5176 | jge =>BC_JLOOP 5177 } 5178 | jmp <6 5179 |9: // Fallback to FP variant. 5180 } else if (!vk) { 5181 | cmp FOR_TIDX, LJ_TISNUM 5182 } 5183 if (!vk) { 5184 | jae ->vmeta_for 5185 | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for 5186 } else { 5187 #ifdef LUA_USE_ASSERT 5188 | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type 5189 | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type 5190 #endif 5191 } 5192 | mov RB, FOR_TSTEP // Load type/hiword of for step. 5193 if (!vk) { 5194 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5195 } 5196 | movsd xmm0, qword FOR_IDX 5197 | movsd xmm1, qword FOR_STOP 5198 if (vk) { 5199 | addsd xmm0, qword FOR_STEP 5200 | movsd qword FOR_IDX, xmm0 5201 | test RB, RB; js >3 5202 } else { 5203 | jl >3 5204 } 5205 | ucomisd xmm1, xmm0 5206 |1: 5207 | movsd qword FOR_EXT, xmm0 5208 if (op == BC_FORI) { 5209 |.if DUALNUM 5210 | jnb <7 5211 |.else 5212 | jnb >2 5213 | branchPC RD 5214 |.endif 5215 } else if (op == BC_JFORI) { 5216 | branchPC RD 5217 | movzx RD, PC_RD 5218 | jnb =>BC_JLOOP 5219 } else if (op == BC_IFORL) { 5220 |.if DUALNUM 5221 | jb <7 5222 |.else 5223 | jb >2 5224 | branchPC RD 5225 |.endif 5226 } else { 5227 | jnb =>BC_JLOOP 5228 } 5229 |.if DUALNUM 5230 | jmp <6 5231 |.else 5232 |2: 5233 | ins_next 5234 |.endif 5235 | 5236 |3: // Invert comparison if step is negative. 5237 | ucomisd xmm0, xmm1 5238 | jmp <1 5239 break; 5240 5241 case BC_ITERL: 5242 |.if JIT 5243 | hotloop RB 5244 |.endif 5245 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. 5246 break; 5247 5248 case BC_JITERL: 5249 #if !LJ_HASJIT 5250 break; 5251 #endif 5252 case BC_IITERL: 5253 | ins_AJ // RA = base, RD = target 5254 | lea RA, [BASE+RA*8] 5255 | mov RB, [RA+4] 5256 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. 5257 if (op == BC_JITERL) { 5258 | mov [RA-4], RB 5259 | mov RB, [RA] 5260 | mov [RA-8], RB 5261 | jmp =>BC_JLOOP 5262 } else { 5263 | branchPC RD // Otherwise save control var + branch. 5264 | mov RD, [RA] 5265 | mov [RA-4], RB 5266 | mov [RA-8], RD 5267 } 5268 |1: 5269 | ins_next 5270 break; 5271 5272 case BC_LOOP: 5273 | ins_A // RA = base, RD = target (loop extent) 5274 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5275 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5276 |.if JIT 5277 | hotloop RB 5278 |.endif 5279 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5280 break; 5281 5282 case BC_ILOOP: 5283 | ins_A // RA = base, RD = target (loop extent) 5284 | ins_next 5285 break; 5286 5287 case BC_JLOOP: 5288 |.if JIT 5289 | ins_AD // RA = base (ignored), RD = traceno 5290 | mov RA, [DISPATCH+DISPATCH_J(trace)] 5291 | mov TRACE:RD, [RA+RD*4] 5292 | mov RDa, TRACE:RD->mcode 5293 | mov L:RB, SAVE_L 5294 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5295 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB 5296 | // Save additional callee-save registers only used in compiled code. 5297 |.if X64WIN 5298 | mov TMPQ, r12 5299 | mov TMPa, r13 5300 | mov CSAVE_4, r14 5301 | mov CSAVE_3, r15 5302 | mov RAa, rsp 5303 | sub rsp, 9*16+4*8 5304 | movdqa [RAa], xmm6 5305 | movdqa [RAa-1*16], xmm7 5306 | movdqa [RAa-2*16], xmm8 5307 | movdqa [RAa-3*16], xmm9 5308 | movdqa [RAa-4*16], xmm10 5309 | movdqa [RAa-5*16], xmm11 5310 | movdqa [RAa-6*16], xmm12 5311 | movdqa [RAa-7*16], xmm13 5312 | movdqa [RAa-8*16], xmm14 5313 | movdqa [RAa-9*16], xmm15 5314 |.elif X64 5315 | mov TMPQ, r12 5316 | mov TMPa, r13 5317 | sub rsp, 16 5318 |.endif 5319 | jmp RDa 5320 |.endif 5321 break; 5322 5323 case BC_JMP: 5324 | ins_AJ // RA = unused, RD = target 5325 | branchPC RD 5326 | ins_next 5327 break; 5328 5329 /* -- Function headers -------------------------------------------------- */ 5330 5331 /* 5332 ** Reminder: A function may be called with func/args above L->maxstack, 5333 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, 5334 ** too. This means all FUNC* ops (including fast functions) must check 5335 ** for stack overflow _before_ adding more slots! 5336 */ 5337 5338 case BC_FUNCF: 5339 |.if JIT 5340 | hotcall RB 5341 |.endif 5342 case BC_FUNCV: /* NYI: compiled vararg functions. */ 5343 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. 5344 break; 5345 5346 case BC_JFUNCF: 5347 #if !LJ_HASJIT 5348 break; 5349 #endif 5350 case BC_IFUNCF: 5351 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 5352 | mov KBASE, [PC-4+PC2PROTO(k)] 5353 | mov L:RB, SAVE_L 5354 | lea RA, [BASE+RA*8] // Top of frame. 5355 | cmp RA, L:RB->maxstack 5356 | ja ->vm_growstack_f 5357 | movzx RA, byte [PC-4+PC2PROTO(numparams)] 5358 | cmp NARGS:RD, RA // Check for missing parameters. 5359 | jbe >3 5360 |2: 5361 if (op == BC_JFUNCF) { 5362 | movzx RD, PC_RD 5363 | jmp =>BC_JLOOP 5364 } else { 5365 | ins_next 5366 } 5367 | 5368 |3: // Clear missing parameters. 5369 | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL 5370 | add NARGS:RD, 1 5371 | cmp NARGS:RD, RA 5372 | jbe <3 5373 | jmp <2 5374 break; 5375 5376 case BC_JFUNCV: 5377 #if !LJ_HASJIT 5378 break; 5379 #endif 5380 | int3 // NYI: compiled vararg functions 5381 break; /* NYI: compiled vararg functions. */ 5382 5383 case BC_IFUNCV: 5384 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 5385 | lea RB, [NARGS:RD*8+FRAME_VARG] 5386 | lea RD, [BASE+NARGS:RD*8] 5387 | mov LFUNC:KBASE, [BASE-8] 5388 | mov [RD-4], RB // Store delta + FRAME_VARG. 5389 | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC. 5390 | mov L:RB, SAVE_L 5391 | lea RA, [RD+RA*8] 5392 | cmp RA, L:RB->maxstack 5393 | ja ->vm_growstack_v // Need to grow stack. 5394 | mov RA, BASE 5395 | mov BASE, RD 5396 | movzx RB, byte [PC-4+PC2PROTO(numparams)] 5397 | test RB, RB 5398 | jz >2 5399 |1: // Copy fixarg slots up to new frame. 5400 | add RA, 8 5401 | cmp RA, BASE 5402 | jnb >3 // Less args than parameters? 5403 | mov KBASE, [RA-8] 5404 | mov [RD], KBASE 5405 | mov KBASE, [RA-4] 5406 | mov [RD+4], KBASE 5407 | add RD, 8 5408 | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC). 5409 | sub RB, 1 5410 | jnz <1 5411 |2: 5412 if (op == BC_JFUNCV) { 5413 | movzx RD, PC_RD 5414 | jmp =>BC_JLOOP 5415 } else { 5416 | mov KBASE, [PC-4+PC2PROTO(k)] 5417 | ins_next 5418 } 5419 | 5420 |3: // Clear missing parameters. 5421 | mov dword [RD+4], LJ_TNIL 5422 | add RD, 8 5423 | sub RB, 1 5424 | jnz <3 5425 | jmp <2 5426 break; 5427 5428 case BC_FUNCC: 5429 case BC_FUNCCW: 5430 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 5431 | mov CFUNC:RB, [BASE-8] 5432 | mov KBASEa, CFUNC:RB->f 5433 | mov L:RB, SAVE_L 5434 | lea RD, [BASE+NARGS:RD*8-8] 5435 | mov L:RB->base, BASE 5436 | lea RA, [RD+8*LUA_MINSTACK] 5437 | cmp RA, L:RB->maxstack 5438 | mov L:RB->top, RD 5439 if (op == BC_FUNCC) { 5440 |.if X64 5441 | mov CARG1d, L:RB // Caveat: CARG1d may be RA. 5442 |.else 5443 | mov ARG1, L:RB 5444 |.endif 5445 } else { 5446 |.if X64 5447 | mov CARG2, KBASEa 5448 | mov CARG1d, L:RB // Caveat: CARG1d may be RA. 5449 |.else 5450 | mov ARG2, KBASEa 5451 | mov ARG1, L:RB 5452 |.endif 5453 } 5454 | ja ->vm_growstack_c // Need to grow stack. 5455 | set_vmstate C 5456 if (op == BC_FUNCC) { 5457 | call KBASEa // (lua_State *L) 5458 } else { 5459 | // (lua_State *L, lua_CFunction f) 5460 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 5461 } 5462 | // nresults returned in eax (RD). 5463 | mov BASE, L:RB->base 5464 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB 5465 | set_vmstate INTERP 5466 | lea RA, [BASE+RD*8] 5467 | neg RA 5468 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 5469 | mov PC, [BASE-4] // Fetch PC of caller. 5470 | jmp ->vm_returnc 5471 break; 5472 5473 /* ---------------------------------------------------------------------- */ 5474 5475 default: 5476 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); 5477 exit(2); 5478 break; 5479 } 5480 } 5481 5482 static int build_backend(BuildCtx *ctx) 5483 { 5484 int op; 5485 dasm_growpc(Dst, BC__MAX); 5486 build_subroutines(ctx); 5487 |.code_op 5488 for (op = 0; op < BC__MAX; op++) 5489 build_ins(ctx, (BCOp)op, op); 5490 return BC__MAX; 5491 } 5492 5493 /* Emit pseudo frame-info for all assembler functions. */ 5494 static void emit_asm_debug(BuildCtx *ctx) 5495 { 5496 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); 5497 #if LJ_64 5498 #define SZPTR "8" 5499 #define BSZPTR "3" 5500 #define REG_SP "0x7" 5501 #define REG_RA "0x10" 5502 #else 5503 #define SZPTR "4" 5504 #define BSZPTR "2" 5505 #define REG_SP "0x4" 5506 #define REG_RA "0x8" 5507 #endif 5508 switch (ctx->mode) { 5509 case BUILD_elfasm: 5510 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); 5511 fprintf(ctx->fp, 5512 ".Lframe0:\n" 5513 "\t.long .LECIE0-.LSCIE0\n" 5514 ".LSCIE0:\n" 5515 "\t.long 0xffffffff\n" 5516 "\t.byte 0x1\n" 5517 "\t.string \"\"\n" 5518 "\t.uleb128 0x1\n" 5519 "\t.sleb128 -" SZPTR "\n" 5520 "\t.byte " REG_RA "\n" 5521 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" 5522 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" 5523 "\t.align " SZPTR "\n" 5524 ".LECIE0:\n\n"); 5525 fprintf(ctx->fp, 5526 ".LSFDE0:\n" 5527 "\t.long .LEFDE0-.LASFDE0\n" 5528 ".LASFDE0:\n" 5529 "\t.long .Lframe0\n" 5530 #if LJ_64 5531 "\t.quad .Lbegin\n" 5532 "\t.quad %d\n" 5533 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 5534 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 5535 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 5536 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ 5537 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ 5538 #if LJ_NO_UNWIND 5539 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ 5540 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ 5541 #endif 5542 #else 5543 "\t.long .Lbegin\n" 5544 "\t.long %d\n" 5545 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 5546 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ 5547 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ 5548 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ 5549 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ 5550 #endif 5551 "\t.align " SZPTR "\n" 5552 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); 5553 #if LJ_HASFFI 5554 fprintf(ctx->fp, 5555 ".LSFDE1:\n" 5556 "\t.long .LEFDE1-.LASFDE1\n" 5557 ".LASFDE1:\n" 5558 "\t.long .Lframe0\n" 5559 #if LJ_64 5560 "\t.quad lj_vm_ffi_call\n" 5561 "\t.quad %d\n" 5562 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ 5563 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 5564 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ 5565 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 5566 #else 5567 "\t.long lj_vm_ffi_call\n" 5568 "\t.long %d\n" 5569 "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ 5570 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ 5571 "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ 5572 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ 5573 #endif 5574 "\t.align " SZPTR "\n" 5575 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 5576 #endif 5577 #if !LJ_NO_UNWIND 5578 #if (defined(__sun__) && defined(__svr4__)) 5579 #if LJ_64 5580 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); 5581 #else 5582 fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); 5583 #endif 5584 #else 5585 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); 5586 #endif 5587 fprintf(ctx->fp, 5588 ".Lframe1:\n" 5589 "\t.long .LECIE1-.LSCIE1\n" 5590 ".LSCIE1:\n" 5591 "\t.long 0\n" 5592 "\t.byte 0x1\n" 5593 "\t.string \"zPR\"\n" 5594 "\t.uleb128 0x1\n" 5595 "\t.sleb128 -" SZPTR "\n" 5596 "\t.byte " REG_RA "\n" 5597 "\t.uleb128 6\n" /* augmentation length */ 5598 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5599 "\t.long lj_err_unwind_dwarf-.\n" 5600 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5601 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" 5602 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" 5603 "\t.align " SZPTR "\n" 5604 ".LECIE1:\n\n"); 5605 fprintf(ctx->fp, 5606 ".LSFDE2:\n" 5607 "\t.long .LEFDE2-.LASFDE2\n" 5608 ".LASFDE2:\n" 5609 "\t.long .LASFDE2-.Lframe1\n" 5610 "\t.long .Lbegin-.\n" 5611 "\t.long %d\n" 5612 "\t.uleb128 0\n" /* augmentation length */ 5613 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ 5614 #if LJ_64 5615 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 5616 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 5617 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ 5618 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ 5619 #else 5620 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ 5621 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ 5622 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ 5623 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ 5624 #endif 5625 "\t.align " SZPTR "\n" 5626 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); 5627 #if LJ_HASFFI 5628 fprintf(ctx->fp, 5629 ".Lframe2:\n" 5630 "\t.long .LECIE2-.LSCIE2\n" 5631 ".LSCIE2:\n" 5632 "\t.long 0\n" 5633 "\t.byte 0x1\n" 5634 "\t.string \"zR\"\n" 5635 "\t.uleb128 0x1\n" 5636 "\t.sleb128 -" SZPTR "\n" 5637 "\t.byte " REG_RA "\n" 5638 "\t.uleb128 1\n" /* augmentation length */ 5639 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5640 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" 5641 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" 5642 "\t.align " SZPTR "\n" 5643 ".LECIE2:\n\n"); 5644 fprintf(ctx->fp, 5645 ".LSFDE3:\n" 5646 "\t.long .LEFDE3-.LASFDE3\n" 5647 ".LASFDE3:\n" 5648 "\t.long .LASFDE3-.Lframe2\n" 5649 "\t.long lj_vm_ffi_call-.\n" 5650 "\t.long %d\n" 5651 "\t.uleb128 0\n" /* augmentation length */ 5652 #if LJ_64 5653 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ 5654 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ 5655 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ 5656 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ 5657 #else 5658 "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ 5659 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ 5660 "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ 5661 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ 5662 #endif 5663 "\t.align " SZPTR "\n" 5664 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); 5665 #endif 5666 #endif 5667 break; 5668 #if !LJ_NO_UNWIND 5669 /* Mental note: never let Apple design an assembler. 5670 ** Or a linker. Or a plastic case. But I digress. 5671 */ 5672 case BUILD_machasm: { 5673 #if LJ_HASFFI 5674 int fcsize = 0; 5675 #endif 5676 int i; 5677 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); 5678 fprintf(ctx->fp, 5679 "EH_frame1:\n" 5680 "\t.set L$set$x,LECIEX-LSCIEX\n" 5681 "\t.long L$set$x\n" 5682 "LSCIEX:\n" 5683 "\t.long 0\n" 5684 "\t.byte 0x1\n" 5685 "\t.ascii \"zPR\\0\"\n" 5686 "\t.byte 0x1\n" 5687 "\t.byte 128-" SZPTR "\n" 5688 "\t.byte " REG_RA "\n" 5689 "\t.byte 6\n" /* augmentation length */ 5690 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ 5691 #if LJ_64 5692 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" 5693 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5694 "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" 5695 #else 5696 "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n" 5697 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5698 "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */ 5699 #endif 5700 "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" 5701 "\t.align " BSZPTR "\n" 5702 "LECIEX:\n\n"); 5703 for (i = 0; i < ctx->nsym; i++) { 5704 const char *name = ctx->sym[i].name; 5705 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; 5706 if (size == 0) continue; 5707 #if LJ_HASFFI 5708 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } 5709 #endif 5710 fprintf(ctx->fp, 5711 "%s.eh:\n" 5712 "LSFDE%d:\n" 5713 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" 5714 "\t.long L$set$%d\n" 5715 "LASFDE%d:\n" 5716 "\t.long LASFDE%d-EH_frame1\n" 5717 "\t.long %s-.\n" 5718 "\t.long %d\n" 5719 "\t.byte 0\n" /* augmentation length */ 5720 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ 5721 #if LJ_64 5722 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ 5723 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ 5724 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ 5725 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ 5726 #else 5727 "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ 5728 "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */ 5729 "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */ 5730 "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */ 5731 #endif 5732 "\t.align " BSZPTR "\n" 5733 "LEFDE%d:\n\n", 5734 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); 5735 } 5736 #if LJ_HASFFI 5737 if (fcsize) { 5738 fprintf(ctx->fp, 5739 "EH_frame2:\n" 5740 "\t.set L$set$y,LECIEY-LSCIEY\n" 5741 "\t.long L$set$y\n" 5742 "LSCIEY:\n" 5743 "\t.long 0\n" 5744 "\t.byte 0x1\n" 5745 "\t.ascii \"zR\\0\"\n" 5746 "\t.byte 0x1\n" 5747 "\t.byte 128-" SZPTR "\n" 5748 "\t.byte " REG_RA "\n" 5749 "\t.byte 1\n" /* augmentation length */ 5750 #if LJ_64 5751 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5752 "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" 5753 #else 5754 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5755 "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */ 5756 #endif 5757 "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" 5758 "\t.align " BSZPTR "\n" 5759 "LECIEY:\n\n"); 5760 fprintf(ctx->fp, 5761 "_lj_vm_ffi_call.eh:\n" 5762 "LSFDEY:\n" 5763 "\t.set L$set$yy,LEFDEY-LASFDEY\n" 5764 "\t.long L$set$yy\n" 5765 "LASFDEY:\n" 5766 "\t.long LASFDEY-EH_frame2\n" 5767 "\t.long _lj_vm_ffi_call-.\n" 5768 "\t.long %d\n" 5769 "\t.byte 0\n" /* augmentation length */ 5770 #if LJ_64 5771 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ 5772 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ 5773 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ 5774 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ 5775 #else 5776 "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */ 5777 "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ 5778 "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */ 5779 "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */ 5780 #endif 5781 "\t.align " BSZPTR "\n" 5782 "LEFDEY:\n\n", fcsize); 5783 } 5784 #endif 5785 #if !LJ_64 5786 fprintf(ctx->fp, 5787 "\t.non_lazy_symbol_pointer\n" 5788 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" 5789 ".indirect_symbol _lj_err_unwind_dwarf\n" 5790 ".long 0\n\n"); 5791 fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n"); 5792 { 5793 const char *const *xn; 5794 for (xn = ctx->extnames; *xn; xn++) 5795 if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) 5796 fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn); 5797 } 5798 #endif 5799 fprintf(ctx->fp, ".subsections_via_symbols\n"); 5800 } 5801 break; 5802 #endif 5803 default: /* Difficult for other modes. */ 5804 break; 5805 } 5806 } 5807