ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

vm_x64.dasc (133620B)


      1 |// Low-level VM code for x64 CPUs in LJ_GC64 mode.
      2 |// Bytecode interpreter, fast functions and helper functions.
      3 |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
      4 |
      5 |.arch x64
      6 |.section code_op, code_sub
      7 |
      8 |.actionlist build_actionlist
      9 |.globals GLOB_
     10 |.globalnames globnames
     11 |.externnames extnames
     12 |
     13 |//-----------------------------------------------------------------------
     14 |
     15 |.if WIN
     16 |.define X64WIN, 1			// Windows/x64 calling conventions.
     17 |.endif
     18 |
     19 |// Fixed register assignments for the interpreter.
     20 |// This is very fragile and has many dependencies. Caveat emptor.
     21 |.define BASE,		rdx		// Not C callee-save, refetched anyway.
     22 |.if X64WIN
     23 |.define KBASE,		rdi		// Must be C callee-save.
     24 |.define PC,		rsi		// Must be C callee-save.
     25 |.define DISPATCH,	rbx		// Must be C callee-save.
     26 |.define KBASEd,	edi
     27 |.define PCd,		esi
     28 |.define DISPATCHd,	ebx
     29 |.else
     30 |.define KBASE,		r15		// Must be C callee-save.
     31 |.define PC,		rbx		// Must be C callee-save.
     32 |.define DISPATCH,	r14		// Must be C callee-save.
     33 |.define KBASEd,	r15d
     34 |.define PCd,		ebx
     35 |.define DISPATCHd,	r14d
     36 |.endif
     37 |
     38 |.define RA,		rcx
     39 |.define RAd,		ecx
     40 |.define RAH,		ch
     41 |.define RAL,		cl
     42 |.define RB,		rbp		// Must be rbp (C callee-save).
     43 |.define RBd,		ebp
     44 |.define RC,		rax		// Must be rax.
     45 |.define RCd,		eax
     46 |.define RCW,		ax
     47 |.define RCH,		ah
     48 |.define RCL,		al
     49 |.define OP,		RBd
     50 |.define RD,		RC
     51 |.define RDd,		RCd
     52 |.define RDW,		RCW
     53 |.define RDL,		RCL
     54 |.define TMPR,		r10
     55 |.define TMPRd,		r10d
     56 |.define ITYPE,		r11
     57 |.define ITYPEd,	r11d
     58 |
     59 |.if X64WIN
     60 |.define CARG1,		rcx		// x64/WIN64 C call arguments.
     61 |.define CARG2,		rdx
     62 |.define CARG3,		r8
     63 |.define CARG4,		r9
     64 |.define CARG1d,	ecx
     65 |.define CARG2d,	edx
     66 |.define CARG3d,	r8d
     67 |.define CARG4d,	r9d
     68 |.else
     69 |.define CARG1,		rdi		// x64/POSIX C call arguments.
     70 |.define CARG2,		rsi
     71 |.define CARG3,		rdx
     72 |.define CARG4,		rcx
     73 |.define CARG5,		r8
     74 |.define CARG6,		r9
     75 |.define CARG1d,	edi
     76 |.define CARG2d,	esi
     77 |.define CARG3d,	edx
     78 |.define CARG4d,	ecx
     79 |.define CARG5d,	r8d
     80 |.define CARG6d,	r9d
     81 |.endif
     82 |
     83 |// Type definitions. Some of these are only used for documentation.
     84 |.type L,		lua_State
     85 |.type GL,		global_State
     86 |.type TVALUE,		TValue
     87 |.type GCOBJ,		GCobj
     88 |.type STR,		GCstr
     89 |.type TAB,		GCtab
     90 |.type LFUNC,		GCfuncL
     91 |.type CFUNC,		GCfuncC
     92 |.type PROTO,		GCproto
     93 |.type UPVAL,		GCupval
     94 |.type NODE,		Node
     95 |.type NARGS,		int
     96 |.type TRACE,		GCtrace
     97 |.type SBUF,		SBuf
     98 |
     99 |// Stack layout while in interpreter. Must match with lj_frame.h.
    100 |//-----------------------------------------------------------------------
    101 |.if X64WIN		// x64/Windows stack layout
    102 |
    103 |.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
    104 |.macro saveregs_
    105 |  push rdi; push rsi; push rbx
    106 |  sub rsp, CFRAME_SPACE
    107 |.endmacro
    108 |.macro saveregs
    109 |  push rbp; saveregs_
    110 |.endmacro
    111 |.macro restoreregs
    112 |  add rsp, CFRAME_SPACE
    113 |  pop rbx; pop rsi; pop rdi; pop rbp
    114 |.endmacro
    115 |
    116 |.define SAVE_CFRAME,	aword [rsp+aword*13]
    117 |.define SAVE_PC,	aword [rsp+aword*12]
    118 |.define SAVE_L,	aword [rsp+aword*11]
    119 |.define SAVE_ERRF,	dword [rsp+dword*21]
    120 |.define SAVE_NRES,	dword [rsp+dword*20]
    121 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
    122 |.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
    123 |.define SAVE_R4,	aword [rsp+aword*8]
    124 |.define SAVE_R3,	aword [rsp+aword*7]
    125 |.define SAVE_R2,	aword [rsp+aword*6]
    126 |.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
    127 |.define ARG5,		aword [rsp+aword*4]
    128 |.define CSAVE_4,	aword [rsp+aword*3]
    129 |.define CSAVE_3,	aword [rsp+aword*2]
    130 |.define CSAVE_2,	aword [rsp+aword*1]
    131 |.define CSAVE_1,	aword [rsp]		//<-- rsp while in interpreter.
    132 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
    133 |
    134 |.define ARG5d,		dword [rsp+dword*8]
    135 |.define TMP1,		ARG5			// TMP1 overlaps ARG5
    136 |.define TMP1d,		ARG5d
    137 |.define TMP1hi,	dword [rsp+dword*9]
    138 |.define MULTRES,	TMP1d			// MULTRES overlaps TMP1d.
    139 |
    140 |//-----------------------------------------------------------------------
    141 |.else			// x64/POSIX stack layout
    142 |
    143 |.define CFRAME_SPACE,	aword*5			// Delta for rsp (see <--).
    144 |.macro saveregs_
    145 |  push rbx; push r15; push r14
    146 |.if NO_UNWIND
    147 |  push r13; push r12
    148 |.endif
    149 |  sub rsp, CFRAME_SPACE
    150 |.endmacro
    151 |.macro saveregs
    152 |  push rbp; saveregs_
    153 |.endmacro
    154 |.macro restoreregs
    155 |  add rsp, CFRAME_SPACE
    156 |.if NO_UNWIND
    157 |  pop r12; pop r13
    158 |.endif
    159 |  pop r14; pop r15; pop rbx; pop rbp
    160 |.endmacro
    161 |
    162 |//----- 16 byte aligned,
    163 |.if NO_UNWIND
    164 |.define SAVE_RET,	aword [rsp+aword*11]	//<-- rsp entering interpreter.
    165 |.define SAVE_R4,	aword [rsp+aword*10]
    166 |.define SAVE_R3,	aword [rsp+aword*9]
    167 |.define SAVE_R2,	aword [rsp+aword*8]
    168 |.define SAVE_R1,	aword [rsp+aword*7]
    169 |.define SAVE_RU2,	aword [rsp+aword*6]
    170 |.define SAVE_RU1,	aword [rsp+aword*5]	//<-- rsp after register saves.
    171 |.else
    172 |.define SAVE_RET,	aword [rsp+aword*9]	//<-- rsp entering interpreter.
    173 |.define SAVE_R4,	aword [rsp+aword*8]
    174 |.define SAVE_R3,	aword [rsp+aword*7]
    175 |.define SAVE_R2,	aword [rsp+aword*6]
    176 |.define SAVE_R1,	aword [rsp+aword*5]	//<-- rsp after register saves.
    177 |.endif
    178 |.define SAVE_CFRAME,	aword [rsp+aword*4]
    179 |.define SAVE_PC,	aword [rsp+aword*3]
    180 |.define SAVE_L,	aword [rsp+aword*2]
    181 |.define SAVE_ERRF,	dword [rsp+dword*3]
    182 |.define SAVE_NRES,	dword [rsp+dword*2]
    183 |.define TMP1,		aword [rsp]		//<-- rsp while in interpreter.
    184 |//----- 16 byte aligned
    185 |
    186 |.define TMP1d,		dword [rsp]
    187 |.define TMP1hi,	dword [rsp+dword*1]
    188 |.define MULTRES,	TMP1d			// MULTRES overlaps TMP1d.
    189 |
    190 |.endif
    191 |
    192 |//-----------------------------------------------------------------------
    193 |
    194 |// Instruction headers.
    195 |.macro ins_A; .endmacro
    196 |.macro ins_AD; .endmacro
    197 |.macro ins_AJ; .endmacro
    198 |.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
    199 |.macro ins_AB_; movzx RBd, RCH; .endmacro
    200 |.macro ins_A_C; movzx RCd, RCL; .endmacro
    201 |.macro ins_AND; not RD; .endmacro
    202 |
    203 |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
    204 |.macro ins_NEXT
    205 |  mov RCd, [PC]
    206 |  movzx RAd, RCH
    207 |  movzx OP, RCL
    208 |  add PC, 4
    209 |  shr RCd, 16
    210 |  jmp aword [DISPATCH+OP*8]
    211 |.endmacro
    212 |
    213 |// Instruction footer.
    214 |.if 1
    215 |  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
    216 |  .define ins_next, ins_NEXT
    217 |  .define ins_next_, ins_NEXT
    218 |.else
    219 |  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
    220 |  // Affects only certain kinds of benchmarks (and only with -j off).
    221 |  // Around 10%-30% slower on Core2, a lot more slower on P4.
    222 |  .macro ins_next
    223 |    jmp ->ins_next
    224 |  .endmacro
    225 |  .macro ins_next_
    226 |  ->ins_next:
    227 |    ins_NEXT
    228 |  .endmacro
    229 |.endif
    230 |
    231 |// Call decode and dispatch.
    232 |.macro ins_callt
    233 |  // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
    234 |  mov PC, LFUNC:RB->pc
    235 |  mov RAd, [PC]
    236 |  movzx OP, RAL
    237 |  movzx RAd, RAH
    238 |  add PC, 4
    239 |  jmp aword [DISPATCH+OP*8]
    240 |.endmacro
    241 |
    242 |.macro ins_call
    243 |  // BASE = new base, RB = LFUNC, RD = nargs+1
    244 |  mov [BASE-8], PC
    245 |  ins_callt
    246 |.endmacro
    247 |
    248 |//-----------------------------------------------------------------------
    249 |
    250 |// Macros to clear or set tags.
    251 |.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
    252 |.macro settp, reg, tp
    253 |  mov64 ITYPE, ((uint64_t)tp<<47)
    254 |  or reg, ITYPE
    255 |.endmacro
    256 |.macro settp, dst, reg, tp
    257 |  mov64 dst, ((uint64_t)tp<<47)
    258 |  or dst, reg
    259 |.endmacro
    260 |.macro setint, reg
    261 |  settp reg, LJ_TISNUM
    262 |.endmacro
    263 |.macro setint, dst, reg
    264 |  settp dst, reg, LJ_TISNUM
    265 |.endmacro
    266 |
    267 |// Macros to test operand types.
    268 |.macro checktp_nc, reg, tp, target
    269 |  mov ITYPE, reg
    270 |  sar ITYPE, 47
    271 |  cmp ITYPEd, tp
    272 |  jne target
    273 |.endmacro
    274 |.macro checktp, reg, tp, target
    275 |  mov ITYPE, reg
    276 |  cleartp reg
    277 |  sar ITYPE, 47
    278 |  cmp ITYPEd, tp
    279 |  jne target
    280 |.endmacro
    281 |.macro checktptp, src, tp, target
    282 |  mov ITYPE, src
    283 |  sar ITYPE, 47
    284 |  cmp ITYPEd, tp
    285 |  jne target
    286 |.endmacro
    287 |.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
    288 |.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
    289 |.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
    290 |
    291 |.macro checknumx, reg, target, jump
    292 |  mov ITYPE, reg
    293 |  sar ITYPE, 47
    294 |  cmp ITYPEd, LJ_TISNUM
    295 |  jump target
    296 |.endmacro
    297 |.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
    298 |.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
    299 |.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
    300 |.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
    301 |.macro checknumber, src, target; checknumx src, target, ja; .endmacro
    302 |
    303 |.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
    304 |.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
    305 |
    306 |// These operands must be used with movzx.
    307 |.define PC_OP, byte [PC-4]
    308 |.define PC_RA, byte [PC-3]
    309 |.define PC_RB, byte [PC-1]
    310 |.define PC_RC, byte [PC-2]
    311 |.define PC_RD, word [PC-2]
    312 |
    313 |.macro branchPC, reg
    314 |  lea PC, [PC+reg*4-BCBIAS_J*4]
    315 |.endmacro
    316 |
    317 |// Assumes DISPATCH is relative to GL.
    318 #define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
    319 #define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))
    320 |
    321 #define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
    322 |
    323 |// Decrement hashed hotcount and trigger trace recorder if zero.
    324 |.macro hotloop, reg
    325 |  mov reg, PCd
    326 |  shr reg, 1
    327 |  and reg, HOTCOUNT_PCMASK
    328 |  sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
    329 |  jb ->vm_hotloop
    330 |.endmacro
    331 |
    332 |.macro hotcall, reg
    333 |  mov reg, PCd
    334 |  shr reg, 1
    335 |  and reg, HOTCOUNT_PCMASK
    336 |  sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
    337 |  jb ->vm_hotcall
    338 |.endmacro
    339 |
    340 |// Set current VM state.
    341 |.macro set_vmstate, st
    342 |  mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
    343 |.endmacro
    344 |
    345 |.macro fpop1; fstp st1; .endmacro
    346 |
    347 |// Synthesize SSE FP constants.
    348 |.macro sseconst_abs, reg, tmp		// Synthesize abs mask.
    349 |  mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
    350 |.endmacro
    351 |
    352 |.macro sseconst_hi, reg, tmp, val	// Synthesize hi-32 bit const.
    353 |  mov64 tmp, U64x(val,00000000); movd reg, tmp
    354 |.endmacro
    355 |
    356 |.macro sseconst_sign, reg, tmp		// Synthesize sign mask.
    357 |  sseconst_hi reg, tmp, 80000000
    358 |.endmacro
    359 |.macro sseconst_1, reg, tmp		// Synthesize 1.0.
    360 |  sseconst_hi reg, tmp, 3ff00000
    361 |.endmacro
    362 |.macro sseconst_m1, reg, tmp		// Synthesize -1.0.
    363 |  sseconst_hi reg, tmp, bff00000
    364 |.endmacro
    365 |.macro sseconst_2p52, reg, tmp		// Synthesize 2^52.
    366 |  sseconst_hi reg, tmp, 43300000
    367 |.endmacro
    368 |.macro sseconst_tobit, reg, tmp	// Synthesize 2^52 + 2^51.
    369 |  sseconst_hi reg, tmp, 43380000
    370 |.endmacro
    371 |
    372 |// Move table write barrier back. Overwrites reg.
    373 |.macro barrierback, tab, reg
    374 |  and byte tab->marked, (uint8_t)~LJ_GC_BLACK	// black2gray(tab)
    375 |  mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
    376 |  mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
    377 |  mov tab->gclist, reg
    378 |.endmacro
    379 |
    380 |//-----------------------------------------------------------------------
    381 
    382 /* Generate subroutines used by opcodes and other parts of the VM. */
    383 /* The .code_sub section should be last to help static branch prediction. */
    384 static void build_subroutines(BuildCtx *ctx)
    385 {
    386   |.code_sub
    387   |
    388   |//-----------------------------------------------------------------------
    389   |//-- Return handling ----------------------------------------------------
    390   |//-----------------------------------------------------------------------
    391   |
    392   |->vm_returnp:
    393   |  test PCd, FRAME_P
    394   |  jz ->cont_dispatch
    395   |
    396   |  // Return from pcall or xpcall fast func.
    397   |  and PC, -8
    398   |  sub BASE, PC			// Restore caller base.
    399   |  lea RA, [RA+PC-8]			// Rebase RA and prepend one result.
    400   |  mov PC, [BASE-8]			// Fetch PC of previous frame.
    401   |  // Prepending may overwrite the pcall frame, so do it at the end.
    402   |  mov_true ITYPE
    403   |  mov aword [BASE+RA], ITYPE		// Prepend true to results.
    404   |
    405   |->vm_returnc:
    406   |  add RDd, 1				// RD = nresults+1
    407   |  jz ->vm_unwind_yield
    408   |  mov MULTRES, RDd
    409   |  test PC, FRAME_TYPE
    410   |  jz ->BC_RET_Z			// Handle regular return to Lua.
    411   |
    412   |->vm_return:
    413   |  // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
    414   |  xor PC, FRAME_C
    415   |  test PCd, FRAME_TYPE
    416   |  jnz ->vm_returnp
    417   |
    418   |  // Return to C.
    419   |  set_vmstate C
    420   |  and PC, -8
    421   |  sub PC, BASE
    422   |  neg PC				// Previous base = BASE - delta.
    423   |
    424   |  sub RDd, 1
    425   |  jz >2
    426   |1:  // Move results down.
    427   |  mov RB, [BASE+RA]
    428   |  mov [BASE-16], RB
    429   |  add BASE, 8
    430   |  sub RDd, 1
    431   |  jnz <1
    432   |2:
    433   |  mov L:RB, SAVE_L
    434   |  mov L:RB->base, PC
    435   |3:
    436   |  mov RDd, MULTRES
    437   |  mov RAd, SAVE_NRES			// RA = wanted nresults+1
    438   |4:
    439   |  cmp RAd, RDd
    440   |  jne >6				// More/less results wanted?
    441   |5:
    442   |  sub BASE, 16
    443   |  mov L:RB->top, BASE
    444   |
    445   |->vm_leave_cp:
    446   |  mov RA, SAVE_CFRAME		// Restore previous C frame.
    447   |  mov L:RB->cframe, RA
    448   |  xor eax, eax			// Ok return status for vm_pcall.
    449   |
    450   |->vm_leave_unw:
    451   |  restoreregs
    452   |  ret
    453   |
    454   |6:
    455   |  jb >7				// Less results wanted?
    456   |  // More results wanted. Check stack size and fill up results with nil.
    457   |  cmp BASE, L:RB->maxstack
    458   |  ja >8
    459   |  mov aword [BASE-16], LJ_TNIL
    460   |  add BASE, 8
    461   |  add RDd, 1
    462   |  jmp <4
    463   |
    464   |7:  // Less results wanted.
    465   |  test RAd, RAd
    466   |  jz <5				// But check for LUA_MULTRET+1.
    467   |  sub RA, RD				// Negative result!
    468   |  lea BASE, [BASE+RA*8]		// Correct top.
    469   |  jmp <5
    470   |
    471   |8:  // Corner case: need to grow stack for filling up results.
    472   |  // This can happen if:
    473   |  // - A C function grows the stack (a lot).
    474   |  // - The GC shrinks the stack in between.
    475   |  // - A return back from a lua_call() with (high) nresults adjustment.
    476   |  mov L:RB->top, BASE		// Save current top held in BASE (yes).
    477   |  mov MULTRES, RDd			// Need to fill only remainder with nil.
    478   |  mov CARG2d, RAd
    479   |  mov CARG1, L:RB
    480   |  call extern lj_state_growstack	// (lua_State *L, int n)
    481   |  mov BASE, L:RB->top		// Need the (realloced) L->top in BASE.
    482   |  jmp <3
    483   |
    484   |->vm_unwind_yield:
    485   |  mov al, LUA_YIELD
    486   |  jmp ->vm_unwind_c_eh
    487   |
    488   |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
    489   |  // (void *cframe, int errcode)
    490   |  mov eax, CARG2d			// Error return status for vm_pcall.
    491   |  mov rsp, CARG1
    492   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
    493   |  mov L:RB, SAVE_L
    494   |  mov GL:RB, L:RB->glref
    495   |  mov dword GL:RB->vmstate, ~LJ_VMST_C
    496   |  jmp ->vm_leave_unw
    497   |
    498   |->vm_unwind_rethrow:
    499   |.if not X64WIN
    500   |  mov CARG1, SAVE_L
    501   |  mov CARG2d, eax
    502   |  restoreregs
    503   |  jmp extern lj_err_throw		// (lua_State *L, int errcode)
    504   |.endif
    505   |
    506   |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
    507   |  // (void *cframe)
    508   |  and CARG1, CFRAME_RAWMASK
    509   |  mov rsp, CARG1
    510   |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
    511   |  mov L:RB, SAVE_L
    512   |  mov RDd, 1+1			// Really 1+2 results, incr. later.
    513   |  mov BASE, L:RB->base
    514   |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
    515   |  add DISPATCH, GG_G2DISP
    516   |  mov PC, [BASE-8]			// Fetch PC of previous frame.
    517   |  mov_false RA
    518   |  mov RB, [BASE]
    519   |  mov [BASE-16], RA			// Prepend false to error message.
    520   |  mov [BASE-8], RB
    521   |  mov RA, -16			// Results start at BASE+RA = BASE-16.
    522   |  set_vmstate INTERP
    523   |  jmp ->vm_returnc			// Increments RD/MULTRES and returns.
    524   |
    525   |//-----------------------------------------------------------------------
    526   |//-- Grow stack for calls -----------------------------------------------
    527   |//-----------------------------------------------------------------------
    528   |
    529   |->vm_growstack_c:			// Grow stack for C function.
    530   |  mov CARG2d, LUA_MINSTACK
    531   |  jmp >2
    532   |
    533   |->vm_growstack_v:			// Grow stack for vararg Lua function.
    534   |  sub RD, 16				// LJ_FR2
    535   |  jmp >1
    536   |
    537   |->vm_growstack_f:			// Grow stack for fixarg Lua function.
    538   |  // BASE = new base, RD = nargs+1, RB = L, PC = first PC
    539   |  lea RD, [BASE+NARGS:RD*8-8]
    540   |1:
    541   |  movzx RAd, byte [PC-4+PC2PROTO(framesize)]
    542   |  add PC, 4				// Must point after first instruction.
    543   |  mov L:RB->base, BASE
    544   |  mov L:RB->top, RD
    545   |  mov SAVE_PC, PC
    546   |  mov CARG2, RA
    547   |2:
    548   |  // RB = L, L->base = new base, L->top = top
    549   |  mov CARG1, L:RB
    550   |  call extern lj_state_growstack	// (lua_State *L, int n)
    551   |  mov BASE, L:RB->base
    552   |  mov RD, L:RB->top
    553   |  mov LFUNC:RB, [BASE-16]
    554   |  cleartp LFUNC:RB
    555   |  sub RD, BASE
    556   |  shr RDd, 3
    557   |  add NARGS:RDd, 1
    558   |  // BASE = new base, RB = LFUNC, RD = nargs+1
    559   |  ins_callt				// Just retry the call.
    560   |
    561   |//-----------------------------------------------------------------------
    562   |//-- Entry points into the assembler VM ---------------------------------
    563   |//-----------------------------------------------------------------------
    564   |
    565   |->vm_resume:				// Setup C frame and resume thread.
    566   |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
    567   |  saveregs
    568   |  mov L:RB, CARG1			// Caveat: CARG1 may be RA.
    569   |  mov SAVE_L, CARG1
    570   |  mov RA, CARG2
    571   |  mov PCd, FRAME_CP
    572   |  xor RDd, RDd
    573   |  lea KBASE, [esp+CFRAME_RESUME]
    574   |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
    575   |  add DISPATCH, GG_G2DISP
    576   |  mov SAVE_PC, RD			// Any value outside of bytecode is ok.
    577   |  mov SAVE_CFRAME, RD
    578   |  mov SAVE_NRES, RDd
    579   |  mov SAVE_ERRF, RDd
    580   |  mov L:RB->cframe, KBASE
    581   |  cmp byte L:RB->status, RDL
    582   |  je >2				// Initial resume (like a call).
    583   |
    584   |  // Resume after yield (like a return).
    585   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
    586   |  set_vmstate INTERP
    587   |  mov byte L:RB->status, RDL
    588   |  mov BASE, L:RB->base
    589   |  mov RD, L:RB->top
    590   |  sub RD, RA
    591   |  shr RDd, 3
    592   |  add RDd, 1				// RD = nresults+1
    593   |  sub RA, BASE			// RA = resultofs
    594   |  mov PC, [BASE-8]
    595   |  mov MULTRES, RDd
    596   |  test PCd, FRAME_TYPE
    597   |  jz ->BC_RET_Z
    598   |  jmp ->vm_return
    599   |
    600   |->vm_pcall:				// Setup protected C frame and enter VM.
    601   |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
    602   |  saveregs
    603   |  mov PCd, FRAME_CP
    604   |  mov SAVE_ERRF, CARG4d
    605   |  jmp >1
    606   |
    607   |->vm_call:				// Setup C frame and enter VM.
    608   |  // (lua_State *L, TValue *base, int nres1)
    609   |  saveregs
    610   |  mov PCd, FRAME_C
    611   |
    612   |1:  // Entry point for vm_pcall above (PC = ftype).
    613   |  mov SAVE_NRES, CARG3d
    614   |  mov L:RB, CARG1			// Caveat: CARG1 may be RA.
    615   |  mov SAVE_L, CARG1
    616   |  mov RA, CARG2
    617   |
    618   |  mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
    619   |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.
    620   |  mov SAVE_CFRAME, KBASE
    621   |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
    622   |  add DISPATCH, GG_G2DISP
    623   |  mov L:RB->cframe, rsp
    624   |
    625   |2:  // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
    626   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
    627   |  set_vmstate INTERP
    628   |  mov BASE, L:RB->base		// BASE = old base (used in vmeta_call).
    629   |  add PC, RA
    630   |  sub PC, BASE			// PC = frame delta + frame type
    631   |
    632   |  mov RD, L:RB->top
    633   |  sub RD, RA
    634   |  shr NARGS:RDd, 3
    635   |  add NARGS:RDd, 1			// RD = nargs+1
    636   |
    637   |->vm_call_dispatch:
    638   |  mov LFUNC:RB, [RA-16]
    639   |  checkfunc LFUNC:RB, ->vmeta_call	// Ensure KBASE defined and != BASE.
    640   |
    641   |->vm_call_dispatch_f:
    642   |  mov BASE, RA
    643   |  ins_call
    644   |  // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
    645   |
    646   |->vm_cpcall:				// Setup protected C frame, call C.
    647   |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
    648   |  saveregs
    649   |  mov L:RB, CARG1			// Caveat: CARG1 may be RA.
    650   |  mov SAVE_L, CARG1
    651   |  mov SAVE_PC, L:RB			// Any value outside of bytecode is ok.
    652   |
    653   |  mov KBASE, L:RB->stack		// Compute -savestack(L, L->top).
    654   |  sub KBASE, L:RB->top
    655   |   mov DISPATCH, L:RB->glref		// Setup pointer to dispatch table.
    656   |  mov SAVE_ERRF, 0			// No error function.
    657   |  mov SAVE_NRES, KBASEd		// Neg. delta means cframe w/o frame.
    658   |   add DISPATCH, GG_G2DISP
    659   |  // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
    660   |
    661   |  mov KBASE, L:RB->cframe		// Add our C frame to cframe chain.
    662   |  mov SAVE_CFRAME, KBASE
    663   |  mov L:RB->cframe, rsp
    664   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
    665   |
    666   |  call CARG4			// (lua_State *L, lua_CFunction func, void *ud)
    667   |  // TValue * (new base) or NULL returned in eax (RC).
    668   |  test RC, RC
    669   |  jz ->vm_leave_cp			// No base? Just remove C frame.
    670   |  mov RA, RC
    671   |  mov PCd, FRAME_CP
    672   |  jmp <2				// Else continue with the call.
    673   |
    674   |//-----------------------------------------------------------------------
    675   |//-- Metamethod handling ------------------------------------------------
    676   |//-----------------------------------------------------------------------
    677   |
    678   |//-- Continuation dispatch ----------------------------------------------
    679   |
    680   |->cont_dispatch:
    681   |  // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
    682   |  add RA, BASE
    683   |  and PC, -8
    684   |  mov RB, BASE
    685   |  sub BASE, PC			// Restore caller BASE.
    686   |  mov aword [RA+RD*8-8], LJ_TNIL	// Ensure one valid arg.
    687   |  mov RC, RA				// ... in [RC]
    688   |  mov PC, [RB-24]			// Restore PC from [cont|PC].
    689   |  mov RA, qword [RB-32]		// May be negative on WIN64 with debug.
    690   |.if FFI
    691   |  cmp RA, 1
    692   |  jbe >1
    693   |.endif
    694   |  mov LFUNC:KBASE, [BASE-16]
    695   |  cleartp LFUNC:KBASE
    696   |  mov KBASE, LFUNC:KBASE->pc
    697   |  mov KBASE, [KBASE+PC2PROTO(k)]
    698   |  // BASE = base, RC = result, RB = meta base
    699   |  jmp RA				// Jump to continuation.
    700   |
    701   |.if FFI
    702   |1:
    703   |  je ->cont_ffi_callback		// cont = 1: return from FFI callback.
    704   |  // cont = 0: Tail call from C function.
    705   |  sub RB, BASE
    706   |  shr RBd, 3
    707   |  lea RDd, [RBd-3]
    708   |  jmp ->vm_call_tail
    709   |.endif
    710   |
    711   |->cont_cat:				// BASE = base, RC = result, RB = mbase
    712   |  movzx RAd, PC_RB
    713   |  sub RB, 32
    714   |  lea RA, [BASE+RA*8]
    715   |  sub RA, RB
    716   |  je ->cont_ra
    717   |  neg RA
    718   |  shr RAd, 3
    719   |.if X64WIN
    720   |  mov CARG3d, RAd
    721   |  mov L:CARG1, SAVE_L
    722   |  mov L:CARG1->base, BASE
    723   |  mov RC, [RC]
    724   |  mov [RB], RC
    725   |  mov CARG2, RB
    726   |.else
    727   |  mov L:CARG1, SAVE_L
    728   |  mov L:CARG1->base, BASE
    729   |  mov CARG3d, RAd
    730   |  mov RA, [RC]
    731   |  mov [RB], RA
    732   |  mov CARG2, RB
    733   |.endif
    734   |  jmp ->BC_CAT_Z
    735   |
    736   |//-- Table indexing metamethods -----------------------------------------
    737   |
    738   |->vmeta_tgets:
    739   |  settp STR:RC, LJ_TSTR		// STR:RC = GCstr *
    740   |  mov TMP1, STR:RC
    741   |  lea RC, TMP1
    742   |  cmp PC_OP, BC_GGET
    743   |  jne >1
    744   |  settp TAB:RA, TAB:RB, LJ_TTAB	// TAB:RB = GCtab *
    745   |  lea RB, [DISPATCH+DISPATCH_GL(tmptv)]  // Store fn->l.env in g->tmptv.
    746   |  mov [RB], TAB:RA
    747   |  jmp >2
    748   |
    749   |->vmeta_tgetb:
    750   |  movzx RCd, PC_RC
    751   |.if DUALNUM
    752   |  setint RC
    753   |  mov TMP1, RC
    754   |.else
    755   |  cvtsi2sd xmm0, RCd
    756   |  movsd TMP1, xmm0
    757   |.endif
    758   |  lea RC, TMP1
    759   |  jmp >1
    760   |
    761   |->vmeta_tgetv:
    762   |  movzx RCd, PC_RC			// Reload TValue *k from RC.
    763   |  lea RC, [BASE+RC*8]
    764   |1:
    765   |  movzx RBd, PC_RB			// Reload TValue *t from RB.
    766   |  lea RB, [BASE+RB*8]
    767   |2:
    768   |  mov L:CARG1, SAVE_L
    769   |  mov L:CARG1->base, BASE		// Caveat: CARG2/CARG3 may be BASE.
    770   |  mov CARG2, RB
    771   |  mov CARG3, RC
    772   |  mov L:RB, L:CARG1
    773   |  mov SAVE_PC, PC
    774   |  call extern lj_meta_tget		// (lua_State *L, TValue *o, TValue *k)
    775   |  // TValue * (finished) or NULL (metamethod) returned in eax (RC).
    776   |  mov BASE, L:RB->base
    777   |  test RC, RC
    778   |  jz >3
    779   |->cont_ra:				// BASE = base, RC = result
    780   |  movzx RAd, PC_RA
    781   |  mov RB, [RC]
    782   |  mov [BASE+RA*8], RB
    783   |  ins_next
    784   |
    785   |3:  // Call __index metamethod.
    786   |  // BASE = base, L->top = new base, stack = cont/func/t/k
    787   |  mov RA, L:RB->top
    788   |  mov [RA-24], PC			// [cont|PC]
    789   |  lea PC, [RA+FRAME_CONT]
    790   |  sub PC, BASE
    791   |  mov LFUNC:RB, [RA-16]		// Guaranteed to be a function here.
    792   |  mov NARGS:RDd, 2+1			// 2 args for func(t, k).
    793   |  cleartp LFUNC:RB
    794   |  jmp ->vm_call_dispatch_f
    795   |
    796   |->vmeta_tgetr:
    797   |  mov CARG1, TAB:RB
    798   |  mov RB, BASE			// Save BASE.
    799   |  mov CARG2d, RCd			// Caveat: CARG2 == BASE
    800   |  call extern lj_tab_getinth		// (GCtab *t, int32_t key)
    801   |  // cTValue * or NULL returned in eax (RC).
    802   |  movzx RAd, PC_RA
    803   |  mov BASE, RB			// Restore BASE.
    804   |  test RC, RC
    805   |  jnz ->BC_TGETR_Z
    806   |  mov ITYPE, LJ_TNIL
    807   |  jmp ->BC_TGETR2_Z
    808   |
    809   |//-----------------------------------------------------------------------
    810   |
    811   |->vmeta_tsets:
    812   |  settp STR:RC, LJ_TSTR		// STR:RC = GCstr *
    813   |  mov TMP1, STR:RC
    814   |  lea RC, TMP1
    815   |  cmp PC_OP, BC_GSET
    816   |  jne >1
    817   |  settp TAB:RA, TAB:RB, LJ_TTAB	// TAB:RB = GCtab *
    818   |  lea RB, [DISPATCH+DISPATCH_GL(tmptv)]  // Store fn->l.env in g->tmptv.
    819   |  mov [RB], TAB:RA
    820   |  jmp >2
    821   |
    822   |->vmeta_tsetb:
    823   |  movzx RCd, PC_RC
    824   |.if DUALNUM
    825   |  setint RC
    826   |  mov TMP1, RC
    827   |.else
    828   |  cvtsi2sd xmm0, RCd
    829   |  movsd TMP1, xmm0
    830   |.endif
    831   |  lea RC, TMP1
    832   |  jmp >1
    833   |
    834   |->vmeta_tsetv:
    835   |  movzx RCd, PC_RC			// Reload TValue *k from RC.
    836   |  lea RC, [BASE+RC*8]
    837   |1:
    838   |  movzx RBd, PC_RB			// Reload TValue *t from RB.
    839   |  lea RB, [BASE+RB*8]
    840   |2:
    841   |  mov L:CARG1, SAVE_L
    842   |  mov L:CARG1->base, BASE		// Caveat: CARG2/CARG3 may be BASE.
    843   |  mov CARG2, RB
    844   |  mov CARG3, RC
    845   |  mov L:RB, L:CARG1
    846   |  mov SAVE_PC, PC
    847   |  call extern lj_meta_tset		// (lua_State *L, TValue *o, TValue *k)
    848   |  // TValue * (finished) or NULL (metamethod) returned in eax (RC).
    849   |  mov BASE, L:RB->base
    850   |  test RC, RC
    851   |  jz >3
    852   |  // NOBARRIER: lj_meta_tset ensures the table is not black.
    853   |  movzx RAd, PC_RA
    854   |  mov RB, [BASE+RA*8]
    855   |  mov [RC], RB
    856   |->cont_nop:				// BASE = base, (RC = result)
    857   |  ins_next
    858   |
    859   |3:  // Call __newindex metamethod.
    860   |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
    861   |  mov RA, L:RB->top
    862   |  mov [RA-24], PC			// [cont|PC]
    863   |  movzx RCd, PC_RA
    864   |  // Copy value to third argument.
    865   |  mov RB, [BASE+RC*8]
    866   |  mov [RA+16], RB
    867   |  lea PC, [RA+FRAME_CONT]
    868   |  sub PC, BASE
    869   |  mov LFUNC:RB, [RA-16]		// Guaranteed to be a function here.
    870   |  mov NARGS:RDd, 3+1			// 3 args for func(t, k, v).
    871   |  cleartp LFUNC:RB
    872   |  jmp ->vm_call_dispatch_f
    873   |
    874   |->vmeta_tsetr:
    875   |.if X64WIN
    876   |  mov L:CARG1, SAVE_L
    877   |  mov CARG3d, RCd
    878   |  mov L:CARG1->base, BASE
    879   |  xchg CARG2, TAB:RB			// Caveat: CARG2 == BASE.
    880   |.else
    881   |  mov L:CARG1, SAVE_L
    882   |  mov CARG2, TAB:RB
    883   |  mov L:CARG1->base, BASE
    884   |  mov RB, BASE			// Save BASE.
    885   |  mov CARG3d, RCd			// Caveat: CARG3 == BASE.
    886   |.endif
    887   |  mov SAVE_PC, PC
    888   |  call extern lj_tab_setinth  // (lua_State *L, GCtab *t, int32_t key)
    889   |  // TValue * returned in eax (RC).
    890   |  movzx RAd, PC_RA
    891   |  mov BASE, RB			// Restore BASE.
    892   |  jmp ->BC_TSETR_Z
    893   |
    894   |//-- Comparison metamethods ---------------------------------------------
    895   |
    896   |->vmeta_comp:
    897   |  movzx RDd, PC_RD
    898   |  movzx RAd, PC_RA
    899   |  mov L:RB, SAVE_L
    900   |  mov L:RB->base, BASE		// Caveat: CARG2/CARG3 == BASE.
    901   |.if X64WIN
    902   |  lea CARG3, [BASE+RD*8]
    903   |  lea CARG2, [BASE+RA*8]
    904   |.else
    905   |  lea CARG2, [BASE+RA*8]
    906   |  lea CARG3, [BASE+RD*8]
    907   |.endif
    908   |  mov CARG1, L:RB			// Caveat: CARG1/CARG4 == RA.
    909   |  movzx CARG4d, PC_OP
    910   |  mov SAVE_PC, PC
    911   |  call extern lj_meta_comp	// (lua_State *L, TValue *o1, *o2, int op)
    912   |  // 0/1 or TValue * (metamethod) returned in eax (RC).
    913   |3:
    914   |  mov BASE, L:RB->base
    915   |  cmp RC, 1
    916   |  ja ->vmeta_binop
    917   |4:
    918   |  lea PC, [PC+4]
    919   |  jb >6
    920   |5:
    921   |  movzx RDd, PC_RD
    922   |  branchPC RD
    923   |6:
    924   |  ins_next
    925   |
    926   |->cont_condt:			// BASE = base, RC = result
    927   |  add PC, 4
    928   |  mov ITYPE, [RC]
    929   |  sar ITYPE, 47
    930   |  cmp ITYPEd, LJ_TISTRUECOND		// Branch if result is true.
    931   |  jb <5
    932   |  jmp <6
    933   |
    934   |->cont_condf:			// BASE = base, RC = result
    935   |  mov ITYPE, [RC]
    936   |  sar ITYPE, 47
    937   |  cmp ITYPEd, LJ_TISTRUECOND		// Branch if result is false.
    938   |  jmp <4
    939   |
    940   |->vmeta_equal:
    941   |  cleartp TAB:RD
    942   |  sub PC, 4
    943   |.if X64WIN
    944   |  mov CARG3, RD
    945   |  mov CARG4d, RBd
    946   |  mov L:RB, SAVE_L
    947   |  mov L:RB->base, BASE		// Caveat: CARG2 == BASE.
    948   |  mov CARG2, RA
    949   |  mov CARG1, L:RB			// Caveat: CARG1 == RA.
    950   |.else
    951   |  mov CARG2, RA
    952   |  mov CARG4d, RBd			// Caveat: CARG4 == RA.
    953   |  mov L:RB, SAVE_L
    954   |  mov L:RB->base, BASE		// Caveat: CARG3 == BASE.
    955   |  mov CARG3, RD
    956   |  mov CARG1, L:RB
    957   |.endif
    958   |  mov SAVE_PC, PC
    959   |  call extern lj_meta_equal	// (lua_State *L, GCobj *o1, *o2, int ne)
    960   |  // 0/1 or TValue * (metamethod) returned in eax (RC).
    961   |  jmp <3
    962   |
    963   |->vmeta_equal_cd:
    964   |.if FFI
    965   |  sub PC, 4
    966   |  mov L:RB, SAVE_L
    967   |  mov L:RB->base, BASE
    968   |  mov CARG1, L:RB
    969   |  mov CARG2d, dword [PC-4]
    970   |  mov SAVE_PC, PC
    971   |  call extern lj_meta_equal_cd	// (lua_State *L, BCIns ins)
    972   |  // 0/1 or TValue * (metamethod) returned in eax (RC).
    973   |  jmp <3
    974   |.endif
    975   |
    976   |->vmeta_istype:
    977   |  mov L:RB, SAVE_L
    978   |  mov L:RB->base, BASE		// Caveat: CARG2/CARG3 may be BASE.
    979   |  mov CARG2d, RAd
    980   |  mov CARG3d, RDd
    981   |  mov L:CARG1, L:RB
    982   |  mov SAVE_PC, PC
    983   |  call extern lj_meta_istype  // (lua_State *L, BCReg ra, BCReg tp)
    984   |  mov BASE, L:RB->base
    985   |  jmp <6
    986   |
    987   |//-- Arithmetic metamethods ---------------------------------------------
    988   |
    989   |->vmeta_arith_vno:
    990   |.if DUALNUM
    991   |  movzx RBd, PC_RB
    992   |  movzx RCd, PC_RC
    993   |.endif
    994   |->vmeta_arith_vn:
    995   |  lea RC, [KBASE+RC*8]
    996   |  jmp >1
    997   |
    998   |->vmeta_arith_nvo:
    999   |.if DUALNUM
   1000   |  movzx RBd, PC_RB
   1001   |  movzx RCd, PC_RC
   1002   |.endif
   1003   |->vmeta_arith_nv:
   1004   |  lea TMPR, [KBASE+RC*8]
   1005   |  lea RC, [BASE+RB*8]
   1006   |  mov RB, TMPR
   1007   |  jmp >2
   1008   |
   1009   |->vmeta_unm:
   1010   |  lea RC, [BASE+RD*8]
   1011   |  mov RB, RC
   1012   |  jmp >2
   1013   |
   1014   |->vmeta_arith_vvo:
   1015   |.if DUALNUM
   1016   |  movzx RBd, PC_RB
   1017   |  movzx RCd, PC_RC
   1018   |.endif
   1019   |->vmeta_arith_vv:
   1020   |  lea RC, [BASE+RC*8]
   1021   |1:
   1022   |  lea RB, [BASE+RB*8]
   1023   |2:
   1024   |  lea RA, [BASE+RA*8]
   1025   |.if X64WIN
   1026   |  mov CARG3, RB
   1027   |  mov CARG4, RC
   1028   |  movzx RCd, PC_OP
   1029   |  mov ARG5d, RCd
   1030   |  mov L:RB, SAVE_L
   1031   |  mov L:RB->base, BASE		// Caveat: CARG2 == BASE.
   1032   |  mov CARG2, RA
   1033   |  mov CARG1, L:RB			// Caveat: CARG1 == RA.
   1034   |.else
   1035   |  movzx CARG5d, PC_OP
   1036   |  mov CARG2, RA
   1037   |  mov CARG4, RC			// Caveat: CARG4 == RA.
   1038   |  mov L:CARG1, SAVE_L
   1039   |  mov L:CARG1->base, BASE		// Caveat: CARG3 == BASE.
   1040   |  mov CARG3, RB
   1041   |  mov L:RB, L:CARG1
   1042   |.endif
   1043   |  mov SAVE_PC, PC
   1044   |  call extern lj_meta_arith	// (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
   1045   |  // NULL (finished) or TValue * (metamethod) returned in eax (RC).
   1046   |  mov BASE, L:RB->base
   1047   |  test RC, RC
   1048   |  jz ->cont_nop
   1049   |
   1050   |  // Call metamethod for binary op.
   1051   |->vmeta_binop:
   1052   |  // BASE = base, RC = new base, stack = cont/func/o1/o2
   1053   |  mov RA, RC
   1054   |  sub RC, BASE
   1055   |  mov [RA-24], PC			// [cont|PC]
   1056   |  lea PC, [RC+FRAME_CONT]
   1057   |  mov NARGS:RDd, 2+1			// 2 args for func(o1, o2).
   1058   |  jmp ->vm_call_dispatch
   1059   |
   1060   |->vmeta_len:
   1061   |  movzx RDd, PC_RD
   1062   |  mov L:RB, SAVE_L
   1063   |  mov L:RB->base, BASE
   1064   |  lea CARG2, [BASE+RD*8]		// Caveat: CARG2 == BASE
   1065   |  mov L:CARG1, L:RB
   1066   |  mov SAVE_PC, PC
   1067   |  call extern lj_meta_len		// (lua_State *L, TValue *o)
   1068   |  // NULL (retry) or TValue * (metamethod) returned in eax (RC).
   1069   |  mov BASE, L:RB->base
   1070 #if LJ_52
   1071   |  test RC, RC
   1072   |  jne ->vmeta_binop			// Binop call for compatibility.
   1073   |  movzx RDd, PC_RD
   1074   |  mov TAB:CARG1, [BASE+RD*8]
   1075   |  cleartp TAB:CARG1
   1076   |  jmp ->BC_LEN_Z
   1077 #else
   1078   |  jmp ->vmeta_binop			// Binop call for compatibility.
   1079 #endif
   1080   |
   1081   |//-- Call metamethod ----------------------------------------------------
   1082   |
   1083   |->vmeta_call_ra:
   1084   |  lea RA, [BASE+RA*8+16]
   1085   |->vmeta_call:			// Resolve and call __call metamethod.
   1086   |  // BASE = old base, RA = new base, RC = nargs+1, PC = return
   1087   |  mov TMP1d, NARGS:RDd		// Save RA, RC for us.
   1088   |  mov RB, RA
   1089   |.if X64WIN
   1090   |  mov L:TMPR, SAVE_L
   1091   |  mov L:TMPR->base, BASE		// Caveat: CARG2 is BASE.
   1092   |  lea CARG2, [RA-16]
   1093   |  lea CARG3, [RA+NARGS:RD*8-8]
   1094   |  mov CARG1, L:TMPR			// Caveat: CARG1 is RA.
   1095   |.else
   1096   |  mov L:CARG1, SAVE_L
   1097   |  mov L:CARG1->base, BASE		// Caveat: CARG3 is BASE.
   1098   |  lea CARG2, [RA-16]
   1099   |  lea CARG3, [RA+NARGS:RD*8-8]
   1100   |.endif
   1101   |  mov SAVE_PC, PC
   1102   |  call extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
   1103   |  mov RA, RB
   1104   |  mov L:RB, SAVE_L
   1105   |  mov BASE, L:RB->base
   1106   |  mov NARGS:RDd, TMP1d
   1107   |  mov LFUNC:RB, [RA-16]
   1108   |  add NARGS:RDd, 1
   1109   |  // This is fragile. L->base must not move, KBASE must always be defined.
   1110   |  cmp KBASE, BASE			// Continue with CALLT if flag set.
   1111   |  je ->BC_CALLT_Z
   1112   |  cleartp LFUNC:RB
   1113   |  mov BASE, RA
   1114   |  ins_call				// Otherwise call resolved metamethod.
   1115   |
   1116   |//-- Argument coercion for 'for' statement ------------------------------
   1117   |
   1118   |->vmeta_for:
   1119   |  mov L:RB, SAVE_L
   1120   |  mov L:RB->base, BASE
   1121   |  mov CARG2, RA			// Caveat: CARG2 == BASE
   1122   |  mov L:CARG1, L:RB			// Caveat: CARG1 == RA
   1123   |  mov SAVE_PC, PC
   1124   |  call extern lj_meta_for	// (lua_State *L, TValue *base)
   1125   |  mov BASE, L:RB->base
   1126   |  mov RCd, [PC-4]
   1127   |  movzx RAd, RCH
   1128   |  movzx OP, RCL
   1129   |  shr RCd, 16
   1130   |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]	// Retry FORI or JFORI.
   1131   |
   1132   |//-----------------------------------------------------------------------
   1133   |//-- Fast functions -----------------------------------------------------
   1134   |//-----------------------------------------------------------------------
   1135   |
   1136   |.macro .ffunc, name
   1137   |->ff_ .. name:
   1138   |.endmacro
   1139   |
   1140   |.macro .ffunc_1, name
   1141   |->ff_ .. name:
   1142   |  cmp NARGS:RDd, 1+1;  jb ->fff_fallback
   1143   |.endmacro
   1144   |
   1145   |.macro .ffunc_2, name
   1146   |->ff_ .. name:
   1147   |  cmp NARGS:RDd, 2+1;  jb ->fff_fallback
   1148   |.endmacro
   1149   |
   1150   |.macro .ffunc_n, name, op
   1151   |  .ffunc_1 name
   1152   |  checknumtp [BASE], ->fff_fallback
   1153   |  op xmm0, qword [BASE]
   1154   |.endmacro
   1155   |
   1156   |.macro .ffunc_n, name
   1157   |  .ffunc_n name, movsd
   1158   |.endmacro
   1159   |
   1160   |.macro .ffunc_nn, name
   1161   |  .ffunc_2 name
   1162   |  checknumtp [BASE], ->fff_fallback
   1163   |  checknumtp [BASE+8], ->fff_fallback
   1164   |  movsd xmm0, qword [BASE]
   1165   |  movsd xmm1, qword [BASE+8]
   1166   |.endmacro
   1167   |
   1168   |// Inlined GC threshold check. Caveat: uses label 1.
   1169   |.macro ffgccheck
   1170   |  mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
   1171   |  cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
   1172   |  jb >1
   1173   |  call ->fff_gcstep
   1174   |1:
   1175   |.endmacro
   1176   |
   1177   |//-- Base library: checks -----------------------------------------------
   1178   |
   1179   |.ffunc_1 assert
   1180   |  mov ITYPE, [BASE]
   1181   |  mov RB, ITYPE
   1182   |  sar ITYPE, 47
   1183   |  cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
   1184   |  mov PC, [BASE-8]
   1185   |  mov MULTRES, RDd
   1186   |  mov RB, [BASE]
   1187   |  mov [BASE-16], RB
   1188   |  sub RDd, 2
   1189   |  jz >2
   1190   |  mov RA, BASE
   1191   |1:
   1192   |  add RA, 8
   1193   |  mov RB, [RA]
   1194   |  mov [RA-16], RB
   1195   |  sub RDd, 1
   1196   |  jnz <1
   1197   |2:
   1198   |  mov RDd, MULTRES
   1199   |  jmp ->fff_res_
   1200   |
   1201   |.ffunc_1 type
   1202   |  mov RC, [BASE]
   1203   |  sar RC, 47
   1204   |  mov RBd, LJ_TISNUM
   1205   |  cmp RCd, RBd
   1206   |  cmovb RCd, RBd
   1207   |  not RCd
   1208   |2:
   1209   |  mov CFUNC:RB, [BASE-16]
   1210   |  cleartp CFUNC:RB
   1211   |  mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
   1212   |  mov PC, [BASE-8]
   1213   |  settp STR:RC, LJ_TSTR
   1214   |  mov [BASE-16], STR:RC
   1215   |  jmp ->fff_res1
   1216   |
   1217   |//-- Base library: getters and setters ---------------------------------
   1218   |
   1219   |.ffunc_1 getmetatable
   1220   |  mov TAB:RB, [BASE]
   1221   |  mov PC, [BASE-8]
   1222   |  checktab TAB:RB, >6
   1223   |1:  // Field metatable must be at same offset for GCtab and GCudata!
   1224   |  mov TAB:RB, TAB:RB->metatable
   1225   |2:
   1226   |  test TAB:RB, TAB:RB
   1227   |  mov aword [BASE-16], LJ_TNIL
   1228   |  jz ->fff_res1
   1229   |  settp TAB:RC, TAB:RB, LJ_TTAB
   1230   |  mov [BASE-16], TAB:RC		// Store metatable as default result.
   1231   |  mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
   1232   |  mov RAd, TAB:RB->hmask
   1233   |  and RAd, STR:RC->hash
   1234   |  settp STR:RC, LJ_TSTR
   1235   |  imul RAd, #NODE
   1236   |  add NODE:RA, TAB:RB->node
   1237   |3:  // Rearranged logic, because we expect _not_ to find the key.
   1238   |  cmp NODE:RA->key, STR:RC
   1239   |  je >5
   1240   |4:
   1241   |  mov NODE:RA, NODE:RA->next
   1242   |  test NODE:RA, NODE:RA
   1243   |  jnz <3
   1244   |  jmp ->fff_res1			// Not found, keep default result.
   1245   |5:
   1246   |  mov RB, NODE:RA->val
   1247   |  cmp RB, LJ_TNIL; je ->fff_res1	// Ditto for nil value.
   1248   |  mov [BASE-16], RB			// Return value of mt.__metatable.
   1249   |  jmp ->fff_res1
   1250   |
   1251   |6:
   1252   |  cmp ITYPEd, LJ_TUDATA; je <1
   1253   |  cmp ITYPEd, LJ_TISNUM; ja >7
   1254   |  mov ITYPEd, LJ_TISNUM
   1255   |7:
   1256   |  not ITYPEd
   1257   |  mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
   1258   |  jmp <2
   1259   |
   1260   |.ffunc_2 setmetatable
   1261   |  mov TAB:RB, [BASE]
   1262   |  mov TAB:TMPR, TAB:RB
   1263   |  checktab TAB:RB, ->fff_fallback
   1264   |  // Fast path: no mt for table yet and not clearing the mt.
   1265   |  cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
   1266   |  mov TAB:RA, [BASE+8]
   1267   |  checktab TAB:RA, ->fff_fallback
   1268   |  mov TAB:RB->metatable, TAB:RA
   1269   |  mov PC, [BASE-8]
   1270   |  mov [BASE-16], TAB:TMPR			// Return original table.
   1271   |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
   1272   |  jz >1
   1273   |  // Possible write barrier. Table is black, but skip iswhite(mt) check.
   1274   |  barrierback TAB:RB, RC
   1275   |1:
   1276   |  jmp ->fff_res1
   1277   |
   1278   |.ffunc_2 rawget
   1279   |.if X64WIN
   1280   |  mov TAB:RA, [BASE]
   1281   |  checktab TAB:RA, ->fff_fallback
   1282   |  mov RB, BASE			// Save BASE.
   1283   |  lea CARG3, [BASE+8]
   1284   |  mov CARG2, TAB:RA			// Caveat: CARG2 == BASE.
   1285   |  mov CARG1, SAVE_L
   1286   |.else
   1287   |  mov TAB:CARG2, [BASE]
   1288   |  checktab TAB:CARG2, ->fff_fallback
   1289   |  mov RB, BASE			// Save BASE.
   1290   |  lea CARG3, [BASE+8]		// Caveat: CARG3 == BASE.
   1291   |  mov CARG1, SAVE_L
   1292   |.endif
   1293   |  call extern lj_tab_get	// (lua_State *L, GCtab *t, cTValue *key)
   1294   |  // cTValue * returned in eax (RD).
   1295   |  mov BASE, RB			// Restore BASE.
   1296   |  // Copy table slot.
   1297   |  mov RB, [RD]
   1298   |  mov PC, [BASE-8]
   1299   |  mov [BASE-16], RB
   1300   |  jmp ->fff_res1
   1301   |
   1302   |//-- Base library: conversions ------------------------------------------
   1303   |
   1304   |.ffunc tonumber
   1305   |  // Only handles the number case inline (without a base argument).
   1306   |  cmp NARGS:RDd, 1+1;  jne ->fff_fallback	// Exactly one argument.
   1307   |  mov RB, [BASE]
   1308   |  checknumber RB, ->fff_fallback
   1309   |  mov PC, [BASE-8]
   1310   |  mov [BASE-16], RB
   1311   |  jmp ->fff_res1
   1312   |
   1313   |.ffunc_1 tostring
   1314   |  // Only handles the string or number case inline.
   1315   |  mov PC, [BASE-8]
   1316   |  mov STR:RB, [BASE]
   1317   |  checktp_nc STR:RB, LJ_TSTR, >3
   1318   |  // A __tostring method in the string base metatable is ignored.
   1319   |2:
   1320   |  mov [BASE-16], STR:RB
   1321   |  jmp ->fff_res1
   1322   |3:  // Handle numbers inline, unless a number base metatable is present.
   1323   |  cmp ITYPEd, LJ_TISNUM;  ja ->fff_fallback_1
   1324   |  cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
   1325   |  jne ->fff_fallback
   1326   |  ffgccheck				// Caveat: uses label 1.
   1327   |  mov L:RB, SAVE_L
   1328   |  mov L:RB->base, BASE		// Add frame since C call can throw.
   1329   |  mov SAVE_PC, PC			// Redundant (but a defined value).
   1330   |.if not X64WIN
   1331   |  mov CARG2, BASE			// Otherwise: CARG2 == BASE
   1332   |.endif
   1333   |  mov L:CARG1, L:RB
   1334   |.if DUALNUM
   1335   |  call extern lj_strfmt_number	// (lua_State *L, cTValue *o)
   1336   |.else
   1337   |  call extern lj_strfmt_num		// (lua_State *L, lua_Number *np)
   1338   |.endif
   1339   |  // GCstr returned in eax (RD).
   1340   |  mov BASE, L:RB->base
   1341   |  settp STR:RB, RD, LJ_TSTR
   1342   |  jmp <2
   1343   |
   1344   |//-- Base library: iterators -------------------------------------------
   1345   |
   1346   |.ffunc_1 next
   1347   |  je >2				// Missing 2nd arg?
   1348   |1:
   1349   |.if X64WIN
   1350   |  mov RA, [BASE]
   1351   |  checktab RA, ->fff_fallback
   1352   |.else
   1353   |  mov CARG2, [BASE]
   1354   |  checktab CARG2, ->fff_fallback
   1355   |.endif
   1356   |  mov L:RB, SAVE_L
   1357   |  mov L:RB->base, BASE		// Add frame since C call can throw.
   1358   |  mov L:RB->top, BASE		// Dummy frame length is ok.
   1359   |  mov PC, [BASE-8]
   1360   |.if X64WIN
   1361   |  lea CARG3, [BASE+8]
   1362   |  mov CARG2, RA			// Caveat: CARG2 == BASE.
   1363   |  mov CARG1, L:RB
   1364   |.else
   1365   |  lea CARG3, [BASE+8]		// Caveat: CARG3 == BASE.
   1366   |  mov CARG1, L:RB
   1367   |.endif
   1368   |  mov SAVE_PC, PC			// Needed for ITERN fallback.
   1369   |  call extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
   1370   |  // Flag returned in eax (RD).
   1371   |  mov BASE, L:RB->base
   1372   |  test RDd, RDd;  jz >3		// End of traversal?
   1373   |  // Copy key and value to results.
   1374   |  mov RB, [BASE+8]
   1375   |  mov RD, [BASE+16]
   1376   |  mov [BASE-16], RB
   1377   |  mov [BASE-8], RD
   1378   |->fff_res2:
   1379   |  mov RDd, 1+2
   1380   |  jmp ->fff_res
   1381   |2:  // Set missing 2nd arg to nil.
   1382   |  mov aword [BASE+8], LJ_TNIL
   1383   |  jmp <1
   1384   |3:  // End of traversal: return nil.
   1385   |  mov aword [BASE-16], LJ_TNIL
   1386   |  jmp ->fff_res1
   1387   |
   1388   |.ffunc_1 pairs
   1389   |  mov TAB:RB, [BASE]
   1390   |  mov TMPR, TAB:RB
   1391   |  checktab TAB:RB, ->fff_fallback
   1392 #if LJ_52
   1393   |  cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
   1394 #endif
   1395   |  mov CFUNC:RD, [BASE-16]
   1396   |  cleartp CFUNC:RD
   1397   |  mov CFUNC:RD, CFUNC:RD->upvalue[0]
   1398   |  settp CFUNC:RD, LJ_TFUNC
   1399   |  mov PC, [BASE-8]
   1400   |  mov [BASE-16], CFUNC:RD
   1401   |  mov [BASE-8], TMPR
   1402   |  mov aword [BASE], LJ_TNIL
   1403   |  mov RDd, 1+3
   1404   |  jmp ->fff_res
   1405   |
   1406   |.ffunc_2 ipairs_aux
   1407   |  mov TAB:RB, [BASE]
   1408   |  checktab TAB:RB, ->fff_fallback
   1409   |.if DUALNUM
   1410   |  mov RA, [BASE+8]
   1411   |  checkint RA, ->fff_fallback
   1412   |.else
   1413   |  checknumtp [BASE+8], ->fff_fallback
   1414   |  movsd xmm0, qword [BASE+8]
   1415   |.endif
   1416   |  mov PC, [BASE-8]
   1417   |.if DUALNUM
   1418   |  add RAd, 1
   1419   |  setint ITYPE, RA
   1420   |  mov [BASE-16], ITYPE
   1421   |.else
   1422   |  sseconst_1 xmm1, TMPR
   1423   |  addsd xmm0, xmm1
   1424   |  cvttsd2si RAd, xmm0
   1425   |  movsd qword [BASE-16], xmm0
   1426   |.endif
   1427   |  cmp RAd, TAB:RB->asize;  jae >2	// Not in array part?
   1428   |  mov RD, TAB:RB->array
   1429   |  lea RD, [RD+RA*8]
   1430   |1:
   1431   |  cmp aword [RD], LJ_TNIL;  je ->fff_res0
   1432   |  // Copy array slot.
   1433   |  mov RB, [RD]
   1434   |  mov [BASE-8], RB
   1435   |  jmp ->fff_res2
   1436   |2:  // Check for empty hash part first. Otherwise call C function.
   1437   |  cmp dword TAB:RB->hmask, 0; je ->fff_res0
   1438   |.if X64WIN
   1439   |  mov TMPR, BASE
   1440   |  mov CARG2d, RAd
   1441   |  mov CARG1, TAB:RB
   1442   |  mov RB, TMPR
   1443   |.else
   1444   |  mov CARG1, TAB:RB
   1445   |  mov RB, BASE			// Save BASE.
   1446   |  mov CARG2d, RAd			// Caveat: CARG2 == BASE
   1447   |.endif
   1448   |  call extern lj_tab_getinth		// (GCtab *t, int32_t key)
   1449   |  // cTValue * or NULL returned in eax (RD).
   1450   |  mov BASE, RB
   1451   |  test RD, RD
   1452   |  jnz <1
   1453   |->fff_res0:
   1454   |  mov RDd, 1+0
   1455   |  jmp ->fff_res
   1456   |
   1457   |.ffunc_1 ipairs
   1458   |  mov TAB:RB, [BASE]
   1459   |  mov TMPR, TAB:RB
   1460   |  checktab TAB:RB, ->fff_fallback
   1461 #if LJ_52
   1462   |  cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
   1463 #endif
   1464   |  mov CFUNC:RD, [BASE-16]
   1465   |  cleartp CFUNC:RD
   1466   |  mov CFUNC:RD, CFUNC:RD->upvalue[0]
   1467   |  settp CFUNC:RD, LJ_TFUNC
   1468   |  mov PC, [BASE-8]
   1469   |  mov [BASE-16], CFUNC:RD
   1470   |  mov [BASE-8], TMPR
   1471   |.if DUALNUM
   1472   |  mov64 RD, ((int64_t)LJ_TISNUM<<47)
   1473   |  mov [BASE], RD
   1474   |.else
   1475   |  mov qword [BASE], 0
   1476   |.endif
   1477   |  mov RDd, 1+3
   1478   |  jmp ->fff_res
   1479   |
   1480   |//-- Base library: catch errors ----------------------------------------
   1481   |
   1482   |.ffunc_1 pcall
   1483   |  lea RA, [BASE+16]
   1484   |  sub NARGS:RDd, 1
   1485   |  mov PCd, 16+FRAME_PCALL
   1486   |1:
   1487   |  movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
   1488   |  shr RB, HOOK_ACTIVE_SHIFT
   1489   |  and RB, 1
   1490   |  add PC, RB				// Remember active hook before pcall.
   1491   |  // Note: this does a (harmless) copy of the function to the PC slot, too.
   1492   |  mov KBASE, RD
   1493   |2:
   1494   |  mov RB, [RA+KBASE*8-24]
   1495   |  mov [RA+KBASE*8-16], RB
   1496   |  sub KBASE, 1
   1497   |  ja <2
   1498   |  jmp ->vm_call_dispatch
   1499   |
   1500   |.ffunc_2 xpcall
   1501   |  mov LFUNC:RA, [BASE+8]
   1502   |  checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
   1503   |  mov LFUNC:RB, [BASE]		// Swap function and traceback.
   1504   |  mov [BASE], LFUNC:RA
   1505   |  mov [BASE+8], LFUNC:RB
   1506   |  lea RA, [BASE+24]
   1507   |  sub NARGS:RDd, 2
   1508   |  mov PCd, 24+FRAME_PCALL
   1509   |  jmp <1
   1510   |
   1511   |//-- Coroutine library --------------------------------------------------
   1512   |
   1513   |.macro coroutine_resume_wrap, resume
   1514   |.if resume
   1515   |.ffunc_1 coroutine_resume
   1516   |  mov L:RB, [BASE]
   1517   |  cleartp L:RB
   1518   |.else
   1519   |.ffunc coroutine_wrap_aux
   1520   |  mov CFUNC:RB, [BASE-16]
   1521   |  cleartp CFUNC:RB
   1522   |  mov L:RB, CFUNC:RB->upvalue[0].gcr
   1523   |  cleartp L:RB
   1524   |.endif
   1525   |  mov PC, [BASE-8]
   1526   |  mov SAVE_PC, PC
   1527   |  mov TMP1, L:RB
   1528   |.if resume
   1529   |  checktptp [BASE], LJ_TTHREAD, ->fff_fallback
   1530   |.endif
   1531   |  cmp aword L:RB->cframe, 0; jne ->fff_fallback
   1532   |  cmp byte L:RB->status, LUA_YIELD;  ja ->fff_fallback
   1533   |  mov RA, L:RB->top
   1534   |  je >1				// Status != LUA_YIELD (i.e. 0)?
   1535   |  cmp RA, L:RB->base			// Check for presence of initial func.
   1536   |  je ->fff_fallback
   1537   |  mov PC, [RA-8]			// Move initial function up.
   1538   |  mov [RA], PC
   1539   |  add RA, 8
   1540   |1:
   1541   |.if resume
   1542   |  lea PC, [RA+NARGS:RD*8-16]		// Check stack space (-1-thread).
   1543   |.else
   1544   |  lea PC, [RA+NARGS:RD*8-8]		// Check stack space (-1).
   1545   |.endif
   1546   |  cmp PC, L:RB->maxstack; ja ->fff_fallback
   1547   |  mov L:RB->top, PC
   1548   |
   1549   |  mov L:RB, SAVE_L
   1550   |  mov L:RB->base, BASE
   1551   |.if resume
   1552   |  add BASE, 8			// Keep resumed thread in stack for GC.
   1553   |.endif
   1554   |  mov L:RB->top, BASE
   1555   |.if resume
   1556   |  lea RB, [BASE+NARGS:RD*8-24]	// RB = end of source for stack move.
   1557   |.else
   1558   |  lea RB, [BASE+NARGS:RD*8-16]	// RB = end of source for stack move.
   1559   |.endif
   1560   |  sub RB, PC			// Relative to PC.
   1561   |
   1562   |  cmp PC, RA
   1563   |  je >3
   1564   |2:  // Move args to coroutine.
   1565   |  mov RC, [PC+RB]
   1566   |  mov [PC-8], RC
   1567   |  sub PC, 8
   1568   |  cmp PC, RA
   1569   |  jne <2
   1570   |3:
   1571   |  mov CARG2, RA
   1572   |  mov CARG1, TMP1
   1573   |  call ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
   1574   |
   1575   |  mov L:RB, SAVE_L
   1576   |  mov L:PC, TMP1
   1577   |  mov BASE, L:RB->base
   1578   |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
   1579   |  set_vmstate INTERP
   1580   |
   1581   |  cmp eax, LUA_YIELD
   1582   |  ja >8
   1583   |4:
   1584   |  mov RA, L:PC->base
   1585   |  mov KBASE, L:PC->top
   1586   |  mov L:PC->top, RA			// Clear coroutine stack.
   1587   |  mov PC, KBASE
   1588   |  sub PC, RA
   1589   |  je >6				// No results?
   1590   |  lea RD, [BASE+PC]
   1591   |  shr PCd, 3
   1592   |  cmp RD, L:RB->maxstack
   1593   |  ja >9				// Need to grow stack?
   1594   |
   1595   |  mov RB, BASE
   1596   |  sub RB, RA
   1597   |5:  // Move results from coroutine.
   1598   |  mov RD, [RA]
   1599   |  mov [RA+RB], RD
   1600   |  add RA, 8
   1601   |  cmp RA, KBASE
   1602   |  jne <5
   1603   |6:
   1604   |.if resume
   1605   |  lea RDd, [PCd+2]			// nresults+1 = 1 + true + results.
   1606   |  mov_true ITYPE			// Prepend true to results.
   1607   |  mov [BASE-8], ITYPE
   1608   |.else
   1609   |  lea RDd, [PCd+1]			// nresults+1 = 1 + results.
   1610   |.endif
   1611   |7:
   1612   |  mov PC, SAVE_PC
   1613   |  mov MULTRES, RDd
   1614   |.if resume
   1615   |  mov RA, -8
   1616   |.else
   1617   |  xor RAd, RAd
   1618   |.endif
   1619   |  test PCd, FRAME_TYPE
   1620   |  jz ->BC_RET_Z
   1621   |  jmp ->vm_return
   1622   |
   1623   |8:  // Coroutine returned with error (at co->top-1).
   1624   |.if resume
   1625   |  mov_false ITYPE			// Prepend false to results.
   1626   |  mov [BASE-8], ITYPE
   1627   |  mov RA, L:PC->top
   1628   |  sub RA, 8
   1629   |  mov L:PC->top, RA			// Clear error from coroutine stack.
   1630   |  // Copy error message.
   1631   |  mov RD, [RA]
   1632   |  mov [BASE], RD
   1633   |  mov RDd, 1+2			// nresults+1 = 1 + false + error.
   1634   |  jmp <7
   1635   |.else
   1636   |  mov CARG2, L:PC
   1637   |  mov CARG1, L:RB
   1638   |  call extern lj_ffh_coroutine_wrap_err  // (lua_State *L, lua_State *co)
   1639   |  // Error function does not return.
   1640   |.endif
   1641   |
   1642   |9:  // Handle stack expansion on return from yield.
   1643   |  mov L:RA, TMP1
   1644   |  mov L:RA->top, KBASE		// Undo coroutine stack clearing.
   1645   |  mov CARG2, PC
   1646   |  mov CARG1, L:RB
   1647   |  call extern lj_state_growstack	// (lua_State *L, int n)
   1648   |  mov L:PC, TMP1
   1649   |  mov BASE, L:RB->base
   1650   |  jmp <4				// Retry the stack move.
   1651   |.endmacro
   1652   |
   1653   |  coroutine_resume_wrap 1		// coroutine.resume
   1654   |  coroutine_resume_wrap 0		// coroutine.wrap
   1655   |
   1656   |.ffunc coroutine_yield
   1657   |  mov L:RB, SAVE_L
   1658   |  test aword L:RB->cframe, CFRAME_RESUME
   1659   |  jz ->fff_fallback
   1660   |  mov L:RB->base, BASE
   1661   |  lea RD, [BASE+NARGS:RD*8-8]
   1662   |  mov L:RB->top, RD
   1663   |  xor RDd, RDd
   1664   |  mov aword L:RB->cframe, RD
   1665   |  mov al, LUA_YIELD
   1666   |  mov byte L:RB->status, al
   1667   |  jmp ->vm_leave_unw
   1668   |
   1669   |//-- Math library -------------------------------------------------------
   1670   |
   1671   |  .ffunc_1 math_abs
   1672   |  mov RB, [BASE]
   1673   |.if DUALNUM
   1674   |  checkint RB, >3
   1675   |  cmp RBd, 0; jns ->fff_resi
   1676   |  neg RBd; js >2
   1677   |->fff_resbit:
   1678   |->fff_resi:
   1679   |  setint RB
   1680   |->fff_resRB:
   1681   |  mov PC, [BASE-8]
   1682   |  mov [BASE-16], RB
   1683   |  jmp ->fff_res1
   1684   |2:
   1685   |  mov64 RB, U64x(41e00000,00000000)  // 2^31.
   1686   |  jmp ->fff_resRB
   1687   |3:
   1688   |  ja ->fff_fallback
   1689   |.else
   1690   |  checknum RB, ->fff_fallback
   1691   |.endif
   1692   |  shl RB, 1
   1693   |  shr RB, 1
   1694   |  mov PC, [BASE-8]
   1695   |  mov [BASE-16], RB
   1696   |  jmp ->fff_res1
   1697   |
   1698   |.ffunc_n math_sqrt, sqrtsd
   1699   |->fff_resxmm0:
   1700   |  mov PC, [BASE-8]
   1701   |  movsd qword [BASE-16], xmm0
   1702   |  // fallthrough
   1703   |
   1704   |->fff_res1:
   1705   |  mov RDd, 1+1
   1706   |->fff_res:
   1707   |  mov MULTRES, RDd
   1708   |->fff_res_:
   1709   |  test PCd, FRAME_TYPE
   1710   |  jnz >7
   1711   |5:
   1712   |  cmp PC_RB, RDL			// More results expected?
   1713   |  ja >6
   1714   |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
   1715   |  movzx RAd, PC_RA
   1716   |  neg RA
   1717   |  lea BASE, [BASE+RA*8-16]		// base = base - (RA+2)*8
   1718   |  ins_next
   1719   |
   1720   |6:  // Fill up results with nil.
   1721   |  mov aword [BASE+RD*8-24], LJ_TNIL
   1722   |  add RD, 1
   1723   |  jmp <5
   1724   |
   1725   |7:  // Non-standard return case.
   1726   |  mov RA, -16			// Results start at BASE+RA = BASE-16.
   1727   |  jmp ->vm_return
   1728   |
   1729   |.macro math_round, func
   1730   |  .ffunc math_ .. func
   1731   |.if DUALNUM
   1732   |  mov RB, [BASE]
   1733   |  checknumx RB, ->fff_resRB, je
   1734   |  ja ->fff_fallback
   1735   |.else
   1736   |  checknumtp [BASE], ->fff_fallback
   1737   |.endif
   1738   |  movsd xmm0, qword [BASE]
   1739   |  call ->vm_ .. func .. _sse
   1740   |.if DUALNUM
   1741   |  cvttsd2si RBd, xmm0
   1742   |  cmp RBd, 0x80000000
   1743   |  jne ->fff_resi
   1744   |  cvtsi2sd xmm1, RBd
   1745   |  ucomisd xmm0, xmm1
   1746   |  jp ->fff_resxmm0
   1747   |  je ->fff_resi
   1748   |.endif
   1749   |  jmp ->fff_resxmm0
   1750   |.endmacro
   1751   |
   1752   |  math_round floor
   1753   |  math_round ceil
   1754   |
   1755   |.ffunc math_log
   1756   |  cmp NARGS:RDd, 1+1; jne ->fff_fallback	// Exactly one argument.
   1757   |  checknumtp [BASE], ->fff_fallback
   1758   |  movsd xmm0, qword [BASE]
   1759   |  mov RB, BASE
   1760   |  call extern log
   1761   |  mov BASE, RB
   1762   |  jmp ->fff_resxmm0
   1763   |
   1764   |.macro math_extern, func
   1765   |  .ffunc_n math_ .. func
   1766   |  mov RB, BASE
   1767   |  call extern func
   1768   |  mov BASE, RB
   1769   |  jmp ->fff_resxmm0
   1770   |.endmacro
   1771   |
   1772   |.macro math_extern2, func
   1773   |  .ffunc_nn math_ .. func
   1774   |  mov RB, BASE
   1775   |  call extern func
   1776   |  mov BASE, RB
   1777   |  jmp ->fff_resxmm0
   1778   |.endmacro
   1779   |
   1780   |  math_extern log10
   1781   |  math_extern exp
   1782   |  math_extern sin
   1783   |  math_extern cos
   1784   |  math_extern tan
   1785   |  math_extern asin
   1786   |  math_extern acos
   1787   |  math_extern atan
   1788   |  math_extern sinh
   1789   |  math_extern cosh
   1790   |  math_extern tanh
   1791   |  math_extern2 pow
   1792   |  math_extern2 atan2
   1793   |  math_extern2 fmod
   1794   |
   1795   |.ffunc_2 math_ldexp
   1796   |  checknumtp [BASE], ->fff_fallback
   1797   |  checknumtp [BASE+8], ->fff_fallback
   1798   |  fld qword [BASE+8]
   1799   |  fld qword [BASE]
   1800   |  fscale
   1801   |  fpop1
   1802   |  mov PC, [BASE-8]
   1803   |  fstp qword [BASE-16]
   1804   |  jmp ->fff_res1
   1805   |
   1806   |.ffunc_n math_frexp
   1807   |  mov RB, BASE
   1808   |.if X64WIN
   1809   |  lea CARG2, TMP1		// Caveat: CARG2 == BASE
   1810   |.else
   1811   |  lea CARG1, TMP1
   1812   |.endif
   1813   |  call extern frexp
   1814   |  mov BASE, RB
   1815   |  mov RBd, TMP1d
   1816   |  mov PC, [BASE-8]
   1817   |  movsd qword [BASE-16], xmm0
   1818   |.if DUALNUM
   1819   |  setint RB
   1820   |  mov [BASE-8], RB
   1821   |.else
   1822   |  cvtsi2sd xmm1, RBd
   1823   |  movsd qword [BASE-8], xmm1
   1824   |.endif
   1825   |  mov RDd, 1+2
   1826   |  jmp ->fff_res
   1827   |
   1828   |.ffunc_n math_modf
   1829   |  mov RB, BASE
   1830   |.if X64WIN
   1831   |  lea CARG2, [BASE-16]	// Caveat: CARG2 == BASE
   1832   |.else
   1833   |  lea CARG1, [BASE-16]
   1834   |.endif
   1835   |  call extern modf
   1836   |  mov BASE, RB
   1837   |  mov PC, [BASE-8]
   1838   |  movsd qword [BASE-8], xmm0
   1839   |  mov RDd, 1+2
   1840   |  jmp ->fff_res
   1841   |
   1842   |.macro math_minmax, name, cmovop, sseop
   1843   |  .ffunc name
   1844   |  mov RAd, 2
   1845   |.if DUALNUM
   1846   |  mov RB, [BASE]
   1847   |  checkint RB, >4
   1848   |1:  // Handle integers.
   1849   |  cmp RAd, RDd; jae ->fff_resRB
   1850   |  mov TMPR, [BASE+RA*8-8]
   1851   |  checkint TMPR, >3
   1852   |  cmp RBd, TMPRd
   1853   |  cmovop RB, TMPR
   1854   |  add RAd, 1
   1855   |  jmp <1
   1856   |3:
   1857   |  ja ->fff_fallback
   1858   |  // Convert intermediate result to number and continue below.
   1859   |  cvtsi2sd xmm0, RBd
   1860   |  jmp >6
   1861   |4:
   1862   |  ja ->fff_fallback
   1863   |.else
   1864   |  checknumtp [BASE], ->fff_fallback
   1865   |.endif
   1866   |
   1867   |  movsd xmm0, qword [BASE]
   1868   |5:  // Handle numbers or integers.
   1869   |  cmp RAd, RDd; jae ->fff_resxmm0
   1870   |.if DUALNUM
   1871   |  mov RB, [BASE+RA*8-8]
   1872   |  checknumx RB, >6, jb
   1873   |  ja ->fff_fallback
   1874   |  cvtsi2sd xmm1, RBd
   1875   |  jmp >7
   1876   |.else
   1877   |  checknumtp [BASE+RA*8-8], ->fff_fallback
   1878   |.endif
   1879   |6:
   1880   |  movsd xmm1, qword [BASE+RA*8-8]
   1881   |7:
   1882   |  sseop xmm0, xmm1
   1883   |  add RAd, 1
   1884   |  jmp <5
   1885   |.endmacro
   1886   |
   1887   |  math_minmax math_min, cmovg, minsd
   1888   |  math_minmax math_max, cmovl, maxsd
   1889   |
   1890   |//-- String library -----------------------------------------------------
   1891   |
   1892   |.ffunc string_byte			// Only handle the 1-arg case here.
   1893   |  cmp NARGS:RDd, 1+1;  jne ->fff_fallback
   1894   |  mov STR:RB, [BASE]
   1895   |  checkstr STR:RB, ->fff_fallback
   1896   |  mov PC, [BASE-8]
   1897   |  cmp dword STR:RB->len, 1
   1898   |  jb ->fff_res0			// Return no results for empty string.
   1899   |  movzx RBd, byte STR:RB[1]
   1900   |.if DUALNUM
   1901   |  jmp ->fff_resi
   1902   |.else
   1903   |  cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
   1904   |.endif
   1905   |
   1906   |.ffunc string_char			// Only handle the 1-arg case here.
   1907   |  ffgccheck
   1908   |  cmp NARGS:RDd, 1+1;  jne ->fff_fallback	// *Exactly* 1 arg.
   1909   |.if DUALNUM
   1910   |  mov RB, [BASE]
   1911   |  checkint RB, ->fff_fallback
   1912   |.else
   1913   |  checknumtp [BASE], ->fff_fallback
   1914   |  cvttsd2si RBd, qword [BASE]
   1915   |.endif
   1916   |  cmp RBd, 255;  ja ->fff_fallback
   1917   |  mov TMP1d, RBd
   1918   |  mov TMPRd, 1
   1919   |  lea RD, TMP1			// Points to stack. Little-endian.
   1920   |->fff_newstr:
   1921   |  mov L:RB, SAVE_L
   1922   |  mov L:RB->base, BASE
   1923   |  mov CARG3d, TMPRd			// Zero-extended to size_t.
   1924   |  mov CARG2, RD
   1925   |  mov CARG1, L:RB
   1926   |  mov SAVE_PC, PC
   1927   |  call extern lj_str_new		// (lua_State *L, char *str, size_t l)
   1928   |->fff_resstr:
   1929   |  // GCstr * returned in eax (RD).
   1930   |  mov BASE, L:RB->base
   1931   |  mov PC, [BASE-8]
   1932   |  settp STR:RD, LJ_TSTR
   1933   |  mov [BASE-16], STR:RD
   1934   |  jmp ->fff_res1
   1935   |
   1936   |.ffunc string_sub
   1937   |  ffgccheck
   1938   |  mov TMPRd, -1
   1939   |  cmp NARGS:RDd, 1+2;  jb ->fff_fallback
   1940   |  jna >1
   1941   |.if DUALNUM
   1942   |  mov TMPR, [BASE+16]
   1943   |  checkint TMPR, ->fff_fallback
   1944   |.else
   1945   |  checknumtp [BASE+16], ->fff_fallback
   1946   |  cvttsd2si TMPRd, qword [BASE+16]
   1947   |.endif
   1948   |1:
   1949   |  mov STR:RB, [BASE]
   1950   |  checkstr STR:RB, ->fff_fallback
   1951   |.if DUALNUM
   1952   |  mov ITYPE, [BASE+8]
   1953   |  mov RAd, ITYPEd			// Must clear hiword for lea below.
   1954   |  sar ITYPE, 47
   1955   |  cmp ITYPEd, LJ_TISNUM
   1956   |  jne ->fff_fallback
   1957   |.else
   1958   |  checknumtp [BASE+8], ->fff_fallback
   1959   |  cvttsd2si RAd, qword [BASE+8]
   1960   |.endif
   1961   |  mov RCd, STR:RB->len
   1962   |  cmp RCd, TMPRd			// len < end? (unsigned compare)
   1963   |  jb >5
   1964   |2:
   1965   |  test RAd, RAd			// start <= 0?
   1966   |  jle >7
   1967   |3:
   1968   |  sub TMPRd, RAd			// start > end?
   1969   |  jl ->fff_emptystr
   1970   |  lea RD, [STR:RB+RAd+#STR-1]
   1971   |  add TMPRd, 1
   1972   |4:
   1973   |  jmp ->fff_newstr
   1974   |
   1975   |5:  // Negative end or overflow.
   1976   |  jl >6
   1977   |  lea TMPRd, [TMPRd+RCd+1]		// end = end+(len+1)
   1978   |  jmp <2
   1979   |6:  // Overflow.
   1980   |  mov TMPRd, RCd			// end = len
   1981   |  jmp <2
   1982   |
   1983   |7:  // Negative start or underflow.
   1984   |  je >8
   1985   |  add RAd, RCd			// start = start+(len+1)
   1986   |  add RAd, 1
   1987   |  jg <3				// start > 0?
   1988   |8:  // Underflow.
   1989   |  mov RAd, 1				// start = 1
   1990   |  jmp <3
   1991   |
   1992   |->fff_emptystr:  // Range underflow.
   1993   |  xor TMPRd, TMPRd			// Zero length. Any ptr in RD is ok.
   1994   |  jmp <4
   1995   |
   1996   |.macro ffstring_op, name
   1997   |  .ffunc_1 string_ .. name
   1998   |  ffgccheck
   1999   |.if X64WIN
   2000   |  mov STR:TMPR, [BASE]
   2001   |  checkstr STR:TMPR, ->fff_fallback
   2002   |.else
   2003   |  mov STR:CARG2, [BASE]
   2004   |  checkstr STR:CARG2, ->fff_fallback
   2005   |.endif
   2006   |  mov L:RB, SAVE_L
   2007   |   lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
   2008   |  mov L:RB->base, BASE
   2009   |.if X64WIN
   2010   |  mov STR:CARG2, STR:TMPR		// Caveat: CARG2 == BASE
   2011   |.endif
   2012   |   mov RC, SBUF:CARG1->b
   2013   |   mov SBUF:CARG1->L, L:RB
   2014   |   mov SBUF:CARG1->p, RC
   2015   |  mov SAVE_PC, PC
   2016   |  call extern lj_buf_putstr_ .. name
   2017   |  mov CARG1, rax
   2018   |  call extern lj_buf_tostr
   2019   |  jmp ->fff_resstr
   2020   |.endmacro
   2021   |
   2022   |ffstring_op reverse
   2023   |ffstring_op lower
   2024   |ffstring_op upper
   2025   |
   2026   |//-- Bit library --------------------------------------------------------
   2027   |
   2028   |.macro .ffunc_bit, name, kind, fdef
   2029   |  fdef name
   2030   |.if kind == 2
   2031   |  sseconst_tobit xmm1, RB
   2032   |.endif
   2033   |.if DUALNUM
   2034   |  mov RB, [BASE]
   2035   |  checkint RB, >1
   2036   |.if kind > 0
   2037   |  jmp >2
   2038   |.else
   2039   |  jmp ->fff_resbit
   2040   |.endif
   2041   |1:
   2042   |  ja ->fff_fallback
   2043   |  movd xmm0, RB
   2044   |.else
   2045   |  checknumtp [BASE], ->fff_fallback
   2046   |  movsd xmm0, qword [BASE]
   2047   |.endif
   2048   |.if kind < 2
   2049   |  sseconst_tobit xmm1, RB
   2050   |.endif
   2051   |  addsd xmm0, xmm1
   2052   |  movd RBd, xmm0
   2053   |2:
   2054   |.endmacro
   2055   |
   2056   |.macro .ffunc_bit, name, kind
   2057   |  .ffunc_bit name, kind, .ffunc_1
   2058   |.endmacro
   2059   |
   2060   |.ffunc_bit bit_tobit, 0
   2061   |  jmp ->fff_resbit
   2062   |
   2063   |.macro .ffunc_bit_op, name, ins
   2064   |  .ffunc_bit name, 2
   2065   |  mov TMPRd, NARGS:RDd		// Save for fallback.
   2066   |  lea RD, [BASE+NARGS:RD*8-16]
   2067   |1:
   2068   |  cmp RD, BASE
   2069   |  jbe ->fff_resbit
   2070   |.if DUALNUM
   2071   |  mov RA, [RD]
   2072   |  checkint RA, >2
   2073   |  ins RBd, RAd
   2074   |  sub RD, 8
   2075   |  jmp <1
   2076   |2:
   2077   |  ja ->fff_fallback_bit_op
   2078   |  movd xmm0, RA
   2079   |.else
   2080   |  checknumtp [RD], ->fff_fallback_bit_op
   2081   |  movsd xmm0, qword [RD]
   2082   |.endif
   2083   |  addsd xmm0, xmm1
   2084   |  movd RAd, xmm0
   2085   |  ins RBd, RAd
   2086   |  sub RD, 8
   2087   |  jmp <1
   2088   |.endmacro
   2089   |
   2090   |.ffunc_bit_op bit_band, and
   2091   |.ffunc_bit_op bit_bor, or
   2092   |.ffunc_bit_op bit_bxor, xor
   2093   |
   2094   |.ffunc_bit bit_bswap, 1
   2095   |  bswap RBd
   2096   |  jmp ->fff_resbit
   2097   |
   2098   |.ffunc_bit bit_bnot, 1
   2099   |  not RBd
   2100   |.if DUALNUM
   2101   |  jmp ->fff_resbit
   2102   |.else
   2103   |->fff_resbit:
   2104   |  cvtsi2sd xmm0, RBd
   2105   |  jmp ->fff_resxmm0
   2106   |.endif
   2107   |
   2108   |->fff_fallback_bit_op:
   2109   |  mov NARGS:RDd, TMPRd		// Restore for fallback
   2110   |  jmp ->fff_fallback
   2111   |
   2112   |.macro .ffunc_bit_sh, name, ins
   2113   |.if DUALNUM
   2114   |  .ffunc_bit name, 1, .ffunc_2
   2115   |  // Note: no inline conversion from number for 2nd argument!
   2116   |  mov RA, [BASE+8]
   2117   |  checkint RA, ->fff_fallback
   2118   |.else
   2119   |  .ffunc_nn name
   2120   |  sseconst_tobit xmm2, RB
   2121   |  addsd xmm0, xmm2
   2122   |  addsd xmm1, xmm2
   2123   |  movd RBd, xmm0
   2124   |  movd RAd, xmm1
   2125   |.endif
   2126   |  ins RBd, cl			// Assumes RA is ecx.
   2127   |  jmp ->fff_resbit
   2128   |.endmacro
   2129   |
   2130   |.ffunc_bit_sh bit_lshift, shl
   2131   |.ffunc_bit_sh bit_rshift, shr
   2132   |.ffunc_bit_sh bit_arshift, sar
   2133   |.ffunc_bit_sh bit_rol, rol
   2134   |.ffunc_bit_sh bit_ror, ror
   2135   |
   2136   |//-----------------------------------------------------------------------
   2137   |
   2138   |->fff_fallback_2:
   2139   |  mov NARGS:RDd, 1+2			// Other args are ignored, anyway.
   2140   |  jmp ->fff_fallback
   2141   |->fff_fallback_1:
   2142   |  mov NARGS:RDd, 1+1			// Other args are ignored, anyway.
   2143   |->fff_fallback:			// Call fast function fallback handler.
   2144   |  // BASE = new base, RD = nargs+1
   2145   |  mov L:RB, SAVE_L
   2146   |  mov PC, [BASE-8]			// Fallback may overwrite PC.
   2147   |  mov SAVE_PC, PC			// Redundant (but a defined value).
   2148   |  mov L:RB->base, BASE
   2149   |  lea RD, [BASE+NARGS:RD*8-8]
   2150   |  lea RA, [RD+8*LUA_MINSTACK]	// Ensure enough space for handler.
   2151   |  mov L:RB->top, RD
   2152   |  mov CFUNC:RD, [BASE-16]
   2153   |  cleartp CFUNC:RD
   2154   |  cmp RA, L:RB->maxstack
   2155   |  ja >5				// Need to grow stack.
   2156   |  mov CARG1, L:RB
   2157   |  call aword CFUNC:RD->f		// (lua_State *L)
   2158   |  mov BASE, L:RB->base
   2159   |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
   2160   |  test RDd, RDd; jg ->fff_res	// Returned nresults+1?
   2161   |1:
   2162   |  mov RA, L:RB->top
   2163   |  sub RA, BASE
   2164   |  shr RAd, 3
   2165   |  test RDd, RDd
   2166   |  lea NARGS:RDd, [RAd+1]
   2167   |  mov LFUNC:RB, [BASE-16]
   2168   |  jne ->vm_call_tail			// Returned -1?
   2169   |  cleartp LFUNC:RB
   2170   |  ins_callt				// Returned 0: retry fast path.
   2171   |
   2172   |// Reconstruct previous base for vmeta_call during tailcall.
   2173   |->vm_call_tail:
   2174   |  mov RA, BASE
   2175   |  test PCd, FRAME_TYPE
   2176   |  jnz >3
   2177   |  movzx RBd, PC_RA
   2178   |  neg RB
   2179   |  lea BASE, [BASE+RB*8-16]		// base = base - (RB+2)*8
   2180   |  jmp ->vm_call_dispatch		// Resolve again for tailcall.
   2181   |3:
   2182   |  mov RB, PC
   2183   |  and RB, -8
   2184   |  sub BASE, RB
   2185   |  jmp ->vm_call_dispatch		// Resolve again for tailcall.
   2186   |
   2187   |5:  // Grow stack for fallback handler.
   2188   |  mov CARG2d, LUA_MINSTACK
   2189   |  mov CARG1, L:RB
   2190   |  call extern lj_state_growstack	// (lua_State *L, int n)
   2191   |  mov BASE, L:RB->base
   2192   |  xor RDd, RDd			// Simulate a return 0.
   2193   |  jmp <1				// Dumb retry (goes through ff first).
   2194   |
   2195   |->fff_gcstep:			// Call GC step function.
   2196   |  // BASE = new base, RD = nargs+1
   2197   |  pop RB				// Must keep stack at same level.
   2198   |  mov TMP1, RB			// Save return address
   2199   |  mov L:RB, SAVE_L
   2200   |  mov SAVE_PC, PC			// Redundant (but a defined value).
   2201   |  mov L:RB->base, BASE
   2202   |  lea RD, [BASE+NARGS:RD*8-8]
   2203   |  mov CARG1, L:RB
   2204   |  mov L:RB->top, RD
   2205   |  call extern lj_gc_step		// (lua_State *L)
   2206   |  mov BASE, L:RB->base
   2207   |  mov RD, L:RB->top
   2208   |  sub RD, BASE
   2209   |  shr RDd, 3
   2210   |  add NARGS:RDd, 1
   2211   |  mov RB, TMP1
   2212   |  push RB				// Restore return address.
   2213   |  ret
   2214   |
   2215   |//-----------------------------------------------------------------------
   2216   |//-- Special dispatch targets -------------------------------------------
   2217   |//-----------------------------------------------------------------------
   2218   |
   2219   |->vm_record:				// Dispatch target for recording phase.
   2220   |.if JIT
   2221   |  movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
   2222   |  test RDL, HOOK_VMEVENT		// No recording while in vmevent.
   2223   |  jnz >5
   2224   |  // Decrement the hookcount for consistency, but always do the call.
   2225   |  test RDL, HOOK_ACTIVE
   2226   |  jnz >1
   2227   |  test RDL, LUA_MASKLINE|LUA_MASKCOUNT
   2228   |  jz >1
   2229   |  dec dword [DISPATCH+DISPATCH_GL(hookcount)]
   2230   |  jmp >1
   2231   |.endif
   2232   |
   2233   |->vm_rethook:			// Dispatch target for return hooks.
   2234   |  movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
   2235   |  test RDL, HOOK_ACTIVE		// Hook already active?
   2236   |  jnz >5
   2237   |  jmp >1
   2238   |
   2239   |->vm_inshook:			// Dispatch target for instr/line hooks.
   2240   |  movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
   2241   |  test RDL, HOOK_ACTIVE		// Hook already active?
   2242   |  jnz >5
   2243   |
   2244   |  test RDL, LUA_MASKLINE|LUA_MASKCOUNT
   2245   |  jz >5
   2246   |  dec dword [DISPATCH+DISPATCH_GL(hookcount)]
   2247   |  jz >1
   2248   |  test RDL, LUA_MASKLINE
   2249   |  jz >5
   2250   |1:
   2251   |  mov L:RB, SAVE_L
   2252   |  mov L:RB->base, BASE
   2253   |  mov CARG2, PC			// Caveat: CARG2 == BASE
   2254   |  mov CARG1, L:RB
   2255   |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
   2256   |  call extern lj_dispatch_ins	// (lua_State *L, const BCIns *pc)
   2257   |3:
   2258   |  mov BASE, L:RB->base
   2259   |4:
   2260   |  movzx RAd, PC_RA
   2261   |5:
   2262   |  movzx OP, PC_OP
   2263   |  movzx RDd, PC_RD
   2264   |  jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]	// Re-dispatch to static ins.
   2265   |
   2266   |->cont_hook:				// Continue from hook yield.
   2267   |  add PC, 4
   2268   |  mov RA, [RB-40]
   2269   |  mov MULTRES, RAd			// Restore MULTRES for *M ins.
   2270   |  jmp <4
   2271   |
   2272   |->vm_hotloop:			// Hot loop counter underflow.
   2273   |.if JIT
   2274   |  mov LFUNC:RB, [BASE-16]		// Same as curr_topL(L).
   2275   |  cleartp LFUNC:RB
   2276   |  mov RB, LFUNC:RB->pc
   2277   |  movzx RDd, byte [RB+PC2PROTO(framesize)]
   2278   |  lea RD, [BASE+RD*8]
   2279   |  mov L:RB, SAVE_L
   2280   |  mov L:RB->base, BASE
   2281   |  mov L:RB->top, RD
   2282   |  mov CARG2, PC
   2283   |  lea CARG1, [DISPATCH+GG_DISP2J]
   2284   |  mov aword [DISPATCH+DISPATCH_J(L)], L:RB
   2285   |  mov SAVE_PC, PC
   2286   |  call extern lj_trace_hot		// (jit_State *J, const BCIns *pc)
   2287   |  jmp <3
   2288   |.endif
   2289   |
   2290   |->vm_callhook:			// Dispatch target for call hooks.
   2291   |  mov SAVE_PC, PC
   2292   |.if JIT
   2293   |  jmp >1
   2294   |.endif
   2295   |
   2296   |->vm_hotcall:			// Hot call counter underflow.
   2297   |.if JIT
   2298   |  mov SAVE_PC, PC
   2299   |  or PC, 1				// Marker for hot call.
   2300   |1:
   2301   |.endif
   2302   |  lea RD, [BASE+NARGS:RD*8-8]
   2303   |  mov L:RB, SAVE_L
   2304   |  mov L:RB->base, BASE
   2305   |  mov L:RB->top, RD
   2306   |  mov CARG2, PC
   2307   |  mov CARG1, L:RB
   2308   |  call extern lj_dispatch_call	// (lua_State *L, const BCIns *pc)
   2309   |  // ASMFunction returned in eax/rax (RD).
   2310   |  mov SAVE_PC, 0			// Invalidate for subsequent line hook.
   2311   |.if JIT
   2312   |  and PC, -2
   2313   |.endif
   2314   |  mov BASE, L:RB->base
   2315   |  mov RA, RD
   2316   |  mov RD, L:RB->top
   2317   |  sub RD, BASE
   2318   |  mov RB, RA
   2319   |  movzx RAd, PC_RA
   2320   |  shr RDd, 3
   2321   |  add NARGS:RDd, 1
   2322   |  jmp RB
   2323   |
   2324   |->cont_stitch:			// Trace stitching.
   2325   |.if JIT
   2326   |  // BASE = base, RC = result, RB = mbase
   2327   |  mov TRACE:ITYPE, [RB-40]		// Save previous trace.
   2328   |  cleartp TRACE:ITYPE
   2329   |  mov TMPRd, MULTRES
   2330   |  movzx RAd, PC_RA
   2331   |  lea RA, [BASE+RA*8]		// Call base.
   2332   |  sub TMPRd, 1
   2333   |  jz >2
   2334   |1:  // Move results down.
   2335   |  mov RB, [RC]
   2336   |  mov [RA], RB
   2337   |  add RC, 8
   2338   |  add RA, 8
   2339   |  sub TMPRd, 1
   2340   |  jnz <1
   2341   |2:
   2342   |  movzx RCd, PC_RA
   2343   |  movzx RBd, PC_RB
   2344   |  add RC, RB
   2345   |  lea RC, [BASE+RC*8-8]
   2346   |3:
   2347   |  cmp RC, RA
   2348   |  ja >9				// More results wanted?
   2349   |
   2350   |  test TRACE:ITYPE, TRACE:ITYPE
   2351   |  jz ->cont_nop
   2352   |  movzx RBd, word TRACE:ITYPE->traceno
   2353   |  movzx RDd, word TRACE:ITYPE->link
   2354   |  cmp RDd, RBd
   2355   |  je ->cont_nop			// Blacklisted.
   2356   |  test RDd, RDd
   2357   |  jne =>BC_JLOOP			// Jump to stitched trace.
   2358   |
   2359   |  // Stitch a new trace to the previous trace.
   2360   |  mov [DISPATCH+DISPATCH_J(exitno)], RB
   2361   |  mov L:RB, SAVE_L
   2362   |  mov L:RB->base, BASE
   2363   |  mov CARG2, PC
   2364   |  lea CARG1, [DISPATCH+GG_DISP2J]
   2365   |  mov aword [DISPATCH+DISPATCH_J(L)], L:RB
   2366   |  call extern lj_dispatch_stitch	// (jit_State *J, const BCIns *pc)
   2367   |  mov BASE, L:RB->base
   2368   |  jmp ->cont_nop
   2369   |
   2370   |9:  // Fill up results with nil.
   2371   |  mov aword [RA], LJ_TNIL
   2372   |  add RA, 8
   2373   |  jmp <3
   2374   |.endif
   2375   |
   2376   |->vm_profhook:			// Dispatch target for profiler hook.
   2377 #if LJ_HASPROFILE
   2378   |  mov L:RB, SAVE_L
   2379   |  mov L:RB->base, BASE
   2380   |  mov CARG2, PC			// Caveat: CARG2 == BASE
   2381   |  mov CARG1, L:RB
   2382   |  call extern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
   2383   |  mov BASE, L:RB->base
   2384   |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
   2385   |  sub PC, 4
   2386   |  jmp ->cont_nop
   2387 #endif
   2388   |
   2389   |//-----------------------------------------------------------------------
   2390   |//-- Trace exit handler -------------------------------------------------
   2391   |//-----------------------------------------------------------------------
   2392   |
   2393   |// Called from an exit stub with the exit number on the stack.
   2394   |// The 16 bit exit number is stored with two (sign-extended) push imm8.
   2395   |->vm_exit_handler:
   2396   |.if JIT
   2397   |  push r13; push r12
   2398   |  push r11; push r10; push r9; push r8
   2399   |  push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
   2400   |  push rbx; push rdx; push rcx; push rax
   2401   |  movzx RCd, byte [rbp-8]		// Reconstruct exit number.
   2402   |  mov RCH, byte [rbp-16]
   2403   |  mov [rbp-8], r15; mov [rbp-16], r14
   2404   |  // DISPATCH is preserved on-trace in LJ_GC64 mode.
   2405   |  mov RAd, [DISPATCH+DISPATCH_GL(vmstate)]	// Get trace number.
   2406   |  set_vmstate EXIT
   2407   |  mov [DISPATCH+DISPATCH_J(exitno)], RCd
   2408   |  mov [DISPATCH+DISPATCH_J(parent)], RAd
   2409   |.if X64WIN
   2410   |  sub rsp, 16*8+4*8			// Room for SSE regs + save area.
   2411   |.else
   2412   |  sub rsp, 16*8			// Room for SSE regs.
   2413   |.endif
   2414   |  add rbp, -128
   2415   |  movsd qword [rbp-8],   xmm15; movsd qword [rbp-16],  xmm14
   2416   |  movsd qword [rbp-24],  xmm13; movsd qword [rbp-32],  xmm12
   2417   |  movsd qword [rbp-40],  xmm11; movsd qword [rbp-48],  xmm10
   2418   |  movsd qword [rbp-56],  xmm9;  movsd qword [rbp-64],  xmm8
   2419   |  movsd qword [rbp-72],  xmm7;  movsd qword [rbp-80],  xmm6
   2420   |  movsd qword [rbp-88],  xmm5;  movsd qword [rbp-96],  xmm4
   2421   |  movsd qword [rbp-104], xmm3;  movsd qword [rbp-112], xmm2
   2422   |  movsd qword [rbp-120], xmm1;  movsd qword [rbp-128], xmm0
   2423   |  // Caveat: RB is rbp.
   2424   |  mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
   2425   |  mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
   2426   |  mov aword [DISPATCH+DISPATCH_J(L)], L:RB
   2427   |  mov L:RB->base, BASE
   2428   |.if X64WIN
   2429   |  lea CARG2, [rsp+4*8]
   2430   |.else
   2431   |  mov CARG2, rsp
   2432   |.endif
   2433   |  lea CARG1, [DISPATCH+GG_DISP2J]
   2434   |  mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
   2435   |  call extern lj_trace_exit		// (jit_State *J, ExitState *ex)
   2436   |  // MULTRES or negated error code returned in eax (RD).
   2437   |  mov RA, L:RB->cframe
   2438   |  and RA, CFRAME_RAWMASK
   2439   |  mov [RA+CFRAME_OFS_L], L:RB	// Set SAVE_L (on-trace resume/yield).
   2440   |  mov BASE, L:RB->base
   2441   |  mov PC, [RA+CFRAME_OFS_PC]	// Get SAVE_PC.
   2442   |  jmp >1
   2443   |.endif
   2444   |->vm_exit_interp:
   2445   |  // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
   2446   |.if JIT
   2447   |  // Restore additional callee-save registers only used in compiled code.
   2448   |.if X64WIN
   2449   |  lea RA, [rsp+10*16+4*8]
   2450   |1:
   2451   |  movdqa xmm15, [RA-10*16]
   2452   |  movdqa xmm14, [RA-9*16]
   2453   |  movdqa xmm13, [RA-8*16]
   2454   |  movdqa xmm12, [RA-7*16]
   2455   |  movdqa xmm11, [RA-6*16]
   2456   |  movdqa xmm10, [RA-5*16]
   2457   |  movdqa xmm9, [RA-4*16]
   2458   |  movdqa xmm8, [RA-3*16]
   2459   |  movdqa xmm7, [RA-2*16]
   2460   |  mov rsp, RA			// Reposition stack to C frame.
   2461   |  movdqa xmm6, [RA-1*16]
   2462   |  mov r15, CSAVE_1
   2463   |  mov r14, CSAVE_2
   2464   |  mov r13, CSAVE_3
   2465   |  mov r12, CSAVE_4
   2466   |.else
   2467   |  lea RA, [rsp+16]
   2468   |1:
   2469   |  mov r13, [RA-8]
   2470   |  mov r12, [RA]
   2471   |  mov rsp, RA			// Reposition stack to C frame.
   2472   |.endif
   2473   |  test RDd, RDd; js >9		// Check for error from exit.
   2474   |  mov L:RB, SAVE_L
   2475   |  mov MULTRES, RDd
   2476   |  mov LFUNC:KBASE, [BASE-16]
   2477   |  cleartp LFUNC:KBASE
   2478   |  mov KBASE, LFUNC:KBASE->pc
   2479   |  mov KBASE, [KBASE+PC2PROTO(k)]
   2480   |  mov L:RB->base, BASE
   2481   |  mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
   2482   |  set_vmstate INTERP
   2483   |  // Modified copy of ins_next which handles function header dispatch, too.
   2484   |  mov RCd, [PC]
   2485   |  movzx RAd, RCH
   2486   |  movzx OP, RCL
   2487   |  add PC, 4
   2488   |  shr RCd, 16
   2489   |  cmp OP, BC_FUNCF			// Function header?
   2490   |  jb >3
   2491   |  cmp OP, BC_FUNCC+2			// Fast function?
   2492   |  jae >4
   2493   |2:
   2494   |  mov RCd, MULTRES			// RC/RD holds nres+1.
   2495   |3:
   2496   |  jmp aword [DISPATCH+OP*8]
   2497   |
   2498   |4:  // Check frame below fast function.
   2499   |  mov RC, [BASE-8]
   2500   |  test RCd, FRAME_TYPE
   2501   |  jnz <2				// Trace stitching continuation?
   2502   |  // Otherwise set KBASE for Lua function below fast function.
   2503   |  movzx RCd, byte [RC-3]
   2504   |  neg RC
   2505   |  mov LFUNC:KBASE, [BASE+RC*8-32]
   2506   |  cleartp LFUNC:KBASE
   2507   |  mov KBASE, LFUNC:KBASE->pc
   2508   |  mov KBASE, [KBASE+PC2PROTO(k)]
   2509   |  jmp <2
   2510   |
   2511   |9:  // Rethrow error from the right C frame.
   2512   |  neg RD
   2513   |  mov CARG1, L:RB
   2514   |  mov CARG2, RD
   2515   |  call extern lj_err_throw		// (lua_State *L, int errcode)
   2516   |.endif
   2517   |
   2518   |//-----------------------------------------------------------------------
   2519   |//-- Math helper functions ----------------------------------------------
   2520   |//-----------------------------------------------------------------------
   2521   |
   2522   |// FP value rounding. Called by math.floor/math.ceil fast functions
   2523   |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
   2524   |.macro vm_round, name, mode, cond
   2525   |->name:
   2526   |->name .. _sse:
   2527   |  sseconst_abs xmm2, RD
   2528   |  sseconst_2p52 xmm3, RD
   2529   |  movaps xmm1, xmm0
   2530   |  andpd xmm1, xmm2			// |x|
   2531   |  ucomisd xmm3, xmm1			// No truncation if 2^52 <= |x|.
   2532   |  jbe >1
   2533   |  andnpd xmm2, xmm0			// Isolate sign bit.
   2534   |.if mode == 2		// trunc(x)?
   2535   |  movaps xmm0, xmm1
   2536   |  addsd xmm1, xmm3			// (|x| + 2^52) - 2^52
   2537   |  subsd xmm1, xmm3
   2538   |  sseconst_1 xmm3, RD
   2539   |  cmpsd xmm0, xmm1, 1		// |x| < result?
   2540   |  andpd xmm0, xmm3
   2541   |  subsd xmm1, xmm0			// If yes, subtract -1.
   2542   |  orpd xmm1, xmm2			// Merge sign bit back in.
   2543   |.else
   2544   |  addsd xmm1, xmm3			// (|x| + 2^52) - 2^52
   2545   |  subsd xmm1, xmm3
   2546   |  orpd xmm1, xmm2			// Merge sign bit back in.
   2547   |  .if mode == 1		// ceil(x)?
   2548   |    sseconst_m1 xmm2, RD		// Must subtract -1 to preserve -0.
   2549   |    cmpsd xmm0, xmm1, 6		// x > result?
   2550   |  .else			// floor(x)?
   2551   |    sseconst_1 xmm2, RD
   2552   |    cmpsd xmm0, xmm1, 1		// x < result?
   2553   |  .endif
   2554   |  andpd xmm0, xmm2
   2555   |  subsd xmm1, xmm0			// If yes, subtract +-1.
   2556   |.endif
   2557   |  movaps xmm0, xmm1
   2558   |1:
   2559   |  ret
   2560   |.endmacro
   2561   |
   2562   |  vm_round vm_floor, 0, 1
   2563   |  vm_round vm_ceil,  1, JIT
   2564   |  vm_round vm_trunc, 2, JIT
   2565   |
   2566   |// FP modulo x%y. Called by BC_MOD* and vm_arith.
   2567   |->vm_mod:
   2568   |// Args in xmm0/xmm1, return value in xmm0.
   2569   |// Caveat: xmm0-xmm5 and RC (eax) modified!
   2570   |  movaps xmm5, xmm0
   2571   |  divsd xmm0, xmm1
   2572   |  sseconst_abs xmm2, RD
   2573   |  sseconst_2p52 xmm3, RD
   2574   |  movaps xmm4, xmm0
   2575   |  andpd xmm4, xmm2			// |x/y|
   2576   |  ucomisd xmm3, xmm4			// No truncation if 2^52 <= |x/y|.
   2577   |  jbe >1
   2578   |  andnpd xmm2, xmm0			// Isolate sign bit.
   2579   |  addsd xmm4, xmm3			// (|x/y| + 2^52) - 2^52
   2580   |  subsd xmm4, xmm3
   2581   |  orpd xmm4, xmm2			// Merge sign bit back in.
   2582   |  sseconst_1 xmm2, RD
   2583   |  cmpsd xmm0, xmm4, 1		// x/y < result?
   2584   |  andpd xmm0, xmm2
   2585   |  subsd xmm4, xmm0			// If yes, subtract 1.0.
   2586   |  movaps xmm0, xmm5
   2587   |  mulsd xmm1, xmm4
   2588   |  subsd xmm0, xmm1
   2589   |  ret
   2590   |1:
   2591   |  mulsd xmm1, xmm0
   2592   |  movaps xmm0, xmm5
   2593   |  subsd xmm0, xmm1
   2594   |  ret
   2595   |
   2596   |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
   2597   |->vm_powi_sse:
   2598   |  cmp eax, 1; jle >6			// i<=1?
   2599   |  // Now 1 < (unsigned)i <= 0x80000000.
   2600   |1:  // Handle leading zeros.
   2601   |  test eax, 1; jnz >2
   2602   |  mulsd xmm0, xmm0
   2603   |  shr eax, 1
   2604   |  jmp <1
   2605   |2:
   2606   |  shr eax, 1; jz >5
   2607   |  movaps xmm1, xmm0
   2608   |3:  // Handle trailing bits.
   2609   |  mulsd xmm0, xmm0
   2610   |  shr eax, 1; jz >4
   2611   |  jnc <3
   2612   |  mulsd xmm1, xmm0
   2613   |  jmp <3
   2614   |4:
   2615   |  mulsd xmm0, xmm1
   2616   |5:
   2617   |  ret
   2618   |6:
   2619   |  je <5				// x^1 ==> x
   2620   |  jb >7				// x^0 ==> 1
   2621   |  neg eax
   2622   |  call <1
   2623   |  sseconst_1 xmm1, RD
   2624   |  divsd xmm1, xmm0
   2625   |  movaps xmm0, xmm1
   2626   |  ret
   2627   |7:
   2628   |  sseconst_1 xmm0, RD
   2629   |  ret
   2630   |
   2631   |//-----------------------------------------------------------------------
   2632   |//-- Miscellaneous functions --------------------------------------------
   2633   |//-----------------------------------------------------------------------
   2634   |
   2635   |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
   2636   |->vm_cpuid:
   2637   |  mov eax, CARG1d
   2638   |  .if X64WIN; push rsi; mov rsi, CARG2; .endif
   2639   |  push rbx
   2640   |  xor ecx, ecx
   2641   |  cpuid
   2642   |  mov [rsi], eax
   2643   |  mov [rsi+4], ebx
   2644   |  mov [rsi+8], ecx
   2645   |  mov [rsi+12], edx
   2646   |  pop rbx
   2647   |  .if X64WIN; pop rsi; .endif
   2648   |  ret
   2649   |
   2650   |//-----------------------------------------------------------------------
   2651   |//-- Assertions ---------------------------------------------------------
   2652   |//-----------------------------------------------------------------------
   2653   |
   2654   |->assert_bad_for_arg_type:
   2655 #ifdef LUA_USE_ASSERT
   2656   |  int3
   2657 #endif
   2658   |  int3
   2659   |
   2660   |//-----------------------------------------------------------------------
   2661   |//-- FFI helper functions -----------------------------------------------
   2662   |//-----------------------------------------------------------------------
   2663   |
   2664   |// Handler for callback functions. Callback slot number in ah/al.
   2665   |->vm_ffi_callback:
   2666   |.if FFI
   2667   |.type CTSTATE, CTState, PC
   2668   |  saveregs_	// ebp/rbp already saved. ebp now holds global_State *.
   2669   |  lea DISPATCH, [ebp+GG_G2DISP]
   2670   |  mov CTSTATE, GL:ebp->ctype_state
   2671   |  movzx eax, ax
   2672   |  mov CTSTATE->cb.slot, eax
   2673   |  mov CTSTATE->cb.gpr[0], CARG1
   2674   |  mov CTSTATE->cb.gpr[1], CARG2
   2675   |  mov CTSTATE->cb.gpr[2], CARG3
   2676   |  mov CTSTATE->cb.gpr[3], CARG4
   2677   |  movsd qword CTSTATE->cb.fpr[0], xmm0
   2678   |  movsd qword CTSTATE->cb.fpr[1], xmm1
   2679   |  movsd qword CTSTATE->cb.fpr[2], xmm2
   2680   |  movsd qword CTSTATE->cb.fpr[3], xmm3
   2681   |.if X64WIN
   2682   |  lea rax, [rsp+CFRAME_SIZE+4*8]
   2683   |.else
   2684   |  lea rax, [rsp+CFRAME_SIZE]
   2685   |  mov CTSTATE->cb.gpr[4], CARG5
   2686   |  mov CTSTATE->cb.gpr[5], CARG6
   2687   |  movsd qword CTSTATE->cb.fpr[4], xmm4
   2688   |  movsd qword CTSTATE->cb.fpr[5], xmm5
   2689   |  movsd qword CTSTATE->cb.fpr[6], xmm6
   2690   |  movsd qword CTSTATE->cb.fpr[7], xmm7
   2691   |.endif
   2692   |  mov CTSTATE->cb.stack, rax
   2693   |  mov CARG2, rsp
   2694   |  mov SAVE_PC, CTSTATE		// Any value outside of bytecode is ok.
   2695   |  mov CARG1, CTSTATE
   2696   |  call extern lj_ccallback_enter	// (CTState *cts, void *cf)
   2697   |  // lua_State * returned in eax (RD).
   2698   |  set_vmstate INTERP
   2699   |  mov BASE, L:RD->base
   2700   |  mov RD, L:RD->top
   2701   |  sub RD, BASE
   2702   |  mov LFUNC:RB, [BASE-16]
   2703   |  cleartp LFUNC:RB
   2704   |  shr RD, 3
   2705   |  add RD, 1
   2706   |  ins_callt
   2707   |.endif
   2708   |
   2709   |->cont_ffi_callback:			// Return from FFI callback.
   2710   |.if FFI
   2711   |  mov L:RA, SAVE_L
   2712   |  mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
   2713   |  mov aword CTSTATE->L, L:RA
   2714   |  mov L:RA->base, BASE
   2715   |  mov L:RA->top, RB
   2716   |  mov CARG1, CTSTATE
   2717   |  mov CARG2, RC
   2718   |  call extern lj_ccallback_leave	// (CTState *cts, TValue *o)
   2719   |  mov rax, CTSTATE->cb.gpr[0]
   2720   |  movsd xmm0, qword CTSTATE->cb.fpr[0]
   2721   |  jmp ->vm_leave_unw
   2722   |.endif
   2723   |
   2724   |->vm_ffi_call:			// Call C function via FFI.
   2725   |  // Caveat: needs special frame unwinding, see below.
   2726   |.if FFI
   2727   |  .type CCSTATE, CCallState, rbx
   2728   |  push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
   2729   |
   2730   |  // Readjust stack.
   2731   |  mov eax, CCSTATE->spadj
   2732   |  sub rsp, rax
   2733   |
   2734   |  // Copy stack slots.
   2735   |  movzx ecx, byte CCSTATE->nsp
   2736   |  sub ecx, 1
   2737   |  js >2
   2738   |1:
   2739   |  mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
   2740   |  mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
   2741   |  sub ecx, 1
   2742   |  jns <1
   2743   |2:
   2744   |
   2745   |  movzx eax, byte CCSTATE->nfpr
   2746   |  mov CARG1, CCSTATE->gpr[0]
   2747   |  mov CARG2, CCSTATE->gpr[1]
   2748   |  mov CARG3, CCSTATE->gpr[2]
   2749   |  mov CARG4, CCSTATE->gpr[3]
   2750   |.if not X64WIN
   2751   |  mov CARG5, CCSTATE->gpr[4]
   2752   |  mov CARG6, CCSTATE->gpr[5]
   2753   |.endif
   2754   |  test eax, eax; jz >5
   2755   |  movaps xmm0, CCSTATE->fpr[0]
   2756   |  movaps xmm1, CCSTATE->fpr[1]
   2757   |  movaps xmm2, CCSTATE->fpr[2]
   2758   |  movaps xmm3, CCSTATE->fpr[3]
   2759   |.if not X64WIN
   2760   |  cmp eax, 4; jbe >5
   2761   |  movaps xmm4, CCSTATE->fpr[4]
   2762   |  movaps xmm5, CCSTATE->fpr[5]
   2763   |  movaps xmm6, CCSTATE->fpr[6]
   2764   |  movaps xmm7, CCSTATE->fpr[7]
   2765   |.endif
   2766   |5:
   2767   |
   2768   |  call aword CCSTATE->func
   2769   |
   2770   |  mov CCSTATE->gpr[0], rax
   2771   |  movaps CCSTATE->fpr[0], xmm0
   2772   |.if not X64WIN
   2773   |  mov CCSTATE->gpr[1], rdx
   2774   |  movaps CCSTATE->fpr[1], xmm1
   2775   |.endif
   2776   |
   2777   |  mov rbx, [rbp-8]; leave; ret
   2778   |.endif
   2779   |// Note: vm_ffi_call must be the last function in this object file!
   2780   |
   2781   |//-----------------------------------------------------------------------
   2782 }
   2783 
   2784 /* Generate the code for a single instruction. */
   2785 static void build_ins(BuildCtx *ctx, BCOp op, int defop)
   2786 {
   2787   int vk = 0;
   2788   |// Note: aligning all instructions does not pay off.
   2789   |=>defop:
   2790 
   2791   switch (op) {
   2792 
   2793   /* -- Comparison ops ---------------------------------------------------- */
   2794 
   2795   /* Remember: all ops branch for a true comparison, fall through otherwise. */
   2796 
   2797   |.macro jmp_comp, lt, ge, le, gt, target
   2798   ||switch (op) {
   2799   ||case BC_ISLT:
   2800   |   lt target
   2801   ||break;
   2802   ||case BC_ISGE:
   2803   |   ge target
   2804   ||break;
   2805   ||case BC_ISLE:
   2806   |   le target
   2807   ||break;
   2808   ||case BC_ISGT:
   2809   |   gt target
   2810   ||break;
   2811   ||default: break;  /* Shut up GCC. */
   2812   ||}
   2813   |.endmacro
   2814 
   2815   case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
   2816     |  // RA = src1, RD = src2, JMP with RD = target
   2817     |  ins_AD
   2818     |  mov ITYPE, [BASE+RA*8]
   2819     |  mov RB, [BASE+RD*8]
   2820     |  mov RA, ITYPE
   2821     |  mov RD, RB
   2822     |  sar ITYPE, 47
   2823     |  sar RB, 47
   2824     |.if DUALNUM
   2825     |  cmp ITYPEd, LJ_TISNUM; jne >7
   2826     |  cmp RBd, LJ_TISNUM; jne >8
   2827     |  add PC, 4
   2828     |  cmp RAd, RDd
   2829     |  jmp_comp jge, jl, jg, jle, >9
   2830     |6:
   2831     |  movzx RDd, PC_RD
   2832     |  branchPC RD
   2833     |9:
   2834     |  ins_next
   2835     |
   2836     |7:  // RA is not an integer.
   2837     |  ja ->vmeta_comp
   2838     |  // RA is a number.
   2839     |  cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
   2840     |  // RA is a number, RD is an integer.
   2841     |  cvtsi2sd xmm0, RDd
   2842     |  jmp >2
   2843     |
   2844     |8:  // RA is an integer, RD is not an integer.
   2845     |  ja ->vmeta_comp
   2846     |  // RA is an integer, RD is a number.
   2847     |  cvtsi2sd xmm1, RAd
   2848     |  movd xmm0, RD
   2849     |  jmp >3
   2850     |.else
   2851     |  cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
   2852     |  cmp RBd, LJ_TISNUM; jae ->vmeta_comp
   2853     |.endif
   2854     |1:
   2855     |  movd xmm0, RD
   2856     |2:
   2857     |  movd xmm1, RA
   2858     |3:
   2859     |  add PC, 4
   2860     |  ucomisd xmm0, xmm1
   2861     |  // Unordered: all of ZF CF PF set, ordered: PF clear.
   2862     |  // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
   2863     |.if DUALNUM
   2864     |  jmp_comp jbe, ja, jb, jae, <9
   2865     |  jmp <6
   2866     |.else
   2867     |  jmp_comp jbe, ja, jb, jae, >1
   2868     |  movzx RDd, PC_RD
   2869     |  branchPC RD
   2870     |1:
   2871     |  ins_next
   2872     |.endif
   2873     break;
   2874 
   2875   case BC_ISEQV: case BC_ISNEV:
   2876     vk = op == BC_ISEQV;
   2877     |  ins_AD	// RA = src1, RD = src2, JMP with RD = target
   2878     |  mov RB, [BASE+RD*8]
   2879     |  mov ITYPE, [BASE+RA*8]
   2880     |  add PC, 4
   2881     |  mov RD, RB
   2882     |  mov RA, ITYPE
   2883     |  sar RB, 47
   2884     |  sar ITYPE, 47
   2885     |.if DUALNUM
   2886     |  cmp RBd, LJ_TISNUM; jne >7
   2887     |  cmp ITYPEd, LJ_TISNUM; jne >8
   2888     |  cmp RDd, RAd
   2889     if (vk) {
   2890       |  jne >9
   2891     } else {
   2892       |  je >9
   2893     }
   2894     |  movzx RDd, PC_RD
   2895     |  branchPC RD
   2896     |9:
   2897     |  ins_next
   2898     |
   2899     |7:  // RD is not an integer.
   2900     |  ja >5
   2901     |  // RD is a number.
   2902     |  movd xmm1, RD
   2903     |  cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
   2904     |  // RD is a number, RA is an integer.
   2905     |  cvtsi2sd xmm0, RAd
   2906     |  jmp >2
   2907     |
   2908     |8:  // RD is an integer, RA is not an integer.
   2909     |  ja >5
   2910     |  // RD is an integer, RA is a number.
   2911     |  cvtsi2sd xmm1, RDd
   2912     |  jmp >1
   2913     |
   2914     |.else
   2915     |  cmp RBd, LJ_TISNUM; jae >5
   2916     |  cmp ITYPEd, LJ_TISNUM; jae >5
   2917     |  movd xmm1, RD
   2918     |.endif
   2919     |1:
   2920     |  movd xmm0, RA
   2921     |2:
   2922     |  ucomisd xmm0, xmm1
   2923     |4:
   2924   iseqne_fp:
   2925     if (vk) {
   2926       |  jp >2				// Unordered means not equal.
   2927       |  jne >2
   2928     } else {
   2929       |  jp >2				// Unordered means not equal.
   2930       |  je >1
   2931     }
   2932   iseqne_end:
   2933     if (vk) {
   2934       |1:				// EQ: Branch to the target.
   2935       |  movzx RDd, PC_RD
   2936       |  branchPC RD
   2937       |2:				// NE: Fallthrough to next instruction.
   2938       |.if not FFI
   2939       |3:
   2940       |.endif
   2941     } else {
   2942       |.if not FFI
   2943       |3:
   2944       |.endif
   2945       |2:				// NE: Branch to the target.
   2946       |  movzx RDd, PC_RD
   2947       |  branchPC RD
   2948       |1:				// EQ: Fallthrough to next instruction.
   2949     }
   2950     if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
   2951 		       op == BC_ISEQN || op == BC_ISNEN)) {
   2952       |  jmp <9
   2953     } else {
   2954       |  ins_next
   2955     }
   2956     |
   2957     if (op == BC_ISEQV || op == BC_ISNEV) {
   2958       |5:  // Either or both types are not numbers.
   2959       |.if FFI
   2960       |  cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
   2961       |  cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
   2962       |.endif
   2963       |  cmp RA, RD
   2964       |  je <1				// Same GCobjs or pvalues?
   2965       |  cmp RBd, ITYPEd
   2966       |  jne <2				// Not the same type?
   2967       |  cmp RBd, LJ_TISTABUD
   2968       |  ja <2				// Different objects and not table/ud?
   2969       |
   2970       |  // Different tables or userdatas. Need to check __eq metamethod.
   2971       |  // Field metatable must be at same offset for GCtab and GCudata!
   2972       |  cleartp TAB:RA
   2973       |  mov TAB:RB, TAB:RA->metatable
   2974       |  test TAB:RB, TAB:RB
   2975       |  jz <2				// No metatable?
   2976       |  test byte TAB:RB->nomm, 1<<MM_eq
   2977       |  jnz <2				// Or 'no __eq' flag set?
   2978       if (vk) {
   2979 	|  xor RBd, RBd			// ne = 0
   2980       } else {
   2981 	|  mov RBd, 1			// ne = 1
   2982       }
   2983       |  jmp ->vmeta_equal		// Handle __eq metamethod.
   2984     } else {
   2985       |.if FFI
   2986       |3:
   2987       |  cmp ITYPEd, LJ_TCDATA
   2988       if (LJ_DUALNUM && vk) {
   2989 	|  jne <9
   2990       } else {
   2991 	|  jne <2
   2992       }
   2993       |  jmp ->vmeta_equal_cd
   2994       |.endif
   2995     }
   2996     break;
   2997   case BC_ISEQS: case BC_ISNES:
   2998     vk = op == BC_ISEQS;
   2999     |  ins_AND	// RA = src, RD = str const, JMP with RD = target
   3000     |  mov RB, [BASE+RA*8]
   3001     |  add PC, 4
   3002     |  checkstr RB, >3
   3003     |  cmp RB, [KBASE+RD*8]
   3004   iseqne_test:
   3005     if (vk) {
   3006       |  jne >2
   3007     } else {
   3008       |  je >1
   3009     }
   3010     goto iseqne_end;
   3011   case BC_ISEQN: case BC_ISNEN:
   3012     vk = op == BC_ISEQN;
   3013     |  ins_AD	// RA = src, RD = num const, JMP with RD = target
   3014     |  mov RB, [BASE+RA*8]
   3015     |  add PC, 4
   3016     |.if DUALNUM
   3017     |  checkint RB, >7
   3018     |  mov RD, [KBASE+RD*8]
   3019     |  checkint RD, >8
   3020     |  cmp RBd, RDd
   3021     if (vk) {
   3022       |  jne >9
   3023     } else {
   3024       |  je >9
   3025     }
   3026     |  movzx RDd, PC_RD
   3027     |  branchPC RD
   3028     |9:
   3029     |  ins_next
   3030     |
   3031     |7:  // RA is not an integer.
   3032     |  ja >3
   3033     |  // RA is a number.
   3034     |  mov RD, [KBASE+RD*8]
   3035     |  checkint RD, >1
   3036     |  // RA is a number, RD is an integer.
   3037     |  cvtsi2sd xmm0, RDd
   3038     |  jmp >2
   3039     |
   3040     |8:  // RA is an integer, RD is a number.
   3041     |  cvtsi2sd xmm0, RBd
   3042     |  movd xmm1, RD
   3043     |  ucomisd xmm0, xmm1
   3044     |  jmp >4
   3045     |1:
   3046     |  movd xmm0, RD
   3047     |.else
   3048     |  checknum RB, >3
   3049     |1:
   3050     |  movsd xmm0, qword [KBASE+RD*8]
   3051     |.endif
   3052     |2:
   3053     |  ucomisd xmm0, qword [BASE+RA*8]
   3054     |4:
   3055     goto iseqne_fp;
   3056   case BC_ISEQP: case BC_ISNEP:
   3057     vk = op == BC_ISEQP;
   3058     |  ins_AND	// RA = src, RD = primitive type (~), JMP with RD = target
   3059     |  mov RB, [BASE+RA*8]
   3060     |  sar RB, 47
   3061     |  add PC, 4
   3062     |  cmp RBd, RDd
   3063     if (!LJ_HASFFI) goto iseqne_test;
   3064     if (vk) {
   3065       |  jne >3
   3066       |  movzx RDd, PC_RD
   3067       |  branchPC RD
   3068       |2:
   3069       |  ins_next
   3070       |3:
   3071       |  cmp RBd, LJ_TCDATA; jne <2
   3072       |  jmp ->vmeta_equal_cd
   3073     } else {
   3074       |  je >2
   3075       |  cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
   3076       |  movzx RDd, PC_RD
   3077       |  branchPC RD
   3078       |2:
   3079       |  ins_next
   3080     }
   3081     break;
   3082 
   3083   /* -- Unary test and copy ops ------------------------------------------- */
   3084 
   3085   case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
   3086     |  ins_AD	// RA = dst or unused, RD = src, JMP with RD = target
   3087     |  mov ITYPE, [BASE+RD*8]
   3088     |  add PC, 4
   3089     if (op == BC_ISTC || op == BC_ISFC) {
   3090       |  mov RB, ITYPE
   3091     }
   3092     |  sar ITYPE, 47
   3093     |  cmp ITYPEd, LJ_TISTRUECOND
   3094     if (op == BC_IST || op == BC_ISTC) {
   3095       |  jae >1
   3096     } else {
   3097       |  jb >1
   3098     }
   3099     if (op == BC_ISTC || op == BC_ISFC) {
   3100       |  mov [BASE+RA*8], RB
   3101     }
   3102     |  movzx RDd, PC_RD
   3103     |  branchPC RD
   3104     |1:					// Fallthrough to the next instruction.
   3105     |  ins_next
   3106     break;
   3107 
   3108   case BC_ISTYPE:
   3109     |  ins_AD	// RA = src, RD = -type
   3110     |  mov RB, [BASE+RA*8]
   3111     |  sar RB, 47
   3112     |  add RBd, RDd
   3113     |  jne ->vmeta_istype
   3114     |  ins_next
   3115     break;
   3116   case BC_ISNUM:
   3117     |  ins_AD	// RA = src, RD = -(TISNUM-1)
   3118     |  checknumtp [BASE+RA*8], ->vmeta_istype
   3119     |  ins_next
   3120     break;
   3121 
   3122   /* -- Unary ops --------------------------------------------------------- */
   3123 
   3124   case BC_MOV:
   3125     |  ins_AD	// RA = dst, RD = src
   3126     |  mov RB, [BASE+RD*8]
   3127     |  mov [BASE+RA*8], RB
   3128     |  ins_next_
   3129     break;
   3130   case BC_NOT:
   3131     |  ins_AD	// RA = dst, RD = src
   3132     |  mov RB, [BASE+RD*8]
   3133     |  sar RB, 47
   3134     |  mov RCd, 2
   3135     |  cmp RB, LJ_TISTRUECOND
   3136     |  sbb RCd, 0
   3137     |  shl RC, 47
   3138     |  not RC
   3139     |  mov [BASE+RA*8], RC
   3140     |  ins_next
   3141     break;
   3142   case BC_UNM:
   3143     |  ins_AD	// RA = dst, RD = src
   3144     |  mov RB, [BASE+RD*8]
   3145     |.if DUALNUM
   3146     |  checkint RB, >5
   3147     |  neg RBd
   3148     |  jo >4
   3149     |  setint RB
   3150     |9:
   3151     |  mov [BASE+RA*8], RB
   3152     |  ins_next
   3153     |4:
   3154     |  mov64 RB, U64x(41e00000,00000000)  // 2^31.
   3155     |  jmp <9
   3156     |5:
   3157     |  ja ->vmeta_unm
   3158     |.else
   3159     |  checknum RB, ->vmeta_unm
   3160     |.endif
   3161     |  mov64 RD, U64x(80000000,00000000)
   3162     |  xor RB, RD
   3163     |.if DUALNUM
   3164     |  jmp <9
   3165     |.else
   3166     |  mov [BASE+RA*8], RB
   3167     |  ins_next
   3168     |.endif
   3169     break;
   3170   case BC_LEN:
   3171     |  ins_AD	// RA = dst, RD = src
   3172     |  mov RD, [BASE+RD*8]
   3173     |  checkstr RD, >2
   3174     |.if DUALNUM
   3175     |  mov RDd, dword STR:RD->len
   3176     |1:
   3177     |  setint RD
   3178     |  mov [BASE+RA*8], RD
   3179     |.else
   3180     |  xorps xmm0, xmm0
   3181     |  cvtsi2sd xmm0, dword STR:RD->len
   3182     |1:
   3183     |  movsd qword [BASE+RA*8], xmm0
   3184     |.endif
   3185     |  ins_next
   3186     |2:
   3187     |  cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
   3188     |  mov TAB:CARG1, TAB:RD
   3189 #if LJ_52
   3190     |  mov TAB:RB, TAB:RD->metatable
   3191     |  cmp TAB:RB, 0
   3192     |  jnz >9
   3193     |3:
   3194 #endif
   3195     |->BC_LEN_Z:
   3196     |  mov RB, BASE			// Save BASE.
   3197     |  call extern lj_tab_len		// (GCtab *t)
   3198     |  // Length of table returned in eax (RD).
   3199     |.if DUALNUM
   3200     |  // Nothing to do.
   3201     |.else
   3202     |  cvtsi2sd xmm0, RDd
   3203     |.endif
   3204     |  mov BASE, RB			// Restore BASE.
   3205     |  movzx RAd, PC_RA
   3206     |  jmp <1
   3207 #if LJ_52
   3208     |9:  // Check for __len.
   3209     |  test byte TAB:RB->nomm, 1<<MM_len
   3210     |  jnz <3
   3211     |  jmp ->vmeta_len			// 'no __len' flag NOT set: check.
   3212 #endif
   3213     break;
   3214 
   3215   /* -- Binary ops -------------------------------------------------------- */
   3216 
   3217     |.macro ins_arithpre, sseins, ssereg
   3218     |  ins_ABC
   3219     ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
   3220     ||switch (vk) {
   3221     ||case 0:
   3222     |   checknumtp [BASE+RB*8], ->vmeta_arith_vn
   3223     |   .if DUALNUM
   3224     |     checknumtp [KBASE+RC*8], ->vmeta_arith_vn
   3225     |   .endif
   3226     |   movsd xmm0, qword [BASE+RB*8]
   3227     |   sseins ssereg, qword [KBASE+RC*8]
   3228     ||  break;
   3229     ||case 1:
   3230     |   checknumtp [BASE+RB*8], ->vmeta_arith_nv
   3231     |   .if DUALNUM
   3232     |     checknumtp [KBASE+RC*8], ->vmeta_arith_nv
   3233     |   .endif
   3234     |   movsd xmm0, qword [KBASE+RC*8]
   3235     |   sseins ssereg, qword [BASE+RB*8]
   3236     ||  break;
   3237     ||default:
   3238     |   checknumtp [BASE+RB*8], ->vmeta_arith_vv
   3239     |   checknumtp [BASE+RC*8], ->vmeta_arith_vv
   3240     |   movsd xmm0, qword [BASE+RB*8]
   3241     |   sseins ssereg, qword [BASE+RC*8]
   3242     ||  break;
   3243     ||}
   3244     |.endmacro
   3245     |
   3246     |.macro ins_arithdn, intins
   3247     |  ins_ABC
   3248     ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
   3249     ||switch (vk) {
   3250     ||case 0:
   3251     |   mov RB, [BASE+RB*8]
   3252     |   mov RC, [KBASE+RC*8]
   3253     |   checkint RB, ->vmeta_arith_vno
   3254     |   checkint RC, ->vmeta_arith_vno
   3255     |   intins RBd, RCd; jo ->vmeta_arith_vno
   3256     ||  break;
   3257     ||case 1:
   3258     |   mov RB, [BASE+RB*8]
   3259     |   mov RC, [KBASE+RC*8]
   3260     |   checkint RB, ->vmeta_arith_nvo
   3261     |   checkint RC, ->vmeta_arith_nvo
   3262     |   intins RCd, RBd; jo ->vmeta_arith_nvo
   3263     ||  break;
   3264     ||default:
   3265     |   mov RB, [BASE+RB*8]
   3266     |   mov RC, [BASE+RC*8]
   3267     |   checkint RB, ->vmeta_arith_vvo
   3268     |   checkint RC, ->vmeta_arith_vvo
   3269     |   intins RBd, RCd; jo ->vmeta_arith_vvo
   3270     ||  break;
   3271     ||}
   3272     ||if (vk == 1) {
   3273     |   setint RC
   3274     |   mov [BASE+RA*8], RC
   3275     ||} else {
   3276     |   setint RB
   3277     |   mov [BASE+RA*8], RB
   3278     ||}
   3279     |  ins_next
   3280     |.endmacro
   3281     |
   3282     |.macro ins_arithpost
   3283     |  movsd qword [BASE+RA*8], xmm0
   3284     |.endmacro
   3285     |
   3286     |.macro ins_arith, sseins
   3287     |  ins_arithpre sseins, xmm0
   3288     |  ins_arithpost
   3289     |  ins_next
   3290     |.endmacro
   3291     |
   3292     |.macro ins_arith, intins, sseins
   3293     |.if DUALNUM
   3294     |  ins_arithdn intins
   3295     |.else
   3296     |  ins_arith, sseins
   3297     |.endif
   3298     |.endmacro
   3299 
   3300     |  // RA = dst, RB = src1 or num const, RC = src2 or num const
   3301   case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
   3302     |  ins_arith add, addsd
   3303     break;
   3304   case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
   3305     |  ins_arith sub, subsd
   3306     break;
   3307   case BC_MULVN: case BC_MULNV: case BC_MULVV:
   3308     |  ins_arith imul, mulsd
   3309     break;
   3310   case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
   3311     |  ins_arith divsd
   3312     break;
   3313   case BC_MODVN:
   3314     |  ins_arithpre movsd, xmm1
   3315     |->BC_MODVN_Z:
   3316     |  call ->vm_mod
   3317     |  ins_arithpost
   3318     |  ins_next
   3319     break;
   3320   case BC_MODNV: case BC_MODVV:
   3321     |  ins_arithpre movsd, xmm1
   3322     |  jmp ->BC_MODVN_Z			// Avoid 3 copies. It's slow anyway.
   3323     break;
   3324   case BC_POW:
   3325     |  ins_arithpre movsd, xmm1
   3326     |  mov RB, BASE
   3327     |  call extern pow
   3328     |  movzx RAd, PC_RA
   3329     |  mov BASE, RB
   3330     |  ins_arithpost
   3331     |  ins_next
   3332     break;
   3333 
   3334   case BC_CAT:
   3335     |  ins_ABC	// RA = dst, RB = src_start, RC = src_end
   3336     |  mov L:CARG1, SAVE_L
   3337     |  mov L:CARG1->base, BASE
   3338     |  lea CARG2, [BASE+RC*8]
   3339     |  mov CARG3d, RCd
   3340     |  sub CARG3d, RBd
   3341     |->BC_CAT_Z:
   3342     |  mov L:RB, L:CARG1
   3343     |  mov SAVE_PC, PC
   3344     |  call extern lj_meta_cat		// (lua_State *L, TValue *top, int left)
   3345     |  // NULL (finished) or TValue * (metamethod) returned in eax (RC).
   3346     |  mov BASE, L:RB->base
   3347     |  test RC, RC
   3348     |  jnz ->vmeta_binop
   3349     |  movzx RBd, PC_RB			// Copy result to Stk[RA] from Stk[RB].
   3350     |  movzx RAd, PC_RA
   3351     |  mov RC, [BASE+RB*8]
   3352     |  mov [BASE+RA*8], RC
   3353     |  ins_next
   3354     break;
   3355 
   3356   /* -- Constant ops ------------------------------------------------------ */
   3357 
   3358   case BC_KSTR:
   3359     |  ins_AND	// RA = dst, RD = str const (~)
   3360     |  mov RD, [KBASE+RD*8]
   3361     |  settp RD, LJ_TSTR
   3362     |  mov [BASE+RA*8], RD
   3363     |  ins_next
   3364     break;
   3365   case BC_KCDATA:
   3366     |.if FFI
   3367     |  ins_AND	// RA = dst, RD = cdata const (~)
   3368     |  mov RD, [KBASE+RD*8]
   3369     |  settp RD, LJ_TCDATA
   3370     |  mov [BASE+RA*8], RD
   3371     |  ins_next
   3372     |.endif
   3373     break;
   3374   case BC_KSHORT:
   3375     |  ins_AD	// RA = dst, RD = signed int16 literal
   3376     |.if DUALNUM
   3377     |  movsx RDd, RDW
   3378     |  setint RD
   3379     |  mov [BASE+RA*8], RD
   3380     |.else
   3381     |  movsx RDd, RDW			// Sign-extend literal.
   3382     |  cvtsi2sd xmm0, RDd
   3383     |  movsd qword [BASE+RA*8], xmm0
   3384     |.endif
   3385     |  ins_next
   3386     break;
   3387   case BC_KNUM:
   3388     |  ins_AD	// RA = dst, RD = num const
   3389     |  movsd xmm0, qword [KBASE+RD*8]
   3390     |  movsd qword [BASE+RA*8], xmm0
   3391     |  ins_next
   3392     break;
   3393   case BC_KPRI:
   3394     |  ins_AD	// RA = dst, RD = primitive type (~)
   3395     |  shl RD, 47
   3396     |  not RD
   3397     |  mov [BASE+RA*8], RD
   3398     |  ins_next
   3399     break;
   3400   case BC_KNIL:
   3401     |  ins_AD	// RA = dst_start, RD = dst_end
   3402     |  lea RA, [BASE+RA*8+8]
   3403     |  lea RD, [BASE+RD*8]
   3404     |  mov RB, LJ_TNIL
   3405     |  mov [RA-8], RB			// Sets minimum 2 slots.
   3406     |1:
   3407     |  mov [RA], RB
   3408     |  add RA, 8
   3409     |  cmp RA, RD
   3410     |  jbe <1
   3411     |  ins_next
   3412     break;
   3413 
   3414   /* -- Upvalue and function ops ------------------------------------------ */
   3415 
   3416   case BC_UGET:
   3417     |  ins_AD	// RA = dst, RD = upvalue #
   3418     |  mov LFUNC:RB, [BASE-16]
   3419     |  cleartp LFUNC:RB
   3420     |  mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
   3421     |  mov RB, UPVAL:RB->v
   3422     |  mov RD, [RB]
   3423     |  mov [BASE+RA*8], RD
   3424     |  ins_next
   3425     break;
   3426   case BC_USETV:
   3427 #define TV2MARKOFS \
   3428  ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
   3429     |  ins_AD	// RA = upvalue #, RD = src
   3430     |  mov LFUNC:RB, [BASE-16]
   3431     |  cleartp LFUNC:RB
   3432     |  mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
   3433     |  cmp byte UPVAL:RB->closed, 0
   3434     |  mov RB, UPVAL:RB->v
   3435     |  mov RA, [BASE+RD*8]
   3436     |  mov [RB], RA
   3437     |  jz >1
   3438     |  // Check barrier for closed upvalue.
   3439     |  test byte [RB+TV2MARKOFS], LJ_GC_BLACK		// isblack(uv)
   3440     |  jnz >2
   3441     |1:
   3442     |  ins_next
   3443     |
   3444     |2:  // Upvalue is black. Check if new value is collectable and white.
   3445     |  mov RD, RA
   3446     |  sar RD, 47
   3447     |  sub RDd, LJ_TISGCV
   3448     |  cmp RDd, LJ_TNUMX - LJ_TISGCV			// tvisgcv(v)
   3449     |  jbe <1
   3450     |  cleartp GCOBJ:RA
   3451     |  test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES	// iswhite(v)
   3452     |  jz <1
   3453     |  // Crossed a write barrier. Move the barrier forward.
   3454     |.if not X64WIN
   3455     |  mov CARG2, RB
   3456     |  mov RB, BASE			// Save BASE.
   3457     |.else
   3458     |  xchg CARG2, RB			// Save BASE (CARG2 == BASE).
   3459     |.endif
   3460     |  lea GL:CARG1, [DISPATCH+GG_DISP2G]
   3461     |  call extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
   3462     |  mov BASE, RB			// Restore BASE.
   3463     |  jmp <1
   3464     break;
   3465 #undef TV2MARKOFS
   3466   case BC_USETS:
   3467     |  ins_AND	// RA = upvalue #, RD = str const (~)
   3468     |  mov LFUNC:RB, [BASE-16]
   3469     |  cleartp LFUNC:RB
   3470     |  mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
   3471     |  mov STR:RA, [KBASE+RD*8]
   3472     |  mov RD, UPVAL:RB->v
   3473     |  settp STR:ITYPE, STR:RA, LJ_TSTR
   3474     |  mov [RD], STR:ITYPE
   3475     |  test byte UPVAL:RB->marked, LJ_GC_BLACK		// isblack(uv)
   3476     |  jnz >2
   3477     |1:
   3478     |  ins_next
   3479     |
   3480     |2:  // Check if string is white and ensure upvalue is closed.
   3481     |  test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES	// iswhite(str)
   3482     |  jz <1
   3483     |  cmp byte UPVAL:RB->closed, 0
   3484     |  jz <1
   3485     |  // Crossed a write barrier. Move the barrier forward.
   3486     |  mov RB, BASE			// Save BASE (CARG2 == BASE).
   3487     |  mov CARG2, RD
   3488     |  lea GL:CARG1, [DISPATCH+GG_DISP2G]
   3489     |  call extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
   3490     |  mov BASE, RB			// Restore BASE.
   3491     |  jmp <1
   3492     break;
   3493   case BC_USETN:
   3494     |  ins_AD	// RA = upvalue #, RD = num const
   3495     |  mov LFUNC:RB, [BASE-16]
   3496     |  cleartp LFUNC:RB
   3497     |  movsd xmm0, qword [KBASE+RD*8]
   3498     |  mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
   3499     |  mov RA, UPVAL:RB->v
   3500     |  movsd qword [RA], xmm0
   3501     |  ins_next
   3502     break;
   3503   case BC_USETP:
   3504     |  ins_AD	// RA = upvalue #, RD = primitive type (~)
   3505     |  mov LFUNC:RB, [BASE-16]
   3506     |  cleartp LFUNC:RB
   3507     |  mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
   3508     |  shl RD, 47
   3509     |  not RD
   3510     |  mov RA, UPVAL:RB->v
   3511     |  mov [RA], RD
   3512     |  ins_next
   3513     break;
   3514   case BC_UCLO:
   3515     |  ins_AD	// RA = level, RD = target
   3516     |  branchPC RD			// Do this first to free RD.
   3517     |  mov L:RB, SAVE_L
   3518     |  cmp aword L:RB->openupval, 0
   3519     |  je >1
   3520     |  mov L:RB->base, BASE
   3521     |  lea CARG2, [BASE+RA*8]		// Caveat: CARG2 == BASE
   3522     |  mov L:CARG1, L:RB		// Caveat: CARG1 == RA
   3523     |  call extern lj_func_closeuv	// (lua_State *L, TValue *level)
   3524     |  mov BASE, L:RB->base
   3525     |1:
   3526     |  ins_next
   3527     break;
   3528 
   3529   case BC_FNEW:
   3530     |  ins_AND	// RA = dst, RD = proto const (~) (holding function prototype)
   3531     |  mov L:RB, SAVE_L
   3532     |  mov L:RB->base, BASE		// Caveat: CARG2/CARG3 may be BASE.
   3533     |  mov CARG3, [BASE-16]
   3534     |  cleartp CARG3
   3535     |  mov CARG2, [KBASE+RD*8]		// Fetch GCproto *.
   3536     |  mov CARG1, L:RB
   3537     |  mov SAVE_PC, PC
   3538     |  // (lua_State *L, GCproto *pt, GCfuncL *parent)
   3539     |  call extern lj_func_newL_gc
   3540     |  // GCfuncL * returned in eax (RC).
   3541     |  mov BASE, L:RB->base
   3542     |  movzx RAd, PC_RA
   3543     |  settp LFUNC:RC, LJ_TFUNC
   3544     |  mov [BASE+RA*8], LFUNC:RC
   3545     |  ins_next
   3546     break;
   3547 
   3548   /* -- Table ops --------------------------------------------------------- */
   3549 
   3550   case BC_TNEW:
   3551     |  ins_AD	// RA = dst, RD = hbits|asize
   3552     |  mov L:RB, SAVE_L
   3553     |  mov L:RB->base, BASE
   3554     |  mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
   3555     |  cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
   3556     |  mov SAVE_PC, PC
   3557     |  jae >5
   3558     |1:
   3559     |  mov CARG3d, RDd
   3560     |  and RDd, 0x7ff
   3561     |  shr CARG3d, 11
   3562     |  cmp RDd, 0x7ff
   3563     |  je >3
   3564     |2:
   3565     |  mov L:CARG1, L:RB
   3566     |  mov CARG2d, RDd
   3567     |  call extern lj_tab_new  // (lua_State *L, int32_t asize, uint32_t hbits)
   3568     |  // Table * returned in eax (RC).
   3569     |  mov BASE, L:RB->base
   3570     |  movzx RAd, PC_RA
   3571     |  settp TAB:RC, LJ_TTAB
   3572     |  mov [BASE+RA*8], TAB:RC
   3573     |  ins_next
   3574     |3:  // Turn 0x7ff into 0x801.
   3575     |  mov RDd, 0x801
   3576     |  jmp <2
   3577     |5:
   3578     |  mov L:CARG1, L:RB
   3579     |  call extern lj_gc_step_fixtop	// (lua_State *L)
   3580     |  movzx RDd, PC_RD
   3581     |  jmp <1
   3582     break;
   3583   case BC_TDUP:
   3584     |  ins_AND	// RA = dst, RD = table const (~) (holding template table)
   3585     |  mov L:RB, SAVE_L
   3586     |  mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
   3587     |  mov SAVE_PC, PC
   3588     |  cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
   3589     |  mov L:RB->base, BASE
   3590     |  jae >3
   3591     |2:
   3592     |  mov TAB:CARG2, [KBASE+RD*8]	// Caveat: CARG2 == BASE
   3593     |  mov L:CARG1, L:RB		// Caveat: CARG1 == RA
   3594     |  call extern lj_tab_dup		// (lua_State *L, Table *kt)
   3595     |  // Table * returned in eax (RC).
   3596     |  mov BASE, L:RB->base
   3597     |  movzx RAd, PC_RA
   3598     |  settp TAB:RC, LJ_TTAB
   3599     |  mov [BASE+RA*8], TAB:RC
   3600     |  ins_next
   3601     |3:
   3602     |  mov L:CARG1, L:RB
   3603     |  call extern lj_gc_step_fixtop	// (lua_State *L)
   3604     |  movzx RDd, PC_RD			// Need to reload RD.
   3605     |  not RD
   3606     |  jmp <2
   3607     break;
   3608 
   3609   case BC_GGET:
   3610     |  ins_AND	// RA = dst, RD = str const (~)
   3611     |  mov LFUNC:RB, [BASE-16]
   3612     |  cleartp LFUNC:RB
   3613     |  mov TAB:RB, LFUNC:RB->env
   3614     |  mov STR:RC, [KBASE+RD*8]
   3615     |  jmp ->BC_TGETS_Z
   3616     break;
   3617   case BC_GSET:
   3618     |  ins_AND	// RA = src, RD = str const (~)
   3619     |  mov LFUNC:RB, [BASE-16]
   3620     |  cleartp LFUNC:RB
   3621     |  mov TAB:RB, LFUNC:RB->env
   3622     |  mov STR:RC, [KBASE+RD*8]
   3623     |  jmp ->BC_TSETS_Z
   3624     break;
   3625 
   3626   case BC_TGETV:
   3627     |  ins_ABC	// RA = dst, RB = table, RC = key
   3628     |  mov TAB:RB, [BASE+RB*8]
   3629     |  mov RC, [BASE+RC*8]
   3630     |  checktab TAB:RB, ->vmeta_tgetv
   3631     |
   3632     |  // Integer key?
   3633     |.if DUALNUM
   3634     |  checkint RC, >5
   3635     |.else
   3636     |  // Convert number to int and back and compare.
   3637     |  checknum RC, >5
   3638     |  movd xmm0, RC
   3639     |  cvttsd2si RCd, xmm0
   3640     |  cvtsi2sd xmm1, RCd
   3641     |  ucomisd xmm0, xmm1
   3642     |  jne ->vmeta_tgetv		// Generic numeric key? Use fallback.
   3643     |.endif
   3644     |  cmp RCd, TAB:RB->asize		// Takes care of unordered, too.
   3645     |  jae ->vmeta_tgetv		// Not in array part? Use fallback.
   3646     |  shl RCd, 3
   3647     |  add RC, TAB:RB->array
   3648     |  // Get array slot.
   3649     |  mov ITYPE, [RC]
   3650     |  cmp ITYPE, LJ_TNIL		// Avoid overwriting RB in fastpath.
   3651     |  je >2
   3652     |1:
   3653     |  mov [BASE+RA*8], ITYPE
   3654     |  ins_next
   3655     |
   3656     |2:  // Check for __index if table value is nil.
   3657     |  mov TAB:TMPR, TAB:RB->metatable
   3658     |  test TAB:TMPR, TAB:TMPR
   3659     |  jz <1
   3660     |  test byte TAB:TMPR->nomm, 1<<MM_index
   3661     |  jz ->vmeta_tgetv			// 'no __index' flag NOT set: check.
   3662     |  jmp <1
   3663     |
   3664     |5:  // String key?
   3665     |  cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
   3666     |  cleartp STR:RC
   3667     |  jmp ->BC_TGETS_Z
   3668     break;
   3669   case BC_TGETS:
   3670     |  ins_ABC	// RA = dst, RB = table, RC = str const (~)
   3671     |  mov TAB:RB, [BASE+RB*8]
   3672     |  not RC
   3673     |  mov STR:RC, [KBASE+RC*8]
   3674     |  checktab TAB:RB, ->vmeta_tgets
   3675     |->BC_TGETS_Z:	// RB = GCtab *, RC = GCstr *
   3676     |  mov TMPRd, TAB:RB->hmask
   3677     |  and TMPRd, STR:RC->hash
   3678     |  imul TMPRd, #NODE
   3679     |  add NODE:TMPR, TAB:RB->node
   3680     |  settp ITYPE, STR:RC, LJ_TSTR
   3681     |1:
   3682     |  cmp NODE:TMPR->key, ITYPE
   3683     |  jne >4
   3684     |  // Get node value.
   3685     |  mov ITYPE, NODE:TMPR->val
   3686     |  cmp ITYPE, LJ_TNIL
   3687     |  je >5				// Key found, but nil value?
   3688     |2:
   3689     |  mov [BASE+RA*8], ITYPE
   3690     |  ins_next
   3691     |
   3692     |4:  // Follow hash chain.
   3693     |  mov NODE:TMPR, NODE:TMPR->next
   3694     |  test NODE:TMPR, NODE:TMPR
   3695     |  jnz <1
   3696     |  // End of hash chain: key not found, nil result.
   3697     |  mov ITYPE, LJ_TNIL
   3698     |
   3699     |5:  // Check for __index if table value is nil.
   3700     |  mov TAB:TMPR, TAB:RB->metatable
   3701     |  test TAB:TMPR, TAB:TMPR
   3702     |  jz <2				// No metatable: done.
   3703     |  test byte TAB:TMPR->nomm, 1<<MM_index
   3704     |  jnz <2				// 'no __index' flag set: done.
   3705     |  jmp ->vmeta_tgets		// Caveat: preserve STR:RC.
   3706     break;
   3707   case BC_TGETB:
   3708     |  ins_ABC	// RA = dst, RB = table, RC = byte literal
   3709     |  mov TAB:RB, [BASE+RB*8]
   3710     |  checktab TAB:RB, ->vmeta_tgetb
   3711     |  cmp RCd, TAB:RB->asize
   3712     |  jae ->vmeta_tgetb
   3713     |  shl RCd, 3
   3714     |  add RC, TAB:RB->array
   3715     |  // Get array slot.
   3716     |  mov ITYPE, [RC]
   3717     |  cmp ITYPE, LJ_TNIL
   3718     |  je >2
   3719     |1:
   3720     |  mov [BASE+RA*8], ITYPE
   3721     |  ins_next
   3722     |
   3723     |2:  // Check for __index if table value is nil.
   3724     |  mov TAB:TMPR, TAB:RB->metatable
   3725     |  test TAB:TMPR, TAB:TMPR
   3726     |  jz <1
   3727     |  test byte TAB:TMPR->nomm, 1<<MM_index
   3728     |  jz ->vmeta_tgetb			// 'no __index' flag NOT set: check.
   3729     |  jmp <1
   3730     break;
   3731   case BC_TGETR:
   3732     |  ins_ABC	// RA = dst, RB = table, RC = key
   3733     |  mov TAB:RB, [BASE+RB*8]
   3734     |  cleartp TAB:RB
   3735     |.if DUALNUM
   3736     |  mov RCd, dword [BASE+RC*8]
   3737     |.else
   3738     |  cvttsd2si RCd, qword [BASE+RC*8]
   3739     |.endif
   3740     |  cmp RCd, TAB:RB->asize
   3741     |  jae ->vmeta_tgetr		// Not in array part? Use fallback.
   3742     |  shl RCd, 3
   3743     |  add RC, TAB:RB->array
   3744     |  // Get array slot.
   3745     |->BC_TGETR_Z:
   3746     |  mov ITYPE, [RC]
   3747     |->BC_TGETR2_Z:
   3748     |  mov [BASE+RA*8], ITYPE
   3749     |  ins_next
   3750     break;
   3751 
   3752   case BC_TSETV:
   3753     |  ins_ABC	// RA = src, RB = table, RC = key
   3754     |  mov TAB:RB, [BASE+RB*8]
   3755     |  mov RC, [BASE+RC*8]
   3756     |  checktab TAB:RB, ->vmeta_tsetv
   3757     |
   3758     |  // Integer key?
   3759     |.if DUALNUM
   3760     |  checkint RC, >5
   3761     |.else
   3762     |  // Convert number to int and back and compare.
   3763     |  checknum RC, >5
   3764     |  movd xmm0, RC
   3765     |  cvttsd2si RCd, xmm0
   3766     |  cvtsi2sd xmm1, RCd
   3767     |  ucomisd xmm0, xmm1
   3768     |  jne ->vmeta_tsetv		// Generic numeric key? Use fallback.
   3769     |.endif
   3770     |  cmp RCd, TAB:RB->asize		// Takes care of unordered, too.
   3771     |  jae ->vmeta_tsetv
   3772     |  shl RCd, 3
   3773     |  add RC, TAB:RB->array
   3774     |  cmp aword [RC], LJ_TNIL
   3775     |  je >3				// Previous value is nil?
   3776     |1:
   3777     |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
   3778     |  jnz >7
   3779     |2:  // Set array slot.
   3780     |  mov RB, [BASE+RA*8]
   3781     |  mov [RC], RB
   3782     |  ins_next
   3783     |
   3784     |3:  // Check for __newindex if previous value is nil.
   3785     |  mov TAB:TMPR, TAB:RB->metatable
   3786     |  test TAB:TMPR, TAB:TMPR
   3787     |  jz <1
   3788     |  test byte TAB:TMPR->nomm, 1<<MM_newindex
   3789     |  jz ->vmeta_tsetv			// 'no __newindex' flag NOT set: check.
   3790     |  jmp <1
   3791     |
   3792     |5:  // String key?
   3793     |  cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
   3794     |  cleartp STR:RC
   3795     |  jmp ->BC_TSETS_Z
   3796     |
   3797     |7:  // Possible table write barrier for the value. Skip valiswhite check.
   3798     |  barrierback TAB:RB, TMPR
   3799     |  jmp <2
   3800     break;
   3801   case BC_TSETS:
   3802     |  ins_ABC	// RA = src, RB = table, RC = str const (~)
   3803     |  mov TAB:RB, [BASE+RB*8]
   3804     |  not RC
   3805     |  mov STR:RC, [KBASE+RC*8]
   3806     |  checktab TAB:RB, ->vmeta_tsets
   3807     |->BC_TSETS_Z:	// RB = GCtab *, RC = GCstr *
   3808     |  mov TMPRd, TAB:RB->hmask
   3809     |  and TMPRd, STR:RC->hash
   3810     |  imul TMPRd, #NODE
   3811     |  mov byte TAB:RB->nomm, 0		// Clear metamethod cache.
   3812     |  add NODE:TMPR, TAB:RB->node
   3813     |  settp ITYPE, STR:RC, LJ_TSTR
   3814     |1:
   3815     |  cmp NODE:TMPR->key, ITYPE
   3816     |  jne >5
   3817     |  // Ok, key found. Assumes: offsetof(Node, val) == 0
   3818     |  cmp aword [TMPR], LJ_TNIL
   3819     |  je >4				// Previous value is nil?
   3820     |2:
   3821     |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
   3822     |  jnz >7
   3823     |3:  // Set node value.
   3824     |  mov ITYPE, [BASE+RA*8]
   3825     |  mov [TMPR], ITYPE
   3826     |  ins_next
   3827     |
   3828     |4:  // Check for __newindex if previous value is nil.
   3829     |  mov TAB:ITYPE, TAB:RB->metatable
   3830     |  test TAB:ITYPE, TAB:ITYPE
   3831     |  jz <2
   3832     |  test byte TAB:ITYPE->nomm, 1<<MM_newindex
   3833     |  jz ->vmeta_tsets			// 'no __newindex' flag NOT set: check.
   3834     |  jmp <2
   3835     |
   3836     |5:  // Follow hash chain.
   3837     |  mov NODE:TMPR, NODE:TMPR->next
   3838     |  test NODE:TMPR, NODE:TMPR
   3839     |  jnz <1
   3840     |  // End of hash chain: key not found, add a new one.
   3841     |
   3842     |  // But check for __newindex first.
   3843     |  mov TAB:TMPR, TAB:RB->metatable
   3844     |  test TAB:TMPR, TAB:TMPR
   3845     |  jz >6				// No metatable: continue.
   3846     |  test byte TAB:TMPR->nomm, 1<<MM_newindex
   3847     |  jz ->vmeta_tsets			// 'no __newindex' flag NOT set: check.
   3848     |6:
   3849     |  mov TMP1, ITYPE
   3850     |  mov L:CARG1, SAVE_L
   3851     |  mov L:CARG1->base, BASE
   3852     |  lea CARG3, TMP1
   3853     |  mov CARG2, TAB:RB
   3854     |  mov SAVE_PC, PC
   3855     |  call extern lj_tab_newkey	// (lua_State *L, GCtab *t, TValue *k)
   3856     |  // Handles write barrier for the new key. TValue * returned in eax (RC).
   3857     |  mov L:CARG1, SAVE_L
   3858     |  mov BASE, L:CARG1->base
   3859     |  mov TMPR, rax
   3860     |  movzx RAd, PC_RA
   3861     |  jmp <2				// Must check write barrier for value.
   3862     |
   3863     |7:  // Possible table write barrier for the value. Skip valiswhite check.
   3864     |  barrierback TAB:RB, ITYPE
   3865     |  jmp <3
   3866     break;
   3867   case BC_TSETB:
   3868     |  ins_ABC	// RA = src, RB = table, RC = byte literal
   3869     |  mov TAB:RB, [BASE+RB*8]
   3870     |  checktab TAB:RB, ->vmeta_tsetb
   3871     |  cmp RCd, TAB:RB->asize
   3872     |  jae ->vmeta_tsetb
   3873     |  shl RCd, 3
   3874     |  add RC, TAB:RB->array
   3875     |  cmp aword [RC], LJ_TNIL
   3876     |  je >3				// Previous value is nil?
   3877     |1:
   3878     |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
   3879     |  jnz >7
   3880     |2:	 // Set array slot.
   3881     |  mov ITYPE, [BASE+RA*8]
   3882     |  mov [RC], ITYPE
   3883     |  ins_next
   3884     |
   3885     |3:  // Check for __newindex if previous value is nil.
   3886     |  mov TAB:TMPR, TAB:RB->metatable
   3887     |  test TAB:TMPR, TAB:TMPR
   3888     |  jz <1
   3889     |  test byte TAB:TMPR->nomm, 1<<MM_newindex
   3890     |  jz ->vmeta_tsetb			// 'no __newindex' flag NOT set: check.
   3891     |  jmp <1
   3892     |
   3893     |7:  // Possible table write barrier for the value. Skip valiswhite check.
   3894     |  barrierback TAB:RB, TMPR
   3895     |  jmp <2
   3896     break;
   3897   case BC_TSETR:
   3898     |  ins_ABC	// RA = src, RB = table, RC = key
   3899     |  mov TAB:RB, [BASE+RB*8]
   3900     |  cleartp TAB:RB
   3901     |.if DUALNUM
   3902     |  mov RC, [BASE+RC*8]
   3903     |.else
   3904     |  cvttsd2si RCd, qword [BASE+RC*8]
   3905     |.endif
   3906     |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
   3907     |  jnz >7
   3908     |2:
   3909     |  cmp RCd, TAB:RB->asize
   3910     |  jae ->vmeta_tsetr
   3911     |  shl RCd, 3
   3912     |  add RC, TAB:RB->array
   3913     |  // Set array slot.
   3914     |->BC_TSETR_Z:
   3915     |  mov ITYPE, [BASE+RA*8]
   3916     |  mov [RC], ITYPE
   3917     |  ins_next
   3918     |
   3919     |7:  // Possible table write barrier for the value. Skip valiswhite check.
   3920     |  barrierback TAB:RB, TMPR
   3921     |  jmp <2
   3922     break;
   3923 
   3924   case BC_TSETM:
   3925     |  ins_AD	// RA = base (table at base-1), RD = num const (start index)
   3926     |1:
   3927     |  mov TMPRd, dword [KBASE+RD*8]	// Integer constant is in lo-word.
   3928     |  lea RA, [BASE+RA*8]
   3929     |  mov TAB:RB, [RA-8]		// Guaranteed to be a table.
   3930     |  cleartp TAB:RB
   3931     |  test byte TAB:RB->marked, LJ_GC_BLACK	// isblack(table)
   3932     |  jnz >7
   3933     |2:
   3934     |  mov RDd, MULTRES
   3935     |  sub RDd, 1
   3936     |  jz >4				// Nothing to copy?
   3937     |  add RDd, TMPRd			// Compute needed size.
   3938     |  cmp RDd, TAB:RB->asize
   3939     |  ja >5				// Doesn't fit into array part?
   3940     |  sub RDd, TMPRd
   3941     |  shl TMPRd, 3
   3942     |  add TMPR, TAB:RB->array
   3943     |3:  // Copy result slots to table.
   3944     |  mov RB, [RA]
   3945     |  add RA, 8
   3946     |  mov [TMPR], RB
   3947     |  add TMPR, 8
   3948     |  sub RDd, 1
   3949     |  jnz <3
   3950     |4:
   3951     |  ins_next
   3952     |
   3953     |5:  // Need to resize array part.
   3954     |  mov L:CARG1, SAVE_L
   3955     |  mov L:CARG1->base, BASE		// Caveat: CARG2/CARG3 may be BASE.
   3956     |  mov CARG2, TAB:RB
   3957     |  mov CARG3d, RDd
   3958     |  mov L:RB, L:CARG1
   3959     |  mov SAVE_PC, PC
   3960     |  call extern lj_tab_reasize	// (lua_State *L, GCtab *t, int nasize)
   3961     |  mov BASE, L:RB->base
   3962     |  movzx RAd, PC_RA			// Restore RA.
   3963     |  movzx RDd, PC_RD			// Restore RD.
   3964     |  jmp <1				// Retry.
   3965     |
   3966     |7:  // Possible table write barrier for any value. Skip valiswhite check.
   3967     |  barrierback TAB:RB, RD
   3968     |  jmp <2
   3969     break;
   3970 
   3971   /* -- Calls and vararg handling ----------------------------------------- */
   3972 
   3973   case BC_CALL: case BC_CALLM:
   3974     |  ins_A_C	// RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
   3975     if (op == BC_CALLM) {
   3976       |  add NARGS:RDd, MULTRES
   3977     }
   3978     |  mov LFUNC:RB, [BASE+RA*8]
   3979     |  checkfunc LFUNC:RB, ->vmeta_call_ra
   3980     |  lea BASE, [BASE+RA*8+16]
   3981     |  ins_call
   3982     break;
   3983 
   3984   case BC_CALLMT:
   3985     |  ins_AD	// RA = base, RD = extra_nargs
   3986     |  add NARGS:RDd, MULTRES
   3987     |  // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
   3988     break;
   3989   case BC_CALLT:
   3990     |  ins_AD	// RA = base, RD = nargs+1
   3991     |  lea RA, [BASE+RA*8+16]
   3992     |  mov KBASE, BASE			// Use KBASE for move + vmeta_call hint.
   3993     |  mov LFUNC:RB, [RA-16]
   3994     |  checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
   3995     |->BC_CALLT_Z:
   3996     |  mov PC, [BASE-8]
   3997     |  test PCd, FRAME_TYPE
   3998     |  jnz >7
   3999     |1:
   4000     |  mov [BASE-16], LFUNC:RB		// Copy func+tag down, reloaded below.
   4001     |  mov MULTRES, NARGS:RDd
   4002     |  sub NARGS:RDd, 1
   4003     |  jz >3
   4004     |2:  // Move args down.
   4005     |  mov RB, [RA]
   4006     |  add RA, 8
   4007     |  mov [KBASE], RB
   4008     |  add KBASE, 8
   4009     |  sub NARGS:RDd, 1
   4010     |  jnz <2
   4011     |
   4012     |  mov LFUNC:RB, [BASE-16]
   4013     |3:
   4014     |  cleartp LFUNC:RB
   4015     |  mov NARGS:RDd, MULTRES
   4016     |  cmp byte LFUNC:RB->ffid, 1	// (> FF_C) Calling a fast function?
   4017     |  ja >5
   4018     |4:
   4019     |  ins_callt
   4020     |
   4021     |5:  // Tailcall to a fast function.
   4022     |  test PCd, FRAME_TYPE		// Lua frame below?
   4023     |  jnz <4
   4024     |  movzx RAd, PC_RA
   4025     |  neg RA
   4026     |  mov LFUNC:KBASE, [BASE+RA*8-32]	// Need to prepare KBASE.
   4027     |  cleartp LFUNC:KBASE
   4028     |  mov KBASE, LFUNC:KBASE->pc
   4029     |  mov KBASE, [KBASE+PC2PROTO(k)]
   4030     |  jmp <4
   4031     |
   4032     |7:  // Tailcall from a vararg function.
   4033     |  sub PC, FRAME_VARG
   4034     |  test PCd, FRAME_TYPEP
   4035     |  jnz >8				// Vararg frame below?
   4036     |  sub BASE, PC			// Need to relocate BASE/KBASE down.
   4037     |  mov KBASE, BASE
   4038     |  mov PC, [BASE-8]
   4039     |  jmp <1
   4040     |8:
   4041     |  add PCd, FRAME_VARG
   4042     |  jmp <1
   4043     break;
   4044 
   4045   case BC_ITERC:
   4046     |  ins_A	// RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
   4047     |  lea RA, [BASE+RA*8+16]		// fb = base+2
   4048     |  mov RB, [RA-32]			// Copy state. fb[0] = fb[-4].
   4049     |  mov RC, [RA-24]			// Copy control var. fb[1] = fb[-3].
   4050     |  mov [RA], RB
   4051     |  mov [RA+8], RC
   4052     |  mov LFUNC:RB, [RA-40]		// Copy callable. fb[-2] = fb[-5]
   4053     |  mov [RA-16], LFUNC:RB
   4054     |  mov NARGS:RDd, 2+1		// Handle like a regular 2-arg call.
   4055     |  checkfunc LFUNC:RB, ->vmeta_call
   4056     |  mov BASE, RA
   4057     |  ins_call
   4058     break;
   4059 
   4060   case BC_ITERN:
   4061     |  ins_A	// RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
   4062     |.if JIT
   4063     |  // NYI: add hotloop, record BC_ITERN.
   4064     |.endif
   4065     |  mov TAB:RB, [BASE+RA*8-16]
   4066     |  cleartp TAB:RB
   4067     |  mov RCd, [BASE+RA*8-8]		// Get index from control var.
   4068     |  mov TMPRd, TAB:RB->asize
   4069     |  add PC, 4
   4070     |  mov ITYPE, TAB:RB->array
   4071     |1:  // Traverse array part.
   4072     |  cmp RCd, TMPRd; jae >5		// Index points after array part?
   4073     |  cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
   4074     |.if not DUALNUM
   4075     |  cvtsi2sd xmm0, RCd
   4076     |.endif
   4077     |  // Copy array slot to returned value.
   4078     |  mov RB, [ITYPE+RC*8]
   4079     |  mov [BASE+RA*8+8], RB
   4080     |  // Return array index as a numeric key.
   4081     |.if DUALNUM
   4082     |  setint ITYPE, RC
   4083     |  mov [BASE+RA*8], ITYPE
   4084     |.else
   4085     |  movsd qword [BASE+RA*8], xmm0
   4086     |.endif
   4087     |  add RCd, 1
   4088     |  mov [BASE+RA*8-8], RCd		// Update control var.
   4089     |2:
   4090     |  movzx RDd, PC_RD			// Get target from ITERL.
   4091     |  branchPC RD
   4092     |3:
   4093     |  ins_next
   4094     |
   4095     |4:  // Skip holes in array part.
   4096     |  add RCd, 1
   4097     |  jmp <1
   4098     |
   4099     |5:  // Traverse hash part.
   4100     |  sub RCd, TMPRd
   4101     |6:
   4102     |  cmp RCd, TAB:RB->hmask; ja <3	// End of iteration? Branch to ITERL+1.
   4103     |  imul ITYPEd, RCd, #NODE
   4104     |  add NODE:ITYPE, TAB:RB->node
   4105     |  cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
   4106     |  lea TMPRd, [RCd+TMPRd+1]
   4107     |  // Copy key and value from hash slot.
   4108     |  mov RB, NODE:ITYPE->key
   4109     |  mov RC, NODE:ITYPE->val
   4110     |  mov [BASE+RA*8], RB
   4111     |  mov [BASE+RA*8+8], RC
   4112     |  mov [BASE+RA*8-8], TMPRd
   4113     |  jmp <2
   4114     |
   4115     |7:  // Skip holes in hash part.
   4116     |  add RCd, 1
   4117     |  jmp <6
   4118     break;
   4119 
   4120   case BC_ISNEXT:
   4121     |  ins_AD	// RA = base, RD = target (points to ITERN)
   4122     |  mov CFUNC:RB, [BASE+RA*8-24]
   4123     |  checkfunc CFUNC:RB, >5
   4124     |  checktptp [BASE+RA*8-16], LJ_TTAB, >5
   4125     |  cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
   4126     |  cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
   4127     |  branchPC RD
   4128     |  mov64 TMPR, U64x(fffe7fff, 00000000)
   4129     |  mov [BASE+RA*8-8], TMPR		// Initialize control var.
   4130     |1:
   4131     |  ins_next
   4132     |5:  // Despecialize bytecode if any of the checks fail.
   4133     |  mov PC_OP, BC_JMP
   4134     |  branchPC RD
   4135     |  mov byte [PC], BC_ITERC
   4136     |  jmp <1
   4137     break;
   4138 
   4139   case BC_VARG:
   4140     |  ins_ABC	// RA = base, RB = nresults+1, RC = numparams
   4141     |  lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
   4142     |  lea RA, [BASE+RA*8]
   4143     |  sub TMPR, [BASE-8]
   4144     |  // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
   4145     |  test RB, RB
   4146     |  jz >5				// Copy all varargs?
   4147     |  lea RB, [RA+RB*8-8]
   4148     |  cmp TMPR, BASE			// No vararg slots?
   4149     |  jnb >2
   4150     |1:  // Copy vararg slots to destination slots.
   4151     |  mov RC, [TMPR-16]
   4152     |  add TMPR, 8
   4153     |  mov [RA], RC
   4154     |  add RA, 8
   4155     |  cmp RA, RB			// All destination slots filled?
   4156     |  jnb >3
   4157     |  cmp TMPR, BASE			// No more vararg slots?
   4158     |  jb <1
   4159     |2:  // Fill up remainder with nil.
   4160     |  mov aword [RA], LJ_TNIL
   4161     |  add RA, 8
   4162     |  cmp RA, RB
   4163     |  jb <2
   4164     |3:
   4165     |  ins_next
   4166     |
   4167     |5:  // Copy all varargs.
   4168     |  mov MULTRES, 1			// MULTRES = 0+1
   4169     |  mov RC, BASE
   4170     |  sub RC, TMPR
   4171     |  jbe <3				// No vararg slots?
   4172     |  mov RBd, RCd
   4173     |  shr RBd, 3
   4174     |  add RBd, 1
   4175     |  mov MULTRES, RBd			// MULTRES = #varargs+1
   4176     |  mov L:RB, SAVE_L
   4177     |  add RC, RA
   4178     |  cmp RC, L:RB->maxstack
   4179     |  ja >7				// Need to grow stack?
   4180     |6:  // Copy all vararg slots.
   4181     |  mov RC, [TMPR-16]
   4182     |  add TMPR, 8
   4183     |  mov [RA], RC
   4184     |  add RA, 8
   4185     |  cmp TMPR, BASE			// No more vararg slots?
   4186     |  jb <6
   4187     |  jmp <3
   4188     |
   4189     |7:  // Grow stack for varargs.
   4190     |  mov L:RB->base, BASE
   4191     |  mov L:RB->top, RA
   4192     |  mov SAVE_PC, PC
   4193     |  sub TMPR, BASE			// Need delta, because BASE may change.
   4194     |  mov TMP1hi, TMPRd
   4195     |  mov CARG2d, MULTRES
   4196     |  sub CARG2d, 1
   4197     |  mov CARG1, L:RB
   4198     |  call extern lj_state_growstack	// (lua_State *L, int n)
   4199     |  mov BASE, L:RB->base
   4200     |  movsxd TMPR, TMP1hi
   4201     |  mov RA, L:RB->top
   4202     |  add TMPR, BASE
   4203     |  jmp <6
   4204     break;
   4205 
   4206   /* -- Returns ----------------------------------------------------------- */
   4207 
   4208   case BC_RETM:
   4209     |  ins_AD	// RA = results, RD = extra_nresults
   4210     |  add RDd, MULTRES			// MULTRES >=1, so RD >=1.
   4211     |  // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
   4212     break;
   4213 
   4214   case BC_RET: case BC_RET0: case BC_RET1:
   4215     |  ins_AD	// RA = results, RD = nresults+1
   4216     if (op != BC_RET0) {
   4217       |  shl RAd, 3
   4218     }
   4219     |1:
   4220     |  mov PC, [BASE-8]
   4221     |  mov MULTRES, RDd			// Save nresults+1.
   4222     |  test PCd, FRAME_TYPE		// Check frame type marker.
   4223     |  jnz >7				// Not returning to a fixarg Lua func?
   4224     switch (op) {
   4225     case BC_RET:
   4226       |->BC_RET_Z:
   4227       |  mov KBASE, BASE		// Use KBASE for result move.
   4228       |  sub RDd, 1
   4229       |  jz >3
   4230       |2:  // Move results down.
   4231       |  mov RB, [KBASE+RA]
   4232       |  mov [KBASE-16], RB
   4233       |  add KBASE, 8
   4234       |  sub RDd, 1
   4235       |  jnz <2
   4236       |3:
   4237       |  mov RDd, MULTRES		// Note: MULTRES may be >255.
   4238       |  movzx RBd, PC_RB		// So cannot compare with RDL!
   4239       |5:
   4240       |  cmp RBd, RDd			// More results expected?
   4241       |  ja >6
   4242       break;
   4243     case BC_RET1:
   4244       |  mov RB, [BASE+RA]
   4245       |  mov [BASE-16], RB
   4246       /* fallthrough */
   4247     case BC_RET0:
   4248       |5:
   4249       |  cmp PC_RB, RDL			// More results expected?
   4250       |  ja >6
   4251     default:
   4252       break;
   4253     }
   4254     |  movzx RAd, PC_RA
   4255     |  neg RA
   4256     |  lea BASE, [BASE+RA*8-16]		// base = base - (RA+2)*8
   4257     |  mov LFUNC:KBASE, [BASE-16]
   4258     |  cleartp LFUNC:KBASE
   4259     |  mov KBASE, LFUNC:KBASE->pc
   4260     |  mov KBASE, [KBASE+PC2PROTO(k)]
   4261     |  ins_next
   4262     |
   4263     |6:  // Fill up results with nil.
   4264     if (op == BC_RET) {
   4265       |  mov aword [KBASE-16], LJ_TNIL	// Note: relies on shifted base.
   4266       |  add KBASE, 8
   4267     } else {
   4268       |  mov aword [BASE+RD*8-24], LJ_TNIL
   4269     }
   4270     |  add RD, 1
   4271     |  jmp <5
   4272     |
   4273     |7:  // Non-standard return case.
   4274     |  lea RB, [PC-FRAME_VARG]
   4275     |  test RBd, FRAME_TYPEP
   4276     |  jnz ->vm_return
   4277     |  // Return from vararg function: relocate BASE down and RA up.
   4278     |  sub BASE, RB
   4279     if (op != BC_RET0) {
   4280       |  add RA, RB
   4281     }
   4282     |  jmp <1
   4283     break;
   4284 
   4285   /* -- Loops and branches ------------------------------------------------ */
   4286 
   4287   |.define FOR_IDX,  [RA]
   4288   |.define FOR_STOP, [RA+8]
   4289   |.define FOR_STEP, [RA+16]
   4290   |.define FOR_EXT,  [RA+24]
   4291 
   4292   case BC_FORL:
   4293     |.if JIT
   4294     |  hotloop RBd
   4295     |.endif
   4296     | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
   4297     break;
   4298 
   4299   case BC_JFORI:
   4300   case BC_JFORL:
   4301 #if !LJ_HASJIT
   4302     break;
   4303 #endif
   4304   case BC_FORI:
   4305   case BC_IFORL:
   4306     vk = (op == BC_IFORL || op == BC_JFORL);
   4307     |  ins_AJ	// RA = base, RD = target (after end of loop or start of loop)
   4308     |  lea RA, [BASE+RA*8]
   4309     if (LJ_DUALNUM) {
   4310       |  mov RB, FOR_IDX
   4311       |  checkint RB, >9
   4312       |  mov TMPR, FOR_STOP
   4313       if (!vk) {
   4314 	|  checkint TMPR, ->vmeta_for
   4315 	|  mov ITYPE, FOR_STEP
   4316 	|  test ITYPEd, ITYPEd; js >5
   4317 	|  sar ITYPE, 47;
   4318 	|  cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
   4319       } else {
   4320 #ifdef LUA_USE_ASSERT
   4321 	|  checkinttp FOR_STOP, ->assert_bad_for_arg_type
   4322 	|  checkinttp FOR_STEP, ->assert_bad_for_arg_type
   4323 #endif
   4324 	|  mov ITYPE, FOR_STEP
   4325 	|  test ITYPEd, ITYPEd; js >5
   4326 	|  add RBd, ITYPEd; jo >1
   4327 	|  setint RB
   4328 	|  mov FOR_IDX, RB
   4329       }
   4330       |  cmp RBd, TMPRd
   4331       |  mov FOR_EXT, RB
   4332       if (op == BC_FORI) {
   4333 	|  jle >7
   4334 	|1:
   4335 	|6:
   4336 	|  branchPC RD
   4337       } else if (op == BC_JFORI) {
   4338 	|  branchPC RD
   4339 	|  movzx RDd, PC_RD
   4340 	|  jle =>BC_JLOOP
   4341 	|1:
   4342 	|6:
   4343       } else if (op == BC_IFORL) {
   4344 	|  jg >7
   4345 	|6:
   4346 	|  branchPC RD
   4347 	|1:
   4348       } else {
   4349 	|  jle =>BC_JLOOP
   4350 	|1:
   4351 	|6:
   4352       }
   4353       |7:
   4354       |  ins_next
   4355       |
   4356       |5:  // Invert check for negative step.
   4357       if (!vk) {
   4358 	|  sar ITYPE, 47;
   4359 	|  cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
   4360       } else {
   4361 	|  add RBd, ITYPEd; jo <1
   4362 	|  setint RB
   4363 	|  mov FOR_IDX, RB
   4364       }
   4365       |  cmp RBd, TMPRd
   4366       |  mov FOR_EXT, RB
   4367       if (op == BC_FORI) {
   4368 	|  jge <7
   4369       } else if (op == BC_JFORI) {
   4370 	|  branchPC RD
   4371 	|  movzx RDd, PC_RD
   4372 	|  jge =>BC_JLOOP
   4373       } else if (op == BC_IFORL) {
   4374 	|  jl <7
   4375       } else {
   4376 	|  jge =>BC_JLOOP
   4377       }
   4378       |  jmp <6
   4379       |9:  // Fallback to FP variant.
   4380       if (!vk) {
   4381 	|  jae ->vmeta_for
   4382       }
   4383     } else if (!vk) {
   4384       |  checknumtp FOR_IDX, ->vmeta_for
   4385     }
   4386     if (!vk) {
   4387       |  checknumtp FOR_STOP, ->vmeta_for
   4388     } else {
   4389 #ifdef LUA_USE_ASSERT
   4390       |  checknumtp FOR_STOP, ->assert_bad_for_arg_type
   4391       |  checknumtp FOR_STEP, ->assert_bad_for_arg_type
   4392 #endif
   4393     }
   4394     |  mov RB, FOR_STEP
   4395     if (!vk) {
   4396       |  checknum RB, ->vmeta_for
   4397     }
   4398     |  movsd xmm0, qword FOR_IDX
   4399     |  movsd xmm1, qword FOR_STOP
   4400     if (vk) {
   4401       |  addsd xmm0, qword FOR_STEP
   4402       |  movsd qword FOR_IDX, xmm0
   4403       |  test RB, RB; js >3
   4404     } else {
   4405       |  jl >3
   4406     }
   4407     |  ucomisd xmm1, xmm0
   4408     |1:
   4409     |  movsd qword FOR_EXT, xmm0
   4410     if (op == BC_FORI) {
   4411       |.if DUALNUM
   4412       |  jnb <7
   4413       |.else
   4414       |  jnb >2
   4415       |  branchPC RD
   4416       |.endif
   4417     } else if (op == BC_JFORI) {
   4418       |  branchPC RD
   4419       |  movzx RDd, PC_RD
   4420       |  jnb =>BC_JLOOP
   4421     } else if (op == BC_IFORL) {
   4422       |.if DUALNUM
   4423       |  jb <7
   4424       |.else
   4425       |  jb >2
   4426       |  branchPC RD
   4427       |.endif
   4428     } else {
   4429       |  jnb =>BC_JLOOP
   4430     }
   4431     |.if DUALNUM
   4432     |  jmp <6
   4433     |.else
   4434     |2:
   4435     |  ins_next
   4436     |.endif
   4437     |
   4438     |3:  // Invert comparison if step is negative.
   4439     |  ucomisd xmm0, xmm1
   4440     |  jmp <1
   4441     break;
   4442 
   4443   case BC_ITERL:
   4444     |.if JIT
   4445     |  hotloop RBd
   4446     |.endif
   4447     | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
   4448     break;
   4449 
   4450   case BC_JITERL:
   4451 #if !LJ_HASJIT
   4452     break;
   4453 #endif
   4454   case BC_IITERL:
   4455     |  ins_AJ	// RA = base, RD = target
   4456     |  lea RA, [BASE+RA*8]
   4457     |  mov RB, [RA]
   4458     |  cmp RB, LJ_TNIL; je >1		// Stop if iterator returned nil.
   4459     if (op == BC_JITERL) {
   4460       |  mov [RA-8], RB
   4461       |  jmp =>BC_JLOOP
   4462     } else {
   4463       |  branchPC RD			// Otherwise save control var + branch.
   4464       |  mov [RA-8], RB
   4465     }
   4466     |1:
   4467     |  ins_next
   4468     break;
   4469 
   4470   case BC_LOOP:
   4471     |  ins_A	// RA = base, RD = target (loop extent)
   4472     |  // Note: RA/RD is only used by trace recorder to determine scope/extent
   4473     |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
   4474     |.if JIT
   4475     |  hotloop RBd
   4476     |.endif
   4477     | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
   4478     break;
   4479 
   4480   case BC_ILOOP:
   4481     |  ins_A	// RA = base, RD = target (loop extent)
   4482     |  ins_next
   4483     break;
   4484 
   4485   case BC_JLOOP:
   4486     |.if JIT
   4487     |  ins_AD	// RA = base (ignored), RD = traceno
   4488     |  mov RA, [DISPATCH+DISPATCH_J(trace)]
   4489     |  mov TRACE:RD, [RA+RD*8]
   4490     |  mov RD, TRACE:RD->mcode
   4491     |  mov L:RB, SAVE_L
   4492     |  mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
   4493     |  mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
   4494     |  // Save additional callee-save registers only used in compiled code.
   4495     |.if X64WIN
   4496     |  mov CSAVE_4, r12
   4497     |  mov CSAVE_3, r13
   4498     |  mov CSAVE_2, r14
   4499     |  mov CSAVE_1, r15
   4500     |  mov RA, rsp
   4501     |  sub rsp, 10*16+4*8
   4502     |  movdqa [RA-1*16], xmm6
   4503     |  movdqa [RA-2*16], xmm7
   4504     |  movdqa [RA-3*16], xmm8
   4505     |  movdqa [RA-4*16], xmm9
   4506     |  movdqa [RA-5*16], xmm10
   4507     |  movdqa [RA-6*16], xmm11
   4508     |  movdqa [RA-7*16], xmm12
   4509     |  movdqa [RA-8*16], xmm13
   4510     |  movdqa [RA-9*16], xmm14
   4511     |  movdqa [RA-10*16], xmm15
   4512     |.else
   4513     |  sub rsp, 16
   4514     |  mov [rsp+16], r12
   4515     |  mov [rsp+8], r13
   4516     |.endif
   4517     |  jmp RD
   4518     |.endif
   4519     break;
   4520 
   4521   case BC_JMP:
   4522     |  ins_AJ	// RA = unused, RD = target
   4523     |  branchPC RD
   4524     |  ins_next
   4525     break;
   4526 
   4527   /* -- Function headers -------------------------------------------------- */
   4528 
   4529    /*
   4530    ** Reminder: A function may be called with func/args above L->maxstack,
   4531    ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
   4532    ** too. This means all FUNC* ops (including fast functions) must check
   4533    ** for stack overflow _before_ adding more slots!
   4534    */
   4535 
   4536   case BC_FUNCF:
   4537     |.if JIT
   4538     |  hotcall RBd
   4539     |.endif
   4540   case BC_FUNCV:  /* NYI: compiled vararg functions. */
   4541     | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
   4542     break;
   4543 
   4544   case BC_JFUNCF:
   4545 #if !LJ_HASJIT
   4546     break;
   4547 #endif
   4548   case BC_IFUNCF:
   4549     |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
   4550     |  mov KBASE, [PC-4+PC2PROTO(k)]
   4551     |  mov L:RB, SAVE_L
   4552     |  lea RA, [BASE+RA*8]		// Top of frame.
   4553     |  cmp RA, L:RB->maxstack
   4554     |  ja ->vm_growstack_f
   4555     |  movzx RAd, byte [PC-4+PC2PROTO(numparams)]
   4556     |  cmp NARGS:RDd, RAd		// Check for missing parameters.
   4557     |  jbe >3
   4558     |2:
   4559     if (op == BC_JFUNCF) {
   4560       |  movzx RDd, PC_RD
   4561       |  jmp =>BC_JLOOP
   4562     } else {
   4563       |  ins_next
   4564     }
   4565     |
   4566     |3:  // Clear missing parameters.
   4567     |  mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
   4568     |  add NARGS:RDd, 1
   4569     |  cmp NARGS:RDd, RAd
   4570     |  jbe <3
   4571     |  jmp <2
   4572     break;
   4573 
   4574   case BC_JFUNCV:
   4575 #if !LJ_HASJIT
   4576     break;
   4577 #endif
   4578     | int3  // NYI: compiled vararg functions
   4579     break;  /* NYI: compiled vararg functions. */
   4580 
   4581   case BC_IFUNCV:
   4582     |  ins_AD  // BASE = new base, RA = framesize, RD = nargs+1
   4583     |  lea RBd, [NARGS:RD*8+FRAME_VARG+8]
   4584     |  lea RD, [BASE+NARGS:RD*8+8]
   4585     |  mov LFUNC:KBASE, [BASE-16]
   4586     |  mov [RD-8], RB			// Store delta + FRAME_VARG.
   4587     |  mov [RD-16], LFUNC:KBASE		// Store copy of LFUNC.
   4588     |  mov L:RB, SAVE_L
   4589     |  lea RA, [RD+RA*8]
   4590     |  cmp RA, L:RB->maxstack
   4591     |  ja ->vm_growstack_v		// Need to grow stack.
   4592     |  mov RA, BASE
   4593     |  mov BASE, RD
   4594     |  movzx RBd, byte [PC-4+PC2PROTO(numparams)]
   4595     |  test RBd, RBd
   4596     |  jz >2
   4597     |  add RA, 8
   4598     |1:  // Copy fixarg slots up to new frame.
   4599     |  add RA, 8
   4600     |  cmp RA, BASE
   4601     |  jnb >3				// Less args than parameters?
   4602     |  mov KBASE, [RA-16]
   4603     |  mov [RD], KBASE
   4604     |  add RD, 8
   4605     |  mov aword [RA-16], LJ_TNIL	// Clear old fixarg slot (help the GC).
   4606     |  sub RBd, 1
   4607     |  jnz <1
   4608     |2:
   4609     if (op == BC_JFUNCV) {
   4610       |  movzx RDd, PC_RD
   4611       |  jmp =>BC_JLOOP
   4612     } else {
   4613       |  mov KBASE, [PC-4+PC2PROTO(k)]
   4614       |  ins_next
   4615     }
   4616     |
   4617     |3:  // Clear missing parameters.
   4618     |  mov aword [RD], LJ_TNIL
   4619     |  add RD, 8
   4620     |  sub RBd, 1
   4621     |  jnz <3
   4622     |  jmp <2
   4623     break;
   4624 
   4625   case BC_FUNCC:
   4626   case BC_FUNCCW:
   4627     |  ins_AD  // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
   4628     |  mov CFUNC:RB, [BASE-16]
   4629     |  cleartp CFUNC:RB
   4630     |  mov KBASE, CFUNC:RB->f
   4631     |  mov L:RB, SAVE_L
   4632     |  lea RD, [BASE+NARGS:RD*8-8]
   4633     |  mov L:RB->base, BASE
   4634     |  lea RA, [RD+8*LUA_MINSTACK]
   4635     |  cmp RA, L:RB->maxstack
   4636     |  mov L:RB->top, RD
   4637     if (op == BC_FUNCC) {
   4638       |  mov CARG1, L:RB		// Caveat: CARG1 may be RA.
   4639     } else {
   4640       |  mov CARG2, KBASE
   4641       |  mov CARG1, L:RB		// Caveat: CARG1 may be RA.
   4642     }
   4643     |  ja ->vm_growstack_c		// Need to grow stack.
   4644     |  set_vmstate C
   4645     if (op == BC_FUNCC) {
   4646       |  call KBASE			// (lua_State *L)
   4647     } else {
   4648       |  // (lua_State *L, lua_CFunction f)
   4649       |  call aword [DISPATCH+DISPATCH_GL(wrapf)]
   4650     }
   4651     |  // nresults returned in eax (RD).
   4652     |  mov BASE, L:RB->base
   4653     |  mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
   4654     |  set_vmstate INTERP
   4655     |  lea RA, [BASE+RD*8]
   4656     |  neg RA
   4657     |  add RA, L:RB->top		// RA = (L->top-(L->base+nresults))*8
   4658     |  mov PC, [BASE-8]			// Fetch PC of caller.
   4659     |  jmp ->vm_returnc
   4660     break;
   4661 
   4662   /* ---------------------------------------------------------------------- */
   4663 
   4664   default:
   4665     fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
   4666     exit(2);
   4667     break;
   4668   }
   4669 }
   4670 
   4671 static int build_backend(BuildCtx *ctx)
   4672 {
   4673   int op;
   4674   dasm_growpc(Dst, BC__MAX);
   4675   build_subroutines(ctx);
   4676   |.code_op
   4677   for (op = 0; op < BC__MAX; op++)
   4678     build_ins(ctx, (BCOp)op, op);
   4679   return BC__MAX;
   4680 }
   4681 
   4682 /* Emit pseudo frame-info for all assembler functions. */
   4683 static void emit_asm_debug(BuildCtx *ctx)
   4684 {
   4685   int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
   4686   switch (ctx->mode) {
   4687   case BUILD_elfasm:
   4688     fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
   4689     fprintf(ctx->fp,
   4690 	".Lframe0:\n"
   4691 	"\t.long .LECIE0-.LSCIE0\n"
   4692 	".LSCIE0:\n"
   4693 	"\t.long 0xffffffff\n"
   4694 	"\t.byte 0x1\n"
   4695 	"\t.string \"\"\n"
   4696 	"\t.uleb128 0x1\n"
   4697 	"\t.sleb128 -8\n"
   4698 	"\t.byte 0x10\n"
   4699 	"\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
   4700 	"\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
   4701 	"\t.align 8\n"
   4702 	".LECIE0:\n\n");
   4703     fprintf(ctx->fp,
   4704 	".LSFDE0:\n"
   4705 	"\t.long .LEFDE0-.LASFDE0\n"
   4706 	".LASFDE0:\n"
   4707 	"\t.long .Lframe0\n"
   4708 	"\t.quad .Lbegin\n"
   4709 	"\t.quad %d\n"
   4710 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
   4711 	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
   4712 	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
   4713 	"\t.byte 0x8f\n\t.uleb128 0x4\n"	/* offset r15 */
   4714 	"\t.byte 0x8e\n\t.uleb128 0x5\n"	/* offset r14 */
   4715 #if LJ_NO_UNWIND
   4716 	"\t.byte 0x8d\n\t.uleb128 0x6\n"	/* offset r13 */
   4717 	"\t.byte 0x8c\n\t.uleb128 0x7\n"	/* offset r12 */
   4718 #endif
   4719 	"\t.align 8\n"
   4720 	".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
   4721 #if LJ_HASFFI
   4722     fprintf(ctx->fp,
   4723 	".LSFDE1:\n"
   4724 	"\t.long .LEFDE1-.LASFDE1\n"
   4725 	".LASFDE1:\n"
   4726 	"\t.long .Lframe0\n"
   4727 	"\t.quad lj_vm_ffi_call\n"
   4728 	"\t.quad %d\n"
   4729 	"\t.byte 0xe\n\t.uleb128 16\n"		/* def_cfa_offset */
   4730 	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
   4731 	"\t.byte 0xd\n\t.uleb128 0x6\n"		/* def_cfa_register rbp */
   4732 	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
   4733 	"\t.align 8\n"
   4734 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
   4735 #endif
   4736 #if !LJ_NO_UNWIND
   4737 #if (defined(__sun__) && defined(__svr4__))
   4738     fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
   4739 #else
   4740     fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
   4741 #endif
   4742     fprintf(ctx->fp,
   4743 	".Lframe1:\n"
   4744 	"\t.long .LECIE1-.LSCIE1\n"
   4745 	".LSCIE1:\n"
   4746 	"\t.long 0\n"
   4747 	"\t.byte 0x1\n"
   4748 	"\t.string \"zPR\"\n"
   4749 	"\t.uleb128 0x1\n"
   4750 	"\t.sleb128 -8\n"
   4751 	"\t.byte 0x10\n"
   4752 	"\t.uleb128 6\n"			/* augmentation length */
   4753 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
   4754 	"\t.long lj_err_unwind_dwarf-.\n"
   4755 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
   4756 	"\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
   4757 	"\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
   4758 	"\t.align 8\n"
   4759 	".LECIE1:\n\n");
   4760     fprintf(ctx->fp,
   4761 	".LSFDE2:\n"
   4762 	"\t.long .LEFDE2-.LASFDE2\n"
   4763 	".LASFDE2:\n"
   4764 	"\t.long .LASFDE2-.Lframe1\n"
   4765 	"\t.long .Lbegin-.\n"
   4766 	"\t.long %d\n"
   4767 	"\t.uleb128 0\n"			/* augmentation length */
   4768 	"\t.byte 0xe\n\t.uleb128 %d\n"		/* def_cfa_offset */
   4769 	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
   4770 	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
   4771 	"\t.byte 0x8f\n\t.uleb128 0x4\n"	/* offset r15 */
   4772 	"\t.byte 0x8e\n\t.uleb128 0x5\n"	/* offset r14 */
   4773 	"\t.align 8\n"
   4774 	".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
   4775 #if LJ_HASFFI
   4776     fprintf(ctx->fp,
   4777 	".Lframe2:\n"
   4778 	"\t.long .LECIE2-.LSCIE2\n"
   4779 	".LSCIE2:\n"
   4780 	"\t.long 0\n"
   4781 	"\t.byte 0x1\n"
   4782 	"\t.string \"zR\"\n"
   4783 	"\t.uleb128 0x1\n"
   4784 	"\t.sleb128 -8\n"
   4785 	"\t.byte 0x10\n"
   4786 	"\t.uleb128 1\n"			/* augmentation length */
   4787 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
   4788 	"\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
   4789 	"\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
   4790 	"\t.align 8\n"
   4791 	".LECIE2:\n\n");
   4792     fprintf(ctx->fp,
   4793 	".LSFDE3:\n"
   4794 	"\t.long .LEFDE3-.LASFDE3\n"
   4795 	".LASFDE3:\n"
   4796 	"\t.long .LASFDE3-.Lframe2\n"
   4797 	"\t.long lj_vm_ffi_call-.\n"
   4798 	"\t.long %d\n"
   4799 	"\t.uleb128 0\n"			/* augmentation length */
   4800 	"\t.byte 0xe\n\t.uleb128 16\n"		/* def_cfa_offset */
   4801 	"\t.byte 0x86\n\t.uleb128 0x2\n"	/* offset rbp */
   4802 	"\t.byte 0xd\n\t.uleb128 0x6\n"		/* def_cfa_register rbp */
   4803 	"\t.byte 0x83\n\t.uleb128 0x3\n"	/* offset rbx */
   4804 	"\t.align 8\n"
   4805 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
   4806 #endif
   4807 #endif
   4808     break;
   4809 #if !LJ_NO_UNWIND
   4810   /* Mental note: never let Apple design an assembler.
   4811   ** Or a linker. Or a plastic case. But I digress.
   4812   */
   4813   case BUILD_machasm: {
   4814 #if LJ_HASFFI
   4815     int fcsize = 0;
   4816 #endif
   4817     int i;
   4818     fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
   4819     fprintf(ctx->fp,
   4820 	"EH_frame1:\n"
   4821 	"\t.set L$set$x,LECIEX-LSCIEX\n"
   4822 	"\t.long L$set$x\n"
   4823 	"LSCIEX:\n"
   4824 	"\t.long 0\n"
   4825 	"\t.byte 0x1\n"
   4826 	"\t.ascii \"zPR\\0\"\n"
   4827 	"\t.byte 0x1\n"
   4828 	"\t.byte 128-8\n"
   4829 	"\t.byte 0x10\n"
   4830 	"\t.byte 6\n"				/* augmentation length */
   4831 	"\t.byte 0x9b\n"			/* indirect|pcrel|sdata4 */
   4832 	"\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
   4833 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
   4834 	"\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
   4835 	"\t.byte 0x80+0x10\n\t.byte 0x1\n"
   4836 	"\t.align 3\n"
   4837 	"LECIEX:\n\n");
   4838     for (i = 0; i < ctx->nsym; i++) {
   4839       const char *name = ctx->sym[i].name;
   4840       int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
   4841       if (size == 0) continue;
   4842 #if LJ_HASFFI
   4843       if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
   4844 #endif
   4845       fprintf(ctx->fp,
   4846 	  "%s.eh:\n"
   4847 	  "LSFDE%d:\n"
   4848 	  "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
   4849 	  "\t.long L$set$%d\n"
   4850 	  "LASFDE%d:\n"
   4851 	  "\t.long LASFDE%d-EH_frame1\n"
   4852 	  "\t.long %s-.\n"
   4853 	  "\t.long %d\n"
   4854 	  "\t.byte 0\n"				/* augmentation length */
   4855 	  "\t.byte 0xe\n\t.byte %d\n"		/* def_cfa_offset */
   4856 	  "\t.byte 0x86\n\t.byte 0x2\n"		/* offset rbp */
   4857 	  "\t.byte 0x83\n\t.byte 0x3\n"		/* offset rbx */
   4858 	  "\t.byte 0x8f\n\t.byte 0x4\n"		/* offset r15 */
   4859 	  "\t.byte 0x8e\n\t.byte 0x5\n"		/* offset r14 */
   4860 	  "\t.align 3\n"
   4861 	  "LEFDE%d:\n\n",
   4862 	  name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
   4863     }
   4864 #if LJ_HASFFI
   4865     if (fcsize) {
   4866       fprintf(ctx->fp,
   4867 	  "EH_frame2:\n"
   4868 	  "\t.set L$set$y,LECIEY-LSCIEY\n"
   4869 	  "\t.long L$set$y\n"
   4870 	  "LSCIEY:\n"
   4871 	  "\t.long 0\n"
   4872 	  "\t.byte 0x1\n"
   4873 	  "\t.ascii \"zR\\0\"\n"
   4874 	  "\t.byte 0x1\n"
   4875 	  "\t.byte 128-8\n"
   4876 	  "\t.byte 0x10\n"
   4877 	  "\t.byte 1\n"				/* augmentation length */
   4878 	  "\t.byte 0x1b\n"			/* pcrel|sdata4 */
   4879 	  "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
   4880 	  "\t.byte 0x80+0x10\n\t.byte 0x1\n"
   4881 	  "\t.align 3\n"
   4882 	  "LECIEY:\n\n");
   4883       fprintf(ctx->fp,
   4884 	  "_lj_vm_ffi_call.eh:\n"
   4885 	  "LSFDEY:\n"
   4886 	  "\t.set L$set$yy,LEFDEY-LASFDEY\n"
   4887 	  "\t.long L$set$yy\n"
   4888 	  "LASFDEY:\n"
   4889 	  "\t.long LASFDEY-EH_frame2\n"
   4890 	  "\t.long _lj_vm_ffi_call-.\n"
   4891 	  "\t.long %d\n"
   4892 	  "\t.byte 0\n"				/* augmentation length */
   4893 	  "\t.byte 0xe\n\t.byte 16\n"		/* def_cfa_offset */
   4894 	  "\t.byte 0x86\n\t.byte 0x2\n"		/* offset rbp */
   4895 	  "\t.byte 0xd\n\t.byte 0x6\n"		/* def_cfa_register rbp */
   4896 	  "\t.byte 0x83\n\t.byte 0x3\n"		/* offset rbx */
   4897 	  "\t.align 3\n"
   4898 	  "LEFDEY:\n\n", fcsize);
   4899     }
   4900 #endif
   4901     fprintf(ctx->fp, ".subsections_via_symbols\n");
   4902     }
   4903     break;
   4904 #endif
   4905   default:  /* Difficult for other modes. */
   4906     break;
   4907   }
   4908 }
   4909