ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

vm_ppc.dasc (140359B)


      1 |// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
      2 |// Bytecode interpreter, fast functions and helper functions.
      3 |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
      4 |
      5 |.arch ppc
      6 |.section code_op, code_sub
      7 |
      8 |.actionlist build_actionlist
      9 |.globals GLOB_
     10 |.globalnames globnames
     11 |.externnames extnames
     12 |
     13 |// Note: The ragged indentation of the instructions is intentional.
     14 |//       The starting columns indicate data dependencies.
     15 |
     16 |//-----------------------------------------------------------------------
     17 |
     18 |// DynASM defines used by the PPC port:
     19 |//
     20 |// P64     64 bit pointers (only for GPR64 testing).
     21 |//         Note: see vm_ppc64.dasc for a full PPC64 _LP64 port.
     22 |// GPR64   64 bit registers (but possibly 32 bit pointers, e.g. PS3).
     23 |//         Affects reg saves, stack layout, carry/overflow/dot flags etc.
     24 |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
     25 |// TOC     Need table of contents (64 bit or 32 bit variant, e.g. PS3).
     26 |//         Function pointers are really a struct: code, TOC, env (optional).
     27 |// TOCENV  Function pointers have an environment pointer, too (not on PS3).
     28 |// PPE     Power Processor Element of Cell (PS3) or Xenon (Xbox 360).
     29 |//         Must avoid (slow) micro-coded instructions.
     30 |
     31 |.if P64
     32 |.define TOC, 1
     33 |.define TOCENV, 1
     34 |.macro lpx, a, b, c; ldx a, b, c; .endmacro
     35 |.macro lp, a, b; ld a, b; .endmacro
     36 |.macro stp, a, b; std a, b; .endmacro
     37 |.define decode_OPP, decode_OP8
     38 |.if FFI
     39 |// Missing: Calling conventions, 64 bit regs, TOC.
     40 |.error lib_ffi not yet implemented for PPC64
     41 |.endif
     42 |.else
     43 |.macro lpx, a, b, c; lwzx a, b, c; .endmacro
     44 |.macro lp, a, b; lwz a, b; .endmacro
     45 |.macro stp, a, b; stw a, b; .endmacro
     46 |.define decode_OPP, decode_OP4
     47 |.endif
     48 |
     49 |// Convenience macros for TOC handling.
     50 |.if TOC
     51 |// Linker needs a TOC patch area for every external call relocation.
     52 |.macro blex, target; bl extern target@plt; nop; .endmacro
     53 |.macro .toc, a, b; a, b; .endmacro
     54 |.if P64
     55 |.define TOC_OFS,	 8
     56 |.define ENV_OFS,	16
     57 |.else
     58 |.define TOC_OFS,	4
     59 |.define ENV_OFS,	8
     60 |.endif
     61 |.else  // No TOC.
     62 |.macro blex, target; bl extern target@plt; .endmacro
     63 |.macro .toc, a, b; .endmacro
     64 |.endif
     65 |.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro
     66 |
     67 |.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro
     68 |
     69 |.macro andix., y, a, i
     70 |.if PPE
     71 |  rlwinm y, a, 0, 31-lj_fls(i), 31-lj_ffs(i)
     72 |  cmpwi y, 0
     73 |.else
     74 |  andi. y, a, i
     75 |.endif
     76 |.endmacro
     77 |
     78 |.macro clrso, reg
     79 |.if PPE
     80 |  li reg, 0
     81 |  mtxer reg
     82 |.else
     83 |  mcrxr cr0
     84 |.endif
     85 |.endmacro
     86 |
     87 |.macro checkov, reg, noov
     88 |.if PPE
     89 |  mfxer reg
     90 |  add reg, reg, reg
     91 |  cmpwi reg, 0
     92 |   li reg, 0
     93 |   mtxer reg
     94 |  bgey noov
     95 |.else
     96 |  mcrxr cr0
     97 |  bley noov
     98 |.endif
     99 |.endmacro
    100 |
    101 |//-----------------------------------------------------------------------
    102 |
    103 |// Fixed register assignments for the interpreter.
    104 |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
    105 |
    106 |// The following must be C callee-save (but BASE is often refetched).
    107 |.define BASE,		r14	// Base of current Lua stack frame.
    108 |.define KBASE,		r15	// Constants of current Lua function.
    109 |.define PC,		r16	// Next PC.
    110 |.define DISPATCH,	r17	// Opcode dispatch table.
    111 |.define LREG,		r18	// Register holding lua_State (also in SAVE_L).
    112 |.define MULTRES,	r19	// Size of multi-result: (nresults+1)*8.
    113 |.define JGL,		r31	// On-trace: global_State + 32768.
    114 |
    115 |// Constants for type-comparisons, stores and conversions. C callee-save.
    116 |.define TISNUM,	r22
    117 |.define TISNIL,	r23
    118 |.define ZERO,		r24
    119 |.define TOBIT,		f30	// 2^52 + 2^51.
    120 |.define TONUM,		f31	// 2^52 + 2^51 + 2^31.
    121 |
    122 |// The following temporaries are not saved across C calls, except for RA.
    123 |.define RA,		r20	// Callee-save.
    124 |.define RB,		r10
    125 |.define RC,		r11
    126 |.define RD,		r12
    127 |.define INS,		r7	// Overlaps CARG5.
    128 |
    129 |.define TMP0,		r0
    130 |.define TMP1,		r8
    131 |.define TMP2,		r9
    132 |.define TMP3,		r6	// Overlaps CARG4.
    133 |
    134 |// Saved temporaries.
    135 |.define SAVE0,		r21
    136 |
    137 |// Calling conventions.
    138 |.define CARG1,		r3
    139 |.define CARG2,		r4
    140 |.define CARG3,		r5
    141 |.define CARG4,		r6	// Overlaps TMP3.
    142 |.define CARG5,		r7	// Overlaps INS.
    143 |
    144 |.define FARG1,		f1
    145 |.define FARG2,		f2
    146 |
    147 |.define CRET1,		r3
    148 |.define CRET2,		r4
    149 |
    150 |.define TOCREG,	r2	// TOC register (only used by C code).
    151 |.define ENVREG,	r11	// Environment pointer (nested C functions).
    152 |
    153 |// Stack layout while in interpreter. Must match with lj_frame.h.
    154 |.if GPR64
    155 |.if FRAME32
    156 |
    157 |//			456(sp) // \ 32/64 bit C frame info
    158 |.define TONUM_LO,	452(sp) // |
    159 |.define TONUM_HI,	448(sp) // |
    160 |.define TMPD_LO,	444(sp) // |
    161 |.define TMPD_HI,	440(sp) // |
    162 |.define SAVE_CR,	432(sp) // | 64 bit CR save.
    163 |.define SAVE_ERRF,	424(sp) //  > Parameter save area.
    164 |.define SAVE_NRES,	420(sp) // |
    165 |.define SAVE_L,	416(sp) // |
    166 |.define SAVE_PC,	412(sp) // |
    167 |.define SAVE_MULTRES,	408(sp) // |
    168 |.define SAVE_CFRAME,	400(sp) // / 64 bit C frame chain.
    169 |//			392(sp) // Reserved.
    170 |.define CFRAME_SPACE,	384     // Delta for sp.
    171 |// Back chain for sp:	384(sp) <-- sp entering interpreter
    172 |.define SAVE_LR,	376(sp) // 32 bit LR stored in hi-part.
    173 |.define SAVE_GPR_,	232     // .. 232+18*8: 64 bit GPR saves.
    174 |.define SAVE_FPR_,	88      // .. 88+18*8: 64 bit FPR saves.
    175 |//			80(sp) // Needed for 16 byte stack frame alignment.
    176 |//			16(sp)  // Callee parameter save area (ABI mandated).
    177 |//			8(sp)   // Reserved
    178 |// Back chain for sp:	0(sp)   <-- sp while in interpreter
    179 |// 32 bit sp stored in hi-part of 0(sp).
    180 |
    181 |.define TMPD_BLO,	447(sp)
    182 |.define TMPD,		TMPD_HI
    183 |.define TONUM_D,	TONUM_HI
    184 |
    185 |.else
    186 |
    187 |//			508(sp) // \ 32 bit C frame info.
    188 |.define SAVE_ERRF,	472(sp) // |
    189 |.define SAVE_NRES,	468(sp) // |
    190 |.define SAVE_L,	464(sp) //  > Parameter save area.
    191 |.define SAVE_PC,	460(sp) // |
    192 |.define SAVE_MULTRES,	456(sp) // |
    193 |.define SAVE_CFRAME,	448(sp) // / 64 bit C frame chain.
    194 |.define SAVE_LR,	416(sp)
    195 |.define CFRAME_SPACE,	400     // Delta for sp.
    196 |// Back chain for sp:	400(sp) <-- sp entering interpreter
    197 |.define SAVE_FPR_,	256     // .. 256+18*8: 64 bit FPR saves.
    198 |.define SAVE_GPR_,	112     // .. 112+18*8: 64 bit GPR saves.
    199 |//			48(sp)  // Callee parameter save area (ABI mandated).
    200 |.define SAVE_TOC,	40(sp)  // TOC save area.
    201 |.define TMPD_LO,	36(sp)  // \ Link editor temp (ABI mandated).
    202 |.define TMPD_HI,	32(sp)  // /
    203 |.define TONUM_LO,	28(sp)  // \ Compiler temp (ABI mandated).
    204 |.define TONUM_HI,	24(sp)  // /
    205 |// Next frame lr:	16(sp)
    206 |.define SAVE_CR,	8(sp)  // 64 bit CR save.
    207 |// Back chain for sp:	0(sp)	<-- sp while in interpreter
    208 |
    209 |.define TMPD_BLO,	39(sp)
    210 |.define TMPD,		TMPD_HI
    211 |.define TONUM_D,	TONUM_HI
    212 |
    213 |.endif
    214 |.else
    215 |
    216 |.define SAVE_LR,	276(sp)
    217 |.define CFRAME_SPACE,	272     // Delta for sp.
    218 |// Back chain for sp:	272(sp) <-- sp entering interpreter
    219 |.define SAVE_FPR_,	128     // .. 128+18*8: 64 bit FPR saves.
    220 |.define SAVE_GPR_,	56      // .. 56+18*4: 32 bit GPR saves.
    221 |.define SAVE_CR,	52(sp)  // 32 bit CR save.
    222 |.define SAVE_ERRF,	48(sp)  // 32 bit C frame info.
    223 |.define SAVE_NRES,	44(sp)
    224 |.define SAVE_CFRAME,	40(sp)
    225 |.define SAVE_L,	36(sp)
    226 |.define SAVE_PC,	32(sp)
    227 |.define SAVE_MULTRES,	28(sp)
    228 |.define UNUSED1,	24(sp)
    229 |.define TMPD_LO,	20(sp)
    230 |.define TMPD_HI,	16(sp)
    231 |.define TONUM_LO,	12(sp)
    232 |.define TONUM_HI,	8(sp)
    233 |// Next frame lr:	4(sp)
    234 |// Back chain for sp:	0(sp)	<-- sp while in interpreter
    235 |
    236 |.define TMPD_BLO,	23(sp)
    237 |.define TMPD,		TMPD_HI
    238 |.define TONUM_D,	TONUM_HI
    239 |
    240 |.endif
    241 |
    242 |.macro save_, reg
    243 |.if GPR64
    244 |  std r..reg, SAVE_GPR_+(reg-14)*8(sp)
    245 |.else
    246 |  stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
    247 |.endif
    248 |  stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
    249 |.endmacro
    250 |.macro rest_, reg
    251 |.if GPR64
    252 |  ld r..reg, SAVE_GPR_+(reg-14)*8(sp)
    253 |.else
    254 |  lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
    255 |.endif
    256 |  lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
    257 |.endmacro
    258 |
    259 |.macro saveregs
    260 |.if GPR64 and not FRAME32
    261 |  stdu sp, -CFRAME_SPACE(sp)
    262 |.else
    263 |  stwu sp, -CFRAME_SPACE(sp)
    264 |.endif
    265 |  save_ 14; save_ 15; save_ 16
    266 |  mflr r0
    267 |  save_ 17; save_ 18; save_ 19; save_ 20; save_ 21; save_ 22
    268 |.if GPR64 and not FRAME32
    269 |  std r0, SAVE_LR
    270 |.else
    271 |  stw r0, SAVE_LR
    272 |.endif
    273 |  save_ 23; save_ 24; save_ 25
    274 |  mfcr r0
    275 |  save_ 26; save_ 27; save_ 28; save_ 29; save_ 30; save_ 31
    276 |.if GPR64
    277 |  std r0, SAVE_CR
    278 |.else
    279 |  stw r0, SAVE_CR
    280 |.endif
    281 |  .toc std TOCREG, SAVE_TOC
    282 |.endmacro
    283 |
    284 |.macro restoreregs
    285 |.if GPR64 and not FRAME32
    286 |  ld r0, SAVE_LR
    287 |.else
    288 |  lwz r0, SAVE_LR
    289 |.endif
    290 |.if GPR64
    291 |  ld r12, SAVE_CR
    292 |.else
    293 |  lwz r12, SAVE_CR
    294 |.endif
    295 |  rest_ 14; rest_ 15; rest_ 16; rest_ 17; rest_ 18; rest_ 19
    296 |  mtlr r0;
    297 |.if PPE; mtocrf 0x20, r12; .else; mtcrf 0x38, r12; .endif
    298 |  rest_ 20; rest_ 21; rest_ 22; rest_ 23; rest_ 24; rest_ 25
    299 |.if PPE; mtocrf 0x10, r12; .endif
    300 |  rest_ 26; rest_ 27; rest_ 28; rest_ 29; rest_ 30; rest_ 31
    301 |.if PPE; mtocrf 0x08, r12; .endif
    302 |  addi sp, sp, CFRAME_SPACE
    303 |.endmacro
    304 |
    305 |// Type definitions. Some of these are only used for documentation.
    306 |.type L,		lua_State,	LREG
    307 |.type GL,		global_State
    308 |.type TVALUE,		TValue
    309 |.type GCOBJ,		GCobj
    310 |.type STR,		GCstr
    311 |.type TAB,		GCtab
    312 |.type LFUNC,		GCfuncL
    313 |.type CFUNC,		GCfuncC
    314 |.type PROTO,		GCproto
    315 |.type UPVAL,		GCupval
    316 |.type NODE,		Node
    317 |.type NARGS8,		int
    318 |.type TRACE,		GCtrace
    319 |.type SBUF,		SBuf
    320 |
    321 |//-----------------------------------------------------------------------
    322 |
    323 |// Trap for not-yet-implemented parts.
    324 |.macro NYI; tw 4, sp, sp; .endmacro
    325 |
    326 |// int/FP conversions.
    327 |.macro tonum_i, freg, reg
    328 |  xoris reg, reg, 0x8000
    329 |  stw reg, TONUM_LO
    330 |  lfd freg, TONUM_D
    331 |  fsub freg, freg, TONUM
    332 |.endmacro
    333 |
    334 |.macro tonum_u, freg, reg
    335 |  stw reg, TONUM_LO
    336 |  lfd freg, TONUM_D
    337 |  fsub freg, freg, TOBIT
    338 |.endmacro
    339 |
    340 |.macro toint, reg, freg, tmpfreg
    341 |  fctiwz tmpfreg, freg
    342 |  stfd tmpfreg, TMPD
    343 |  lwz reg, TMPD_LO
    344 |.endmacro
    345 |
    346 |.macro toint, reg, freg
    347 |  toint reg, freg, freg
    348 |.endmacro
    349 |
    350 |//-----------------------------------------------------------------------
    351 |
    352 |// Access to frame relative to BASE.
    353 |.define FRAME_PC,	-8
    354 |.define FRAME_FUNC,	-4
    355 |
    356 |// Instruction decode.
    357 |.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro
    358 |.macro decode_OP8, dst, ins; rlwinm dst, ins, 3, 21, 28; .endmacro
    359 |.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro
    360 |.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro
    361 |.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro
    362 |.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro
    363 |
    364 |.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro
    365 |.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro
    366 |
    367 |// Instruction fetch.
    368 |.macro ins_NEXT1
    369 |  lwz INS, 0(PC)
    370 |   addi PC, PC, 4
    371 |.endmacro
    372 |// Instruction decode+dispatch. Note: optimized for e300!
    373 |.macro ins_NEXT2
    374 |  decode_OPP TMP1, INS
    375 |  lpx TMP0, DISPATCH, TMP1
    376 |  mtctr TMP0
    377 |   decode_RB8 RB, INS
    378 |   decode_RD8 RD, INS
    379 |   decode_RA8 RA, INS
    380 |   decode_RC8 RC, INS
    381 |  bctr
    382 |.endmacro
    383 |.macro ins_NEXT
    384 |  ins_NEXT1
    385 |  ins_NEXT2
    386 |.endmacro
    387 |
    388 |// Instruction footer.
    389 |.if 1
    390 |  // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
    391 |  .define ins_next, ins_NEXT
    392 |  .define ins_next_, ins_NEXT
    393 |  .define ins_next1, ins_NEXT1
    394 |  .define ins_next2, ins_NEXT2
    395 |.else
    396 |  // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
    397 |  // Affects only certain kinds of benchmarks (and only with -j off).
    398 |  .macro ins_next
    399 |    b ->ins_next
    400 |  .endmacro
    401 |  .macro ins_next1
    402 |  .endmacro
    403 |  .macro ins_next2
    404 |    b ->ins_next
    405 |  .endmacro
    406 |  .macro ins_next_
    407 |  ->ins_next:
    408 |    ins_NEXT
    409 |  .endmacro
    410 |.endif
    411 |
    412 |// Call decode and dispatch.
    413 |.macro ins_callt
    414 |  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
    415 |  lwz PC, LFUNC:RB->pc
    416 |  lwz INS, 0(PC)
    417 |   addi PC, PC, 4
    418 |  decode_OPP TMP1, INS
    419 |   decode_RA8 RA, INS
    420 |  lpx TMP0, DISPATCH, TMP1
    421 |   add RA, RA, BASE
    422 |  mtctr TMP0
    423 |  bctr
    424 |.endmacro
    425 |
    426 |.macro ins_call
    427 |  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
    428 |  stw PC, FRAME_PC(BASE)
    429 |  ins_callt
    430 |.endmacro
    431 |
    432 |//-----------------------------------------------------------------------
    433 |
    434 |// Macros to test operand types.
    435 |.macro checknum, reg; cmplw reg, TISNUM; .endmacro
    436 |.macro checknum, cr, reg; cmplw cr, reg, TISNUM; .endmacro
    437 |.macro checkstr, reg; cmpwi reg, LJ_TSTR; .endmacro
    438 |.macro checktab, reg; cmpwi reg, LJ_TTAB; .endmacro
    439 |.macro checkfunc, reg; cmpwi reg, LJ_TFUNC; .endmacro
    440 |.macro checknil, reg; cmpwi reg, LJ_TNIL; .endmacro
    441 |
    442 |.macro branch_RD
    443 |  srwi TMP0, RD, 1
    444 |  addis PC, PC, -(BCBIAS_J*4 >> 16)
    445 |  add PC, PC, TMP0
    446 |.endmacro
    447 |
    448 |// Assumes DISPATCH is relative to GL.
    449 #define DISPATCH_GL(field)	(GG_DISP2G + (int)offsetof(global_State, field))
    450 #define DISPATCH_J(field)	(GG_DISP2J + (int)offsetof(jit_State, field))
    451 |
    452 #define PC2PROTO(field)  ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
    453 |
    454 |.macro hotcheck, delta, target
    455 |  rlwinm TMP1, PC, 31, 25, 30
    456 |  addi TMP1, TMP1, GG_DISP2HOT
    457 |  lhzx TMP2, DISPATCH, TMP1
    458 |  addic. TMP2, TMP2, -delta
    459 |  sthx TMP2, DISPATCH, TMP1
    460 |  blt target
    461 |.endmacro
    462 |
    463 |.macro hotloop
    464 |  hotcheck HOTCOUNT_LOOP, ->vm_hotloop
    465 |.endmacro
    466 |
    467 |.macro hotcall
    468 |  hotcheck HOTCOUNT_CALL, ->vm_hotcall
    469 |.endmacro
    470 |
    471 |// Set current VM state. Uses TMP0.
    472 |.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
    473 |.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
    474 |
    475 |// Move table write barrier back. Overwrites mark and tmp.
    476 |.macro barrierback, tab, mark, tmp
    477 |  lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
    478 |  // Assumes LJ_GC_BLACK is 0x04.
    479 |   rlwinm mark, mark, 0, 30, 28		// black2gray(tab)
    480 |  stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
    481 |   stb mark, tab->marked
    482 |  stw tmp, tab->gclist
    483 |.endmacro
    484 |
    485 |//-----------------------------------------------------------------------
    486 
    487 /* Generate subroutines used by opcodes and other parts of the VM. */
    488 /* The .code_sub section should be last to help static branch prediction. */
    489 static void build_subroutines(BuildCtx *ctx)
    490 {
    491   |.code_sub
    492   |
    493   |//-----------------------------------------------------------------------
    494   |//-- Return handling ----------------------------------------------------
    495   |//-----------------------------------------------------------------------
    496   |
    497   |->vm_returnp:
    498   |  // See vm_return. Also: TMP2 = previous base.
    499   |  andix. TMP0, PC, FRAME_P
    500   |   li TMP1, LJ_TTRUE
    501   |  beq ->cont_dispatch
    502   |
    503   |  // Return from pcall or xpcall fast func.
    504   |  lwz PC, FRAME_PC(TMP2)		// Fetch PC of previous frame.
    505   |  mr BASE, TMP2			// Restore caller base.
    506   |  // Prepending may overwrite the pcall frame, so do it at the end.
    507   |   stwu TMP1, FRAME_PC(RA)		// Prepend true to results.
    508   |
    509   |->vm_returnc:
    510   |  addi RD, RD, 8			// RD = (nresults+1)*8.
    511   |   andix. TMP0, PC, FRAME_TYPE
    512   |  cmpwi cr1, RD, 0
    513   |  li CRET1, LUA_YIELD
    514   |  beq cr1, ->vm_unwind_c_eh
    515   |  mr MULTRES, RD
    516   |   beq ->BC_RET_Z			// Handle regular return to Lua.
    517   |
    518   |->vm_return:
    519   |  // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
    520   |  // TMP0 = PC & FRAME_TYPE
    521   |  cmpwi TMP0, FRAME_C
    522   |   rlwinm TMP2, PC, 0, 0, 28
    523   |    li_vmstate C
    524   |   sub TMP2, BASE, TMP2		// TMP2 = previous base.
    525   |  bney ->vm_returnp
    526   |
    527   |  addic. TMP1, RD, -8
    528   |   stp TMP2, L->base
    529   |   lwz TMP2, SAVE_NRES
    530   |    subi BASE, BASE, 8
    531   |    st_vmstate
    532   |   slwi TMP2, TMP2, 3
    533   |  beq >2
    534   |1:
    535   |  addic. TMP1, TMP1, -8
    536   |   lfd f0, 0(RA)
    537   |    addi RA, RA, 8
    538   |   stfd f0, 0(BASE)
    539   |    addi BASE, BASE, 8
    540   |  bney <1
    541   |
    542   |2:
    543   |  cmpw TMP2, RD			// More/less results wanted?
    544   |  bne >6
    545   |3:
    546   |  stp BASE, L->top			// Store new top.
    547   |
    548   |->vm_leave_cp:
    549   |  lp TMP0, SAVE_CFRAME		// Restore previous C frame.
    550   |   li CRET1, 0			// Ok return status for vm_pcall.
    551   |  stp TMP0, L->cframe
    552   |
    553   |->vm_leave_unw:
    554   |  restoreregs
    555   |  blr
    556   |
    557   |6:
    558   |  ble >7				// Less results wanted?
    559   |  // More results wanted. Check stack size and fill up results with nil.
    560   |  lwz TMP1, L->maxstack
    561   |  cmplw BASE, TMP1
    562   |  bge >8
    563   |  stw TISNIL, 0(BASE)
    564   |  addi RD, RD, 8
    565   |  addi BASE, BASE, 8
    566   |  b <2
    567   |
    568   |7:  // Less results wanted.
    569   |  subfic TMP3, TMP2, 0		// LUA_MULTRET+1 case?
    570   |   sub TMP0, RD, TMP2
    571   |  subfe TMP1, TMP1, TMP1		// TMP1 = TMP2 == 0 ? 0 : -1
    572   |   and TMP0, TMP0, TMP1
    573   |  sub BASE, BASE, TMP0		// Either keep top or shrink it.
    574   |  b <3
    575   |
    576   |8:  // Corner case: need to grow stack for filling up results.
    577   |  // This can happen if:
    578   |  // - A C function grows the stack (a lot).
    579   |  // - The GC shrinks the stack in between.
    580   |  // - A return back from a lua_call() with (high) nresults adjustment.
    581   |  stp BASE, L->top			// Save current top held in BASE (yes).
    582   |   mr SAVE0, RD
    583   |  srwi CARG2, TMP2, 3
    584   |  mr CARG1, L
    585   |  bl extern lj_state_growstack	// (lua_State *L, int n)
    586   |    lwz TMP2, SAVE_NRES
    587   |   mr RD, SAVE0
    588   |    slwi TMP2, TMP2, 3
    589   |  lp BASE, L->top			// Need the (realloced) L->top in BASE.
    590   |  b <2
    591   |
    592   |->vm_unwind_c:			// Unwind C stack, return from vm_pcall.
    593   |  // (void *cframe, int errcode)
    594   |  mr sp, CARG1
    595   |  mr CRET1, CARG2
    596   |->vm_unwind_c_eh:			// Landing pad for external unwinder.
    597   |  lwz L, SAVE_L
    598   |  .toc ld TOCREG, SAVE_TOC
    599   |   li TMP0, ~LJ_VMST_C
    600   |  lwz GL:TMP1, L->glref
    601   |   stw TMP0, GL:TMP1->vmstate
    602   |  b ->vm_leave_unw
    603   |
    604   |->vm_unwind_ff:			// Unwind C stack, return from ff pcall.
    605   |  // (void *cframe)
    606   |.if GPR64
    607   |  rldicr sp, CARG1, 0, 61
    608   |.else
    609   |  rlwinm sp, CARG1, 0, 0, 29
    610   |.endif
    611   |->vm_unwind_ff_eh:			// Landing pad for external unwinder.
    612   |  lwz L, SAVE_L
    613   |  .toc ld TOCREG, SAVE_TOC
    614   |     li TISNUM, LJ_TISNUM		// Setup type comparison constants.
    615   |  lp BASE, L->base
    616   |     lus TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
    617   |   lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
    618   |     li ZERO, 0
    619   |     stw TMP3, TMPD
    620   |  li TMP1, LJ_TFALSE
    621   |     ori TMP3, TMP3, 0x0004		// TONUM = 2^52 + 2^51 + 2^31 (float).
    622   |     li TISNIL, LJ_TNIL
    623   |    li_vmstate INTERP
    624   |     lfs TOBIT, TMPD
    625   |  lwz PC, FRAME_PC(BASE)		// Fetch PC of previous frame.
    626   |  la RA, -8(BASE)			// Results start at BASE-8.
    627   |     stw TMP3, TMPD
    628   |   addi DISPATCH, DISPATCH, GG_G2DISP
    629   |  stw TMP1, 0(RA)			// Prepend false to error message.
    630   |  li RD, 16				// 2 results: false + error message.
    631   |    st_vmstate
    632   |     lfs TONUM, TMPD
    633   |  b ->vm_returnc
    634   |
    635   |//-----------------------------------------------------------------------
    636   |//-- Grow stack for calls -----------------------------------------------
    637   |//-----------------------------------------------------------------------
    638   |
    639   |->vm_growstack_c:			// Grow stack for C function.
    640   |  li CARG2, LUA_MINSTACK
    641   |  b >2
    642   |
    643   |->vm_growstack_l:			// Grow stack for Lua function.
    644   |  // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
    645   |  add RC, BASE, RC
    646   |   sub RA, RA, BASE
    647   |  stp BASE, L->base
    648   |   addi PC, PC, 4			// Must point after first instruction.
    649   |  stp RC, L->top
    650   |   srwi CARG2, RA, 3
    651   |2:
    652   |  // L->base = new base, L->top = top
    653   |   stw PC, SAVE_PC
    654   |  mr CARG1, L
    655   |  bl extern lj_state_growstack	// (lua_State *L, int n)
    656   |  lp BASE, L->base
    657   |  lp RC, L->top
    658   |  lwz LFUNC:RB, FRAME_FUNC(BASE)
    659   |  sub RC, RC, BASE
    660   |  // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
    661   |  ins_callt				// Just retry the call.
    662   |
    663   |//-----------------------------------------------------------------------
    664   |//-- Entry points into the assembler VM ---------------------------------
    665   |//-----------------------------------------------------------------------
    666   |
    667   |->vm_resume:				// Setup C frame and resume thread.
    668   |  // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
    669   |  saveregs
    670   |  mr L, CARG1
    671   |    lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
    672   |  mr BASE, CARG2
    673   |    lbz TMP1, L->status
    674   |   stw L, SAVE_L
    675   |  li PC, FRAME_CP
    676   |  addi TMP0, sp, CFRAME_RESUME
    677   |    addi DISPATCH, DISPATCH, GG_G2DISP
    678   |   stw CARG3, SAVE_NRES
    679   |    cmplwi TMP1, 0
    680   |   stw CARG3, SAVE_ERRF
    681   |   stp CARG3, SAVE_CFRAME
    682   |   stw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
    683   |  stp TMP0, L->cframe
    684   |    beq >3
    685   |
    686   |  // Resume after yield (like a return).
    687   |  stw L, DISPATCH_GL(cur_L)(DISPATCH)
    688   |  mr RA, BASE
    689   |   lp BASE, L->base
    690   |     li TISNUM, LJ_TISNUM		// Setup type comparison constants.
    691   |   lp TMP1, L->top
    692   |  lwz PC, FRAME_PC(BASE)
    693   |     lus TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
    694   |    stb CARG3, L->status
    695   |     stw TMP3, TMPD
    696   |     ori TMP3, TMP3, 0x0004		// TONUM = 2^52 + 2^51 + 2^31 (float).
    697   |     lfs TOBIT, TMPD
    698   |   sub RD, TMP1, BASE
    699   |     stw TMP3, TMPD
    700   |     lus TMP0, 0x4338		// Hiword of 2^52 + 2^51 (double)
    701   |   addi RD, RD, 8
    702   |     stw TMP0, TONUM_HI
    703   |    li_vmstate INTERP
    704   |     li ZERO, 0
    705   |    st_vmstate
    706   |  andix. TMP0, PC, FRAME_TYPE
    707   |   mr MULTRES, RD
    708   |     lfs TONUM, TMPD
    709   |     li TISNIL, LJ_TNIL
    710   |  beq ->BC_RET_Z
    711   |  b ->vm_return
    712   |
    713   |->vm_pcall:				// Setup protected C frame and enter VM.
    714   |  // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
    715   |  saveregs
    716   |  li PC, FRAME_CP
    717   |  stw CARG4, SAVE_ERRF
    718   |  b >1
    719   |
    720   |->vm_call:				// Setup C frame and enter VM.
    721   |  // (lua_State *L, TValue *base, int nres1)
    722   |  saveregs
    723   |  li PC, FRAME_C
    724   |
    725   |1:  // Entry point for vm_pcall above (PC = ftype).
    726   |  lp TMP1, L:CARG1->cframe
    727   |    mr L, CARG1
    728   |   stw CARG3, SAVE_NRES
    729   |    lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
    730   |   stw CARG1, SAVE_L
    731   |     mr BASE, CARG2
    732   |    addi DISPATCH, DISPATCH, GG_G2DISP
    733   |   stw CARG1, SAVE_PC		// Any value outside of bytecode is ok.
    734   |  stp TMP1, SAVE_CFRAME
    735   |  stp sp, L->cframe			// Add our C frame to cframe chain.
    736   |
    737   |3:  // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
    738   |  stw L, DISPATCH_GL(cur_L)(DISPATCH)
    739   |  lp TMP2, L->base			// TMP2 = old base (used in vmeta_call).
    740   |     li TISNUM, LJ_TISNUM		// Setup type comparison constants.
    741   |   lp TMP1, L->top
    742   |     lus TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
    743   |  add PC, PC, BASE
    744   |     stw TMP3, TMPD
    745   |     li ZERO, 0
    746   |     ori TMP3, TMP3, 0x0004		// TONUM = 2^52 + 2^51 + 2^31 (float).
    747   |     lfs TOBIT, TMPD
    748   |  sub PC, PC, TMP2			// PC = frame delta + frame type
    749   |     stw TMP3, TMPD
    750   |     lus TMP0, 0x4338		// Hiword of 2^52 + 2^51 (double)
    751   |   sub NARGS8:RC, TMP1, BASE
    752   |     stw TMP0, TONUM_HI
    753   |    li_vmstate INTERP
    754   |     lfs TONUM, TMPD
    755   |     li TISNIL, LJ_TNIL
    756   |    st_vmstate
    757   |
    758   |->vm_call_dispatch:
    759   |  // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
    760   |  lwz TMP0, FRAME_PC(BASE)
    761   |   lwz LFUNC:RB, FRAME_FUNC(BASE)
    762   |  checkfunc TMP0; bne ->vmeta_call
    763   |
    764   |->vm_call_dispatch_f:
    765   |  ins_call
    766   |  // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
    767   |
    768   |->vm_cpcall:				// Setup protected C frame, call C.
    769   |  // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
    770   |  saveregs
    771   |  mr L, CARG1
    772   |   lwz TMP0, L:CARG1->stack
    773   |  stw CARG1, SAVE_L
    774   |   lp TMP1, L->top
    775   |     lwz DISPATCH, L->glref		// Setup pointer to dispatch table.
    776   |  stw CARG1, SAVE_PC			// Any value outside of bytecode is ok.
    777   |   sub TMP0, TMP0, TMP1		// Compute -savestack(L, L->top).
    778   |    lp TMP1, L->cframe
    779   |     addi DISPATCH, DISPATCH, GG_G2DISP
    780   |  .toc lp CARG4, 0(CARG4)
    781   |  li TMP2, 0
    782   |   stw TMP0, SAVE_NRES		// Neg. delta means cframe w/o frame.
    783   |  stw TMP2, SAVE_ERRF		// No error function.
    784   |    stp TMP1, SAVE_CFRAME
    785   |    stp sp, L->cframe		// Add our C frame to cframe chain.
    786   |     stw L, DISPATCH_GL(cur_L)(DISPATCH)
    787   |  mtctr CARG4
    788   |  bctrl			// (lua_State *L, lua_CFunction func, void *ud)
    789   |.if PPE
    790   |  mr BASE, CRET1
    791   |  cmpwi CRET1, 0
    792   |.else
    793   |  mr. BASE, CRET1
    794   |.endif
    795   |   li PC, FRAME_CP
    796   |  bne <3				// Else continue with the call.
    797   |  b ->vm_leave_cp			// No base? Just remove C frame.
    798   |
    799   |//-----------------------------------------------------------------------
    800   |//-- Metamethod handling ------------------------------------------------
    801   |//-----------------------------------------------------------------------
    802   |
    803   |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
    804   |// stack, so BASE doesn't need to be reloaded across these calls.
    805   |
    806   |//-- Continuation dispatch ----------------------------------------------
    807   |
    808   |->cont_dispatch:
    809   |  // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
    810   |  lwz TMP0, -12(BASE)		// Continuation.
    811   |   mr RB, BASE
    812   |   mr BASE, TMP2			// Restore caller BASE.
    813   |    lwz LFUNC:TMP1, FRAME_FUNC(TMP2)
    814   |.if FFI
    815   |  cmplwi TMP0, 1
    816   |.endif
    817   |     lwz PC, -16(RB)			// Restore PC from [cont|PC].
    818   |   subi TMP2, RD, 8
    819   |    lwz TMP1, LFUNC:TMP1->pc
    820   |   stwx TISNIL, RA, TMP2		// Ensure one valid arg.
    821   |.if FFI
    822   |  ble >1
    823   |.endif
    824   |    lwz KBASE, PC2PROTO(k)(TMP1)
    825   |  // BASE = base, RA = resultptr, RB = meta base
    826   |  mtctr TMP0
    827   |  bctr				// Jump to continuation.
    828   |
    829   |.if FFI
    830   |1:
    831   |  beq ->cont_ffi_callback		// cont = 1: return from FFI callback.
    832   |  // cont = 0: tailcall from C function.
    833   |  subi TMP1, RB, 16
    834   |  sub RC, TMP1, BASE
    835   |  b ->vm_call_tail
    836   |.endif
    837   |
    838   |->cont_cat:				// RA = resultptr, RB = meta base
    839   |  lwz INS, -4(PC)
    840   |   subi CARG2, RB, 16
    841   |  decode_RB8 SAVE0, INS
    842   |   lfd f0, 0(RA)
    843   |  add TMP1, BASE, SAVE0
    844   |   stp BASE, L->base
    845   |  cmplw TMP1, CARG2
    846   |   sub CARG3, CARG2, TMP1
    847   |  decode_RA8 RA, INS
    848   |   stfd f0, 0(CARG2)
    849   |  bney ->BC_CAT_Z
    850   |   stfdx f0, BASE, RA
    851   |  b ->cont_nop
    852   |
    853   |//-- Table indexing metamethods -----------------------------------------
    854   |
    855   |->vmeta_tgets1:
    856   |  la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
    857   |  li TMP0, LJ_TSTR
    858   |   decode_RB8 RB, INS
    859   |  stw STR:RC, 4(CARG3)
    860   |   add CARG2, BASE, RB
    861   |  stw TMP0, 0(CARG3)
    862   |  b >1
    863   |
    864   |->vmeta_tgets:
    865   |  la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
    866   |  li TMP0, LJ_TTAB
    867   |  stw TAB:RB, 4(CARG2)
    868   |   la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
    869   |  stw TMP0, 0(CARG2)
    870   |   li TMP1, LJ_TSTR
    871   |   stw STR:RC, 4(CARG3)
    872   |   stw TMP1, 0(CARG3)
    873   |  b >1
    874   |
    875   |->vmeta_tgetb:			// TMP0 = index
    876   |.if not DUALNUM
    877   |  tonum_u f0, TMP0
    878   |.endif
    879   |   decode_RB8 RB, INS
    880   |  la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
    881   |   add CARG2, BASE, RB
    882   |.if DUALNUM
    883   |  stw TISNUM, 0(CARG3)
    884   |  stw TMP0, 4(CARG3)
    885   |.else
    886   |  stfd f0, 0(CARG3)
    887   |.endif
    888   |  b >1
    889   |
    890   |->vmeta_tgetv:
    891   |  decode_RB8 RB, INS
    892   |   decode_RC8 RC, INS
    893   |  add CARG2, BASE, RB
    894   |   add CARG3, BASE, RC
    895   |1:
    896   |  stp BASE, L->base
    897   |  mr CARG1, L
    898   |  stw PC, SAVE_PC
    899   |  bl extern lj_meta_tget		// (lua_State *L, TValue *o, TValue *k)
    900   |  // Returns TValue * (finished) or NULL (metamethod).
    901   |  cmplwi CRET1, 0
    902   |  beq >3
    903   |   lfd f0, 0(CRET1)
    904   |  ins_next1
    905   |   stfdx f0, BASE, RA
    906   |  ins_next2
    907   |
    908   |3:  // Call __index metamethod.
    909   |  // BASE = base, L->top = new base, stack = cont/func/t/k
    910   |  subfic TMP1, BASE, FRAME_CONT
    911   |  lp BASE, L->top
    912   |  stw PC, -16(BASE)			// [cont|PC]
    913   |   add PC, TMP1, BASE
    914   |  lwz LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
    915   |   li NARGS8:RC, 16			// 2 args for func(t, k).
    916   |  b ->vm_call_dispatch_f
    917   |
    918   |->vmeta_tgetr:
    919   |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
    920   |  // Returns cTValue * or NULL.
    921   |  cmplwi CRET1, 0
    922   |  beq >1
    923   |  lfd f14, 0(CRET1)
    924   |  b ->BC_TGETR_Z
    925   |1:
    926   |  stwx TISNIL, BASE, RA
    927   |  b ->cont_nop
    928   |
    929   |//-----------------------------------------------------------------------
    930   |
    931   |->vmeta_tsets1:
    932   |  la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
    933   |  li TMP0, LJ_TSTR
    934   |   decode_RB8 RB, INS
    935   |  stw STR:RC, 4(CARG3)
    936   |   add CARG2, BASE, RB
    937   |  stw TMP0, 0(CARG3)
    938   |  b >1
    939   |
    940   |->vmeta_tsets:
    941   |  la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
    942   |  li TMP0, LJ_TTAB
    943   |  stw TAB:RB, 4(CARG2)
    944   |   la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
    945   |  stw TMP0, 0(CARG2)
    946   |   li TMP1, LJ_TSTR
    947   |   stw STR:RC, 4(CARG3)
    948   |   stw TMP1, 0(CARG3)
    949   |  b >1
    950   |
    951   |->vmeta_tsetb:			// TMP0 = index
    952   |.if not DUALNUM
    953   |  tonum_u f0, TMP0
    954   |.endif
    955   |   decode_RB8 RB, INS
    956   |  la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
    957   |   add CARG2, BASE, RB
    958   |.if DUALNUM
    959   |  stw TISNUM, 0(CARG3)
    960   |  stw TMP0, 4(CARG3)
    961   |.else
    962   |  stfd f0, 0(CARG3)
    963   |.endif
    964   |  b >1
    965   |
    966   |->vmeta_tsetv:
    967   |  decode_RB8 RB, INS
    968   |   decode_RC8 RC, INS
    969   |  add CARG2, BASE, RB
    970   |   add CARG3, BASE, RC
    971   |1:
    972   |  stp BASE, L->base
    973   |  mr CARG1, L
    974   |  stw PC, SAVE_PC
    975   |  bl extern lj_meta_tset		// (lua_State *L, TValue *o, TValue *k)
    976   |  // Returns TValue * (finished) or NULL (metamethod).
    977   |  cmplwi CRET1, 0
    978   |   lfdx f0, BASE, RA
    979   |  beq >3
    980   |  // NOBARRIER: lj_meta_tset ensures the table is not black.
    981   |  ins_next1
    982   |   stfd f0, 0(CRET1)
    983   |  ins_next2
    984   |
    985   |3:  // Call __newindex metamethod.
    986   |  // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
    987   |  subfic TMP1, BASE, FRAME_CONT
    988   |  lp BASE, L->top
    989   |  stw PC, -16(BASE)			// [cont|PC]
    990   |   add PC, TMP1, BASE
    991   |  lwz LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
    992   |   li NARGS8:RC, 24			// 3 args for func(t, k, v)
    993   |  stfd f0, 16(BASE)			// Copy value to third argument.
    994   |  b ->vm_call_dispatch_f
    995   |
    996   |->vmeta_tsetr:
    997   |  stp BASE, L->base
    998   |  stw PC, SAVE_PC
    999   |  bl extern lj_tab_setinth  // (lua_State *L, GCtab *t, int32_t key)
   1000   |  // Returns TValue *.
   1001   |  stfd f14, 0(CRET1)
   1002   |  b ->cont_nop
   1003   |
   1004   |//-- Comparison metamethods ---------------------------------------------
   1005   |
   1006   |->vmeta_comp:
   1007   |  mr CARG1, L
   1008   |   subi PC, PC, 4
   1009   |.if DUALNUM
   1010   |  mr CARG2, RA
   1011   |.else
   1012   |  add CARG2, BASE, RA
   1013   |.endif
   1014   |   stw PC, SAVE_PC
   1015   |.if DUALNUM
   1016   |  mr CARG3, RD
   1017   |.else
   1018   |  add CARG3, BASE, RD
   1019   |.endif
   1020   |   stp BASE, L->base
   1021   |  decode_OP1 CARG4, INS
   1022   |  bl extern lj_meta_comp  // (lua_State *L, TValue *o1, *o2, int op)
   1023   |  // Returns 0/1 or TValue * (metamethod).
   1024   |3:
   1025   |  cmplwi CRET1, 1
   1026   |  bgt ->vmeta_binop
   1027   |  subfic CRET1, CRET1, 0
   1028   |4:
   1029   |  lwz INS, 0(PC)
   1030   |   addi PC, PC, 4
   1031   |  decode_RD4 TMP2, INS
   1032   |  addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
   1033   |  and TMP2, TMP2, CRET1
   1034   |  add PC, PC, TMP2
   1035   |->cont_nop:
   1036   |  ins_next
   1037   |
   1038   |->cont_ra:				// RA = resultptr
   1039   |  lwz INS, -4(PC)
   1040   |   lfd f0, 0(RA)
   1041   |  decode_RA8 TMP1, INS
   1042   |   stfdx f0, BASE, TMP1
   1043   |  b ->cont_nop
   1044   |
   1045   |->cont_condt:			// RA = resultptr
   1046   |  lwz TMP0, 0(RA)
   1047   |  .gpr64 extsw TMP0, TMP0
   1048   |  subfic TMP0, TMP0, LJ_TTRUE	// Branch if result is true.
   1049   |  subfe CRET1, CRET1, CRET1
   1050   |  not CRET1, CRET1
   1051   |  b <4
   1052   |
   1053   |->cont_condf:			// RA = resultptr
   1054   |  lwz TMP0, 0(RA)
   1055   |  .gpr64 extsw TMP0, TMP0
   1056   |  subfic TMP0, TMP0, LJ_TTRUE	// Branch if result is false.
   1057   |  subfe CRET1, CRET1, CRET1
   1058   |  b <4
   1059   |
   1060   |->vmeta_equal:
   1061   |  // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
   1062   |  subi PC, PC, 4
   1063   |   stp BASE, L->base
   1064   |  mr CARG1, L
   1065   |   stw PC, SAVE_PC
   1066   |  bl extern lj_meta_equal  // (lua_State *L, GCobj *o1, *o2, int ne)
   1067   |  // Returns 0/1 or TValue * (metamethod).
   1068   |  b <3
   1069   |
   1070   |->vmeta_equal_cd:
   1071   |.if FFI
   1072   |  mr CARG2, INS
   1073   |  subi PC, PC, 4
   1074   |   stp BASE, L->base
   1075   |  mr CARG1, L
   1076   |   stw PC, SAVE_PC
   1077   |  bl extern lj_meta_equal_cd		// (lua_State *L, BCIns op)
   1078   |  // Returns 0/1 or TValue * (metamethod).
   1079   |  b <3
   1080   |.endif
   1081   |
   1082   |->vmeta_istype:
   1083   |  subi PC, PC, 4
   1084   |   stp BASE, L->base
   1085   |   srwi CARG2, RA, 3
   1086   |   mr CARG1, L
   1087   |   srwi CARG3, RD, 3
   1088   |  stw PC, SAVE_PC
   1089   |  bl extern lj_meta_istype  // (lua_State *L, BCReg ra, BCReg tp)
   1090   |  b ->cont_nop
   1091   |
   1092   |//-- Arithmetic metamethods ---------------------------------------------
   1093   |
   1094   |->vmeta_arith_nv:
   1095   |  add CARG3, KBASE, RC
   1096   |  add CARG4, BASE, RB
   1097   |  b >1
   1098   |->vmeta_arith_nv2:
   1099   |.if DUALNUM
   1100   |  mr CARG3, RC
   1101   |  mr CARG4, RB
   1102   |  b >1
   1103   |.endif
   1104   |
   1105   |->vmeta_unm:
   1106   |  mr CARG3, RD
   1107   |  mr CARG4, RD
   1108   |  b >1
   1109   |
   1110   |->vmeta_arith_vn:
   1111   |  add CARG3, BASE, RB
   1112   |  add CARG4, KBASE, RC
   1113   |  b >1
   1114   |
   1115   |->vmeta_arith_vv:
   1116   |  add CARG3, BASE, RB
   1117   |  add CARG4, BASE, RC
   1118   |.if DUALNUM
   1119   |  b >1
   1120   |.endif
   1121   |->vmeta_arith_vn2:
   1122   |->vmeta_arith_vv2:
   1123   |.if DUALNUM
   1124   |  mr CARG3, RB
   1125   |  mr CARG4, RC
   1126   |.endif
   1127   |1:
   1128   |  add CARG2, BASE, RA
   1129   |   stp BASE, L->base
   1130   |  mr CARG1, L
   1131   |   stw PC, SAVE_PC
   1132   |  decode_OP1 CARG5, INS		// Caveat: CARG5 overlaps INS.
   1133   |  bl extern lj_meta_arith  // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
   1134   |  // Returns NULL (finished) or TValue * (metamethod).
   1135   |  cmplwi CRET1, 0
   1136   |  beq ->cont_nop
   1137   |
   1138   |  // Call metamethod for binary op.
   1139   |->vmeta_binop:
   1140   |  // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
   1141   |  sub TMP1, CRET1, BASE
   1142   |   stw PC, -16(CRET1)		// [cont|PC]
   1143   |   mr TMP2, BASE
   1144   |  addi PC, TMP1, FRAME_CONT
   1145   |   mr BASE, CRET1
   1146   |  li NARGS8:RC, 16			// 2 args for func(o1, o2).
   1147   |  b ->vm_call_dispatch
   1148   |
   1149   |->vmeta_len:
   1150   |  mr SAVE0, CARG1
   1151   |  mr CARG2, RD
   1152   |   stp BASE, L->base
   1153   |  mr CARG1, L
   1154   |   stw PC, SAVE_PC
   1155   |  bl extern lj_meta_len		// (lua_State *L, TValue *o)
   1156   |  // Returns NULL (retry) or TValue * (metamethod base).
   1157   |  cmplwi CRET1, 0
   1158   |  bne ->vmeta_binop			// Binop call for compatibility.
   1159   |  mr CARG1, SAVE0
   1160   |  b ->BC_LEN_Z
   1161   |
   1162   |//-- Call metamethod ----------------------------------------------------
   1163   |
   1164   |->vmeta_call:			// Resolve and call __call metamethod.
   1165   |  // TMP2 = old base, BASE = new base, RC = nargs*8
   1166   |  mr CARG1, L
   1167   |   stp TMP2, L->base			// This is the callers base!
   1168   |  subi CARG2, BASE, 8
   1169   |   stw PC, SAVE_PC
   1170   |  add CARG3, BASE, RC
   1171   |   mr SAVE0, NARGS8:RC
   1172   |  bl extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
   1173   |  lwz LFUNC:RB, FRAME_FUNC(BASE)	// Guaranteed to be a function here.
   1174   |   addi NARGS8:RC, SAVE0, 8		// Got one more argument now.
   1175   |  ins_call
   1176   |
   1177   |->vmeta_callt:			// Resolve __call for BC_CALLT.
   1178   |  // BASE = old base, RA = new base, RC = nargs*8
   1179   |  mr CARG1, L
   1180   |   stp BASE, L->base
   1181   |  subi CARG2, RA, 8
   1182   |   stw PC, SAVE_PC
   1183   |  add CARG3, RA, RC
   1184   |   mr SAVE0, NARGS8:RC
   1185   |  bl extern lj_meta_call	// (lua_State *L, TValue *func, TValue *top)
   1186   |  lwz TMP1, FRAME_PC(BASE)
   1187   |   addi NARGS8:RC, SAVE0, 8		// Got one more argument now.
   1188   |   lwz LFUNC:RB, FRAME_FUNC(RA)	// Guaranteed to be a function here.
   1189   |  b ->BC_CALLT_Z
   1190   |
   1191   |//-- Argument coercion for 'for' statement ------------------------------
   1192   |
   1193   |->vmeta_for:
   1194   |  mr CARG1, L
   1195   |   stp BASE, L->base
   1196   |  mr CARG2, RA
   1197   |   stw PC, SAVE_PC
   1198   |  mr SAVE0, INS
   1199   |  bl extern lj_meta_for	// (lua_State *L, TValue *base)
   1200   |.if JIT
   1201   |   decode_OP1 TMP0, SAVE0
   1202   |.endif
   1203   |  decode_RA8 RA, SAVE0
   1204   |.if JIT
   1205   |   cmpwi TMP0, BC_JFORI
   1206   |.endif
   1207   |  decode_RD8 RD, SAVE0
   1208   |.if JIT
   1209   |   beqy =>BC_JFORI
   1210   |.endif
   1211   |  b =>BC_FORI
   1212   |
   1213   |//-----------------------------------------------------------------------
   1214   |//-- Fast functions -----------------------------------------------------
   1215   |//-----------------------------------------------------------------------
   1216   |
   1217   |.macro .ffunc, name
   1218   |->ff_ .. name:
   1219   |.endmacro
   1220   |
   1221   |.macro .ffunc_1, name
   1222   |->ff_ .. name:
   1223   |  cmplwi NARGS8:RC, 8
   1224   |   lwz CARG3, 0(BASE)
   1225   |    lwz CARG1, 4(BASE)
   1226   |  blt ->fff_fallback
   1227   |.endmacro
   1228   |
   1229   |.macro .ffunc_2, name
   1230   |->ff_ .. name:
   1231   |  cmplwi NARGS8:RC, 16
   1232   |   lwz CARG3, 0(BASE)
   1233   |    lwz CARG4, 8(BASE)
   1234   |   lwz CARG1, 4(BASE)
   1235   |    lwz CARG2, 12(BASE)
   1236   |  blt ->fff_fallback
   1237   |.endmacro
   1238   |
   1239   |.macro .ffunc_n, name
   1240   |->ff_ .. name:
   1241   |  cmplwi NARGS8:RC, 8
   1242   |   lwz CARG3, 0(BASE)
   1243   |    lfd FARG1, 0(BASE)
   1244   |  blt ->fff_fallback
   1245   |  checknum CARG3; bge ->fff_fallback
   1246   |.endmacro
   1247   |
   1248   |.macro .ffunc_nn, name
   1249   |->ff_ .. name:
   1250   |  cmplwi NARGS8:RC, 16
   1251   |   lwz CARG3, 0(BASE)
   1252   |    lfd FARG1, 0(BASE)
   1253   |   lwz CARG4, 8(BASE)
   1254   |    lfd FARG2, 8(BASE)
   1255   |  blt ->fff_fallback
   1256   |  checknum CARG3; bge ->fff_fallback
   1257   |  checknum CARG4; bge ->fff_fallback
   1258   |.endmacro
   1259   |
   1260   |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
   1261   |.macro ffgccheck
   1262   |  lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
   1263   |  lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
   1264   |  cmplw TMP0, TMP1
   1265   |  bgel ->fff_gcstep
   1266   |.endmacro
   1267   |
   1268   |//-- Base library: checks -----------------------------------------------
   1269   |
   1270   |.ffunc_1 assert
   1271   |  li TMP1, LJ_TFALSE
   1272   |   la RA, -8(BASE)
   1273   |  cmplw cr1, CARG3, TMP1
   1274   |    lwz PC, FRAME_PC(BASE)
   1275   |  bge cr1, ->fff_fallback
   1276   |   stw CARG3, 0(RA)
   1277   |  addi RD, NARGS8:RC, 8		// Compute (nresults+1)*8.
   1278   |   stw CARG1, 4(RA)
   1279   |  beq ->fff_res			// Done if exactly 1 argument.
   1280   |  li TMP1, 8
   1281   |  subi RC, RC, 8
   1282   |1:
   1283   |  cmplw TMP1, RC
   1284   |   lfdx f0, BASE, TMP1
   1285   |   stfdx f0, RA, TMP1
   1286   |    addi TMP1, TMP1, 8
   1287   |  bney <1
   1288   |  b ->fff_res
   1289   |
   1290   |.ffunc type
   1291   |  cmplwi NARGS8:RC, 8
   1292   |   lwz CARG1, 0(BASE)
   1293   |  blt ->fff_fallback
   1294   |  .gpr64 extsw CARG1, CARG1
   1295   |  subfc TMP0, TISNUM, CARG1
   1296   |  subfe TMP2, CARG1, CARG1
   1297   |  orc TMP1, TMP2, TMP0
   1298   |  addi TMP1, TMP1, ~LJ_TISNUM+1
   1299   |  slwi TMP1, TMP1, 3
   1300   |   la TMP2, CFUNC:RB->upvalue
   1301   |  lfdx FARG1, TMP2, TMP1
   1302   |  b ->fff_resn
   1303   |
   1304   |//-- Base library: getters and setters ---------------------------------
   1305   |
   1306   |.ffunc_1 getmetatable
   1307   |  checktab CARG3; bne >6
   1308   |1:  // Field metatable must be at same offset for GCtab and GCudata!
   1309   |  lwz TAB:CARG1, TAB:CARG1->metatable
   1310   |2:
   1311   |  li CARG3, LJ_TNIL
   1312   |   cmplwi TAB:CARG1, 0
   1313   |  lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
   1314   |   beq ->fff_restv
   1315   |  lwz TMP0, TAB:CARG1->hmask
   1316   |   li CARG3, LJ_TTAB			// Use metatable as default result.
   1317   |  lwz TMP1, STR:RC->hash
   1318   |  lwz NODE:TMP2, TAB:CARG1->node
   1319   |  and TMP1, TMP1, TMP0		// idx = str->hash & tab->hmask
   1320   |  slwi TMP0, TMP1, 5
   1321   |  slwi TMP1, TMP1, 3
   1322   |  sub TMP1, TMP0, TMP1
   1323   |  add NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
   1324   |3:  // Rearranged logic, because we expect _not_ to find the key.
   1325   |  lwz CARG4, NODE:TMP2->key
   1326   |   lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
   1327   |    lwz CARG2, NODE:TMP2->val
   1328   |     lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
   1329   |  checkstr CARG4; bne >4
   1330   |   cmpw TMP0, STR:RC; beq >5
   1331   |4:
   1332   |  lwz NODE:TMP2, NODE:TMP2->next
   1333   |  cmplwi NODE:TMP2, 0
   1334   |  beq ->fff_restv			// Not found, keep default result.
   1335   |  b <3
   1336   |5:
   1337   |  checknil CARG2
   1338   |  beq ->fff_restv			// Ditto for nil value.
   1339   |  mr CARG3, CARG2			// Return value of mt.__metatable.
   1340   |  mr CARG1, TMP1
   1341   |  b ->fff_restv
   1342   |
   1343   |6:
   1344   |  cmpwi CARG3, LJ_TUDATA; beq <1
   1345   |  .gpr64 extsw CARG3, CARG3
   1346   |  subfc TMP0, TISNUM, CARG3
   1347   |  subfe TMP2, CARG3, CARG3
   1348   |  orc TMP1, TMP2, TMP0
   1349   |  addi TMP1, TMP1, ~LJ_TISNUM+1
   1350   |  slwi TMP1, TMP1, 2
   1351   |   la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
   1352   |  lwzx TAB:CARG1, TMP2, TMP1
   1353   |  b <2
   1354   |
   1355   |.ffunc_2 setmetatable
   1356   |  // Fast path: no mt for table yet and not clearing the mt.
   1357   |   checktab CARG3; bne ->fff_fallback
   1358   |  lwz TAB:TMP1, TAB:CARG1->metatable
   1359   |   checktab CARG4; bne ->fff_fallback
   1360   |  cmplwi TAB:TMP1, 0
   1361   |   lbz TMP3, TAB:CARG1->marked
   1362   |  bne ->fff_fallback
   1363   |   andix. TMP0, TMP3, LJ_GC_BLACK	// isblack(table)
   1364   |    stw TAB:CARG2, TAB:CARG1->metatable
   1365   |   beq ->fff_restv
   1366   |  barrierback TAB:CARG1, TMP3, TMP0
   1367   |  b ->fff_restv
   1368   |
   1369   |.ffunc rawget
   1370   |  cmplwi NARGS8:RC, 16
   1371   |   lwz CARG4, 0(BASE)
   1372   |    lwz TAB:CARG2, 4(BASE)
   1373   |  blt ->fff_fallback
   1374   |  checktab CARG4; bne ->fff_fallback
   1375   |   la CARG3, 8(BASE)
   1376   |   mr CARG1, L
   1377   |  bl extern lj_tab_get  // (lua_State *L, GCtab *t, cTValue *key)
   1378   |  // Returns cTValue *.
   1379   |  lfd FARG1, 0(CRET1)
   1380   |  b ->fff_resn
   1381   |
   1382   |//-- Base library: conversions ------------------------------------------
   1383   |
   1384   |.ffunc tonumber
   1385   |  // Only handles the number case inline (without a base argument).
   1386   |  cmplwi NARGS8:RC, 8
   1387   |   lwz CARG1, 0(BASE)
   1388   |    lfd FARG1, 0(BASE)
   1389   |  bne ->fff_fallback			// Exactly one argument.
   1390   |   checknum CARG1; bgt ->fff_fallback
   1391   |  b ->fff_resn
   1392   |
   1393   |.ffunc_1 tostring
   1394   |  // Only handles the string or number case inline.
   1395   |  checkstr CARG3
   1396   |  // A __tostring method in the string base metatable is ignored.
   1397   |  beq ->fff_restv			// String key?
   1398   |  // Handle numbers inline, unless a number base metatable is present.
   1399   |  lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
   1400   |  checknum CARG3
   1401   |  cmplwi cr1, TMP0, 0
   1402   |   stp BASE, L->base			// Add frame since C call can throw.
   1403   |  crorc 4*cr0+eq, 4*cr0+gt, 4*cr1+eq
   1404   |   stw PC, SAVE_PC			// Redundant (but a defined value).
   1405   |  beq ->fff_fallback
   1406   |  ffgccheck
   1407   |  mr CARG1, L
   1408   |  mr CARG2, BASE
   1409   |.if DUALNUM
   1410   |  bl extern lj_strfmt_number		// (lua_State *L, cTValue *o)
   1411   |.else
   1412   |  bl extern lj_strfmt_num		// (lua_State *L, lua_Number *np)
   1413   |.endif
   1414   |  // Returns GCstr *.
   1415   |  li CARG3, LJ_TSTR
   1416   |  b ->fff_restv
   1417   |
   1418   |//-- Base library: iterators -------------------------------------------
   1419   |
   1420   |.ffunc next
   1421   |  cmplwi NARGS8:RC, 8
   1422   |   lwz CARG1, 0(BASE)
   1423   |    lwz TAB:CARG2, 4(BASE)
   1424   |  blt ->fff_fallback
   1425   |   stwx TISNIL, BASE, NARGS8:RC	// Set missing 2nd arg to nil.
   1426   |  checktab CARG1
   1427   |   lwz PC, FRAME_PC(BASE)
   1428   |  bne ->fff_fallback
   1429   |   stp BASE, L->base			// Add frame since C call can throw.
   1430   |  mr CARG1, L
   1431   |   stp BASE, L->top			// Dummy frame length is ok.
   1432   |  la CARG3, 8(BASE)
   1433   |   stw PC, SAVE_PC
   1434   |  bl extern lj_tab_next	// (lua_State *L, GCtab *t, TValue *key)
   1435   |  // Returns 0 at end of traversal.
   1436   |  cmplwi CRET1, 0
   1437   |   li CARG3, LJ_TNIL
   1438   |  beq ->fff_restv			// End of traversal: return nil.
   1439   |  lfd f0, 8(BASE)			// Copy key and value to results.
   1440   |   la RA, -8(BASE)
   1441   |  lfd f1, 16(BASE)
   1442   |  stfd f0, 0(RA)
   1443   |   li RD, (2+1)*8
   1444   |  stfd f1, 8(RA)
   1445   |  b ->fff_res
   1446   |
   1447   |.ffunc_1 pairs
   1448   |  checktab CARG3
   1449   |   lwz PC, FRAME_PC(BASE)
   1450   |  bne ->fff_fallback
   1451   |   lwz TAB:TMP2, TAB:CARG1->metatable
   1452   |  lfd f0, CFUNC:RB->upvalue[0]
   1453   |   cmplwi TAB:TMP2, 0
   1454   |  la RA, -8(BASE)
   1455   |   bne ->fff_fallback
   1456   |   stw TISNIL, 8(BASE)
   1457   |  li RD, (3+1)*8
   1458   |  stfd f0, 0(RA)
   1459   |  b ->fff_res
   1460   |
   1461   |.ffunc ipairs_aux
   1462   |  cmplwi NARGS8:RC, 16
   1463   |   lwz CARG3, 0(BASE)
   1464   |    lwz TAB:CARG1, 4(BASE)
   1465   |   lwz CARG4, 8(BASE)
   1466   |.if DUALNUM
   1467   |    lwz TMP2, 12(BASE)
   1468   |.else
   1469   |    lfd FARG2, 8(BASE)
   1470   |.endif
   1471   |  blt ->fff_fallback
   1472   |  checktab CARG3
   1473   |  checknum cr1, CARG4
   1474   |   lwz PC, FRAME_PC(BASE)
   1475   |.if DUALNUM
   1476   |  bne ->fff_fallback
   1477   |  bne cr1, ->fff_fallback
   1478   |.else
   1479   |    lus TMP0, 0x3ff0
   1480   |    stw ZERO, TMPD_LO
   1481   |  bne ->fff_fallback
   1482   |    stw TMP0, TMPD_HI
   1483   |  bge cr1, ->fff_fallback
   1484   |    lfd FARG1, TMPD
   1485   |  toint TMP2, FARG2, f0
   1486   |.endif
   1487   |   lwz TMP0, TAB:CARG1->asize
   1488   |   lwz TMP1, TAB:CARG1->array
   1489   |.if not DUALNUM
   1490   |  fadd FARG2, FARG2, FARG1
   1491   |.endif
   1492   |  addi TMP2, TMP2, 1
   1493   |   la RA, -8(BASE)
   1494   |  cmplw TMP0, TMP2
   1495   |.if DUALNUM
   1496   |  stw TISNUM, 0(RA)
   1497   |   slwi TMP3, TMP2, 3
   1498   |  stw TMP2, 4(RA)
   1499   |.else
   1500   |   slwi TMP3, TMP2, 3
   1501   |  stfd FARG2, 0(RA)
   1502   |.endif
   1503   |  ble >2				// Not in array part?
   1504   |  lwzx TMP2, TMP1, TMP3
   1505   |  lfdx f0, TMP1, TMP3
   1506   |1:
   1507   |  checknil TMP2
   1508   |   li RD, (0+1)*8
   1509   |  beq ->fff_res			// End of iteration, return 0 results.
   1510   |   li RD, (2+1)*8
   1511   |  stfd f0, 8(RA)
   1512   |  b ->fff_res
   1513   |2:  // Check for empty hash part first. Otherwise call C function.
   1514   |  lwz TMP0, TAB:CARG1->hmask
   1515   |  cmplwi TMP0, 0
   1516   |   li RD, (0+1)*8
   1517   |  beq ->fff_res
   1518   |   mr CARG2, TMP2
   1519   |  bl extern lj_tab_getinth		// (GCtab *t, int32_t key)
   1520   |  // Returns cTValue * or NULL.
   1521   |  cmplwi CRET1, 0
   1522   |   li RD, (0+1)*8
   1523   |  beq ->fff_res
   1524   |  lwz TMP2, 0(CRET1)
   1525   |  lfd f0, 0(CRET1)
   1526   |  b <1
   1527   |
   1528   |.ffunc_1 ipairs
   1529   |  checktab CARG3
   1530   |   lwz PC, FRAME_PC(BASE)
   1531   |  bne ->fff_fallback
   1532   |   lwz TAB:TMP2, TAB:CARG1->metatable
   1533   |  lfd f0, CFUNC:RB->upvalue[0]
   1534   |   cmplwi TAB:TMP2, 0
   1535   |  la RA, -8(BASE)
   1536   |   bne ->fff_fallback
   1537   |.if DUALNUM
   1538   |  stw TISNUM, 8(BASE)
   1539   |.else
   1540   |  stw ZERO, 8(BASE)
   1541   |.endif
   1542   |   stw ZERO, 12(BASE)
   1543   |  li RD, (3+1)*8
   1544   |  stfd f0, 0(RA)
   1545   |  b ->fff_res
   1546   |
   1547   |//-- Base library: catch errors ----------------------------------------
   1548   |
   1549   |.ffunc pcall
   1550   |  cmplwi NARGS8:RC, 8
   1551   |   lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
   1552   |  blt ->fff_fallback
   1553   |   mr TMP2, BASE
   1554   |   la BASE, 8(BASE)
   1555   |  // Remember active hook before pcall.
   1556   |  rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
   1557   |   subi NARGS8:RC, NARGS8:RC, 8
   1558   |  addi PC, TMP3, 8+FRAME_PCALL
   1559   |  b ->vm_call_dispatch
   1560   |
   1561   |.ffunc xpcall
   1562   |  cmplwi NARGS8:RC, 16
   1563   |   lwz CARG4, 8(BASE)
   1564   |    lfd FARG2, 8(BASE)
   1565   |    lfd FARG1, 0(BASE)
   1566   |  blt ->fff_fallback
   1567   |  lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
   1568   |   mr TMP2, BASE
   1569   |  checkfunc CARG4; bne ->fff_fallback  // Traceback must be a function.
   1570   |   la BASE, 16(BASE)
   1571   |  // Remember active hook before pcall.
   1572   |  rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
   1573   |    stfd FARG2, 0(TMP2)		// Swap function and traceback.
   1574   |  subi NARGS8:RC, NARGS8:RC, 16
   1575   |    stfd FARG1, 8(TMP2)
   1576   |  addi PC, TMP1, 16+FRAME_PCALL
   1577   |  b ->vm_call_dispatch
   1578   |
   1579   |//-- Coroutine library --------------------------------------------------
   1580   |
   1581   |.macro coroutine_resume_wrap, resume
   1582   |.if resume
   1583   |.ffunc_1 coroutine_resume
   1584   |  cmpwi CARG3, LJ_TTHREAD; bne ->fff_fallback
   1585   |.else
   1586   |.ffunc coroutine_wrap_aux
   1587   |  lwz L:CARG1, CFUNC:RB->upvalue[0].gcr
   1588   |.endif
   1589   |  lbz TMP0, L:CARG1->status
   1590   |   lp TMP1, L:CARG1->cframe
   1591   |    lp CARG2, L:CARG1->top
   1592   |  cmplwi cr0, TMP0, LUA_YIELD
   1593   |    lp TMP2, L:CARG1->base
   1594   |   cmplwi cr1, TMP1, 0
   1595   |   lwz TMP0, L:CARG1->maxstack
   1596   |    cmplw cr7, CARG2, TMP2
   1597   |   lwz PC, FRAME_PC(BASE)
   1598   |  crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq		// st>LUA_YIELD || cframe!=0
   1599   |   add TMP2, CARG2, NARGS8:RC
   1600   |  crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq	// base==top && st!=LUA_YIELD
   1601   |   cmplw cr1, TMP2, TMP0
   1602   |  cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt
   1603   |   stw PC, SAVE_PC
   1604   |  cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt		// cond1 || cond2 || stackov
   1605   |   stp BASE, L->base
   1606   |  blt cr6, ->fff_fallback
   1607   |1:
   1608   |.if resume
   1609   |  addi BASE, BASE, 8			// Keep resumed thread in stack for GC.
   1610   |  subi NARGS8:RC, NARGS8:RC, 8
   1611   |  subi TMP2, TMP2, 8
   1612   |.endif
   1613   |  stp TMP2, L:CARG1->top
   1614   |  li TMP1, 0
   1615   |  stp BASE, L->top
   1616   |2:  // Move args to coroutine.
   1617   |  cmpw TMP1, NARGS8:RC
   1618   |   lfdx f0, BASE, TMP1
   1619   |  beq >3
   1620   |   stfdx f0, CARG2, TMP1
   1621   |  addi TMP1, TMP1, 8
   1622   |  b <2
   1623   |3:
   1624   |  li CARG3, 0
   1625   |   mr L:SAVE0, L:CARG1
   1626   |  li CARG4, 0
   1627   |  bl ->vm_resume			// (lua_State *L, TValue *base, 0, 0)
   1628   |  // Returns thread status.
   1629   |4:
   1630   |  lp TMP2, L:SAVE0->base
   1631   |   cmplwi CRET1, LUA_YIELD
   1632   |  lp TMP3, L:SAVE0->top
   1633   |    li_vmstate INTERP
   1634   |  lp BASE, L->base
   1635   |    stw L, DISPATCH_GL(cur_L)(DISPATCH)
   1636   |    st_vmstate
   1637   |   bgt >8
   1638   |  sub RD, TMP3, TMP2
   1639   |   lwz TMP0, L->maxstack
   1640   |  cmplwi RD, 0
   1641   |   add TMP1, BASE, RD
   1642   |  beq >6				// No results?
   1643   |  cmplw TMP1, TMP0
   1644   |   li TMP1, 0
   1645   |  bgt >9				// Need to grow stack?
   1646   |
   1647   |  subi TMP3, RD, 8
   1648   |   stp TMP2, L:SAVE0->top		// Clear coroutine stack.
   1649   |5:  // Move results from coroutine.
   1650   |  cmplw TMP1, TMP3
   1651   |   lfdx f0, TMP2, TMP1
   1652   |   stfdx f0, BASE, TMP1
   1653   |    addi TMP1, TMP1, 8
   1654   |  bne <5
   1655   |6:
   1656   |  andix. TMP0, PC, FRAME_TYPE
   1657   |.if resume
   1658   |  li TMP1, LJ_TTRUE
   1659   |   la RA, -8(BASE)
   1660   |  stw TMP1, -8(BASE)			// Prepend true to results.
   1661   |  addi RD, RD, 16
   1662   |.else
   1663   |  mr RA, BASE
   1664   |  addi RD, RD, 8
   1665   |.endif
   1666   |7:
   1667   |    stw PC, SAVE_PC
   1668   |   mr MULTRES, RD
   1669   |  beq ->BC_RET_Z
   1670   |  b ->vm_return
   1671   |
   1672   |8:  // Coroutine returned with error (at co->top-1).
   1673   |.if resume
   1674   |  andix. TMP0, PC, FRAME_TYPE
   1675   |  la TMP3, -8(TMP3)
   1676   |   li TMP1, LJ_TFALSE
   1677   |  lfd f0, 0(TMP3)
   1678   |   stp TMP3, L:SAVE0->top		// Remove error from coroutine stack.
   1679   |    li RD, (2+1)*8
   1680   |   stw TMP1, -8(BASE)		// Prepend false to results.
   1681   |    la RA, -8(BASE)
   1682   |  stfd f0, 0(BASE)			// Copy error message.
   1683   |  b <7
   1684   |.else
   1685   |  mr CARG1, L
   1686   |  mr CARG2, L:SAVE0
   1687   |  bl extern lj_ffh_coroutine_wrap_err  // (lua_State *L, lua_State *co)
   1688   |.endif
   1689   |
   1690   |9:  // Handle stack expansion on return from yield.
   1691   |  mr CARG1, L
   1692   |  srwi CARG2, RD, 3
   1693   |  bl extern lj_state_growstack	// (lua_State *L, int n)
   1694   |  li CRET1, 0
   1695   |  b <4
   1696   |.endmacro
   1697   |
   1698   |  coroutine_resume_wrap 1		// coroutine.resume
   1699   |  coroutine_resume_wrap 0		// coroutine.wrap
   1700   |
   1701   |.ffunc coroutine_yield
   1702   |  lp TMP0, L->cframe
   1703   |   add TMP1, BASE, NARGS8:RC
   1704   |   stp BASE, L->base
   1705   |  andix. TMP0, TMP0, CFRAME_RESUME
   1706   |   stp TMP1, L->top
   1707   |    li CRET1, LUA_YIELD
   1708   |  beq ->fff_fallback
   1709   |   stp ZERO, L->cframe
   1710   |    stb CRET1, L->status
   1711   |  b ->vm_leave_unw
   1712   |
   1713   |//-- Math library -------------------------------------------------------
   1714   |
   1715   |.ffunc_1 math_abs
   1716   |  checknum CARG3
   1717   |.if DUALNUM
   1718   |  bne >2
   1719   |  srawi TMP1, CARG1, 31
   1720   |  xor TMP2, TMP1, CARG1
   1721   |.if GPR64
   1722   |  lus TMP0, 0x8000
   1723   |  sub CARG1, TMP2, TMP1
   1724   |  cmplw CARG1, TMP0
   1725   |  beq >1
   1726   |.else
   1727   |  sub. CARG1, TMP2, TMP1
   1728   |  blt >1
   1729   |.endif
   1730   |->fff_resi:
   1731   |  lwz PC, FRAME_PC(BASE)
   1732   |  la RA, -8(BASE)
   1733   |  stw TISNUM, -8(BASE)
   1734   |  stw CRET1, -4(BASE)
   1735   |  b ->fff_res1
   1736   |1:
   1737   |  lus CARG3, 0x41e0	// 2^31.
   1738   |  li CARG1, 0
   1739   |  b ->fff_restv
   1740   |2:
   1741   |.endif
   1742   |  bge ->fff_fallback
   1743   |  rlwinm CARG3, CARG3, 0, 1, 31
   1744   |  // Fallthrough.
   1745   |
   1746   |->fff_restv:
   1747   |  // CARG3/CARG1 = TValue result.
   1748   |  lwz PC, FRAME_PC(BASE)
   1749   |   stw CARG3, -8(BASE)
   1750   |  la RA, -8(BASE)
   1751   |   stw CARG1, -4(BASE)
   1752   |->fff_res1:
   1753   |  // RA = results, PC = return.
   1754   |  li RD, (1+1)*8
   1755   |->fff_res:
   1756   |  // RA = results, RD = (nresults+1)*8, PC = return.
   1757   |  andix. TMP0, PC, FRAME_TYPE
   1758   |   mr MULTRES, RD
   1759   |  bney ->vm_return
   1760   |  lwz INS, -4(PC)
   1761   |  decode_RB8 RB, INS
   1762   |5:
   1763   |  cmplw RB, RD			// More results expected?
   1764   |   decode_RA8 TMP0, INS
   1765   |  bgt >6
   1766   |  ins_next1
   1767   |  // Adjust BASE. KBASE is assumed to be set for the calling frame.
   1768   |   sub BASE, RA, TMP0
   1769   |  ins_next2
   1770   |
   1771   |6:  // Fill up results with nil.
   1772   |  subi TMP1, RD, 8
   1773   |   addi RD, RD, 8
   1774   |  stwx TISNIL, RA, TMP1
   1775   |  b <5
   1776   |
   1777   |.macro math_extern, func
   1778   |  .ffunc_n math_ .. func
   1779   |  blex func
   1780   |  b ->fff_resn
   1781   |.endmacro
   1782   |
   1783   |.macro math_extern2, func
   1784   |  .ffunc_nn math_ .. func
   1785   |  blex func
   1786   |  b ->fff_resn
   1787   |.endmacro
   1788   |
   1789   |.macro math_round, func
   1790   |  .ffunc_1 math_ .. func
   1791   |   checknum CARG3; beqy ->fff_restv
   1792   |  rlwinm TMP2, CARG3, 12, 21, 31
   1793   |   bge ->fff_fallback
   1794   |  addic. TMP2, TMP2, -1023		// exp = exponent(x) - 1023
   1795   |  cmplwi cr1, TMP2, 31		// 0 <= exp < 31?
   1796   |   subfic TMP0, TMP2, 31
   1797   |  blt >3
   1798   |  slwi TMP1, CARG3, 11
   1799   |   srwi TMP3, CARG1, 21
   1800   |  oris TMP1, TMP1, 0x8000
   1801   |   addi TMP2, TMP2, 1
   1802   |  or TMP1, TMP1, TMP3
   1803   |   slwi CARG2, CARG1, 11
   1804   |  bge cr1, >4
   1805   |   slw TMP3, TMP1, TMP2
   1806   |  srw RD, TMP1, TMP0
   1807   |   or TMP3, TMP3, CARG2
   1808   |  srawi TMP2, CARG3, 31
   1809   |.if "func" == "floor"
   1810   |  and TMP1, TMP3, TMP2
   1811   |  addic TMP0, TMP1, -1
   1812   |  subfe TMP1, TMP0, TMP1
   1813   |  add CARG1, RD, TMP1
   1814   |  xor CARG1, CARG1, TMP2
   1815   |  sub CARG1, CARG1, TMP2
   1816   |  b ->fff_resi
   1817   |.else
   1818   |  andc TMP1, TMP3, TMP2
   1819   |  addic TMP0, TMP1, -1
   1820   |  subfe TMP1, TMP0, TMP1
   1821   |  add CARG1, RD, TMP1
   1822   |  cmpw CARG1, RD
   1823   |  xor CARG1, CARG1, TMP2
   1824   |  sub CARG1, CARG1, TMP2
   1825   |  bge ->fff_resi
   1826   |  // Overflow to 2^31.
   1827   |  lus CARG3, 0x41e0			// 2^31.
   1828   |  li CARG1, 0
   1829   |  b ->fff_restv
   1830   |.endif
   1831   |3:  // |x| < 1
   1832   |  slwi TMP2, CARG3, 1
   1833   |   srawi TMP1, CARG3, 31
   1834   |  or TMP2, CARG1, TMP2		// ztest = (hi+hi) | lo
   1835   |.if "func" == "floor"
   1836   |  and TMP1, TMP2, TMP1		// (ztest & sign) == 0 ? 0 : -1
   1837   |  subfic TMP2, TMP1, 0
   1838   |  subfe CARG1, CARG1, CARG1
   1839   |.else
   1840   |  andc TMP1, TMP2, TMP1		// (ztest & ~sign) == 0 ? 0 : 1
   1841   |  addic TMP2, TMP1, -1
   1842   |  subfe CARG1, TMP2, TMP1
   1843   |.endif
   1844   |  b ->fff_resi
   1845   |4:  // exp >= 31. Check for -(2^31).
   1846   |  xoris TMP1, TMP1, 0x8000
   1847   |  srawi TMP2, CARG3, 31
   1848   |.if "func" == "floor"
   1849   |  or TMP1, TMP1, CARG2
   1850   |.endif
   1851   |.if PPE
   1852   |  orc TMP1, TMP1, TMP2
   1853   |  cmpwi TMP1, 0
   1854   |.else
   1855   |  orc. TMP1, TMP1, TMP2
   1856   |.endif
   1857   |  crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
   1858   |  lus CARG1, 0x8000			// -(2^31).
   1859   |  beqy ->fff_resi
   1860   |5:
   1861   |  lfd FARG1, 0(BASE)
   1862   |  blex func
   1863   |  b ->fff_resn
   1864   |.endmacro
   1865   |
   1866   |.if DUALNUM
   1867   |  math_round floor
   1868   |  math_round ceil
   1869   |.else
   1870   |  // NYI: use internal implementation.
   1871   |  math_extern floor
   1872   |  math_extern ceil
   1873   |.endif
   1874   |
   1875   |.if SQRT
   1876   |.ffunc_n math_sqrt
   1877   |  fsqrt FARG1, FARG1
   1878   |  b ->fff_resn
   1879   |.else
   1880   |  math_extern sqrt
   1881   |.endif
   1882   |
   1883   |.ffunc math_log
   1884   |  cmplwi NARGS8:RC, 8
   1885   |   lwz CARG3, 0(BASE)
   1886   |    lfd FARG1, 0(BASE)
   1887   |  bne ->fff_fallback			// Need exactly 1 argument.
   1888   |  checknum CARG3; bge ->fff_fallback
   1889   |  blex log
   1890   |  b ->fff_resn
   1891   |
   1892   |  math_extern log10
   1893   |  math_extern exp
   1894   |  math_extern sin
   1895   |  math_extern cos
   1896   |  math_extern tan
   1897   |  math_extern asin
   1898   |  math_extern acos
   1899   |  math_extern atan
   1900   |  math_extern sinh
   1901   |  math_extern cosh
   1902   |  math_extern tanh
   1903   |  math_extern2 pow
   1904   |  math_extern2 atan2
   1905   |  math_extern2 fmod
   1906   |
   1907   |.if DUALNUM
   1908   |.ffunc math_ldexp
   1909   |  cmplwi NARGS8:RC, 16
   1910   |   lwz CARG3, 0(BASE)
   1911   |    lfd FARG1, 0(BASE)
   1912   |   lwz CARG4, 8(BASE)
   1913   |.if GPR64
   1914   |    lwz CARG2, 12(BASE)
   1915   |.else
   1916   |    lwz CARG1, 12(BASE)
   1917   |.endif
   1918   |  blt ->fff_fallback
   1919   |  checknum CARG3; bge ->fff_fallback
   1920   |  checknum CARG4; bne ->fff_fallback
   1921   |.else
   1922   |.ffunc_nn math_ldexp
   1923   |.if GPR64
   1924   |  toint CARG2, FARG2
   1925   |.else
   1926   |  toint CARG1, FARG2
   1927   |.endif
   1928   |.endif
   1929   |  blex ldexp
   1930   |  b ->fff_resn
   1931   |
   1932   |.ffunc_n math_frexp
   1933   |.if GPR64
   1934   |  la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
   1935   |.else
   1936   |  la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
   1937   |.endif
   1938   |   lwz PC, FRAME_PC(BASE)
   1939   |  blex frexp
   1940   |   lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH)
   1941   |   la RA, -8(BASE)
   1942   |.if not DUALNUM
   1943   |   tonum_i FARG2, TMP1
   1944   |.endif
   1945   |  stfd FARG1, 0(RA)
   1946   |  li RD, (2+1)*8
   1947   |.if DUALNUM
   1948   |   stw TISNUM, 8(RA)
   1949   |   stw TMP1, 12(RA)
   1950   |.else
   1951   |   stfd FARG2, 8(RA)
   1952   |.endif
   1953   |  b ->fff_res
   1954   |
   1955   |.ffunc_n math_modf
   1956   |.if GPR64
   1957   |  la CARG2, -8(BASE)
   1958   |.else
   1959   |  la CARG1, -8(BASE)
   1960   |.endif
   1961   |   lwz PC, FRAME_PC(BASE)
   1962   |  blex modf
   1963   |   la RA, -8(BASE)
   1964   |  stfd FARG1, 0(BASE)
   1965   |  li RD, (2+1)*8
   1966   |  b ->fff_res
   1967   |
   1968   |.macro math_minmax, name, ismax
   1969   |.if DUALNUM
   1970   |  .ffunc_1 name
   1971   |  checknum CARG3
   1972   |   addi TMP1, BASE, 8
   1973   |   add TMP2, BASE, NARGS8:RC
   1974   |  bne >4
   1975   |1:  // Handle integers.
   1976   |  lwz CARG4, 0(TMP1)
   1977   |   cmplw cr1, TMP1, TMP2
   1978   |  lwz CARG2, 4(TMP1)
   1979   |   bge cr1, ->fff_resi
   1980   |  checknum CARG4
   1981   |   xoris TMP0, CARG1, 0x8000
   1982   |   xoris TMP3, CARG2, 0x8000
   1983   |  bne >3
   1984   |  subfc TMP3, TMP3, TMP0
   1985   |  subfe TMP0, TMP0, TMP0
   1986   |.if ismax
   1987   |  andc TMP3, TMP3, TMP0
   1988   |.else
   1989   |  and TMP3, TMP3, TMP0
   1990   |.endif
   1991   |  add CARG1, TMP3, CARG2
   1992   |.if GPR64
   1993   |  rldicl CARG1, CARG1, 0, 32
   1994   |.endif
   1995   |   addi TMP1, TMP1, 8
   1996   |  b <1
   1997   |3:
   1998   |  bge ->fff_fallback
   1999   |  // Convert intermediate result to number and continue below.
   2000   |  tonum_i FARG1, CARG1
   2001   |  lfd FARG2, 0(TMP1)
   2002   |  b >6
   2003   |4:
   2004   |   lfd FARG1, 0(BASE)
   2005   |  bge ->fff_fallback
   2006   |5:  // Handle numbers.
   2007   |  lwz CARG4, 0(TMP1)
   2008   |   cmplw cr1, TMP1, TMP2
   2009   |  lfd FARG2, 0(TMP1)
   2010   |   bge cr1, ->fff_resn
   2011   |  checknum CARG4; bge >7
   2012   |6:
   2013   |  fsub f0, FARG1, FARG2
   2014   |   addi TMP1, TMP1, 8
   2015   |.if ismax
   2016   |  fsel FARG1, f0, FARG1, FARG2
   2017   |.else
   2018   |  fsel FARG1, f0, FARG2, FARG1
   2019   |.endif
   2020   |  b <5
   2021   |7:  // Convert integer to number and continue above.
   2022   |   lwz CARG2, 4(TMP1)
   2023   |  bne ->fff_fallback
   2024   |  tonum_i FARG2, CARG2
   2025   |  b <6
   2026   |.else
   2027   |  .ffunc_n name
   2028   |  li TMP1, 8
   2029   |1:
   2030   |   lwzx CARG2, BASE, TMP1
   2031   |   lfdx FARG2, BASE, TMP1
   2032   |  cmplw cr1, TMP1, NARGS8:RC
   2033   |   checknum CARG2
   2034   |  bge cr1, ->fff_resn
   2035   |   bge ->fff_fallback
   2036   |  fsub f0, FARG1, FARG2
   2037   |   addi TMP1, TMP1, 8
   2038   |.if ismax
   2039   |  fsel FARG1, f0, FARG1, FARG2
   2040   |.else
   2041   |  fsel FARG1, f0, FARG2, FARG1
   2042   |.endif
   2043   |  b <1
   2044   |.endif
   2045   |.endmacro
   2046   |
   2047   |  math_minmax math_min, 0
   2048   |  math_minmax math_max, 1
   2049   |
   2050   |//-- String library -----------------------------------------------------
   2051   |
   2052   |.ffunc string_byte			// Only handle the 1-arg case here.
   2053   |  cmplwi NARGS8:RC, 8
   2054   |   lwz CARG3, 0(BASE)
   2055   |    lwz STR:CARG1, 4(BASE)
   2056   |  bne ->fff_fallback			// Need exactly 1 argument.
   2057   |   checkstr CARG3
   2058   |   bne ->fff_fallback
   2059   |  lwz TMP0, STR:CARG1->len
   2060   |.if DUALNUM
   2061   |   lbz CARG1, STR:CARG1[1]		// Access is always ok (NUL at end).
   2062   |   li RD, (0+1)*8
   2063   |   lwz PC, FRAME_PC(BASE)
   2064   |  cmplwi TMP0, 0
   2065   |   la RA, -8(BASE)
   2066   |  beqy ->fff_res
   2067   |  b ->fff_resi
   2068   |.else
   2069   |   lbz TMP1, STR:CARG1[1]		// Access is always ok (NUL at end).
   2070   |  addic TMP3, TMP0, -1		// RD = ((str->len != 0)+1)*8
   2071   |  subfe RD, TMP3, TMP0
   2072   |   stw TMP1, TONUM_LO		// Inlined tonum_u f0, TMP1.
   2073   |  addi RD, RD, 1
   2074   |   lfd f0, TONUM_D
   2075   |  la RA, -8(BASE)
   2076   |  lwz PC, FRAME_PC(BASE)
   2077   |   fsub f0, f0, TOBIT
   2078   |  slwi RD, RD, 3
   2079   |   stfd f0, 0(RA)
   2080   |  b ->fff_res
   2081   |.endif
   2082   |
   2083   |.ffunc string_char			// Only handle the 1-arg case here.
   2084   |  ffgccheck
   2085   |  cmplwi NARGS8:RC, 8
   2086   |   lwz CARG3, 0(BASE)
   2087   |.if DUALNUM
   2088   |    lwz TMP0, 4(BASE)
   2089   |  bne ->fff_fallback			// Exactly 1 argument.
   2090   |  checknum CARG3; bne ->fff_fallback
   2091   |   la CARG2, 7(BASE)
   2092   |.else
   2093   |    lfd FARG1, 0(BASE)
   2094   |  bne ->fff_fallback			// Exactly 1 argument.
   2095   |  checknum CARG3; bge ->fff_fallback
   2096   |  toint TMP0, FARG1
   2097   |   la CARG2, TMPD_BLO
   2098   |.endif
   2099   |   li CARG3, 1
   2100   |  cmplwi TMP0, 255; bgt ->fff_fallback
   2101   |->fff_newstr:
   2102   |  mr CARG1, L
   2103   |  stp BASE, L->base
   2104   |  stw PC, SAVE_PC
   2105   |  bl extern lj_str_new		// (lua_State *L, char *str, size_t l)
   2106   |->fff_resstr:
   2107   |  // Returns GCstr *.
   2108   |  lp BASE, L->base
   2109   |  li CARG3, LJ_TSTR
   2110   |  b ->fff_restv
   2111   |
   2112   |.ffunc string_sub
   2113   |  ffgccheck
   2114   |  cmplwi NARGS8:RC, 16
   2115   |   lwz CARG3, 16(BASE)
   2116   |.if not DUALNUM
   2117   |    lfd f0, 16(BASE)
   2118   |.endif
   2119   |   lwz TMP0, 0(BASE)
   2120   |    lwz STR:CARG1, 4(BASE)
   2121   |  blt ->fff_fallback
   2122   |   lwz CARG2, 8(BASE)
   2123   |.if DUALNUM
   2124   |    lwz TMP1, 12(BASE)
   2125   |.else
   2126   |    lfd f1, 8(BASE)
   2127   |.endif
   2128   |   li TMP2, -1
   2129   |  beq >1
   2130   |.if DUALNUM
   2131   |  checknum CARG3
   2132   |   lwz TMP2, 20(BASE)
   2133   |  bne ->fff_fallback
   2134   |1:
   2135   |  checknum CARG2; bne ->fff_fallback
   2136   |.else
   2137   |  checknum CARG3; bge ->fff_fallback
   2138   |  toint TMP2, f0
   2139   |1:
   2140   |  checknum CARG2; bge ->fff_fallback
   2141   |.endif
   2142   |  checkstr TMP0; bne ->fff_fallback
   2143   |.if not DUALNUM
   2144   |   toint TMP1, f1
   2145   |.endif
   2146   |   lwz TMP0, STR:CARG1->len
   2147   |  cmplw TMP0, TMP2			// len < end? (unsigned compare)
   2148   |   addi TMP3, TMP2, 1
   2149   |  blt >5
   2150   |2:
   2151   |  cmpwi TMP1, 0			// start <= 0?
   2152   |   add TMP3, TMP1, TMP0
   2153   |  ble >7
   2154   |3:
   2155   |  sub CARG3, TMP2, TMP1
   2156   |    addi CARG2, STR:CARG1, #STR-1
   2157   |  srawi TMP0, CARG3, 31
   2158   |   addi CARG3, CARG3, 1
   2159   |    add CARG2, CARG2, TMP1
   2160   |  andc CARG3, CARG3, TMP0
   2161   |.if GPR64
   2162   |  rldicl CARG2, CARG2, 0, 32
   2163   |  rldicl CARG3, CARG3, 0, 32
   2164   |.endif
   2165   |  b ->fff_newstr
   2166   |
   2167   |5:  // Negative end or overflow.
   2168   |  cmpw TMP0, TMP2			// len >= end? (signed compare)
   2169   |   add TMP2, TMP0, TMP3		// Negative end: end = end+len+1.
   2170   |  bge <2
   2171   |   mr TMP2, TMP0			// Overflow: end = len.
   2172   |  b <2
   2173   |
   2174   |7:  // Negative start or underflow.
   2175   |  .gpr64 extsw TMP1, TMP1
   2176   |  addic CARG3, TMP1, -1
   2177   |  subfe CARG3, CARG3, CARG3
   2178   |   srawi CARG2, TMP3, 31		// Note: modifies carry.
   2179   |  andc TMP3, TMP3, CARG3
   2180   |   andc TMP1, TMP3, CARG2
   2181   |  addi TMP1, TMP1, 1			// start = 1 + (start ? start+len : 0)
   2182   |  b <3
   2183   |
   2184   |.macro ffstring_op, name
   2185   |  .ffunc string_ .. name
   2186   |  ffgccheck
   2187   |  cmplwi NARGS8:RC, 8
   2188   |   lwz CARG3, 0(BASE)
   2189   |    lwz STR:CARG2, 4(BASE)
   2190   |  blt ->fff_fallback
   2191   |  checkstr CARG3
   2192   |   la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
   2193   |  bne ->fff_fallback
   2194   |   lwz TMP0, SBUF:CARG1->b
   2195   |  stw L, SBUF:CARG1->L
   2196   |  stp BASE, L->base
   2197   |  stw PC, SAVE_PC
   2198   |   stw TMP0, SBUF:CARG1->p
   2199   |  bl extern lj_buf_putstr_ .. name
   2200   |  bl extern lj_buf_tostr
   2201   |  b ->fff_resstr
   2202   |.endmacro
   2203   |
   2204   |ffstring_op reverse
   2205   |ffstring_op lower
   2206   |ffstring_op upper
   2207   |
   2208   |//-- Bit library --------------------------------------------------------
   2209   |
   2210   |.macro .ffunc_bit, name
   2211   |.if DUALNUM
   2212   |  .ffunc_1 bit_..name
   2213   |  checknum CARG3; bnel ->fff_tobit_fb
   2214   |.else
   2215   |  .ffunc_n bit_..name
   2216   |  fadd FARG1, FARG1, TOBIT
   2217   |  stfd FARG1, TMPD
   2218   |  lwz CARG1, TMPD_LO
   2219   |.endif
   2220   |.endmacro
   2221   |
   2222   |.macro .ffunc_bit_op, name, ins
   2223   |  .ffunc_bit name
   2224   |  addi TMP1, BASE, 8
   2225   |  add TMP2, BASE, NARGS8:RC
   2226   |1:
   2227   |  lwz CARG4, 0(TMP1)
   2228   |   cmplw cr1, TMP1, TMP2
   2229   |.if DUALNUM
   2230   |  lwz CARG2, 4(TMP1)
   2231   |.else
   2232   |  lfd FARG1, 0(TMP1)
   2233   |.endif
   2234   |   bgey cr1, ->fff_resi
   2235   |  checknum CARG4
   2236   |.if DUALNUM
   2237   |  bnel ->fff_bitop_fb
   2238   |.else
   2239   |  fadd FARG1, FARG1, TOBIT
   2240   |  bge ->fff_fallback
   2241   |  stfd FARG1, TMPD
   2242   |  lwz CARG2, TMPD_LO
   2243   |.endif
   2244   |  ins CARG1, CARG1, CARG2
   2245   |   addi TMP1, TMP1, 8
   2246   |  b <1
   2247   |.endmacro
   2248   |
   2249   |.ffunc_bit_op band, and
   2250   |.ffunc_bit_op bor, or
   2251   |.ffunc_bit_op bxor, xor
   2252   |
   2253   |.ffunc_bit bswap
   2254   |  rotlwi TMP0, CARG1, 8
   2255   |  rlwimi TMP0, CARG1, 24, 0, 7
   2256   |  rlwimi TMP0, CARG1, 24, 16, 23
   2257   |  mr CRET1, TMP0
   2258   |  b ->fff_resi
   2259   |
   2260   |.ffunc_bit bnot
   2261   |  not CRET1, CARG1
   2262   |  b ->fff_resi
   2263   |
   2264   |.macro .ffunc_bit_sh, name, ins, shmod
   2265   |.if DUALNUM
   2266   |  .ffunc_2 bit_..name
   2267   |  checknum CARG3; bnel ->fff_tobit_fb
   2268   |  // Note: no inline conversion from number for 2nd argument!
   2269   |  checknum CARG4; bne ->fff_fallback
   2270   |.else
   2271   |  .ffunc_nn bit_..name
   2272   |  fadd FARG1, FARG1, TOBIT
   2273   |  fadd FARG2, FARG2, TOBIT
   2274   |  stfd FARG1, TMPD
   2275   |  lwz CARG1, TMPD_LO
   2276   |  stfd FARG2, TMPD
   2277   |  lwz CARG2, TMPD_LO
   2278   |.endif
   2279   |.if shmod == 1
   2280   |  rlwinm CARG2, CARG2, 0, 27, 31
   2281   |.elif shmod == 2
   2282   |  neg CARG2, CARG2
   2283   |.endif
   2284   |  ins CRET1, CARG1, CARG2
   2285   |  b ->fff_resi
   2286   |.endmacro
   2287   |
   2288   |.ffunc_bit_sh lshift, slw, 1
   2289   |.ffunc_bit_sh rshift, srw, 1
   2290   |.ffunc_bit_sh arshift, sraw, 1
   2291   |.ffunc_bit_sh rol, rotlw, 0
   2292   |.ffunc_bit_sh ror, rotlw, 2
   2293   |
   2294   |.ffunc_bit tobit
   2295   |.if DUALNUM
   2296   |  b ->fff_resi
   2297   |.else
   2298   |->fff_resi:
   2299   |  tonum_i FARG1, CRET1
   2300   |.endif
   2301   |->fff_resn:
   2302   |  lwz PC, FRAME_PC(BASE)
   2303   |  la RA, -8(BASE)
   2304   |  stfd FARG1, -8(BASE)
   2305   |  b ->fff_res1
   2306   |
   2307   |// Fallback FP number to bit conversion.
   2308   |->fff_tobit_fb:
   2309   |.if DUALNUM
   2310   |  lfd FARG1, 0(BASE)
   2311   |  bgt ->fff_fallback
   2312   |  fadd FARG1, FARG1, TOBIT
   2313   |  stfd FARG1, TMPD
   2314   |  lwz CARG1, TMPD_LO
   2315   |  blr
   2316   |.endif
   2317   |->fff_bitop_fb:
   2318   |.if DUALNUM
   2319   |  lfd FARG1, 0(TMP1)
   2320   |  bgt ->fff_fallback
   2321   |  fadd FARG1, FARG1, TOBIT
   2322   |  stfd FARG1, TMPD
   2323   |  lwz CARG2, TMPD_LO
   2324   |  blr
   2325   |.endif
   2326   |
   2327   |//-----------------------------------------------------------------------
   2328   |
   2329   |->fff_fallback:			// Call fast function fallback handler.
   2330   |  // BASE = new base, RB = CFUNC, RC = nargs*8
   2331   |  lp TMP3, CFUNC:RB->f
   2332   |    add TMP1, BASE, NARGS8:RC
   2333   |   lwz PC, FRAME_PC(BASE)		// Fallback may overwrite PC.
   2334   |    addi TMP0, TMP1, 8*LUA_MINSTACK
   2335   |     lwz TMP2, L->maxstack
   2336   |   stw PC, SAVE_PC			// Redundant (but a defined value).
   2337   |  .toc lp TMP3, 0(TMP3)
   2338   |  cmplw TMP0, TMP2
   2339   |     stp BASE, L->base
   2340   |    stp TMP1, L->top
   2341   |   mr CARG1, L
   2342   |  bgt >5				// Need to grow stack.
   2343   |  mtctr TMP3
   2344   |  bctrl				// (lua_State *L)
   2345   |  // Either throws an error, or recovers and returns -1, 0 or nresults+1.
   2346   |  lp BASE, L->base
   2347   |  cmpwi CRET1, 0
   2348   |   slwi RD, CRET1, 3
   2349   |   la RA, -8(BASE)
   2350   |  bgt ->fff_res			// Returned nresults+1?
   2351   |1:  // Returned 0 or -1: retry fast path.
   2352   |  lp TMP0, L->top
   2353   |   lwz LFUNC:RB, FRAME_FUNC(BASE)
   2354   |  sub NARGS8:RC, TMP0, BASE
   2355   |  bne ->vm_call_tail			// Returned -1?
   2356   |  ins_callt				// Returned 0: retry fast path.
   2357   |
   2358   |// Reconstruct previous base for vmeta_call during tailcall.
   2359   |->vm_call_tail:
   2360   |  andix. TMP0, PC, FRAME_TYPE
   2361   |   rlwinm TMP1, PC, 0, 0, 28
   2362   |  bne >3
   2363   |  lwz INS, -4(PC)
   2364   |  decode_RA8 TMP1, INS
   2365   |  addi TMP1, TMP1, 8
   2366   |3:
   2367   |  sub TMP2, BASE, TMP1
   2368   |  b ->vm_call_dispatch		// Resolve again for tailcall.
   2369   |
   2370   |5:  // Grow stack for fallback handler.
   2371   |  li CARG2, LUA_MINSTACK
   2372   |  bl extern lj_state_growstack	// (lua_State *L, int n)
   2373   |  lp BASE, L->base
   2374   |  cmpw TMP0, TMP0			// Set 4*cr0+eq to force retry.
   2375   |  b <1
   2376   |
   2377   |->fff_gcstep:			// Call GC step function.
   2378   |  // BASE = new base, RC = nargs*8
   2379   |  mflr SAVE0
   2380   |   stp BASE, L->base
   2381   |  add TMP0, BASE, NARGS8:RC
   2382   |   stw PC, SAVE_PC			// Redundant (but a defined value).
   2383   |  stp TMP0, L->top
   2384   |  mr CARG1, L
   2385   |  bl extern lj_gc_step		// (lua_State *L)
   2386   |   lp BASE, L->base
   2387   |  mtlr SAVE0
   2388   |    lp TMP0, L->top
   2389   |   sub NARGS8:RC, TMP0, BASE
   2390   |   lwz CFUNC:RB, FRAME_FUNC(BASE)
   2391   |  blr
   2392   |
   2393   |//-----------------------------------------------------------------------
   2394   |//-- Special dispatch targets -------------------------------------------
   2395   |//-----------------------------------------------------------------------
   2396   |
   2397   |->vm_record:				// Dispatch target for recording phase.
   2398   |.if JIT
   2399   |  lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
   2400   |  andix. TMP0, TMP3, HOOK_VMEVENT	// No recording while in vmevent.
   2401   |  bne >5
   2402   |  // Decrement the hookcount for consistency, but always do the call.
   2403   |   lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH)
   2404   |  andix. TMP0, TMP3, HOOK_ACTIVE
   2405   |  bne >1
   2406   |   subi TMP2, TMP2, 1
   2407   |  andi. TMP0, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
   2408   |  beqy >1
   2409   |   stw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
   2410   |  b >1
   2411   |.endif
   2412   |
   2413   |->vm_rethook:			// Dispatch target for return hooks.
   2414   |  lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
   2415   |  andix. TMP0, TMP3, HOOK_ACTIVE	// Hook already active?
   2416   |  beq >1
   2417   |5:  // Re-dispatch to static ins.
   2418   |  addi TMP1, TMP1, GG_DISP2STATIC	// Assumes decode_OPP TMP1, INS.
   2419   |  lpx TMP0, DISPATCH, TMP1
   2420   |  mtctr TMP0
   2421   |  bctr
   2422   |
   2423   |->vm_inshook:			// Dispatch target for instr/line hooks.
   2424   |  lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
   2425   |  lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH)
   2426   |  andix. TMP0, TMP3, HOOK_ACTIVE	// Hook already active?
   2427   |   rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0
   2428   |  bne <5
   2429   |
   2430   |   cmpwi cr1, TMP0, 0
   2431   |  addic. TMP2, TMP2, -1
   2432   |   beq cr1, <5
   2433   |  stw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
   2434   |  beq >1
   2435   |   bge cr1, <5
   2436   |1:
   2437   |  mr CARG1, L
   2438   |   stw MULTRES, SAVE_MULTRES
   2439   |  mr CARG2, PC
   2440   |   stp BASE, L->base
   2441   |  // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
   2442   |  bl extern lj_dispatch_ins		// (lua_State *L, const BCIns *pc)
   2443   |3:
   2444   |  lp BASE, L->base
   2445   |4:  // Re-dispatch to static ins.
   2446   |  lwz INS, -4(PC)
   2447   |  decode_OPP TMP1, INS
   2448   |   decode_RB8 RB, INS
   2449   |  addi TMP1, TMP1, GG_DISP2STATIC
   2450   |   decode_RD8 RD, INS
   2451   |  lpx TMP0, DISPATCH, TMP1
   2452   |   decode_RA8 RA, INS
   2453   |   decode_RC8 RC, INS
   2454   |  mtctr TMP0
   2455   |  bctr
   2456   |
   2457   |->cont_hook:				// Continue from hook yield.
   2458   |  addi PC, PC, 4
   2459   |  lwz MULTRES, -20(RB)		// Restore MULTRES for *M ins.
   2460   |  b <4
   2461   |
   2462   |->vm_hotloop:			// Hot loop counter underflow.
   2463   |.if JIT
   2464   |  lwz LFUNC:TMP1, FRAME_FUNC(BASE)
   2465   |   addi CARG1, DISPATCH, GG_DISP2J
   2466   |   stw PC, SAVE_PC
   2467   |  lwz TMP1, LFUNC:TMP1->pc
   2468   |   mr CARG2, PC
   2469   |   stw L, DISPATCH_J(L)(DISPATCH)
   2470   |  lbz TMP1, PC2PROTO(framesize)(TMP1)
   2471   |   stp BASE, L->base
   2472   |  slwi TMP1, TMP1, 3
   2473   |  add TMP1, BASE, TMP1
   2474   |  stp TMP1, L->top
   2475   |  bl extern lj_trace_hot		// (jit_State *J, const BCIns *pc)
   2476   |  b <3
   2477   |.endif
   2478   |
   2479   |->vm_callhook:			// Dispatch target for call hooks.
   2480   |  mr CARG2, PC
   2481   |.if JIT
   2482   |  b >1
   2483   |.endif
   2484   |
   2485   |->vm_hotcall:			// Hot call counter underflow.
   2486   |.if JIT
   2487   |  ori CARG2, PC, 1
   2488   |1:
   2489   |.endif
   2490   |  add TMP0, BASE, RC
   2491   |   stw PC, SAVE_PC
   2492   |  mr CARG1, L
   2493   |   stp BASE, L->base
   2494   |  sub RA, RA, BASE
   2495   |   stp TMP0, L->top
   2496   |  bl extern lj_dispatch_call		// (lua_State *L, const BCIns *pc)
   2497   |  // Returns ASMFunction.
   2498   |  lp BASE, L->base
   2499   |   lp TMP0, L->top
   2500   |   stw ZERO, SAVE_PC			// Invalidate for subsequent line hook.
   2501   |  sub NARGS8:RC, TMP0, BASE
   2502   |  add RA, BASE, RA
   2503   |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   2504   |  lwz INS, -4(PC)
   2505   |  mtctr CRET1
   2506   |  bctr
   2507   |
   2508   |->cont_stitch:			// Trace stitching.
   2509   |.if JIT
   2510   |  // RA = resultptr, RB = meta base
   2511   |  lwz INS, -4(PC)
   2512   |    lwz TRACE:TMP2, -20(RB)		// Save previous trace.
   2513   |   addic. TMP1, MULTRES, -8
   2514   |  decode_RA8 RC, INS			// Call base.
   2515   |   beq >2
   2516   |1:  // Move results down.
   2517   |  lfd f0, 0(RA)
   2518   |   addic. TMP1, TMP1, -8
   2519   |    addi RA, RA, 8
   2520   |  stfdx f0, BASE, RC
   2521   |    addi RC, RC, 8
   2522   |   bne <1
   2523   |2:
   2524   |   decode_RA8 RA, INS
   2525   |   decode_RB8 RB, INS
   2526   |   add RA, RA, RB
   2527   |3:
   2528   |   cmplw RA, RC
   2529   |   bgt >9				// More results wanted?
   2530   |
   2531   |  lhz TMP3, TRACE:TMP2->traceno
   2532   |  lhz RD, TRACE:TMP2->link
   2533   |  cmpw RD, TMP3
   2534   |   cmpwi cr1, RD, 0
   2535   |  beq ->cont_nop			// Blacklisted.
   2536   |    slwi RD, RD, 3
   2537   |   bne cr1, =>BC_JLOOP		// Jump to stitched trace.
   2538   |
   2539   |  // Stitch a new trace to the previous trace.
   2540   |  stw TMP3, DISPATCH_J(exitno)(DISPATCH)
   2541   |  stp L, DISPATCH_J(L)(DISPATCH)
   2542   |  stp BASE, L->base
   2543   |  addi CARG1, DISPATCH, GG_DISP2J
   2544   |  mr CARG2, PC
   2545   |  bl extern lj_dispatch_stitch	// (jit_State *J, const BCIns *pc)
   2546   |  lp BASE, L->base
   2547   |  b ->cont_nop
   2548   |
   2549   |9:
   2550   |  stwx TISNIL, BASE, RC
   2551   |  addi RC, RC, 8
   2552   |  b <3
   2553   |.endif
   2554   |
   2555   |->vm_profhook:			// Dispatch target for profiler hook.
   2556 #if LJ_HASPROFILE
   2557   |  mr CARG1, L
   2558   |   stw MULTRES, SAVE_MULTRES
   2559   |  mr CARG2, PC
   2560   |   stp BASE, L->base
   2561   |  bl extern lj_dispatch_profile	// (lua_State *L, const BCIns *pc)
   2562   |  // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
   2563   |  lp BASE, L->base
   2564   |  subi PC, PC, 4
   2565   |  b ->cont_nop
   2566 #endif
   2567   |
   2568   |//-----------------------------------------------------------------------
   2569   |//-- Trace exit handler -------------------------------------------------
   2570   |//-----------------------------------------------------------------------
   2571   |
   2572   |.macro savex_, a, b, c, d
   2573   |  stfd f..a, 16+a*8(sp)
   2574   |  stfd f..b, 16+b*8(sp)
   2575   |  stfd f..c, 16+c*8(sp)
   2576   |  stfd f..d, 16+d*8(sp)
   2577   |.endmacro
   2578   |
   2579   |->vm_exit_handler:
   2580   |.if JIT
   2581   |  addi sp, sp, -(16+32*8+32*4)
   2582   |  stmw r2, 16+32*8+2*4(sp)
   2583   |    addi DISPATCH, JGL, -GG_DISP2G-32768
   2584   |    li CARG2, ~LJ_VMST_EXIT
   2585   |   lwz CARG1, 16+32*8+32*4(sp)	// Get stack chain.
   2586   |    stw CARG2, DISPATCH_GL(vmstate)(DISPATCH)
   2587   |  savex_ 0,1,2,3
   2588   |   stw CARG1, 0(sp)			// Store extended stack chain.
   2589   |   clrso TMP1
   2590   |  savex_ 4,5,6,7
   2591   |   addi CARG2, sp, 16+32*8+32*4	// Recompute original value of sp.
   2592   |  savex_ 8,9,10,11
   2593   |   stw CARG2, 16+32*8+1*4(sp)	// Store sp in RID_SP.
   2594   |  savex_ 12,13,14,15
   2595   |   mflr CARG3
   2596   |   li TMP1, 0
   2597   |  savex_ 16,17,18,19
   2598   |   stw TMP1, 16+32*8+0*4(sp)		// Clear RID_TMP.
   2599   |  savex_ 20,21,22,23
   2600   |   lhz CARG4, 2(CARG3)		// Load trace number.
   2601   |  savex_ 24,25,26,27
   2602   |  lwz L, DISPATCH_GL(cur_L)(DISPATCH)
   2603   |  savex_ 28,29,30,31
   2604   |   sub CARG3, TMP0, CARG3		// Compute exit number.
   2605   |  lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
   2606   |   srwi CARG3, CARG3, 2
   2607   |  stp L, DISPATCH_J(L)(DISPATCH)
   2608   |   subi CARG3, CARG3, 2
   2609   |  stp BASE, L->base
   2610   |   stw CARG4, DISPATCH_J(parent)(DISPATCH)
   2611   |  stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
   2612   |  addi CARG1, DISPATCH, GG_DISP2J
   2613   |   stw CARG3, DISPATCH_J(exitno)(DISPATCH)
   2614   |  addi CARG2, sp, 16
   2615   |  bl extern lj_trace_exit		// (jit_State *J, ExitState *ex)
   2616   |  // Returns MULTRES (unscaled) or negated error code.
   2617   |  lp TMP1, L->cframe
   2618   |  lwz TMP2, 0(sp)
   2619   |   lp BASE, L->base
   2620   |.if GPR64
   2621   |  rldicr sp, TMP1, 0, 61
   2622   |.else
   2623   |  rlwinm sp, TMP1, 0, 0, 29
   2624   |.endif
   2625   |   lwz PC, SAVE_PC			// Get SAVE_PC.
   2626   |  stw TMP2, 0(sp)
   2627   |  stw L, SAVE_L			// Set SAVE_L (on-trace resume/yield).
   2628   |  b >1
   2629   |.endif
   2630   |->vm_exit_interp:
   2631   |.if JIT
   2632   |  // CARG1 = MULTRES or negated error code, BASE, PC and JGL set.
   2633   |  lwz L, SAVE_L
   2634   |  addi DISPATCH, JGL, -GG_DISP2G-32768
   2635   |  stp BASE, L->base
   2636   |1:
   2637   |  cmpwi CARG1, 0
   2638   |  blt >9				// Check for error from exit.
   2639   |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   2640   |   slwi MULTRES, CARG1, 3
   2641   |    li TMP2, 0
   2642   |   stw MULTRES, SAVE_MULTRES
   2643   |  lwz TMP1, LFUNC:RB->pc
   2644   |    stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
   2645   |  lwz KBASE, PC2PROTO(k)(TMP1)
   2646   |  // Setup type comparison constants.
   2647   |  li TISNUM, LJ_TISNUM
   2648   |  lus TMP3, 0x59c0			// TOBIT = 2^52 + 2^51 (float).
   2649   |  stw TMP3, TMPD
   2650   |  li ZERO, 0
   2651   |  ori TMP3, TMP3, 0x0004		// TONUM = 2^52 + 2^51 + 2^31 (float).
   2652   |  lfs TOBIT, TMPD
   2653   |  stw TMP3, TMPD
   2654   |  lus TMP0, 0x4338			// Hiword of 2^52 + 2^51 (double)
   2655   |    li TISNIL, LJ_TNIL
   2656   |  stw TMP0, TONUM_HI
   2657   |  lfs TONUM, TMPD
   2658   |  // Modified copy of ins_next which handles function header dispatch, too.
   2659   |  lwz INS, 0(PC)
   2660   |   addi PC, PC, 4
   2661   |    // Assumes TISNIL == ~LJ_VMST_INTERP == -1.
   2662   |    stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
   2663   |  decode_OPP TMP1, INS
   2664   |   decode_RA8 RA, INS
   2665   |  lpx TMP0, DISPATCH, TMP1
   2666   |  mtctr TMP0
   2667   |  cmplwi TMP1, BC_FUNCF*4		// Function header?
   2668   |  bge >2
   2669   |   decode_RB8 RB, INS
   2670   |   decode_RD8 RD, INS
   2671   |   decode_RC8 RC, INS
   2672   |  bctr
   2673   |2:
   2674   |  cmplwi TMP1, (BC_FUNCC+2)*4	// Fast function?
   2675   |  blt >3
   2676   |  // Check frame below fast function.
   2677   |  lwz TMP1, FRAME_PC(BASE)
   2678   |  andix. TMP0, TMP1, FRAME_TYPE
   2679   |  bney >3				// Trace stitching continuation?
   2680   |  // Otherwise set KBASE for Lua function below fast function.
   2681   |  lwz TMP2, -4(TMP1)
   2682   |  decode_RA8 TMP0, TMP2
   2683   |  sub TMP1, BASE, TMP0
   2684   |  lwz LFUNC:TMP2, -12(TMP1)
   2685   |  lwz TMP1, LFUNC:TMP2->pc
   2686   |  lwz KBASE, PC2PROTO(k)(TMP1)
   2687   |3:
   2688   |   subi RC, MULTRES, 8
   2689   |   add RA, RA, BASE
   2690   |  bctr
   2691   |
   2692   |9:  // Rethrow error from the right C frame.
   2693   |  neg CARG2, CARG1
   2694   |  mr CARG1, L
   2695   |  bl extern lj_err_throw		// (lua_State *L, int errcode)
   2696   |.endif
   2697   |
   2698   |//-----------------------------------------------------------------------
   2699   |//-- Math helper functions ----------------------------------------------
   2700   |//-----------------------------------------------------------------------
   2701   |
   2702   |// NYI: Use internal implementations of floor, ceil, trunc.
   2703   |
   2704   |->vm_modi:
   2705   |  divwo. TMP0, CARG1, CARG2
   2706   |  bso >1
   2707   |.if GPR64
   2708   |   xor CARG3, CARG1, CARG2
   2709   |   cmpwi CARG3, 0
   2710   |.else
   2711   |   xor. CARG3, CARG1, CARG2
   2712   |.endif
   2713   |  mullw TMP0, TMP0, CARG2
   2714   |  sub CARG1, CARG1, TMP0
   2715   |   bgelr
   2716   |  cmpwi CARG1, 0; beqlr
   2717   |  add CARG1, CARG1, CARG2
   2718   |  blr
   2719   |1:
   2720   |  cmpwi CARG2, 0
   2721   |   li CARG1, 0
   2722   |  beqlr
   2723   |  clrso TMP0			// Clear SO for -2147483648 % -1 and return 0.
   2724   |  blr
   2725   |
   2726   |//-----------------------------------------------------------------------
   2727   |//-- Miscellaneous functions --------------------------------------------
   2728   |//-----------------------------------------------------------------------
   2729   |
   2730   |// void lj_vm_cachesync(void *start, void *end)
   2731   |// Flush D-Cache and invalidate I-Cache. Assumes 32 byte cache line size.
   2732   |// This is a good lower bound, except for very ancient PPC models.
   2733   |->vm_cachesync:
   2734   |.if JIT or FFI
   2735   |  // Compute start of first cache line and number of cache lines.
   2736   |  rlwinm CARG1, CARG1, 0, 0, 26
   2737   |  sub CARG2, CARG2, CARG1
   2738   |  addi CARG2, CARG2, 31
   2739   |  rlwinm. CARG2, CARG2, 27, 5, 31
   2740   |  beqlr
   2741   |  mtctr CARG2
   2742   |  mr CARG3, CARG1
   2743   |1:  // Flush D-Cache.
   2744   |  dcbst r0, CARG1
   2745   |  addi CARG1, CARG1, 32
   2746   |  bdnz <1
   2747   |  sync
   2748   |  mtctr CARG2
   2749   |1:  // Invalidate I-Cache.
   2750   |  icbi r0, CARG3
   2751   |  addi CARG3, CARG3, 32
   2752   |  bdnz <1
   2753   |  isync
   2754   |  blr
   2755   |.endif
   2756   |
   2757   |//-----------------------------------------------------------------------
   2758   |//-- FFI helper functions -----------------------------------------------
   2759   |//-----------------------------------------------------------------------
   2760   |
   2761   |// Handler for callback functions. Callback slot number in r11, g in r12.
   2762   |->vm_ffi_callback:
   2763   |.if FFI
   2764   |.type CTSTATE, CTState, PC
   2765   |  saveregs
   2766   |  lwz CTSTATE, GL:r12->ctype_state
   2767   |   addi DISPATCH, r12, GG_G2DISP
   2768   |  stw r11, CTSTATE->cb.slot
   2769   |  stw r3, CTSTATE->cb.gpr[0]
   2770   |   stfd f1, CTSTATE->cb.fpr[0]
   2771   |  stw r4, CTSTATE->cb.gpr[1]
   2772   |   stfd f2, CTSTATE->cb.fpr[1]
   2773   |  stw r5, CTSTATE->cb.gpr[2]
   2774   |   stfd f3, CTSTATE->cb.fpr[2]
   2775   |  stw r6, CTSTATE->cb.gpr[3]
   2776   |   stfd f4, CTSTATE->cb.fpr[3]
   2777   |  stw r7, CTSTATE->cb.gpr[4]
   2778   |   stfd f5, CTSTATE->cb.fpr[4]
   2779   |  stw r8, CTSTATE->cb.gpr[5]
   2780   |   stfd f6, CTSTATE->cb.fpr[5]
   2781   |  stw r9, CTSTATE->cb.gpr[6]
   2782   |   stfd f7, CTSTATE->cb.fpr[6]
   2783   |  stw r10, CTSTATE->cb.gpr[7]
   2784   |   stfd f8, CTSTATE->cb.fpr[7]
   2785   |  addi TMP0, sp, CFRAME_SPACE+8
   2786   |  stw TMP0, CTSTATE->cb.stack
   2787   |   mr CARG1, CTSTATE
   2788   |  stw CTSTATE, SAVE_PC		// Any value outside of bytecode is ok.
   2789   |   mr CARG2, sp
   2790   |  bl extern lj_ccallback_enter	// (CTState *cts, void *cf)
   2791   |  // Returns lua_State *.
   2792   |  lp BASE, L:CRET1->base
   2793   |     li TISNUM, LJ_TISNUM		// Setup type comparison constants.
   2794   |  lp RC, L:CRET1->top
   2795   |     lus TMP3, 0x59c0		// TOBIT = 2^52 + 2^51 (float).
   2796   |     li ZERO, 0
   2797   |   mr L, CRET1
   2798   |     stw TMP3, TMPD
   2799   |     lus TMP0, 0x4338		// Hiword of 2^52 + 2^51 (double)
   2800   |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   2801   |     ori TMP3, TMP3, 0x0004		// TONUM = 2^52 + 2^51 + 2^31 (float).
   2802   |     stw TMP0, TONUM_HI
   2803   |     li TISNIL, LJ_TNIL
   2804   |    li_vmstate INTERP
   2805   |     lfs TOBIT, TMPD
   2806   |     stw TMP3, TMPD
   2807   |  sub RC, RC, BASE
   2808   |    st_vmstate
   2809   |     lfs TONUM, TMPD
   2810   |  ins_callt
   2811   |.endif
   2812   |
   2813   |->cont_ffi_callback:			// Return from FFI callback.
   2814   |.if FFI
   2815   |  lwz CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH)
   2816   |   stp BASE, L->base
   2817   |   stp RB, L->top
   2818   |  stp L, CTSTATE->L
   2819   |  mr CARG1, CTSTATE
   2820   |  mr CARG2, RA
   2821   |  bl extern lj_ccallback_leave	// (CTState *cts, TValue *o)
   2822   |  lwz CRET1, CTSTATE->cb.gpr[0]
   2823   |  lfd FARG1, CTSTATE->cb.fpr[0]
   2824   |  lwz CRET2, CTSTATE->cb.gpr[1]
   2825   |  b ->vm_leave_unw
   2826   |.endif
   2827   |
   2828   |->vm_ffi_call:			// Call C function via FFI.
   2829   |  // Caveat: needs special frame unwinding, see below.
   2830   |.if FFI
   2831   |  .type CCSTATE, CCallState, CARG1
   2832   |  lwz TMP1, CCSTATE->spadj
   2833   |    mflr TMP0
   2834   |   lbz CARG2, CCSTATE->nsp
   2835   |   lbz CARG3, CCSTATE->nfpr
   2836   |  neg TMP1, TMP1
   2837   |    stw TMP0, 4(sp)
   2838   |   cmpwi cr1, CARG3, 0
   2839   |  mr TMP2, sp
   2840   |   addic. CARG2, CARG2, -1
   2841   |  stwux sp, sp, TMP1
   2842   |   crnot 4*cr1+eq, 4*cr1+eq		// For vararg calls.
   2843   |  stw r14, -4(TMP2)
   2844   |  stw CCSTATE, -8(TMP2)
   2845   |  mr r14, TMP2
   2846   |  la TMP1, CCSTATE->stack
   2847   |   slwi CARG2, CARG2, 2
   2848   |   blty >2
   2849   |  la TMP2, 8(sp)
   2850   |1:
   2851   |  lwzx TMP0, TMP1, CARG2
   2852   |  stwx TMP0, TMP2, CARG2
   2853   |   addic. CARG2, CARG2, -4
   2854   |  bge <1
   2855   |2:
   2856   |  bney cr1, >3
   2857   |  lfd f1, CCSTATE->fpr[0]
   2858   |  lfd f2, CCSTATE->fpr[1]
   2859   |  lfd f3, CCSTATE->fpr[2]
   2860   |  lfd f4, CCSTATE->fpr[3]
   2861   |  lfd f5, CCSTATE->fpr[4]
   2862   |  lfd f6, CCSTATE->fpr[5]
   2863   |  lfd f7, CCSTATE->fpr[6]
   2864   |  lfd f8, CCSTATE->fpr[7]
   2865   |3:
   2866   |   lp TMP0, CCSTATE->func
   2867   |  lwz CARG2, CCSTATE->gpr[1]
   2868   |  lwz CARG3, CCSTATE->gpr[2]
   2869   |  lwz CARG4, CCSTATE->gpr[3]
   2870   |  lwz CARG5, CCSTATE->gpr[4]
   2871   |   mtctr TMP0
   2872   |  lwz r8, CCSTATE->gpr[5]
   2873   |  lwz r9, CCSTATE->gpr[6]
   2874   |  lwz r10, CCSTATE->gpr[7]
   2875   |  lwz CARG1, CCSTATE->gpr[0]		// Do this last, since CCSTATE is CARG1.
   2876   |   bctrl
   2877   |  lwz CCSTATE:TMP1, -8(r14)
   2878   |  lwz TMP2, -4(r14)
   2879   |   lwz TMP0, 4(r14)
   2880   |  stw CARG1, CCSTATE:TMP1->gpr[0]
   2881   |  stfd FARG1, CCSTATE:TMP1->fpr[0]
   2882   |  stw CARG2, CCSTATE:TMP1->gpr[1]
   2883   |   mtlr TMP0
   2884   |  stw CARG3, CCSTATE:TMP1->gpr[2]
   2885   |   mr sp, r14
   2886   |  stw CARG4, CCSTATE:TMP1->gpr[3]
   2887   |   mr r14, TMP2
   2888   |  blr
   2889   |.endif
   2890   |// Note: vm_ffi_call must be the last function in this object file!
   2891   |
   2892   |//-----------------------------------------------------------------------
   2893 }
   2894 
   2895 /* Generate the code for a single instruction. */
   2896 static void build_ins(BuildCtx *ctx, BCOp op, int defop)
   2897 {
   2898   int vk = 0;
   2899   |=>defop:
   2900 
   2901   switch (op) {
   2902 
   2903   /* -- Comparison ops ---------------------------------------------------- */
   2904 
   2905   /* Remember: all ops branch for a true comparison, fall through otherwise. */
   2906 
   2907   case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
   2908     |  // RA = src1*8, RD = src2*8, JMP with RD = target
   2909     |.if DUALNUM
   2910     |  lwzux TMP0, RA, BASE
   2911     |    addi PC, PC, 4
   2912     |   lwz CARG2, 4(RA)
   2913     |  lwzux TMP1, RD, BASE
   2914     |    lwz TMP2, -4(PC)
   2915     |  checknum cr0, TMP0
   2916     |   lwz CARG3, 4(RD)
   2917     |    decode_RD4 TMP2, TMP2
   2918     |  checknum cr1, TMP1
   2919     |    addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
   2920     |  bne cr0, >7
   2921     |  bne cr1, >8
   2922     |   cmpw CARG2, CARG3
   2923     if (op == BC_ISLT) {
   2924       |  bge >2
   2925     } else if (op == BC_ISGE) {
   2926       |  blt >2
   2927     } else if (op == BC_ISLE) {
   2928       |  bgt >2
   2929     } else {
   2930       |  ble >2
   2931     }
   2932     |1:
   2933     |  add PC, PC, TMP2
   2934     |2:
   2935     |  ins_next
   2936     |
   2937     |7:  // RA is not an integer.
   2938     |  bgt cr0, ->vmeta_comp
   2939     |  // RA is a number.
   2940     |   lfd f0, 0(RA)
   2941     |  bgt cr1, ->vmeta_comp
   2942     |  blt cr1, >4
   2943     |  // RA is a number, RD is an integer.
   2944     |  tonum_i f1, CARG3
   2945     |  b >5
   2946     |
   2947     |8: // RA is an integer, RD is not an integer.
   2948     |  bgt cr1, ->vmeta_comp
   2949     |  // RA is an integer, RD is a number.
   2950     |  tonum_i f0, CARG2
   2951     |4:
   2952     |  lfd f1, 0(RD)
   2953     |5:
   2954     |  fcmpu cr0, f0, f1
   2955     if (op == BC_ISLT) {
   2956       |  bge <2
   2957     } else if (op == BC_ISGE) {
   2958       |  blt <2
   2959     } else if (op == BC_ISLE) {
   2960       |  cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq
   2961       |  bge <2
   2962     } else {
   2963       |  cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq
   2964       |  blt <2
   2965     }
   2966     |  b <1
   2967     |.else
   2968     |  lwzx TMP0, BASE, RA
   2969     |    addi PC, PC, 4
   2970     |   lfdx f0, BASE, RA
   2971     |  lwzx TMP1, BASE, RD
   2972     |  checknum cr0, TMP0
   2973     |    lwz TMP2, -4(PC)
   2974     |   lfdx f1, BASE, RD
   2975     |  checknum cr1, TMP1
   2976     |    decode_RD4 TMP2, TMP2
   2977     |  bge cr0, ->vmeta_comp
   2978     |    addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
   2979     |  bge cr1, ->vmeta_comp
   2980     |  fcmpu cr0, f0, f1
   2981     if (op == BC_ISLT) {
   2982       |  bge >1
   2983     } else if (op == BC_ISGE) {
   2984       |  blt >1
   2985     } else if (op == BC_ISLE) {
   2986       |  cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq
   2987       |  bge >1
   2988     } else {
   2989       |  cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq
   2990       |  blt >1
   2991     }
   2992     |  add PC, PC, TMP2
   2993     |1:
   2994     |  ins_next
   2995     |.endif
   2996     break;
   2997 
   2998   case BC_ISEQV: case BC_ISNEV:
   2999     vk = op == BC_ISEQV;
   3000     |  // RA = src1*8, RD = src2*8, JMP with RD = target
   3001     |.if DUALNUM
   3002     |  lwzux TMP0, RA, BASE
   3003     |    addi PC, PC, 4
   3004     |   lwz CARG2, 4(RA)
   3005     |  lwzux TMP1, RD, BASE
   3006     |  checknum cr0, TMP0
   3007     |    lwz TMP2, -4(PC)
   3008     |  checknum cr1, TMP1
   3009     |    decode_RD4 TMP2, TMP2
   3010     |   lwz CARG3, 4(RD)
   3011     |  cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
   3012     |    addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
   3013     if (vk) {
   3014       |  ble cr7, ->BC_ISEQN_Z
   3015     } else {
   3016       |  ble cr7, ->BC_ISNEN_Z
   3017     }
   3018     |.else
   3019     |  lwzux TMP0, RA, BASE
   3020     |   lwz TMP2, 0(PC)
   3021     |    lfd f0, 0(RA)
   3022     |   addi PC, PC, 4
   3023     |  lwzux TMP1, RD, BASE
   3024     |  checknum cr0, TMP0
   3025     |   decode_RD4 TMP2, TMP2
   3026     |    lfd f1, 0(RD)
   3027     |  checknum cr1, TMP1
   3028     |   addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
   3029     |  bge cr0, >5
   3030     |  bge cr1, >5
   3031     |  fcmpu cr0, f0, f1
   3032     if (vk) {
   3033       |  bne >1
   3034       |  add PC, PC, TMP2
   3035     } else {
   3036       |  beq >1
   3037       |  add PC, PC, TMP2
   3038     }
   3039     |1:
   3040     |  ins_next
   3041     |.endif
   3042     |5:  // Either or both types are not numbers.
   3043     |.if not DUALNUM
   3044     |    lwz CARG2, 4(RA)
   3045     |    lwz CARG3, 4(RD)
   3046     |.endif
   3047     |.if FFI
   3048     |  cmpwi cr7, TMP0, LJ_TCDATA
   3049     |  cmpwi cr5, TMP1, LJ_TCDATA
   3050     |.endif
   3051     |   not TMP3, TMP0
   3052     |  cmplw TMP0, TMP1
   3053     |   cmplwi cr1, TMP3, ~LJ_TISPRI		// Primitive?
   3054     |.if FFI
   3055     |  cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
   3056     |.endif
   3057     |   cmplwi cr6, TMP3, ~LJ_TISTABUD		// Table or userdata?
   3058     |.if FFI
   3059     |  beq cr7, ->vmeta_equal_cd
   3060     |.endif
   3061     |    cmplw cr5, CARG2, CARG3
   3062     |  crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt	// 2: Same type and primitive.
   3063     |  crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq	// 1: Same tv or different type.
   3064     |  crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq	// 0: Same type and same tv.
   3065     |   mr SAVE0, PC
   3066     |  cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt	// 0 or 2.
   3067     |  cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt	// 1 or 2.
   3068     if (vk) {
   3069       |  bne cr0, >6
   3070       |  add PC, PC, TMP2
   3071       |6:
   3072     } else {
   3073       |  beq cr0, >6
   3074       |  add PC, PC, TMP2
   3075       |6:
   3076     }
   3077     |.if DUALNUM
   3078     |  bge cr0, >2			// Done if 1 or 2.
   3079     |1:
   3080     |  ins_next
   3081     |2:
   3082     |.else
   3083     |  blt cr0, <1			// Done if 1 or 2.
   3084     |.endif
   3085     |  blt cr6, <1			// Done if not tab/ud.
   3086     |
   3087     |  // Different tables or userdatas. Need to check __eq metamethod.
   3088     |  // Field metatable must be at same offset for GCtab and GCudata!
   3089     |  lwz TAB:TMP2, TAB:CARG2->metatable
   3090     |   li CARG4, 1-vk			// ne = 0 or 1.
   3091     |  cmplwi TAB:TMP2, 0
   3092     |  beq <1				// No metatable?
   3093     |  lbz TMP2, TAB:TMP2->nomm
   3094     |  andix. TMP2, TMP2, 1<<MM_eq
   3095     |  bne <1				// Or 'no __eq' flag set?
   3096     |  mr PC, SAVE0			// Restore old PC.
   3097     |  b ->vmeta_equal			// Handle __eq metamethod.
   3098     break;
   3099 
   3100   case BC_ISEQS: case BC_ISNES:
   3101     vk = op == BC_ISEQS;
   3102     |  // RA = src*8, RD = str_const*8 (~), JMP with RD = target
   3103     |  lwzux TMP0, RA, BASE
   3104     |   srwi RD, RD, 1
   3105     |  lwz STR:TMP3, 4(RA)
   3106     |    lwz TMP2, 0(PC)
   3107     |   subfic RD, RD, -4
   3108     |    addi PC, PC, 4
   3109     |.if FFI
   3110     |  cmpwi TMP0, LJ_TCDATA
   3111     |.endif
   3112     |   lwzx STR:TMP1, KBASE, RD	// KBASE-4-str_const*4
   3113     |  .gpr64 extsw TMP0, TMP0
   3114     |  subfic TMP0, TMP0, LJ_TSTR
   3115     |.if FFI
   3116     |  beq ->vmeta_equal_cd
   3117     |.endif
   3118     |  sub TMP1, STR:TMP1, STR:TMP3
   3119     |  or TMP0, TMP0, TMP1
   3120     |    decode_RD4 TMP2, TMP2
   3121     |  subfic TMP0, TMP0, 0
   3122     |    addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
   3123     |  subfe TMP1, TMP1, TMP1
   3124     if (vk) {
   3125       |  andc TMP2, TMP2, TMP1
   3126     } else {
   3127       |  and TMP2, TMP2, TMP1
   3128     }
   3129     |  add PC, PC, TMP2
   3130     |  ins_next
   3131     break;
   3132 
   3133   case BC_ISEQN: case BC_ISNEN:
   3134     vk = op == BC_ISEQN;
   3135     |  // RA = src*8, RD = num_const*8, JMP with RD = target
   3136     |.if DUALNUM
   3137     |  lwzux TMP0, RA, BASE
   3138     |    addi PC, PC, 4
   3139     |   lwz CARG2, 4(RA)
   3140     |  lwzux TMP1, RD, KBASE
   3141     |  checknum cr0, TMP0
   3142     |    lwz TMP2, -4(PC)
   3143     |  checknum cr1, TMP1
   3144     |    decode_RD4 TMP2, TMP2
   3145     |   lwz CARG3, 4(RD)
   3146     |    addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
   3147     if (vk) {
   3148       |->BC_ISEQN_Z:
   3149     } else {
   3150       |->BC_ISNEN_Z:
   3151     }
   3152     |  bne cr0, >7
   3153     |  bne cr1, >8
   3154     |   cmpw CARG2, CARG3
   3155     |4:
   3156     |.else
   3157     if (vk) {
   3158       |->BC_ISEQN_Z:  // Dummy label.
   3159     } else {
   3160       |->BC_ISNEN_Z:  // Dummy label.
   3161     }
   3162     |  lwzx TMP0, BASE, RA
   3163     |    addi PC, PC, 4
   3164     |   lfdx f0, BASE, RA
   3165     |    lwz TMP2, -4(PC)
   3166     |  lfdx f1, KBASE, RD
   3167     |    decode_RD4 TMP2, TMP2
   3168     |  checknum TMP0
   3169     |    addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
   3170     |  bge >3
   3171     |  fcmpu cr0, f0, f1
   3172     |.endif
   3173     if (vk) {
   3174       |  bne >1
   3175       |  add PC, PC, TMP2
   3176       |1:
   3177       |.if not FFI
   3178       |3:
   3179       |.endif
   3180     } else {
   3181       |  beq >2
   3182       |1:
   3183       |.if not FFI
   3184       |3:
   3185       |.endif
   3186       |  add PC, PC, TMP2
   3187       |2:
   3188     }
   3189     |  ins_next
   3190     |.if FFI
   3191     |3:
   3192     |  cmpwi TMP0, LJ_TCDATA
   3193     |  beq ->vmeta_equal_cd
   3194     |  b <1
   3195     |.endif
   3196     |.if DUALNUM
   3197     |7:  // RA is not an integer.
   3198     |  bge cr0, <3
   3199     |  // RA is a number.
   3200     |   lfd f0, 0(RA)
   3201     |  blt cr1, >1
   3202     |  // RA is a number, RD is an integer.
   3203     |  tonum_i f1, CARG3
   3204     |  b >2
   3205     |
   3206     |8: // RA is an integer, RD is a number.
   3207     |  tonum_i f0, CARG2
   3208     |1:
   3209     |  lfd f1, 0(RD)
   3210     |2:
   3211     |  fcmpu cr0, f0, f1
   3212     |  b <4
   3213     |.endif
   3214     break;
   3215 
   3216   case BC_ISEQP: case BC_ISNEP:
   3217     vk = op == BC_ISEQP;
   3218     |  // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
   3219     |  lwzx TMP0, BASE, RA
   3220     |   srwi TMP1, RD, 3
   3221     |    lwz TMP2, 0(PC)
   3222     |   not TMP1, TMP1
   3223     |    addi PC, PC, 4
   3224     |.if FFI
   3225     |  cmpwi TMP0, LJ_TCDATA
   3226     |.endif
   3227     |  sub TMP0, TMP0, TMP1
   3228     |.if FFI
   3229     |  beq ->vmeta_equal_cd
   3230     |.endif
   3231     |    decode_RD4 TMP2, TMP2
   3232     |  .gpr64 extsw TMP0, TMP0
   3233     |  addic TMP0, TMP0, -1
   3234     |    addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
   3235     |  subfe TMP1, TMP1, TMP1
   3236     if (vk) {
   3237       |  and TMP2, TMP2, TMP1
   3238     } else {
   3239       |  andc TMP2, TMP2, TMP1
   3240     }
   3241     |  add PC, PC, TMP2
   3242     |  ins_next
   3243     break;
   3244 
   3245   /* -- Unary test and copy ops ------------------------------------------- */
   3246 
   3247   case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
   3248     |  // RA = dst*8 or unused, RD = src*8, JMP with RD = target
   3249     |  lwzx TMP0, BASE, RD
   3250     |   lwz INS, 0(PC)
   3251     |   addi PC, PC, 4
   3252     if (op == BC_IST || op == BC_ISF) {
   3253       |  .gpr64 extsw TMP0, TMP0
   3254       |  subfic TMP0, TMP0, LJ_TTRUE
   3255       |   decode_RD4 TMP2, INS
   3256       |  subfe TMP1, TMP1, TMP1
   3257       |   addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
   3258       if (op == BC_IST) {
   3259 	|  andc TMP2, TMP2, TMP1
   3260       } else {
   3261 	|  and TMP2, TMP2, TMP1
   3262       }
   3263       |  add PC, PC, TMP2
   3264     } else {
   3265       |  li TMP1, LJ_TFALSE
   3266       |   lfdx f0, BASE, RD
   3267       |  cmplw TMP0, TMP1
   3268       if (op == BC_ISTC) {
   3269 	|  bge >1
   3270       } else {
   3271 	|  blt >1
   3272       }
   3273       |  addis PC, PC, -(BCBIAS_J*4 >> 16)
   3274       |  decode_RD4 TMP2, INS
   3275       |   stfdx f0, BASE, RA
   3276       |  add PC, PC, TMP2
   3277       |1:
   3278     }
   3279     |  ins_next
   3280     break;
   3281 
   3282   case BC_ISTYPE:
   3283     |  // RA = src*8, RD = -type*8
   3284     |  lwzx TMP0, BASE, RA
   3285     |  srwi TMP1, RD, 3
   3286     |  ins_next1
   3287     |.if not PPE and not GPR64
   3288     |  add. TMP0, TMP0, TMP1
   3289     |.else
   3290     |  neg TMP1, TMP1
   3291     |  cmpw TMP0, TMP1
   3292     |.endif
   3293     |  bne ->vmeta_istype
   3294     |  ins_next2
   3295     break;
   3296   case BC_ISNUM:
   3297     |  // RA = src*8, RD = -(TISNUM-1)*8
   3298     |  lwzx TMP0, BASE, RA
   3299     |  ins_next1
   3300     |  checknum TMP0
   3301     |  bge ->vmeta_istype
   3302     |  ins_next2
   3303     break;
   3304 
   3305   /* -- Unary ops --------------------------------------------------------- */
   3306 
   3307   case BC_MOV:
   3308     |  // RA = dst*8, RD = src*8
   3309     |  ins_next1
   3310     |  lfdx f0, BASE, RD
   3311     |  stfdx f0, BASE, RA
   3312     |  ins_next2
   3313     break;
   3314   case BC_NOT:
   3315     |  // RA = dst*8, RD = src*8
   3316     |  ins_next1
   3317     |  lwzx TMP0, BASE, RD
   3318     |  .gpr64 extsw TMP0, TMP0
   3319     |  subfic TMP1, TMP0, LJ_TTRUE
   3320     |  adde TMP0, TMP0, TMP1
   3321     |  stwx TMP0, BASE, RA
   3322     |  ins_next2
   3323     break;
   3324   case BC_UNM:
   3325     |  // RA = dst*8, RD = src*8
   3326     |  lwzux TMP1, RD, BASE
   3327     |   lwz TMP0, 4(RD)
   3328     |  checknum TMP1
   3329     |.if DUALNUM
   3330     |  bne >5
   3331     |.if GPR64
   3332     |  lus TMP2, 0x8000
   3333     |  neg TMP0, TMP0
   3334     |  cmplw TMP0, TMP2
   3335     |  beq >4
   3336     |.else
   3337     |  nego. TMP0, TMP0
   3338     |  bso >4
   3339     |1:
   3340     |.endif
   3341     |  ins_next1
   3342     |  stwux TISNUM, RA, BASE
   3343     |   stw TMP0, 4(RA)
   3344     |3:
   3345     |  ins_next2
   3346     |4:
   3347     |.if not GPR64
   3348     |  // Potential overflow.
   3349     |  checkov TMP1, <1			// Ignore unrelated overflow.
   3350     |.endif
   3351     |  lus TMP1, 0x41e0			// 2^31.
   3352     |  li TMP0, 0
   3353     |  b >7
   3354     |.endif
   3355     |5:
   3356     |  bge ->vmeta_unm
   3357     |  xoris TMP1, TMP1, 0x8000
   3358     |7:
   3359     |  ins_next1
   3360     |  stwux TMP1, RA, BASE
   3361     |   stw TMP0, 4(RA)
   3362     |.if DUALNUM
   3363     |  b <3
   3364     |.else
   3365     |  ins_next2
   3366     |.endif
   3367     break;
   3368   case BC_LEN:
   3369     |  // RA = dst*8, RD = src*8
   3370     |  lwzux TMP0, RD, BASE
   3371     |   lwz CARG1, 4(RD)
   3372     |  checkstr TMP0; bne >2
   3373     |  lwz CRET1, STR:CARG1->len
   3374     |1:
   3375     |.if DUALNUM
   3376     |  ins_next1
   3377     |  stwux TISNUM, RA, BASE
   3378     |   stw CRET1, 4(RA)
   3379     |.else
   3380     |  tonum_u f0, CRET1		// Result is a non-negative integer.
   3381     |  ins_next1
   3382     |  stfdx f0, BASE, RA
   3383     |.endif
   3384     |  ins_next2
   3385     |2:
   3386     |  checktab TMP0; bne ->vmeta_len
   3387     |  lwz TAB:TMP2, TAB:CARG1->metatable
   3388     |  cmplwi TAB:TMP2, 0
   3389     |  bne >9
   3390     |3:
   3391     |->BC_LEN_Z:
   3392     |  bl extern lj_tab_len		// (GCtab *t)
   3393     |  // Returns uint32_t (but less than 2^31).
   3394     |  b <1
   3395     |9:
   3396     |  lbz TMP0, TAB:TMP2->nomm
   3397     |  andix. TMP0, TMP0, 1<<MM_len
   3398     |  bne <3				// 'no __len' flag set: done.
   3399     |  b ->vmeta_len
   3400     break;
   3401 
   3402   /* -- Binary ops -------------------------------------------------------- */
   3403 
   3404     |.macro ins_arithpre
   3405     |  // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
   3406     ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
   3407     ||switch (vk) {
   3408     ||case 0:
   3409     |   lwzx TMP1, BASE, RB
   3410     |   .if DUALNUM
   3411     |     lwzx TMP2, KBASE, RC
   3412     |   .endif
   3413     |    lfdx f14, BASE, RB
   3414     |    lfdx f15, KBASE, RC
   3415     |   .if DUALNUM
   3416     |     checknum cr0, TMP1
   3417     |     checknum cr1, TMP2
   3418     |     crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
   3419     |     bge ->vmeta_arith_vn
   3420     |   .else
   3421     |     checknum TMP1; bge ->vmeta_arith_vn
   3422     |   .endif
   3423     ||  break;
   3424     ||case 1:
   3425     |   lwzx TMP1, BASE, RB
   3426     |   .if DUALNUM
   3427     |     lwzx TMP2, KBASE, RC
   3428     |   .endif
   3429     |    lfdx f15, BASE, RB
   3430     |    lfdx f14, KBASE, RC
   3431     |   .if DUALNUM
   3432     |     checknum cr0, TMP1
   3433     |     checknum cr1, TMP2
   3434     |     crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
   3435     |     bge ->vmeta_arith_nv
   3436     |   .else
   3437     |     checknum TMP1; bge ->vmeta_arith_nv
   3438     |   .endif
   3439     ||  break;
   3440     ||default:
   3441     |   lwzx TMP1, BASE, RB
   3442     |   lwzx TMP2, BASE, RC
   3443     |    lfdx f14, BASE, RB
   3444     |    lfdx f15, BASE, RC
   3445     |   checknum cr0, TMP1
   3446     |   checknum cr1, TMP2
   3447     |   crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
   3448     |   bge ->vmeta_arith_vv
   3449     ||  break;
   3450     ||}
   3451     |.endmacro
   3452     |
   3453     |.macro ins_arithfallback, ins
   3454     ||switch (vk) {
   3455     ||case 0:
   3456     |   ins ->vmeta_arith_vn2
   3457     ||  break;
   3458     ||case 1:
   3459     |   ins ->vmeta_arith_nv2
   3460     ||  break;
   3461     ||default:
   3462     |   ins ->vmeta_arith_vv2
   3463     ||  break;
   3464     ||}
   3465     |.endmacro
   3466     |
   3467     |.macro intmod, a, b, c
   3468     |  bl ->vm_modi
   3469     |.endmacro
   3470     |
   3471     |.macro fpmod, a, b, c
   3472     |->BC_MODVN_Z:
   3473     |  fdiv FARG1, b, c
   3474     |  // NYI: Use internal implementation of floor.
   3475     |  blex floor			// floor(b/c)
   3476     |  fmul a, FARG1, c
   3477     |  fsub a, b, a			// b - floor(b/c)*c
   3478     |.endmacro
   3479     |
   3480     |.macro ins_arithfp, fpins
   3481     |  ins_arithpre
   3482     |.if "fpins" == "fpmod_"
   3483     |  b ->BC_MODVN_Z			// Avoid 3 copies. It's slow anyway.
   3484     |.else
   3485     |  fpins f0, f14, f15
   3486     |  ins_next1
   3487     |  stfdx f0, BASE, RA
   3488     |  ins_next2
   3489     |.endif
   3490     |.endmacro
   3491     |
   3492     |.macro ins_arithdn, intins, fpins
   3493     |  // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
   3494     ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
   3495     ||switch (vk) {
   3496     ||case 0:
   3497     |   lwzux TMP1, RB, BASE
   3498     |   lwzux TMP2, RC, KBASE
   3499     |    lwz CARG1, 4(RB)
   3500     |   checknum cr0, TMP1
   3501     |    lwz CARG2, 4(RC)
   3502     ||  break;
   3503     ||case 1:
   3504     |   lwzux TMP1, RB, BASE
   3505     |   lwzux TMP2, RC, KBASE
   3506     |    lwz CARG2, 4(RB)
   3507     |   checknum cr0, TMP1
   3508     |    lwz CARG1, 4(RC)
   3509     ||  break;
   3510     ||default:
   3511     |   lwzux TMP1, RB, BASE
   3512     |   lwzux TMP2, RC, BASE
   3513     |    lwz CARG1, 4(RB)
   3514     |   checknum cr0, TMP1
   3515     |    lwz CARG2, 4(RC)
   3516     ||  break;
   3517     ||}
   3518     |  checknum cr1, TMP2
   3519     |  bne >5
   3520     |  bne cr1, >5
   3521     |  intins CARG1, CARG1, CARG2
   3522     |  bso >4
   3523     |1:
   3524     |  ins_next1
   3525     |  stwux TISNUM, RA, BASE
   3526     |  stw CARG1, 4(RA)
   3527     |2:
   3528     |  ins_next2
   3529     |4:  // Overflow.
   3530     |  checkov TMP0, <1			// Ignore unrelated overflow.
   3531     |  ins_arithfallback b
   3532     |5:  // FP variant.
   3533     ||if (vk == 1) {
   3534     |  lfd f15, 0(RB)
   3535     |   crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
   3536     |  lfd f14, 0(RC)
   3537     ||} else {
   3538     |  lfd f14, 0(RB)
   3539     |   crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
   3540     |  lfd f15, 0(RC)
   3541     ||}
   3542     |   ins_arithfallback bge
   3543     |.if "fpins" == "fpmod_"
   3544     |  b ->BC_MODVN_Z			// Avoid 3 copies. It's slow anyway.
   3545     |.else
   3546     |  fpins f0, f14, f15
   3547     |  ins_next1
   3548     |  stfdx f0, BASE, RA
   3549     |  b <2
   3550     |.endif
   3551     |.endmacro
   3552     |
   3553     |.macro ins_arith, intins, fpins
   3554     |.if DUALNUM
   3555     |  ins_arithdn intins, fpins
   3556     |.else
   3557     |  ins_arithfp fpins
   3558     |.endif
   3559     |.endmacro
   3560 
   3561   case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
   3562     |.if GPR64
   3563     |.macro addo32., y, a, b
   3564     |  // Need to check overflow for (a<<32) + (b<<32).
   3565     |  rldicr TMP0, a, 32, 31
   3566     |  rldicr TMP3, b, 32, 31
   3567     |  addo. TMP0, TMP0, TMP3
   3568     |  add y, a, b
   3569     |.endmacro
   3570     |  ins_arith addo32., fadd
   3571     |.else
   3572     |  ins_arith addo., fadd
   3573     |.endif
   3574     break;
   3575   case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
   3576     |.if GPR64
   3577     |.macro subo32., y, a, b
   3578     |  // Need to check overflow for (a<<32) - (b<<32).
   3579     |  rldicr TMP0, a, 32, 31
   3580     |  rldicr TMP3, b, 32, 31
   3581     |  subo. TMP0, TMP0, TMP3
   3582     |  sub y, a, b
   3583     |.endmacro
   3584     |  ins_arith subo32., fsub
   3585     |.else
   3586     |  ins_arith subo., fsub
   3587     |.endif
   3588     break;
   3589   case BC_MULVN: case BC_MULNV: case BC_MULVV:
   3590     |  ins_arith mullwo., fmul
   3591     break;
   3592   case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
   3593     |  ins_arithfp fdiv
   3594     break;
   3595   case BC_MODVN:
   3596     |  ins_arith intmod, fpmod
   3597     break;
   3598   case BC_MODNV: case BC_MODVV:
   3599     |  ins_arith intmod, fpmod_
   3600     break;
   3601   case BC_POW:
   3602     |  // NYI: (partial) integer arithmetic.
   3603     |  lwzx TMP1, BASE, RB
   3604     |   lfdx FARG1, BASE, RB
   3605     |  lwzx TMP2, BASE, RC
   3606     |   lfdx FARG2, BASE, RC
   3607     |  checknum cr0, TMP1
   3608     |  checknum cr1, TMP2
   3609     |  crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
   3610     |  bge ->vmeta_arith_vv
   3611     |  blex pow
   3612     |  ins_next1
   3613     |  stfdx FARG1, BASE, RA
   3614     |  ins_next2
   3615     break;
   3616 
   3617   case BC_CAT:
   3618     |  // RA = dst*8, RB = src_start*8, RC = src_end*8
   3619     |  sub CARG3, RC, RB
   3620     |   stp BASE, L->base
   3621     |  add CARG2, BASE, RC
   3622     |  mr SAVE0, RB
   3623     |->BC_CAT_Z:
   3624     |   stw PC, SAVE_PC
   3625     |  mr CARG1, L
   3626     |  srwi CARG3, CARG3, 3
   3627     |  bl extern lj_meta_cat		// (lua_State *L, TValue *top, int left)
   3628     |  // Returns NULL (finished) or TValue * (metamethod).
   3629     |  cmplwi CRET1, 0
   3630     |   lp BASE, L->base
   3631     |  bne ->vmeta_binop
   3632     |  ins_next1
   3633     |  lfdx f0, BASE, SAVE0		// Copy result from RB to RA.
   3634     |  stfdx f0, BASE, RA
   3635     |  ins_next2
   3636     break;
   3637 
   3638   /* -- Constant ops ------------------------------------------------------ */
   3639 
   3640   case BC_KSTR:
   3641     |  // RA = dst*8, RD = str_const*8 (~)
   3642     |  srwi TMP1, RD, 1
   3643     |  subfic TMP1, TMP1, -4
   3644     |  ins_next1
   3645     |  lwzx TMP0, KBASE, TMP1		// KBASE-4-str_const*4
   3646     |  li TMP2, LJ_TSTR
   3647     |  stwux TMP2, RA, BASE
   3648     |  stw TMP0, 4(RA)
   3649     |  ins_next2
   3650     break;
   3651   case BC_KCDATA:
   3652     |.if FFI
   3653     |  // RA = dst*8, RD = cdata_const*8 (~)
   3654     |  srwi TMP1, RD, 1
   3655     |  subfic TMP1, TMP1, -4
   3656     |  ins_next1
   3657     |  lwzx TMP0, KBASE, TMP1		// KBASE-4-cdata_const*4
   3658     |  li TMP2, LJ_TCDATA
   3659     |  stwux TMP2, RA, BASE
   3660     |  stw TMP0, 4(RA)
   3661     |  ins_next2
   3662     |.endif
   3663     break;
   3664   case BC_KSHORT:
   3665     |  // RA = dst*8, RD = int16_literal*8
   3666     |.if DUALNUM
   3667     |  slwi RD, RD, 13
   3668     |  srawi RD, RD, 16
   3669     |  ins_next1
   3670     |   stwux TISNUM, RA, BASE
   3671     |   stw RD, 4(RA)
   3672     |  ins_next2
   3673     |.else
   3674     |  // The soft-float approach is faster.
   3675     |  slwi RD, RD, 13
   3676     |  srawi TMP1, RD, 31
   3677     |  xor TMP2, TMP1, RD
   3678     |  sub TMP2, TMP2, TMP1		// TMP2 = abs(x)
   3679     |  cntlzw TMP3, TMP2
   3680     |  subfic TMP1, TMP3, 0x40d		// TMP1 = exponent-1
   3681     |   slw TMP2, TMP2, TMP3		// TMP2 = left aligned mantissa
   3682     |    subfic TMP3, RD, 0
   3683     |  slwi TMP1, TMP1, 20
   3684     |   rlwimi RD, TMP2, 21, 1, 31	// hi = sign(x) | (mantissa>>11)
   3685     |    subfe TMP0, TMP0, TMP0
   3686     |   add RD, RD, TMP1		// hi = hi + exponent-1
   3687     |    and RD, RD, TMP0		// hi = x == 0 ? 0 : hi
   3688     |  ins_next1
   3689     |    stwux RD, RA, BASE
   3690     |    stw ZERO, 4(RA)
   3691     |  ins_next2
   3692     |.endif
   3693     break;
   3694   case BC_KNUM:
   3695     |  // RA = dst*8, RD = num_const*8
   3696     |  ins_next1
   3697     |  lfdx f0, KBASE, RD
   3698     |  stfdx f0, BASE, RA
   3699     |  ins_next2
   3700     break;
   3701   case BC_KPRI:
   3702     |  // RA = dst*8, RD = primitive_type*8 (~)
   3703     |  srwi TMP1, RD, 3
   3704     |  not TMP0, TMP1
   3705     |  ins_next1
   3706     |  stwx TMP0, BASE, RA
   3707     |  ins_next2
   3708     break;
   3709   case BC_KNIL:
   3710     |  // RA = base*8, RD = end*8
   3711     |  stwx TISNIL, BASE, RA
   3712     |   addi RA, RA, 8
   3713     |1:
   3714     |  stwx TISNIL, BASE, RA
   3715     |  cmpw RA, RD
   3716     |   addi RA, RA, 8
   3717     |  blt <1
   3718     |  ins_next_
   3719     break;
   3720 
   3721   /* -- Upvalue and function ops ------------------------------------------ */
   3722 
   3723   case BC_UGET:
   3724     |  // RA = dst*8, RD = uvnum*8
   3725     |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   3726     |   srwi RD, RD, 1
   3727     |   addi RD, RD, offsetof(GCfuncL, uvptr)
   3728     |  lwzx UPVAL:RB, LFUNC:RB, RD
   3729     |  ins_next1
   3730     |  lwz TMP1, UPVAL:RB->v
   3731     |  lfd f0, 0(TMP1)
   3732     |  stfdx f0, BASE, RA
   3733     |  ins_next2
   3734     break;
   3735   case BC_USETV:
   3736     |  // RA = uvnum*8, RD = src*8
   3737     |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   3738     |    srwi RA, RA, 1
   3739     |    addi RA, RA, offsetof(GCfuncL, uvptr)
   3740     |   lfdux f0, RD, BASE
   3741     |  lwzx UPVAL:RB, LFUNC:RB, RA
   3742     |  lbz TMP3, UPVAL:RB->marked
   3743     |   lwz CARG2, UPVAL:RB->v
   3744     |  andix. TMP3, TMP3, LJ_GC_BLACK	// isblack(uv)
   3745     |    lbz TMP0, UPVAL:RB->closed
   3746     |   lwz TMP2, 0(RD)
   3747     |   stfd f0, 0(CARG2)
   3748     |    cmplwi cr1, TMP0, 0
   3749     |   lwz TMP1, 4(RD)
   3750     |  cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
   3751     |   subi TMP2, TMP2, (LJ_TNUMX+1)
   3752     |  bne >2				// Upvalue is closed and black?
   3753     |1:
   3754     |  ins_next
   3755     |
   3756     |2:  // Check if new value is collectable.
   3757     |  cmplwi TMP2, LJ_TISGCV - (LJ_TNUMX+1)
   3758     |  bge <1				// tvisgcv(v)
   3759     |  lbz TMP3, GCOBJ:TMP1->gch.marked
   3760     |  andix. TMP3, TMP3, LJ_GC_WHITES	// iswhite(v)
   3761     |   la CARG1, GG_DISP2G(DISPATCH)
   3762     |  // Crossed a write barrier. Move the barrier forward.
   3763     |  beq <1
   3764     |  bl extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
   3765     |  b <1
   3766     break;
   3767   case BC_USETS:
   3768     |  // RA = uvnum*8, RD = str_const*8 (~)
   3769     |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   3770     |   srwi TMP1, RD, 1
   3771     |    srwi RA, RA, 1
   3772     |   subfic TMP1, TMP1, -4
   3773     |    addi RA, RA, offsetof(GCfuncL, uvptr)
   3774     |   lwzx STR:TMP1, KBASE, TMP1	// KBASE-4-str_const*4
   3775     |  lwzx UPVAL:RB, LFUNC:RB, RA
   3776     |  lbz TMP3, UPVAL:RB->marked
   3777     |   lwz CARG2, UPVAL:RB->v
   3778     |  andix. TMP3, TMP3, LJ_GC_BLACK	// isblack(uv)
   3779     |   lbz TMP3, STR:TMP1->marked
   3780     |   lbz TMP2, UPVAL:RB->closed
   3781     |   li TMP0, LJ_TSTR
   3782     |   stw STR:TMP1, 4(CARG2)
   3783     |   stw TMP0, 0(CARG2)
   3784     |  bne >2
   3785     |1:
   3786     |  ins_next
   3787     |
   3788     |2:  // Check if string is white and ensure upvalue is closed.
   3789     |  andix. TMP3, TMP3, LJ_GC_WHITES	// iswhite(str)
   3790     |   cmplwi cr1, TMP2, 0
   3791     |  cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
   3792     |   la CARG1, GG_DISP2G(DISPATCH)
   3793     |  // Crossed a write barrier. Move the barrier forward.
   3794     |  beq <1
   3795     |  bl extern lj_gc_barrieruv	// (global_State *g, TValue *tv)
   3796     |  b <1
   3797     break;
   3798   case BC_USETN:
   3799     |  // RA = uvnum*8, RD = num_const*8
   3800     |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   3801     |   srwi RA, RA, 1
   3802     |   addi RA, RA, offsetof(GCfuncL, uvptr)
   3803     |    lfdx f0, KBASE, RD
   3804     |  lwzx UPVAL:RB, LFUNC:RB, RA
   3805     |  ins_next1
   3806     |  lwz TMP1, UPVAL:RB->v
   3807     |  stfd f0, 0(TMP1)
   3808     |  ins_next2
   3809     break;
   3810   case BC_USETP:
   3811     |  // RA = uvnum*8, RD = primitive_type*8 (~)
   3812     |  lwz LFUNC:RB, FRAME_FUNC(BASE)
   3813     |   srwi RA, RA, 1
   3814     |    srwi TMP0, RD, 3
   3815     |   addi RA, RA, offsetof(GCfuncL, uvptr)
   3816     |    not TMP0, TMP0
   3817     |  lwzx UPVAL:RB, LFUNC:RB, RA
   3818     |  ins_next1
   3819     |  lwz TMP1, UPVAL:RB->v
   3820     |  stw TMP0, 0(TMP1)
   3821     |  ins_next2
   3822     break;
   3823 
   3824   case BC_UCLO:
   3825     |  // RA = level*8, RD = target
   3826     |  lwz TMP1, L->openupval
   3827     |  branch_RD			// Do this first since RD is not saved.
   3828     |   stp BASE, L->base
   3829     |  cmplwi TMP1, 0
   3830     |   mr CARG1, L
   3831     |  beq >1
   3832     |   add CARG2, BASE, RA
   3833     |  bl extern lj_func_closeuv	// (lua_State *L, TValue *level)
   3834     |  lp BASE, L->base
   3835     |1:
   3836     |  ins_next
   3837     break;
   3838 
   3839   case BC_FNEW:
   3840     |  // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
   3841     |  srwi TMP1, RD, 1
   3842     |   stp BASE, L->base
   3843     |  subfic TMP1, TMP1, -4
   3844     |   stw PC, SAVE_PC
   3845     |  lwzx CARG2, KBASE, TMP1		// KBASE-4-tab_const*4
   3846     |   mr CARG1, L
   3847     |  lwz CARG3, FRAME_FUNC(BASE)
   3848     |  // (lua_State *L, GCproto *pt, GCfuncL *parent)
   3849     |  bl extern lj_func_newL_gc
   3850     |  // Returns GCfuncL *.
   3851     |  lp BASE, L->base
   3852     |   li TMP0, LJ_TFUNC
   3853     |  stwux TMP0, RA, BASE
   3854     |  stw LFUNC:CRET1, 4(RA)
   3855     |  ins_next
   3856     break;
   3857 
   3858   /* -- Table ops --------------------------------------------------------- */
   3859 
   3860   case BC_TNEW:
   3861   case BC_TDUP:
   3862     |  // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
   3863     |  lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
   3864     |   mr CARG1, L
   3865     |  lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
   3866     |   stp BASE, L->base
   3867     |  cmplw TMP0, TMP1
   3868     |   stw PC, SAVE_PC
   3869     |  bge >5
   3870     |1:
   3871     if (op == BC_TNEW) {
   3872       |  rlwinm CARG2, RD, 29, 21, 31
   3873       |  rlwinm CARG3, RD, 18, 27, 31
   3874       |  cmpwi CARG2, 0x7ff; beq >3
   3875       |2:
   3876       |  bl extern lj_tab_new  // (lua_State *L, int32_t asize, uint32_t hbits)
   3877       |  // Returns Table *.
   3878     } else {
   3879       |  srwi TMP1, RD, 1
   3880       |  subfic TMP1, TMP1, -4
   3881       |  lwzx CARG2, KBASE, TMP1		// KBASE-4-tab_const*4
   3882       |  bl extern lj_tab_dup  // (lua_State *L, Table *kt)
   3883       |  // Returns Table *.
   3884     }
   3885     |  lp BASE, L->base
   3886     |   li TMP0, LJ_TTAB
   3887     |  stwux TMP0, RA, BASE
   3888     |  stw TAB:CRET1, 4(RA)
   3889     |  ins_next
   3890     if (op == BC_TNEW) {
   3891       |3:
   3892       |  li CARG2, 0x801
   3893       |  b <2
   3894     }
   3895     |5:
   3896     |  mr SAVE0, RD
   3897     |  bl extern lj_gc_step_fixtop  // (lua_State *L)
   3898     |  mr RD, SAVE0
   3899     |  mr CARG1, L
   3900     |  b <1
   3901     break;
   3902 
   3903   case BC_GGET:
   3904     |  // RA = dst*8, RD = str_const*8 (~)
   3905   case BC_GSET:
   3906     |  // RA = src*8, RD = str_const*8 (~)
   3907     |  lwz LFUNC:TMP2, FRAME_FUNC(BASE)
   3908     |   srwi TMP1, RD, 1
   3909     |  lwz TAB:RB, LFUNC:TMP2->env
   3910     |   subfic TMP1, TMP1, -4
   3911     |   lwzx STR:RC, KBASE, TMP1	// KBASE-4-str_const*4
   3912     if (op == BC_GGET) {
   3913       |  b ->BC_TGETS_Z
   3914     } else {
   3915       |  b ->BC_TSETS_Z
   3916     }
   3917     break;
   3918 
   3919   case BC_TGETV:
   3920     |  // RA = dst*8, RB = table*8, RC = key*8
   3921     |  lwzux CARG1, RB, BASE
   3922     |  lwzux CARG2, RC, BASE
   3923     |   lwz TAB:RB, 4(RB)
   3924     |.if DUALNUM
   3925     |   lwz RC, 4(RC)
   3926     |.else
   3927     |   lfd f0, 0(RC)
   3928     |.endif
   3929     |  checktab CARG1
   3930     |   checknum cr1, CARG2
   3931     |  bne ->vmeta_tgetv
   3932     |.if DUALNUM
   3933     |  lwz TMP0, TAB:RB->asize
   3934     |   bne cr1, >5
   3935     |   lwz TMP1, TAB:RB->array
   3936     |  cmplw TMP0, RC
   3937     |   slwi TMP2, RC, 3
   3938     |.else
   3939     |   bge cr1, >5
   3940     |  // Convert number key to integer, check for integerness and range.
   3941     |  fctiwz f1, f0
   3942     |    fadd f2, f0, TOBIT
   3943     |  stfd f1, TMPD
   3944     |   lwz TMP0, TAB:RB->asize
   3945     |    fsub f2, f2, TOBIT
   3946     |  lwz TMP2, TMPD_LO
   3947     |   lwz TMP1, TAB:RB->array
   3948     |    fcmpu cr1, f0, f2
   3949     |  cmplw cr0, TMP0, TMP2
   3950     |  crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq
   3951     |   slwi TMP2, TMP2, 3
   3952     |.endif
   3953     |  ble ->vmeta_tgetv		// Integer key and in array part?
   3954     |  lwzx TMP0, TMP1, TMP2
   3955     |   lfdx f14, TMP1, TMP2
   3956     |  checknil TMP0; beq >2
   3957     |1:
   3958     |  ins_next1
   3959     |   stfdx f14, BASE, RA
   3960     |  ins_next2
   3961     |
   3962     |2:  // Check for __index if table value is nil.
   3963     |  lwz TAB:TMP2, TAB:RB->metatable
   3964     |  cmplwi TAB:TMP2, 0
   3965     |  beq <1				// No metatable: done.
   3966     |  lbz TMP0, TAB:TMP2->nomm
   3967     |  andix. TMP0, TMP0, 1<<MM_index
   3968     |  bne <1				// 'no __index' flag set: done.
   3969     |  b ->vmeta_tgetv
   3970     |
   3971     |5:
   3972     |  checkstr CARG2; bne ->vmeta_tgetv
   3973     |.if not DUALNUM
   3974     |  lwz STR:RC, 4(RC)
   3975     |.endif
   3976     |  b ->BC_TGETS_Z			// String key?
   3977     break;
   3978   case BC_TGETS:
   3979     |  // RA = dst*8, RB = table*8, RC = str_const*8 (~)
   3980     |  lwzux CARG1, RB, BASE
   3981     |   srwi TMP1, RC, 1
   3982     |    lwz TAB:RB, 4(RB)
   3983     |   subfic TMP1, TMP1, -4
   3984     |  checktab CARG1
   3985     |   lwzx STR:RC, KBASE, TMP1	// KBASE-4-str_const*4
   3986     |  bne ->vmeta_tgets1
   3987     |->BC_TGETS_Z:
   3988     |  // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
   3989     |  lwz TMP0, TAB:RB->hmask
   3990     |  lwz TMP1, STR:RC->hash
   3991     |  lwz NODE:TMP2, TAB:RB->node
   3992     |  and TMP1, TMP1, TMP0		// idx = str->hash & tab->hmask
   3993     |  slwi TMP0, TMP1, 5
   3994     |  slwi TMP1, TMP1, 3
   3995     |  sub TMP1, TMP0, TMP1
   3996     |  add NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
   3997     |1:
   3998     |  lwz CARG1, NODE:TMP2->key
   3999     |   lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
   4000     |    lwz CARG2, NODE:TMP2->val
   4001     |     lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2)
   4002     |  checkstr CARG1; bne >4
   4003     |   cmpw TMP0, STR:RC; bne >4
   4004     |    checknil CARG2; beq >5		// Key found, but nil value?
   4005     |3:
   4006     |    stwux CARG2, RA, BASE
   4007     |     stw TMP1, 4(RA)
   4008     |  ins_next
   4009     |
   4010     |4:  // Follow hash chain.
   4011     |  lwz NODE:TMP2, NODE:TMP2->next
   4012     |  cmplwi NODE:TMP2, 0
   4013     |  bne <1
   4014     |  // End of hash chain: key not found, nil result.
   4015     |   li CARG2, LJ_TNIL
   4016     |
   4017     |5:  // Check for __index if table value is nil.
   4018     |  lwz TAB:TMP2, TAB:RB->metatable
   4019     |  cmplwi TAB:TMP2, 0
   4020     |  beq <3				// No metatable: done.
   4021     |  lbz TMP0, TAB:TMP2->nomm
   4022     |  andix. TMP0, TMP0, 1<<MM_index
   4023     |  bne <3				// 'no __index' flag set: done.
   4024     |  b ->vmeta_tgets
   4025     break;
   4026   case BC_TGETB:
   4027     |  // RA = dst*8, RB = table*8, RC = index*8
   4028     |  lwzux CARG1, RB, BASE
   4029     |   srwi TMP0, RC, 3
   4030     |   lwz TAB:RB, 4(RB)
   4031     |  checktab CARG1; bne ->vmeta_tgetb
   4032     |  lwz TMP1, TAB:RB->asize
   4033     |   lwz TMP2, TAB:RB->array
   4034     |  cmplw TMP0, TMP1; bge ->vmeta_tgetb
   4035     |  lwzx TMP1, TMP2, RC
   4036     |   lfdx f0, TMP2, RC
   4037     |  checknil TMP1; beq >5
   4038     |1:
   4039     |  ins_next1
   4040     |   stfdx f0, BASE, RA
   4041     |  ins_next2
   4042     |
   4043     |5:  // Check for __index if table value is nil.
   4044     |  lwz TAB:TMP2, TAB:RB->metatable
   4045     |  cmplwi TAB:TMP2, 0
   4046     |  beq <1				// No metatable: done.
   4047     |  lbz TMP2, TAB:TMP2->nomm
   4048     |  andix. TMP2, TMP2, 1<<MM_index
   4049     |  bne <1				// 'no __index' flag set: done.
   4050     |  b ->vmeta_tgetb			// Caveat: preserve TMP0!
   4051     break;
   4052   case BC_TGETR:
   4053     |  // RA = dst*8, RB = table*8, RC = key*8
   4054     |  add RB, BASE, RB
   4055     |  lwz TAB:CARG1, 4(RB)
   4056     |.if DUALNUM
   4057     |  add RC, BASE, RC
   4058     |  lwz TMP0, TAB:CARG1->asize
   4059     |  lwz CARG2, 4(RC)
   4060     |   lwz TMP1, TAB:CARG1->array
   4061     |.else
   4062     |  lfdx f0, BASE, RC
   4063     |  lwz TMP0, TAB:CARG1->asize
   4064     |  toint CARG2, f0
   4065     |   lwz TMP1, TAB:CARG1->array
   4066     |.endif
   4067     |  cmplw TMP0, CARG2
   4068     |   slwi TMP2, CARG2, 3
   4069     |  ble ->vmeta_tgetr		// In array part?
   4070     |   lfdx f14, TMP1, TMP2
   4071     |->BC_TGETR_Z:
   4072     |  ins_next1
   4073     |   stfdx f14, BASE, RA
   4074     |  ins_next2
   4075     break;
   4076 
   4077   case BC_TSETV:
   4078     |  // RA = src*8, RB = table*8, RC = key*8
   4079     |  lwzux CARG1, RB, BASE
   4080     |  lwzux CARG2, RC, BASE
   4081     |   lwz TAB:RB, 4(RB)
   4082     |.if DUALNUM
   4083     |   lwz RC, 4(RC)
   4084     |.else
   4085     |   lfd f0, 0(RC)
   4086     |.endif
   4087     |  checktab CARG1
   4088     |   checknum cr1, CARG2
   4089     |  bne ->vmeta_tsetv
   4090     |.if DUALNUM
   4091     |  lwz TMP0, TAB:RB->asize
   4092     |   bne cr1, >5
   4093     |   lwz TMP1, TAB:RB->array
   4094     |  cmplw TMP0, RC
   4095     |   slwi TMP0, RC, 3
   4096     |.else
   4097     |   bge cr1, >5
   4098     |  // Convert number key to integer, check for integerness and range.
   4099     |  fctiwz f1, f0
   4100     |    fadd f2, f0, TOBIT
   4101     |  stfd f1, TMPD
   4102     |   lwz TMP0, TAB:RB->asize
   4103     |    fsub f2, f2, TOBIT
   4104     |  lwz TMP2, TMPD_LO
   4105     |   lwz TMP1, TAB:RB->array
   4106     |    fcmpu cr1, f0, f2
   4107     |  cmplw cr0, TMP0, TMP2
   4108     |  crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq
   4109     |   slwi TMP0, TMP2, 3
   4110     |.endif
   4111     |  ble ->vmeta_tsetv		// Integer key and in array part?
   4112     |   lwzx TMP2, TMP1, TMP0
   4113     |  lbz TMP3, TAB:RB->marked
   4114     |    lfdx f14, BASE, RA
   4115     |   checknil TMP2; beq >3
   4116     |1:
   4117     |  andix. TMP2, TMP3, LJ_GC_BLACK	// isblack(table)
   4118     |    stfdx f14, TMP1, TMP0
   4119     |  bne >7
   4120     |2:
   4121     |  ins_next
   4122     |
   4123     |3:  // Check for __newindex if previous value is nil.
   4124     |  lwz TAB:TMP2, TAB:RB->metatable
   4125     |  cmplwi TAB:TMP2, 0
   4126     |  beq <1				// No metatable: done.
   4127     |  lbz TMP2, TAB:TMP2->nomm
   4128     |  andix. TMP2, TMP2, 1<<MM_newindex
   4129     |  bne <1				// 'no __newindex' flag set: done.
   4130     |  b ->vmeta_tsetv
   4131     |
   4132     |5:
   4133     |  checkstr CARG2; bne ->vmeta_tsetv
   4134     |.if not DUALNUM
   4135     |  lwz STR:RC, 4(RC)
   4136     |.endif
   4137     |  b ->BC_TSETS_Z			// String key?
   4138     |
   4139     |7:  // Possible table write barrier for the value. Skip valiswhite check.
   4140     |  barrierback TAB:RB, TMP3, TMP0
   4141     |  b <2
   4142     break;
   4143   case BC_TSETS:
   4144     |  // RA = src*8, RB = table*8, RC = str_const*8 (~)
   4145     |  lwzux CARG1, RB, BASE
   4146     |   srwi TMP1, RC, 1
   4147     |    lwz TAB:RB, 4(RB)
   4148     |   subfic TMP1, TMP1, -4
   4149     |  checktab CARG1
   4150     |   lwzx STR:RC, KBASE, TMP1	// KBASE-4-str_const*4
   4151     |  bne ->vmeta_tsets1
   4152     |->BC_TSETS_Z:
   4153     |  // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
   4154     |  lwz TMP0, TAB:RB->hmask
   4155     |  lwz TMP1, STR:RC->hash
   4156     |  lwz NODE:TMP2, TAB:RB->node
   4157     |    stb ZERO, TAB:RB->nomm		// Clear metamethod cache.
   4158     |  and TMP1, TMP1, TMP0		// idx = str->hash & tab->hmask
   4159     |    lfdx f14, BASE, RA
   4160     |  slwi TMP0, TMP1, 5
   4161     |  slwi TMP1, TMP1, 3
   4162     |  sub TMP1, TMP0, TMP1
   4163     |    lbz TMP3, TAB:RB->marked
   4164     |  add NODE:TMP2, NODE:TMP2, TMP1	// node = tab->node + (idx*32-idx*8)
   4165     |1:
   4166     |  lwz CARG1, NODE:TMP2->key
   4167     |   lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2)
   4168     |    lwz CARG2, NODE:TMP2->val
   4169     |     lwz NODE:TMP1, NODE:TMP2->next
   4170     |  checkstr CARG1; bne >5
   4171     |   cmpw TMP0, STR:RC; bne >5
   4172     |    checknil CARG2; beq >4		// Key found, but nil value?
   4173     |2:
   4174     |  andix. TMP0, TMP3, LJ_GC_BLACK	// isblack(table)
   4175     |    stfd f14, NODE:TMP2->val
   4176     |  bne >7
   4177     |3:
   4178     |  ins_next
   4179     |
   4180     |4:  // Check for __newindex if previous value is nil.
   4181     |  lwz TAB:TMP1, TAB:RB->metatable
   4182     |  cmplwi TAB:TMP1, 0
   4183     |  beq <2				// No metatable: done.
   4184     |  lbz TMP0, TAB:TMP1->nomm
   4185     |  andix. TMP0, TMP0, 1<<MM_newindex
   4186     |  bne <2				// 'no __newindex' flag set: done.
   4187     |  b ->vmeta_tsets
   4188     |
   4189     |5:  // Follow hash chain.
   4190     |  cmplwi NODE:TMP1, 0
   4191     |   mr NODE:TMP2, NODE:TMP1
   4192     |  bne <1
   4193     |  // End of hash chain: key not found, add a new one.
   4194     |
   4195     |  // But check for __newindex first.
   4196     |  lwz TAB:TMP1, TAB:RB->metatable
   4197     |   la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
   4198     |   stw PC, SAVE_PC
   4199     |   mr CARG1, L
   4200     |  cmplwi TAB:TMP1, 0
   4201     |   stp BASE, L->base
   4202     |  beq >6				// No metatable: continue.
   4203     |  lbz TMP0, TAB:TMP1->nomm
   4204     |  andix. TMP0, TMP0, 1<<MM_newindex
   4205     |  beq ->vmeta_tsets		// 'no __newindex' flag NOT set: check.
   4206     |6:
   4207     |  li TMP0, LJ_TSTR
   4208     |   stw STR:RC, 4(CARG3)
   4209     |   mr CARG2, TAB:RB
   4210     |  stw TMP0, 0(CARG3)
   4211     |  bl extern lj_tab_newkey		// (lua_State *L, GCtab *t, TValue *k)
   4212     |  // Returns TValue *.
   4213     |  lp BASE, L->base
   4214     |  stfd f14, 0(CRET1)
   4215     |  b <3				// No 2nd write barrier needed.
   4216     |
   4217     |7:  // Possible table write barrier for the value. Skip valiswhite check.
   4218     |  barrierback TAB:RB, TMP3, TMP0
   4219     |  b <3
   4220     break;
   4221   case BC_TSETB:
   4222     |  // RA = src*8, RB = table*8, RC = index*8
   4223     |  lwzux CARG1, RB, BASE
   4224     |   srwi TMP0, RC, 3
   4225     |   lwz TAB:RB, 4(RB)
   4226     |  checktab CARG1; bne ->vmeta_tsetb
   4227     |  lwz TMP1, TAB:RB->asize
   4228     |   lwz TMP2, TAB:RB->array
   4229     |    lbz TMP3, TAB:RB->marked
   4230     |  cmplw TMP0, TMP1
   4231     |   lfdx f14, BASE, RA
   4232     |  bge ->vmeta_tsetb
   4233     |  lwzx TMP1, TMP2, RC
   4234     |  checknil TMP1; beq >5
   4235     |1:
   4236     |  andix. TMP0, TMP3, LJ_GC_BLACK	// isblack(table)
   4237     |   stfdx f14, TMP2, RC
   4238     |  bne >7
   4239     |2:
   4240     |  ins_next
   4241     |
   4242     |5:  // Check for __newindex if previous value is nil.
   4243     |  lwz TAB:TMP1, TAB:RB->metatable
   4244     |  cmplwi TAB:TMP1, 0
   4245     |  beq <1				// No metatable: done.
   4246     |  lbz TMP1, TAB:TMP1->nomm
   4247     |  andix. TMP1, TMP1, 1<<MM_newindex
   4248     |  bne <1				// 'no __newindex' flag set: done.
   4249     |  b ->vmeta_tsetb			// Caveat: preserve TMP0!
   4250     |
   4251     |7:  // Possible table write barrier for the value. Skip valiswhite check.
   4252     |  barrierback TAB:RB, TMP3, TMP0
   4253     |  b <2
   4254     break;
   4255   case BC_TSETR:
   4256     |  // RA = dst*8, RB = table*8, RC = key*8
   4257     |  add RB, BASE, RB
   4258     |  lwz TAB:CARG2, 4(RB)
   4259     |.if DUALNUM
   4260     |  add RC, BASE, RC
   4261     |    lbz TMP3, TAB:CARG2->marked
   4262     |  lwz TMP0, TAB:CARG2->asize
   4263     |  lwz CARG3, 4(RC)
   4264     |   lwz TMP1, TAB:CARG2->array
   4265     |.else
   4266     |  lfdx f0, BASE, RC
   4267     |    lbz TMP3, TAB:CARG2->marked
   4268     |  lwz TMP0, TAB:CARG2->asize
   4269     |  toint CARG3, f0
   4270     |   lwz TMP1, TAB:CARG2->array
   4271     |.endif
   4272     |  andix. TMP2, TMP3, LJ_GC_BLACK	// isblack(table)
   4273     |  bne >7
   4274     |2:
   4275     |  cmplw TMP0, CARG3
   4276     |   slwi TMP2, CARG3, 3
   4277     |   lfdx f14, BASE, RA
   4278     |  ble ->vmeta_tsetr		// In array part?
   4279     |  ins_next1
   4280     |   stfdx f14, TMP1, TMP2
   4281     |  ins_next2
   4282     |
   4283     |7:  // Possible table write barrier for the value. Skip valiswhite check.
   4284     |  barrierback TAB:CARG2, TMP3, TMP2
   4285     |  b <2
   4286     break;
   4287 
   4288 
   4289   case BC_TSETM:
   4290     |  // RA = base*8 (table at base-1), RD = num_const*8 (start index)
   4291     |  add RA, BASE, RA
   4292     |1:
   4293     |   add TMP3, KBASE, RD
   4294     |  lwz TAB:CARG2, -4(RA)		// Guaranteed to be a table.
   4295     |    addic. TMP0, MULTRES, -8
   4296     |   lwz TMP3, 4(TMP3)		// Integer constant is in lo-word.
   4297     |    srwi CARG3, TMP0, 3
   4298     |    beq >4				// Nothing to copy?
   4299     |  add CARG3, CARG3, TMP3
   4300     |  lwz TMP2, TAB:CARG2->asize
   4301     |   slwi TMP1, TMP3, 3
   4302     |    lbz TMP3, TAB:CARG2->marked
   4303     |  cmplw CARG3, TMP2
   4304     |   add TMP2, RA, TMP0
   4305     |   lwz TMP0, TAB:CARG2->array
   4306     |  bgt >5
   4307     |   add TMP1, TMP1, TMP0
   4308     |    andix. TMP0, TMP3, LJ_GC_BLACK	// isblack(table)
   4309     |3:  // Copy result slots to table.
   4310     |   lfd f0, 0(RA)
   4311     |  addi RA, RA, 8
   4312     |  cmpw cr1, RA, TMP2
   4313     |   stfd f0, 0(TMP1)
   4314     |    addi TMP1, TMP1, 8
   4315     |  blt cr1, <3
   4316     |  bne >7
   4317     |4:
   4318     |  ins_next
   4319     |
   4320     |5:  // Need to resize array part.
   4321     |   stp BASE, L->base
   4322     |  mr CARG1, L
   4323     |   stw PC, SAVE_PC
   4324     |  mr SAVE0, RD
   4325     |  bl extern lj_tab_reasize		// (lua_State *L, GCtab *t, int nasize)
   4326     |  // Must not reallocate the stack.
   4327     |  mr RD, SAVE0
   4328     |  b <1
   4329     |
   4330     |7:  // Possible table write barrier for any value. Skip valiswhite check.
   4331     |  barrierback TAB:CARG2, TMP3, TMP0
   4332     |  b <4
   4333     break;
   4334 
   4335   /* -- Calls and vararg handling ----------------------------------------- */
   4336 
   4337   case BC_CALLM:
   4338     |  // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
   4339     |  add NARGS8:RC, NARGS8:RC, MULTRES
   4340     |  // Fall through. Assumes BC_CALL follows.
   4341     break;
   4342   case BC_CALL:
   4343     |  // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
   4344     |  mr TMP2, BASE
   4345     |  lwzux TMP0, BASE, RA
   4346     |   lwz LFUNC:RB, 4(BASE)
   4347     |    subi NARGS8:RC, NARGS8:RC, 8
   4348     |   addi BASE, BASE, 8
   4349     |  checkfunc TMP0; bne ->vmeta_call
   4350     |  ins_call
   4351     break;
   4352 
   4353   case BC_CALLMT:
   4354     |  // RA = base*8, (RB = 0,) RC = extra_nargs*8
   4355     |  add NARGS8:RC, NARGS8:RC, MULTRES
   4356     |  // Fall through. Assumes BC_CALLT follows.
   4357     break;
   4358   case BC_CALLT:
   4359     |  // RA = base*8, (RB = 0,) RC = (nargs+1)*8
   4360     |  lwzux TMP0, RA, BASE
   4361     |   lwz LFUNC:RB, 4(RA)
   4362     |    subi NARGS8:RC, NARGS8:RC, 8
   4363     |    lwz TMP1, FRAME_PC(BASE)
   4364     |  checkfunc TMP0
   4365     |   addi RA, RA, 8
   4366     |  bne ->vmeta_callt
   4367     |->BC_CALLT_Z:
   4368     |  andix. TMP0, TMP1, FRAME_TYPE	// Caveat: preserve cr0 until the crand.
   4369     |   lbz TMP3, LFUNC:RB->ffid
   4370     |    xori TMP2, TMP1, FRAME_VARG
   4371     |    cmplwi cr1, NARGS8:RC, 0
   4372     |  bne >7
   4373     |1:
   4374     |  stw LFUNC:RB, FRAME_FUNC(BASE)	// Copy function down, but keep PC.
   4375     |  li TMP2, 0
   4376     |   cmplwi cr7, TMP3, 1		// (> FF_C) Calling a fast function?
   4377     |    beq cr1, >3
   4378     |2:
   4379     |  addi TMP3, TMP2, 8
   4380     |   lfdx f0, RA, TMP2
   4381     |  cmplw cr1, TMP3, NARGS8:RC
   4382     |   stfdx f0, BASE, TMP2
   4383     |  mr TMP2, TMP3
   4384     |  bne cr1, <2
   4385     |3:
   4386     |  crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt
   4387     |  beq >5
   4388     |4:
   4389     |  ins_callt
   4390     |
   4391     |5:  // Tailcall to a fast function with a Lua frame below.
   4392     |  lwz INS, -4(TMP1)
   4393     |  decode_RA8 RA, INS
   4394     |  sub TMP1, BASE, RA
   4395     |  lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1)
   4396     |  lwz TMP1, LFUNC:TMP1->pc
   4397     |  lwz KBASE, PC2PROTO(k)(TMP1)	// Need to prepare KBASE.
   4398     |  b <4
   4399     |
   4400     |7:  // Tailcall from a vararg function.
   4401     |  andix. TMP0, TMP2, FRAME_TYPEP
   4402     |  bne <1				// Vararg frame below?
   4403     |  sub BASE, BASE, TMP2		// Relocate BASE down.
   4404     |  lwz TMP1, FRAME_PC(BASE)
   4405     |  andix. TMP0, TMP1, FRAME_TYPE
   4406     |  b <1
   4407     break;
   4408 
   4409   case BC_ITERC:
   4410     |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
   4411     |  mr TMP2, BASE
   4412     |  add BASE, BASE, RA
   4413     |  lwz TMP1, -24(BASE)
   4414     |   lwz LFUNC:RB, -20(BASE)
   4415     |    lfd f1, -8(BASE)
   4416     |    lfd f0, -16(BASE)
   4417     |  stw TMP1, 0(BASE)		// Copy callable.
   4418     |   stw LFUNC:RB, 4(BASE)
   4419     |  checkfunc TMP1
   4420     |    stfd f1, 16(BASE)		// Copy control var.
   4421     |     li NARGS8:RC, 16		// Iterators get 2 arguments.
   4422     |    stfdu f0, 8(BASE)		// Copy state.
   4423     |  bne ->vmeta_call
   4424     |  ins_call
   4425     break;
   4426 
   4427   case BC_ITERN:
   4428     |  // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
   4429     |.if JIT
   4430     |  // NYI: add hotloop, record BC_ITERN.
   4431     |.endif
   4432     |  add RA, BASE, RA
   4433     |  lwz TAB:RB, -12(RA)
   4434     |  lwz RC, -4(RA)			// Get index from control var.
   4435     |  lwz TMP0, TAB:RB->asize
   4436     |  lwz TMP1, TAB:RB->array
   4437     |   addi PC, PC, 4
   4438     |1:  // Traverse array part.
   4439     |  cmplw RC, TMP0
   4440     |   slwi TMP3, RC, 3
   4441     |  bge >5				// Index points after array part?
   4442     |  lwzx TMP2, TMP1, TMP3
   4443     |   lfdx f0, TMP1, TMP3
   4444     |  checknil TMP2
   4445     |     lwz INS, -4(PC)
   4446     |  beq >4
   4447     |.if DUALNUM
   4448     |   stw RC, 4(RA)
   4449     |   stw TISNUM, 0(RA)
   4450     |.else
   4451     |   tonum_u f1, RC
   4452     |.endif
   4453     |    addi RC, RC, 1
   4454     |     addis TMP3, PC, -(BCBIAS_J*4 >> 16)
   4455     |  stfd f0, 8(RA)
   4456     |     decode_RD4 TMP1, INS
   4457     |    stw RC, -4(RA)			// Update control var.
   4458     |     add PC, TMP1, TMP3
   4459     |.if not DUALNUM
   4460     |   stfd f1, 0(RA)
   4461     |.endif
   4462     |3:
   4463     |  ins_next
   4464     |
   4465     |4:  // Skip holes in array part.
   4466     |  addi RC, RC, 1
   4467     |  b <1
   4468     |
   4469     |5:  // Traverse hash part.
   4470     |  lwz TMP1, TAB:RB->hmask
   4471     |  sub RC, RC, TMP0
   4472     |   lwz TMP2, TAB:RB->node
   4473     |6:
   4474     |  cmplw RC, TMP1			// End of iteration? Branch to ITERL+1.
   4475     |   slwi TMP3, RC, 5
   4476     |  bgty <3
   4477     |   slwi RB, RC, 3
   4478     |   sub TMP3, TMP3, RB
   4479     |  lwzx RB, TMP2, TMP3
   4480     |  lfdx f0, TMP2, TMP3
   4481     |   add NODE:TMP3, TMP2, TMP3
   4482     |  checknil RB
   4483     |     lwz INS, -4(PC)
   4484     |  beq >7
   4485     |   lfd f1, NODE:TMP3->key
   4486     |     addis TMP2, PC, -(BCBIAS_J*4 >> 16)
   4487     |  stfd f0, 8(RA)
   4488     |    add RC, RC, TMP0
   4489     |     decode_RD4 TMP1, INS
   4490     |   stfd f1, 0(RA)
   4491     |    addi RC, RC, 1
   4492     |     add PC, TMP1, TMP2
   4493     |    stw RC, -4(RA)			// Update control var.
   4494     |  b <3
   4495     |
   4496     |7:  // Skip holes in hash part.
   4497     |  addi RC, RC, 1
   4498     |  b <6
   4499     break;
   4500 
   4501   case BC_ISNEXT:
   4502     |  // RA = base*8, RD = target (points to ITERN)
   4503     |  add RA, BASE, RA
   4504     |  lwz TMP0, -24(RA)
   4505     |  lwz CFUNC:TMP1, -20(RA)
   4506     |   lwz TMP2, -16(RA)
   4507     |    lwz TMP3, -8(RA)
   4508     |   cmpwi cr0, TMP2, LJ_TTAB
   4509     |  cmpwi cr1, TMP0, LJ_TFUNC
   4510     |    cmpwi cr6, TMP3, LJ_TNIL
   4511     |  bne cr1, >5
   4512     |  lbz TMP1, CFUNC:TMP1->ffid
   4513     |   crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq
   4514     |  cmpwi cr7, TMP1, FF_next_N
   4515     |    srwi TMP0, RD, 1
   4516     |  crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
   4517     |    add TMP3, PC, TMP0
   4518     |  bne cr0, >5
   4519     |  lus TMP1, 0xfffe
   4520     |  ori TMP1, TMP1, 0x7fff
   4521     |  stw ZERO, -4(RA)			// Initialize control var.
   4522     |  stw TMP1, -8(RA)
   4523     |    addis PC, TMP3, -(BCBIAS_J*4 >> 16)
   4524     |1:
   4525     |  ins_next
   4526     |5:  // Despecialize bytecode if any of the checks fail.
   4527     |  li TMP0, BC_JMP
   4528     |   li TMP1, BC_ITERC
   4529     |  stb TMP0, -1(PC)
   4530     |    addis PC, TMP3, -(BCBIAS_J*4 >> 16)
   4531     |   stb TMP1, 3(PC)
   4532     |  b <1
   4533     break;
   4534 
   4535   case BC_VARG:
   4536     |  // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
   4537     |  lwz TMP0, FRAME_PC(BASE)
   4538     |  add RC, BASE, RC
   4539     |   add RA, BASE, RA
   4540     |  addi RC, RC, FRAME_VARG
   4541     |   add TMP2, RA, RB
   4542     |  subi TMP3, BASE, 8		// TMP3 = vtop
   4543     |  sub RC, RC, TMP0			// RC = vbase
   4544     |  // Note: RC may now be even _above_ BASE if nargs was < numparams.
   4545     |  cmplwi cr1, RB, 0
   4546     |.if PPE
   4547     |   sub TMP1, TMP3, RC
   4548     |   cmpwi TMP1, 0
   4549     |.else
   4550     |   sub. TMP1, TMP3, RC
   4551     |.endif
   4552     |  beq cr1, >5			// Copy all varargs?
   4553     |   subi TMP2, TMP2, 16
   4554     |   ble >2				// No vararg slots?
   4555     |1:  // Copy vararg slots to destination slots.
   4556     |  lfd f0, 0(RC)
   4557     |   addi RC, RC, 8
   4558     |  stfd f0, 0(RA)
   4559     |  cmplw RA, TMP2
   4560     |   cmplw cr1, RC, TMP3
   4561     |  bge >3				// All destination slots filled?
   4562     |    addi RA, RA, 8
   4563     |   blt cr1, <1			// More vararg slots?
   4564     |2:  // Fill up remainder with nil.
   4565     |  stw TISNIL, 0(RA)
   4566     |  cmplw RA, TMP2
   4567     |   addi RA, RA, 8
   4568     |  blt <2
   4569     |3:
   4570     |  ins_next
   4571     |
   4572     |5:  // Copy all varargs.
   4573     |  lwz TMP0, L->maxstack
   4574     |   li MULTRES, 8			// MULTRES = (0+1)*8
   4575     |  bley <3				// No vararg slots?
   4576     |  add TMP2, RA, TMP1
   4577     |  cmplw TMP2, TMP0
   4578     |   addi MULTRES, TMP1, 8
   4579     |  bgt >7
   4580     |6:
   4581     |  lfd f0, 0(RC)
   4582     |   addi RC, RC, 8
   4583     |  stfd f0, 0(RA)
   4584     |  cmplw RC, TMP3
   4585     |   addi RA, RA, 8
   4586     |  blt <6				// More vararg slots?
   4587     |  b <3
   4588     |
   4589     |7:  // Grow stack for varargs.
   4590     |  mr CARG1, L
   4591     |   stp RA, L->top
   4592     |  sub SAVE0, RC, BASE		// Need delta, because BASE may change.
   4593     |   stp BASE, L->base
   4594     |  sub RA, RA, BASE
   4595     |   stw PC, SAVE_PC
   4596     |  srwi CARG2, TMP1, 3
   4597     |  bl extern lj_state_growstack	// (lua_State *L, int n)
   4598     |  lp BASE, L->base
   4599     |  add RA, BASE, RA
   4600     |  add RC, BASE, SAVE0
   4601     |  subi TMP3, BASE, 8
   4602     |  b <6
   4603     break;
   4604 
   4605   /* -- Returns ----------------------------------------------------------- */
   4606 
   4607   case BC_RETM:
   4608     |  // RA = results*8, RD = extra_nresults*8
   4609     |  add RD, RD, MULTRES		// MULTRES >= 8, so RD >= 8.
   4610     |  // Fall through. Assumes BC_RET follows.
   4611     break;
   4612 
   4613   case BC_RET:
   4614     |  // RA = results*8, RD = (nresults+1)*8
   4615     |  lwz PC, FRAME_PC(BASE)
   4616     |   add RA, BASE, RA
   4617     |    mr MULTRES, RD
   4618     |1:
   4619     |  andix. TMP0, PC, FRAME_TYPE
   4620     |   xori TMP1, PC, FRAME_VARG
   4621     |  bne ->BC_RETV_Z
   4622     |
   4623     |->BC_RET_Z:
   4624     |  // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
   4625     |   lwz INS, -4(PC)
   4626     |  cmpwi RD, 8
   4627     |   subi TMP2, BASE, 8
   4628     |   subi RC, RD, 8
   4629     |   decode_RB8 RB, INS
   4630     |  beq >3
   4631     |   li TMP1, 0
   4632     |2:
   4633     |  addi TMP3, TMP1, 8
   4634     |   lfdx f0, RA, TMP1
   4635     |  cmpw TMP3, RC
   4636     |   stfdx f0, TMP2, TMP1
   4637     |  beq >3
   4638     |  addi TMP1, TMP3, 8
   4639     |   lfdx f1, RA, TMP3
   4640     |  cmpw TMP1, RC
   4641     |   stfdx f1, TMP2, TMP3
   4642     |  bne <2
   4643     |3:
   4644     |5:
   4645     |  cmplw RB, RD
   4646     |   decode_RA8 RA, INS
   4647     |  bgt >6
   4648     |   sub BASE, TMP2, RA
   4649     |  lwz LFUNC:TMP1, FRAME_FUNC(BASE)
   4650     |  ins_next1
   4651     |  lwz TMP1, LFUNC:TMP1->pc
   4652     |  lwz KBASE, PC2PROTO(k)(TMP1)
   4653     |  ins_next2
   4654     |
   4655     |6:  // Fill up results with nil.
   4656     |  subi TMP1, RD, 8
   4657     |   addi RD, RD, 8
   4658     |  stwx TISNIL, TMP2, TMP1
   4659     |  b <5
   4660     |
   4661     |->BC_RETV_Z:  // Non-standard return case.
   4662     |  andix. TMP2, TMP1, FRAME_TYPEP
   4663     |  bne ->vm_return
   4664     |  // Return from vararg function: relocate BASE down.
   4665     |  sub BASE, BASE, TMP1
   4666     |  lwz PC, FRAME_PC(BASE)
   4667     |  b <1
   4668     break;
   4669 
   4670   case BC_RET0: case BC_RET1:
   4671     |  // RA = results*8, RD = (nresults+1)*8
   4672     |  lwz PC, FRAME_PC(BASE)
   4673     |   add RA, BASE, RA
   4674     |    mr MULTRES, RD
   4675     |  andix. TMP0, PC, FRAME_TYPE
   4676     |   xori TMP1, PC, FRAME_VARG
   4677     |  bney ->BC_RETV_Z
   4678     |
   4679     |  lwz INS, -4(PC)
   4680     |   subi TMP2, BASE, 8
   4681     |  decode_RB8 RB, INS
   4682     if (op == BC_RET1) {
   4683       |  lfd f0, 0(RA)
   4684       |  stfd f0, 0(TMP2)
   4685     }
   4686     |5:
   4687     |  cmplw RB, RD
   4688     |   decode_RA8 RA, INS
   4689     |  bgt >6
   4690     |   sub BASE, TMP2, RA
   4691     |  lwz LFUNC:TMP1, FRAME_FUNC(BASE)
   4692     |  ins_next1
   4693     |  lwz TMP1, LFUNC:TMP1->pc
   4694     |  lwz KBASE, PC2PROTO(k)(TMP1)
   4695     |  ins_next2
   4696     |
   4697     |6:  // Fill up results with nil.
   4698     |  subi TMP1, RD, 8
   4699     |   addi RD, RD, 8
   4700     |  stwx TISNIL, TMP2, TMP1
   4701     |  b <5
   4702     break;
   4703 
   4704   /* -- Loops and branches ------------------------------------------------ */
   4705 
   4706   case BC_FORL:
   4707     |.if JIT
   4708     |  hotloop
   4709     |.endif
   4710     |  // Fall through. Assumes BC_IFORL follows.
   4711     break;
   4712 
   4713   case BC_JFORI:
   4714   case BC_JFORL:
   4715 #if !LJ_HASJIT
   4716     break;
   4717 #endif
   4718   case BC_FORI:
   4719   case BC_IFORL:
   4720     |  // RA = base*8, RD = target (after end of loop or start of loop)
   4721     vk = (op == BC_IFORL || op == BC_JFORL);
   4722     |.if DUALNUM
   4723     |  // Integer loop.
   4724     |  lwzux TMP1, RA, BASE
   4725     |   lwz CARG1, FORL_IDX*8+4(RA)
   4726     |  cmplw cr0, TMP1, TISNUM
   4727     if (vk) {
   4728       |   lwz CARG3, FORL_STEP*8+4(RA)
   4729       |  bne >9
   4730       |.if GPR64
   4731       |  // Need to check overflow for (a<<32) + (b<<32).
   4732       |  rldicr TMP0, CARG1, 32, 31
   4733       |  rldicr TMP2, CARG3, 32, 31
   4734       |  add CARG1, CARG1, CARG3
   4735       |  addo. TMP0, TMP0, TMP2
   4736       |.else
   4737       |  addo. CARG1, CARG1, CARG3
   4738       |.endif
   4739       |    cmpwi cr6, CARG3, 0
   4740       |   lwz CARG2, FORL_STOP*8+4(RA)
   4741       |  bso >6
   4742       |4:
   4743       |  stw CARG1, FORL_IDX*8+4(RA)
   4744     } else {
   4745       |  lwz TMP3, FORL_STEP*8(RA)
   4746       |   lwz CARG3, FORL_STEP*8+4(RA)
   4747       |  lwz TMP2, FORL_STOP*8(RA)
   4748       |   lwz CARG2, FORL_STOP*8+4(RA)
   4749       |  cmplw cr7, TMP3, TISNUM
   4750       |  cmplw cr1, TMP2, TISNUM
   4751       |  crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
   4752       |  crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
   4753       |    cmpwi cr6, CARG3, 0
   4754       |  bne >9
   4755     }
   4756     |    blt cr6, >5
   4757     |  cmpw CARG1, CARG2
   4758     |1:
   4759     |   stw TISNUM, FORL_EXT*8(RA)
   4760     if (op != BC_JFORL) {
   4761       |  srwi RD, RD, 1
   4762     }
   4763     |   stw CARG1, FORL_EXT*8+4(RA)
   4764     if (op != BC_JFORL) {
   4765       |  add RD, PC, RD
   4766     }
   4767     if (op == BC_FORI) {
   4768       |  bgt >3  // See FP loop below.
   4769     } else if (op == BC_JFORI) {
   4770       |  addis PC, RD, -(BCBIAS_J*4 >> 16)
   4771       |  bley >7
   4772     } else if (op == BC_IFORL) {
   4773       |  bgt >2
   4774       |  addis PC, RD, -(BCBIAS_J*4 >> 16)
   4775     } else {
   4776       |  bley =>BC_JLOOP
   4777     }
   4778     |2:
   4779     |  ins_next
   4780     |5:  // Invert check for negative step.
   4781     |  cmpw CARG2, CARG1
   4782     |  b <1
   4783     if (vk) {
   4784       |6:  // Potential overflow.
   4785       |  checkov TMP0, <4		// Ignore unrelated overflow.
   4786       |  b <2
   4787     }
   4788     |.endif
   4789     if (vk) {
   4790       |.if DUALNUM
   4791       |9:  // FP loop.
   4792       |  lfd f1, FORL_IDX*8(RA)
   4793       |.else
   4794       |  lfdux f1, RA, BASE
   4795       |.endif
   4796       |  lfd f3, FORL_STEP*8(RA)
   4797       |  lfd f2, FORL_STOP*8(RA)
   4798       |   lwz TMP3, FORL_STEP*8(RA)
   4799       |  fadd f1, f1, f3
   4800       |  stfd f1, FORL_IDX*8(RA)
   4801     } else {
   4802       |.if DUALNUM
   4803       |9:  // FP loop.
   4804       |.else
   4805       |  lwzux TMP1, RA, BASE
   4806       |  lwz TMP3, FORL_STEP*8(RA)
   4807       |  lwz TMP2, FORL_STOP*8(RA)
   4808       |  cmplw cr0, TMP1, TISNUM
   4809       |  cmplw cr7, TMP3, TISNUM
   4810       |  cmplw cr1, TMP2, TISNUM
   4811       |.endif
   4812       |   lfd f1, FORL_IDX*8(RA)
   4813       |  crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
   4814       |  crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
   4815       |   lfd f2, FORL_STOP*8(RA)
   4816       |  bge ->vmeta_for
   4817     }
   4818     |  cmpwi cr6, TMP3, 0
   4819     if (op != BC_JFORL) {
   4820       |  srwi RD, RD, 1
   4821     }
   4822     |   stfd f1, FORL_EXT*8(RA)
   4823     if (op != BC_JFORL) {
   4824       |  add RD, PC, RD
   4825     }
   4826     |  fcmpu cr0, f1, f2
   4827     if (op == BC_JFORI) {
   4828       |  addis PC, RD, -(BCBIAS_J*4 >> 16)
   4829     }
   4830     |  blt cr6, >5
   4831     if (op == BC_FORI) {
   4832       |  bgt >3
   4833     } else if (op == BC_IFORL) {
   4834       |.if DUALNUM
   4835       |  bgty <2
   4836       |.else
   4837       |  bgt >2
   4838       |.endif
   4839       |1:
   4840       |  addis PC, RD, -(BCBIAS_J*4 >> 16)
   4841     } else if (op == BC_JFORI) {
   4842       |  bley >7
   4843     } else {
   4844       |  bley =>BC_JLOOP
   4845     }
   4846     |.if DUALNUM
   4847     |  b <2
   4848     |.else
   4849     |2:
   4850     |  ins_next
   4851     |.endif
   4852     |5:  // Negative step.
   4853     if (op == BC_FORI) {
   4854       |  bge <2
   4855       |3:  // Used by integer loop, too.
   4856       |  addis PC, RD, -(BCBIAS_J*4 >> 16)
   4857     } else if (op == BC_IFORL) {
   4858       |  bgey <1
   4859     } else if (op == BC_JFORI) {
   4860       |  bgey >7
   4861     } else {
   4862       |  bgey =>BC_JLOOP
   4863     }
   4864     |  b <2
   4865     if (op == BC_JFORI) {
   4866       |7:
   4867       |  lwz INS, -4(PC)
   4868       |  decode_RD8 RD, INS
   4869       |  b =>BC_JLOOP
   4870     }
   4871     break;
   4872 
   4873   case BC_ITERL:
   4874     |.if JIT
   4875     |  hotloop
   4876     |.endif
   4877     |  // Fall through. Assumes BC_IITERL follows.
   4878     break;
   4879 
   4880   case BC_JITERL:
   4881 #if !LJ_HASJIT
   4882     break;
   4883 #endif
   4884   case BC_IITERL:
   4885     |  // RA = base*8, RD = target
   4886     |  lwzux TMP1, RA, BASE
   4887     |   lwz TMP2, 4(RA)
   4888     |  checknil TMP1; beq >1		// Stop if iterator returned nil.
   4889     if (op == BC_JITERL) {
   4890       |  stw TMP1, -8(RA)
   4891       |   stw TMP2, -4(RA)
   4892       |  b =>BC_JLOOP
   4893     } else {
   4894       |  branch_RD			// Otherwise save control var + branch.
   4895       |  stw TMP1, -8(RA)
   4896       |   stw TMP2, -4(RA)
   4897     }
   4898     |1:
   4899     |  ins_next
   4900     break;
   4901 
   4902   case BC_LOOP:
   4903     |  // RA = base*8, RD = target (loop extent)
   4904     |  // Note: RA/RD is only used by trace recorder to determine scope/extent
   4905     |  // This opcode does NOT jump, it's only purpose is to detect a hot loop.
   4906     |.if JIT
   4907     |  hotloop
   4908     |.endif
   4909     |  // Fall through. Assumes BC_ILOOP follows.
   4910     break;
   4911 
   4912   case BC_ILOOP:
   4913     |  // RA = base*8, RD = target (loop extent)
   4914     |  ins_next
   4915     break;
   4916 
   4917   case BC_JLOOP:
   4918     |.if JIT
   4919     |  // RA = base*8 (ignored), RD = traceno*8
   4920     |  lwz TMP1, DISPATCH_J(trace)(DISPATCH)
   4921     |  srwi RD, RD, 1
   4922     |  // Traces on PPC don't store the trace number, so use 0.
   4923     |   stw ZERO, DISPATCH_GL(vmstate)(DISPATCH)
   4924     |  lwzx TRACE:TMP2, TMP1, RD
   4925     |  clrso TMP1
   4926     |  lp TMP2, TRACE:TMP2->mcode
   4927     |   stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
   4928     |  mtctr TMP2
   4929     |   addi JGL, DISPATCH, GG_DISP2G+32768
   4930     |   stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
   4931     |  bctr
   4932     |.endif
   4933     break;
   4934 
   4935   case BC_JMP:
   4936     |  // RA = base*8 (only used by trace recorder), RD = target
   4937     |  branch_RD
   4938     |  ins_next
   4939     break;
   4940 
   4941   /* -- Function headers -------------------------------------------------- */
   4942 
   4943   case BC_FUNCF:
   4944     |.if JIT
   4945     |  hotcall
   4946     |.endif
   4947   case BC_FUNCV:  /* NYI: compiled vararg functions. */
   4948     |  // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
   4949     break;
   4950 
   4951   case BC_JFUNCF:
   4952 #if !LJ_HASJIT
   4953     break;
   4954 #endif
   4955   case BC_IFUNCF:
   4956     |  // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
   4957     |  lwz TMP2, L->maxstack
   4958     |   lbz TMP1, -4+PC2PROTO(numparams)(PC)
   4959     |    lwz KBASE, -4+PC2PROTO(k)(PC)
   4960     |  cmplw RA, TMP2
   4961     |   slwi TMP1, TMP1, 3
   4962     |  bgt ->vm_growstack_l
   4963     if (op != BC_JFUNCF) {
   4964       |  ins_next1
   4965     }
   4966     |2:
   4967     |  cmplw NARGS8:RC, TMP1		// Check for missing parameters.
   4968     |  blt >3
   4969     if (op == BC_JFUNCF) {
   4970       |  decode_RD8 RD, INS
   4971       |  b =>BC_JLOOP
   4972     } else {
   4973       |  ins_next2
   4974     }
   4975     |
   4976     |3:  // Clear missing parameters.
   4977     |  stwx TISNIL, BASE, NARGS8:RC
   4978     |  addi NARGS8:RC, NARGS8:RC, 8
   4979     |  b <2
   4980     break;
   4981 
   4982   case BC_JFUNCV:
   4983 #if !LJ_HASJIT
   4984     break;
   4985 #endif
   4986     |  NYI  // NYI: compiled vararg functions
   4987     break;  /* NYI: compiled vararg functions. */
   4988 
   4989   case BC_IFUNCV:
   4990     |  // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
   4991     |  lwz TMP2, L->maxstack
   4992     |   add TMP1, BASE, RC
   4993     |  add TMP0, RA, RC
   4994     |   stw LFUNC:RB, 4(TMP1)		// Store copy of LFUNC.
   4995     |   addi TMP3, RC, 8+FRAME_VARG
   4996     |    lwz KBASE, -4+PC2PROTO(k)(PC)
   4997     |  cmplw TMP0, TMP2
   4998     |   stw TMP3, 0(TMP1)		// Store delta + FRAME_VARG.
   4999     |  bge ->vm_growstack_l
   5000     |  lbz TMP2, -4+PC2PROTO(numparams)(PC)
   5001     |   mr RA, BASE
   5002     |   mr RC, TMP1
   5003     |  ins_next1
   5004     |  cmpwi TMP2, 0
   5005     |   addi BASE, TMP1, 8
   5006     |  beq >3
   5007     |1:
   5008     |  cmplw RA, RC			// Less args than parameters?
   5009     |   lwz TMP0, 0(RA)
   5010     |   lwz TMP3, 4(RA)
   5011     |  bge >4
   5012     |    stw TISNIL, 0(RA)		// Clear old fixarg slot (help the GC).
   5013     |    addi RA, RA, 8
   5014     |2:
   5015     |  addic. TMP2, TMP2, -1
   5016     |   stw TMP0, 8(TMP1)
   5017     |   stw TMP3, 12(TMP1)
   5018     |    addi TMP1, TMP1, 8
   5019     |  bne <1
   5020     |3:
   5021     |  ins_next2
   5022     |
   5023     |4:  // Clear missing parameters.
   5024     |  li TMP0, LJ_TNIL
   5025     |  b <2
   5026     break;
   5027 
   5028   case BC_FUNCC:
   5029   case BC_FUNCCW:
   5030     |  // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
   5031     if (op == BC_FUNCC) {
   5032       |  lp RD, CFUNC:RB->f
   5033     } else {
   5034       |  lp RD, DISPATCH_GL(wrapf)(DISPATCH)
   5035     }
   5036     |   add TMP1, RA, NARGS8:RC
   5037     |   lwz TMP2, L->maxstack
   5038     |  .toc lp TMP3, 0(RD)
   5039     |    add RC, BASE, NARGS8:RC
   5040     |   stp BASE, L->base
   5041     |   cmplw TMP1, TMP2
   5042     |    stp RC, L->top
   5043     |     li_vmstate C
   5044     |.if TOC
   5045     |  mtctr TMP3
   5046     |.else
   5047     |  mtctr RD
   5048     |.endif
   5049     if (op == BC_FUNCCW) {
   5050       |  lp CARG2, CFUNC:RB->f
   5051     }
   5052     |  mr CARG1, L
   5053     |   bgt ->vm_growstack_c		// Need to grow stack.
   5054     |  .toc lp TOCREG, TOC_OFS(RD)
   5055     |  .tocenv lp ENVREG, ENV_OFS(RD)
   5056     |     st_vmstate
   5057     |  bctrl				// (lua_State *L [, lua_CFunction f])
   5058     |  // Returns nresults.
   5059     |  lp BASE, L->base
   5060     |  .toc ld TOCREG, SAVE_TOC
   5061     |   slwi RD, CRET1, 3
   5062     |  lp TMP1, L->top
   5063     |    li_vmstate INTERP
   5064     |  lwz PC, FRAME_PC(BASE)		// Fetch PC of caller.
   5065     |    stw L, DISPATCH_GL(cur_L)(DISPATCH)
   5066     |   sub RA, TMP1, RD		// RA = L->top - nresults*8
   5067     |    st_vmstate
   5068     |  b ->vm_returnc
   5069     break;
   5070 
   5071   /* ---------------------------------------------------------------------- */
   5072 
   5073   default:
   5074     fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
   5075     exit(2);
   5076     break;
   5077   }
   5078 }
   5079 
   5080 static int build_backend(BuildCtx *ctx)
   5081 {
   5082   int op;
   5083 
   5084   dasm_growpc(Dst, BC__MAX);
   5085 
   5086   build_subroutines(ctx);
   5087 
   5088   |.code_op
   5089   for (op = 0; op < BC__MAX; op++)
   5090     build_ins(ctx, (BCOp)op, op);
   5091 
   5092   return BC__MAX;
   5093 }
   5094 
   5095 /* Emit pseudo frame-info for all assembler functions. */
   5096 static void emit_asm_debug(BuildCtx *ctx)
   5097 {
   5098   int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
   5099   int i;
   5100   switch (ctx->mode) {
   5101   case BUILD_elfasm:
   5102     fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
   5103     fprintf(ctx->fp,
   5104 	".Lframe0:\n"
   5105 	"\t.long .LECIE0-.LSCIE0\n"
   5106 	".LSCIE0:\n"
   5107 	"\t.long 0xffffffff\n"
   5108 	"\t.byte 0x1\n"
   5109 	"\t.string \"\"\n"
   5110 	"\t.uleb128 0x1\n"
   5111 	"\t.sleb128 -4\n"
   5112 	"\t.byte 65\n"
   5113 	"\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
   5114 	"\t.align 2\n"
   5115 	".LECIE0:\n\n");
   5116     fprintf(ctx->fp,
   5117 	".LSFDE0:\n"
   5118 	"\t.long .LEFDE0-.LASFDE0\n"
   5119 	".LASFDE0:\n"
   5120 	"\t.long .Lframe0\n"
   5121 	"\t.long .Lbegin\n"
   5122 	"\t.long %d\n"
   5123 	"\t.byte 0xe\n\t.uleb128 %d\n"
   5124 	"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
   5125 	"\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
   5126 	fcofs, CFRAME_SIZE);
   5127     for (i = 14; i <= 31; i++)
   5128       fprintf(ctx->fp,
   5129 	"\t.byte %d\n\t.uleb128 %d\n"
   5130 	"\t.byte %d\n\t.uleb128 %d\n",
   5131 	0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i));
   5132     fprintf(ctx->fp,
   5133 	"\t.align 2\n"
   5134 	".LEFDE0:\n\n");
   5135 #if LJ_HASFFI
   5136     fprintf(ctx->fp,
   5137 	".LSFDE1:\n"
   5138 	"\t.long .LEFDE1-.LASFDE1\n"
   5139 	".LASFDE1:\n"
   5140 	"\t.long .Lframe0\n"
   5141 #if LJ_TARGET_PS3
   5142 	"\t.long .lj_vm_ffi_call\n"
   5143 #else
   5144 	"\t.long lj_vm_ffi_call\n"
   5145 #endif
   5146 	"\t.long %d\n"
   5147 	"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
   5148 	"\t.byte 0x8e\n\t.uleb128 2\n"
   5149 	"\t.byte 0xd\n\t.uleb128 0xe\n"
   5150 	"\t.align 2\n"
   5151 	".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
   5152 #endif
   5153 #if !LJ_NO_UNWIND
   5154     fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
   5155     fprintf(ctx->fp,
   5156 	".Lframe1:\n"
   5157 	"\t.long .LECIE1-.LSCIE1\n"
   5158 	".LSCIE1:\n"
   5159 	"\t.long 0\n"
   5160 	"\t.byte 0x1\n"
   5161 	"\t.string \"zPR\"\n"
   5162 	"\t.uleb128 0x1\n"
   5163 	"\t.sleb128 -4\n"
   5164 	"\t.byte 65\n"
   5165 	"\t.uleb128 6\n"			/* augmentation length */
   5166 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
   5167 	"\t.long lj_err_unwind_dwarf-.\n"
   5168 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
   5169 	"\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
   5170 	"\t.align 2\n"
   5171 	".LECIE1:\n\n");
   5172     fprintf(ctx->fp,
   5173 	".LSFDE2:\n"
   5174 	"\t.long .LEFDE2-.LASFDE2\n"
   5175 	".LASFDE2:\n"
   5176 	"\t.long .LASFDE2-.Lframe1\n"
   5177 	"\t.long .Lbegin-.\n"
   5178 	"\t.long %d\n"
   5179 	"\t.uleb128 0\n"			/* augmentation length */
   5180 	"\t.byte 0xe\n\t.uleb128 %d\n"
   5181 	"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
   5182 	"\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n",
   5183 	fcofs, CFRAME_SIZE);
   5184     for (i = 14; i <= 31; i++)
   5185       fprintf(ctx->fp,
   5186 	"\t.byte %d\n\t.uleb128 %d\n"
   5187 	"\t.byte %d\n\t.uleb128 %d\n",
   5188 	0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i));
   5189     fprintf(ctx->fp,
   5190 	"\t.align 2\n"
   5191 	".LEFDE2:\n\n");
   5192 #if LJ_HASFFI
   5193     fprintf(ctx->fp,
   5194 	".Lframe2:\n"
   5195 	"\t.long .LECIE2-.LSCIE2\n"
   5196 	".LSCIE2:\n"
   5197 	"\t.long 0\n"
   5198 	"\t.byte 0x1\n"
   5199 	"\t.string \"zR\"\n"
   5200 	"\t.uleb128 0x1\n"
   5201 	"\t.sleb128 -4\n"
   5202 	"\t.byte 65\n"
   5203 	"\t.uleb128 1\n"			/* augmentation length */
   5204 	"\t.byte 0x1b\n"			/* pcrel|sdata4 */
   5205 	"\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
   5206 	"\t.align 2\n"
   5207 	".LECIE2:\n\n");
   5208     fprintf(ctx->fp,
   5209 	".LSFDE3:\n"
   5210 	"\t.long .LEFDE3-.LASFDE3\n"
   5211 	".LASFDE3:\n"
   5212 	"\t.long .LASFDE3-.Lframe2\n"
   5213 	"\t.long lj_vm_ffi_call-.\n"
   5214 	"\t.long %d\n"
   5215 	"\t.uleb128 0\n"			/* augmentation length */
   5216 	"\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
   5217 	"\t.byte 0x8e\n\t.uleb128 2\n"
   5218 	"\t.byte 0xd\n\t.uleb128 0xe\n"
   5219 	"\t.align 2\n"
   5220 	".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
   5221 #endif
   5222 #endif
   5223     break;
   5224   default:
   5225     break;
   5226   }
   5227 }
   5228