vm_ppc.dasc (140359B)
1 |// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. 2 |// Bytecode interpreter, fast functions and helper functions. 3 |// Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h 4 | 5 |.arch ppc 6 |.section code_op, code_sub 7 | 8 |.actionlist build_actionlist 9 |.globals GLOB_ 10 |.globalnames globnames 11 |.externnames extnames 12 | 13 |// Note: The ragged indentation of the instructions is intentional. 14 |// The starting columns indicate data dependencies. 15 | 16 |//----------------------------------------------------------------------- 17 | 18 |// DynASM defines used by the PPC port: 19 |// 20 |// P64 64 bit pointers (only for GPR64 testing). 21 |// Note: see vm_ppc64.dasc for a full PPC64 _LP64 port. 22 |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). 23 |// Affects reg saves, stack layout, carry/overflow/dot flags etc. 24 |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). 25 |// TOC Need table of contents (64 bit or 32 bit variant, e.g. PS3). 26 |// Function pointers are really a struct: code, TOC, env (optional). 27 |// TOCENV Function pointers have an environment pointer, too (not on PS3). 28 |// PPE Power Processor Element of Cell (PS3) or Xenon (Xbox 360). 29 |// Must avoid (slow) micro-coded instructions. 30 | 31 |.if P64 32 |.define TOC, 1 33 |.define TOCENV, 1 34 |.macro lpx, a, b, c; ldx a, b, c; .endmacro 35 |.macro lp, a, b; ld a, b; .endmacro 36 |.macro stp, a, b; std a, b; .endmacro 37 |.define decode_OPP, decode_OP8 38 |.if FFI 39 |// Missing: Calling conventions, 64 bit regs, TOC. 40 |.error lib_ffi not yet implemented for PPC64 41 |.endif 42 |.else 43 |.macro lpx, a, b, c; lwzx a, b, c; .endmacro 44 |.macro lp, a, b; lwz a, b; .endmacro 45 |.macro stp, a, b; stw a, b; .endmacro 46 |.define decode_OPP, decode_OP4 47 |.endif 48 | 49 |// Convenience macros for TOC handling. 50 |.if TOC 51 |// Linker needs a TOC patch area for every external call relocation. 52 |.macro blex, target; bl extern target@plt; nop; .endmacro 53 |.macro .toc, a, b; a, b; .endmacro 54 |.if P64 55 |.define TOC_OFS, 8 56 |.define ENV_OFS, 16 57 |.else 58 |.define TOC_OFS, 4 59 |.define ENV_OFS, 8 60 |.endif 61 |.else // No TOC. 62 |.macro blex, target; bl extern target@plt; .endmacro 63 |.macro .toc, a, b; .endmacro 64 |.endif 65 |.macro .tocenv, a, b; .if TOCENV; a, b; .endif; .endmacro 66 | 67 |.macro .gpr64, a, b; .if GPR64; a, b; .endif; .endmacro 68 | 69 |.macro andix., y, a, i 70 |.if PPE 71 | rlwinm y, a, 0, 31-lj_fls(i), 31-lj_ffs(i) 72 | cmpwi y, 0 73 |.else 74 | andi. y, a, i 75 |.endif 76 |.endmacro 77 | 78 |.macro clrso, reg 79 |.if PPE 80 | li reg, 0 81 | mtxer reg 82 |.else 83 | mcrxr cr0 84 |.endif 85 |.endmacro 86 | 87 |.macro checkov, reg, noov 88 |.if PPE 89 | mfxer reg 90 | add reg, reg, reg 91 | cmpwi reg, 0 92 | li reg, 0 93 | mtxer reg 94 | bgey noov 95 |.else 96 | mcrxr cr0 97 | bley noov 98 |.endif 99 |.endmacro 100 | 101 |//----------------------------------------------------------------------- 102 | 103 |// Fixed register assignments for the interpreter. 104 |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) 105 | 106 |// The following must be C callee-save (but BASE is often refetched). 107 |.define BASE, r14 // Base of current Lua stack frame. 108 |.define KBASE, r15 // Constants of current Lua function. 109 |.define PC, r16 // Next PC. 110 |.define DISPATCH, r17 // Opcode dispatch table. 111 |.define LREG, r18 // Register holding lua_State (also in SAVE_L). 112 |.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. 113 |.define JGL, r31 // On-trace: global_State + 32768. 114 | 115 |// Constants for type-comparisons, stores and conversions. C callee-save. 116 |.define TISNUM, r22 117 |.define TISNIL, r23 118 |.define ZERO, r24 119 |.define TOBIT, f30 // 2^52 + 2^51. 120 |.define TONUM, f31 // 2^52 + 2^51 + 2^31. 121 | 122 |// The following temporaries are not saved across C calls, except for RA. 123 |.define RA, r20 // Callee-save. 124 |.define RB, r10 125 |.define RC, r11 126 |.define RD, r12 127 |.define INS, r7 // Overlaps CARG5. 128 | 129 |.define TMP0, r0 130 |.define TMP1, r8 131 |.define TMP2, r9 132 |.define TMP3, r6 // Overlaps CARG4. 133 | 134 |// Saved temporaries. 135 |.define SAVE0, r21 136 | 137 |// Calling conventions. 138 |.define CARG1, r3 139 |.define CARG2, r4 140 |.define CARG3, r5 141 |.define CARG4, r6 // Overlaps TMP3. 142 |.define CARG5, r7 // Overlaps INS. 143 | 144 |.define FARG1, f1 145 |.define FARG2, f2 146 | 147 |.define CRET1, r3 148 |.define CRET2, r4 149 | 150 |.define TOCREG, r2 // TOC register (only used by C code). 151 |.define ENVREG, r11 // Environment pointer (nested C functions). 152 | 153 |// Stack layout while in interpreter. Must match with lj_frame.h. 154 |.if GPR64 155 |.if FRAME32 156 | 157 |// 456(sp) // \ 32/64 bit C frame info 158 |.define TONUM_LO, 452(sp) // | 159 |.define TONUM_HI, 448(sp) // | 160 |.define TMPD_LO, 444(sp) // | 161 |.define TMPD_HI, 440(sp) // | 162 |.define SAVE_CR, 432(sp) // | 64 bit CR save. 163 |.define SAVE_ERRF, 424(sp) // > Parameter save area. 164 |.define SAVE_NRES, 420(sp) // | 165 |.define SAVE_L, 416(sp) // | 166 |.define SAVE_PC, 412(sp) // | 167 |.define SAVE_MULTRES, 408(sp) // | 168 |.define SAVE_CFRAME, 400(sp) // / 64 bit C frame chain. 169 |// 392(sp) // Reserved. 170 |.define CFRAME_SPACE, 384 // Delta for sp. 171 |// Back chain for sp: 384(sp) <-- sp entering interpreter 172 |.define SAVE_LR, 376(sp) // 32 bit LR stored in hi-part. 173 |.define SAVE_GPR_, 232 // .. 232+18*8: 64 bit GPR saves. 174 |.define SAVE_FPR_, 88 // .. 88+18*8: 64 bit FPR saves. 175 |// 80(sp) // Needed for 16 byte stack frame alignment. 176 |// 16(sp) // Callee parameter save area (ABI mandated). 177 |// 8(sp) // Reserved 178 |// Back chain for sp: 0(sp) <-- sp while in interpreter 179 |// 32 bit sp stored in hi-part of 0(sp). 180 | 181 |.define TMPD_BLO, 447(sp) 182 |.define TMPD, TMPD_HI 183 |.define TONUM_D, TONUM_HI 184 | 185 |.else 186 | 187 |// 508(sp) // \ 32 bit C frame info. 188 |.define SAVE_ERRF, 472(sp) // | 189 |.define SAVE_NRES, 468(sp) // | 190 |.define SAVE_L, 464(sp) // > Parameter save area. 191 |.define SAVE_PC, 460(sp) // | 192 |.define SAVE_MULTRES, 456(sp) // | 193 |.define SAVE_CFRAME, 448(sp) // / 64 bit C frame chain. 194 |.define SAVE_LR, 416(sp) 195 |.define CFRAME_SPACE, 400 // Delta for sp. 196 |// Back chain for sp: 400(sp) <-- sp entering interpreter 197 |.define SAVE_FPR_, 256 // .. 256+18*8: 64 bit FPR saves. 198 |.define SAVE_GPR_, 112 // .. 112+18*8: 64 bit GPR saves. 199 |// 48(sp) // Callee parameter save area (ABI mandated). 200 |.define SAVE_TOC, 40(sp) // TOC save area. 201 |.define TMPD_LO, 36(sp) // \ Link editor temp (ABI mandated). 202 |.define TMPD_HI, 32(sp) // / 203 |.define TONUM_LO, 28(sp) // \ Compiler temp (ABI mandated). 204 |.define TONUM_HI, 24(sp) // / 205 |// Next frame lr: 16(sp) 206 |.define SAVE_CR, 8(sp) // 64 bit CR save. 207 |// Back chain for sp: 0(sp) <-- sp while in interpreter 208 | 209 |.define TMPD_BLO, 39(sp) 210 |.define TMPD, TMPD_HI 211 |.define TONUM_D, TONUM_HI 212 | 213 |.endif 214 |.else 215 | 216 |.define SAVE_LR, 276(sp) 217 |.define CFRAME_SPACE, 272 // Delta for sp. 218 |// Back chain for sp: 272(sp) <-- sp entering interpreter 219 |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. 220 |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. 221 |.define SAVE_CR, 52(sp) // 32 bit CR save. 222 |.define SAVE_ERRF, 48(sp) // 32 bit C frame info. 223 |.define SAVE_NRES, 44(sp) 224 |.define SAVE_CFRAME, 40(sp) 225 |.define SAVE_L, 36(sp) 226 |.define SAVE_PC, 32(sp) 227 |.define SAVE_MULTRES, 28(sp) 228 |.define UNUSED1, 24(sp) 229 |.define TMPD_LO, 20(sp) 230 |.define TMPD_HI, 16(sp) 231 |.define TONUM_LO, 12(sp) 232 |.define TONUM_HI, 8(sp) 233 |// Next frame lr: 4(sp) 234 |// Back chain for sp: 0(sp) <-- sp while in interpreter 235 | 236 |.define TMPD_BLO, 23(sp) 237 |.define TMPD, TMPD_HI 238 |.define TONUM_D, TONUM_HI 239 | 240 |.endif 241 | 242 |.macro save_, reg 243 |.if GPR64 244 | std r..reg, SAVE_GPR_+(reg-14)*8(sp) 245 |.else 246 | stw r..reg, SAVE_GPR_+(reg-14)*4(sp) 247 |.endif 248 | stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 249 |.endmacro 250 |.macro rest_, reg 251 |.if GPR64 252 | ld r..reg, SAVE_GPR_+(reg-14)*8(sp) 253 |.else 254 | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) 255 |.endif 256 | lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 257 |.endmacro 258 | 259 |.macro saveregs 260 |.if GPR64 and not FRAME32 261 | stdu sp, -CFRAME_SPACE(sp) 262 |.else 263 | stwu sp, -CFRAME_SPACE(sp) 264 |.endif 265 | save_ 14; save_ 15; save_ 16 266 | mflr r0 267 | save_ 17; save_ 18; save_ 19; save_ 20; save_ 21; save_ 22 268 |.if GPR64 and not FRAME32 269 | std r0, SAVE_LR 270 |.else 271 | stw r0, SAVE_LR 272 |.endif 273 | save_ 23; save_ 24; save_ 25 274 | mfcr r0 275 | save_ 26; save_ 27; save_ 28; save_ 29; save_ 30; save_ 31 276 |.if GPR64 277 | std r0, SAVE_CR 278 |.else 279 | stw r0, SAVE_CR 280 |.endif 281 | .toc std TOCREG, SAVE_TOC 282 |.endmacro 283 | 284 |.macro restoreregs 285 |.if GPR64 and not FRAME32 286 | ld r0, SAVE_LR 287 |.else 288 | lwz r0, SAVE_LR 289 |.endif 290 |.if GPR64 291 | ld r12, SAVE_CR 292 |.else 293 | lwz r12, SAVE_CR 294 |.endif 295 | rest_ 14; rest_ 15; rest_ 16; rest_ 17; rest_ 18; rest_ 19 296 | mtlr r0; 297 |.if PPE; mtocrf 0x20, r12; .else; mtcrf 0x38, r12; .endif 298 | rest_ 20; rest_ 21; rest_ 22; rest_ 23; rest_ 24; rest_ 25 299 |.if PPE; mtocrf 0x10, r12; .endif 300 | rest_ 26; rest_ 27; rest_ 28; rest_ 29; rest_ 30; rest_ 31 301 |.if PPE; mtocrf 0x08, r12; .endif 302 | addi sp, sp, CFRAME_SPACE 303 |.endmacro 304 | 305 |// Type definitions. Some of these are only used for documentation. 306 |.type L, lua_State, LREG 307 |.type GL, global_State 308 |.type TVALUE, TValue 309 |.type GCOBJ, GCobj 310 |.type STR, GCstr 311 |.type TAB, GCtab 312 |.type LFUNC, GCfuncL 313 |.type CFUNC, GCfuncC 314 |.type PROTO, GCproto 315 |.type UPVAL, GCupval 316 |.type NODE, Node 317 |.type NARGS8, int 318 |.type TRACE, GCtrace 319 |.type SBUF, SBuf 320 | 321 |//----------------------------------------------------------------------- 322 | 323 |// Trap for not-yet-implemented parts. 324 |.macro NYI; tw 4, sp, sp; .endmacro 325 | 326 |// int/FP conversions. 327 |.macro tonum_i, freg, reg 328 | xoris reg, reg, 0x8000 329 | stw reg, TONUM_LO 330 | lfd freg, TONUM_D 331 | fsub freg, freg, TONUM 332 |.endmacro 333 | 334 |.macro tonum_u, freg, reg 335 | stw reg, TONUM_LO 336 | lfd freg, TONUM_D 337 | fsub freg, freg, TOBIT 338 |.endmacro 339 | 340 |.macro toint, reg, freg, tmpfreg 341 | fctiwz tmpfreg, freg 342 | stfd tmpfreg, TMPD 343 | lwz reg, TMPD_LO 344 |.endmacro 345 | 346 |.macro toint, reg, freg 347 | toint reg, freg, freg 348 |.endmacro 349 | 350 |//----------------------------------------------------------------------- 351 | 352 |// Access to frame relative to BASE. 353 |.define FRAME_PC, -8 354 |.define FRAME_FUNC, -4 355 | 356 |// Instruction decode. 357 |.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro 358 |.macro decode_OP8, dst, ins; rlwinm dst, ins, 3, 21, 28; .endmacro 359 |.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro 360 |.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro 361 |.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro 362 |.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro 363 | 364 |.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro 365 |.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro 366 | 367 |// Instruction fetch. 368 |.macro ins_NEXT1 369 | lwz INS, 0(PC) 370 | addi PC, PC, 4 371 |.endmacro 372 |// Instruction decode+dispatch. Note: optimized for e300! 373 |.macro ins_NEXT2 374 | decode_OPP TMP1, INS 375 | lpx TMP0, DISPATCH, TMP1 376 | mtctr TMP0 377 | decode_RB8 RB, INS 378 | decode_RD8 RD, INS 379 | decode_RA8 RA, INS 380 | decode_RC8 RC, INS 381 | bctr 382 |.endmacro 383 |.macro ins_NEXT 384 | ins_NEXT1 385 | ins_NEXT2 386 |.endmacro 387 | 388 |// Instruction footer. 389 |.if 1 390 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. 391 | .define ins_next, ins_NEXT 392 | .define ins_next_, ins_NEXT 393 | .define ins_next1, ins_NEXT1 394 | .define ins_next2, ins_NEXT2 395 |.else 396 | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. 397 | // Affects only certain kinds of benchmarks (and only with -j off). 398 | .macro ins_next 399 | b ->ins_next 400 | .endmacro 401 | .macro ins_next1 402 | .endmacro 403 | .macro ins_next2 404 | b ->ins_next 405 | .endmacro 406 | .macro ins_next_ 407 | ->ins_next: 408 | ins_NEXT 409 | .endmacro 410 |.endif 411 | 412 |// Call decode and dispatch. 413 |.macro ins_callt 414 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC 415 | lwz PC, LFUNC:RB->pc 416 | lwz INS, 0(PC) 417 | addi PC, PC, 4 418 | decode_OPP TMP1, INS 419 | decode_RA8 RA, INS 420 | lpx TMP0, DISPATCH, TMP1 421 | add RA, RA, BASE 422 | mtctr TMP0 423 | bctr 424 |.endmacro 425 | 426 |.macro ins_call 427 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC 428 | stw PC, FRAME_PC(BASE) 429 | ins_callt 430 |.endmacro 431 | 432 |//----------------------------------------------------------------------- 433 | 434 |// Macros to test operand types. 435 |.macro checknum, reg; cmplw reg, TISNUM; .endmacro 436 |.macro checknum, cr, reg; cmplw cr, reg, TISNUM; .endmacro 437 |.macro checkstr, reg; cmpwi reg, LJ_TSTR; .endmacro 438 |.macro checktab, reg; cmpwi reg, LJ_TTAB; .endmacro 439 |.macro checkfunc, reg; cmpwi reg, LJ_TFUNC; .endmacro 440 |.macro checknil, reg; cmpwi reg, LJ_TNIL; .endmacro 441 | 442 |.macro branch_RD 443 | srwi TMP0, RD, 1 444 | addis PC, PC, -(BCBIAS_J*4 >> 16) 445 | add PC, PC, TMP0 446 |.endmacro 447 | 448 |// Assumes DISPATCH is relative to GL. 449 #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) 450 #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) 451 | 452 #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) 453 | 454 |.macro hotcheck, delta, target 455 | rlwinm TMP1, PC, 31, 25, 30 456 | addi TMP1, TMP1, GG_DISP2HOT 457 | lhzx TMP2, DISPATCH, TMP1 458 | addic. TMP2, TMP2, -delta 459 | sthx TMP2, DISPATCH, TMP1 460 | blt target 461 |.endmacro 462 | 463 |.macro hotloop 464 | hotcheck HOTCOUNT_LOOP, ->vm_hotloop 465 |.endmacro 466 | 467 |.macro hotcall 468 | hotcheck HOTCOUNT_CALL, ->vm_hotcall 469 |.endmacro 470 | 471 |// Set current VM state. Uses TMP0. 472 |.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro 473 |.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro 474 | 475 |// Move table write barrier back. Overwrites mark and tmp. 476 |.macro barrierback, tab, mark, tmp 477 | lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) 478 | // Assumes LJ_GC_BLACK is 0x04. 479 | rlwinm mark, mark, 0, 30, 28 // black2gray(tab) 480 | stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) 481 | stb mark, tab->marked 482 | stw tmp, tab->gclist 483 |.endmacro 484 | 485 |//----------------------------------------------------------------------- 486 487 /* Generate subroutines used by opcodes and other parts of the VM. */ 488 /* The .code_sub section should be last to help static branch prediction. */ 489 static void build_subroutines(BuildCtx *ctx) 490 { 491 |.code_sub 492 | 493 |//----------------------------------------------------------------------- 494 |//-- Return handling ---------------------------------------------------- 495 |//----------------------------------------------------------------------- 496 | 497 |->vm_returnp: 498 | // See vm_return. Also: TMP2 = previous base. 499 | andix. TMP0, PC, FRAME_P 500 | li TMP1, LJ_TTRUE 501 | beq ->cont_dispatch 502 | 503 | // Return from pcall or xpcall fast func. 504 | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. 505 | mr BASE, TMP2 // Restore caller base. 506 | // Prepending may overwrite the pcall frame, so do it at the end. 507 | stwu TMP1, FRAME_PC(RA) // Prepend true to results. 508 | 509 |->vm_returnc: 510 | addi RD, RD, 8 // RD = (nresults+1)*8. 511 | andix. TMP0, PC, FRAME_TYPE 512 | cmpwi cr1, RD, 0 513 | li CRET1, LUA_YIELD 514 | beq cr1, ->vm_unwind_c_eh 515 | mr MULTRES, RD 516 | beq ->BC_RET_Z // Handle regular return to Lua. 517 | 518 |->vm_return: 519 | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return 520 | // TMP0 = PC & FRAME_TYPE 521 | cmpwi TMP0, FRAME_C 522 | rlwinm TMP2, PC, 0, 0, 28 523 | li_vmstate C 524 | sub TMP2, BASE, TMP2 // TMP2 = previous base. 525 | bney ->vm_returnp 526 | 527 | addic. TMP1, RD, -8 528 | stp TMP2, L->base 529 | lwz TMP2, SAVE_NRES 530 | subi BASE, BASE, 8 531 | st_vmstate 532 | slwi TMP2, TMP2, 3 533 | beq >2 534 |1: 535 | addic. TMP1, TMP1, -8 536 | lfd f0, 0(RA) 537 | addi RA, RA, 8 538 | stfd f0, 0(BASE) 539 | addi BASE, BASE, 8 540 | bney <1 541 | 542 |2: 543 | cmpw TMP2, RD // More/less results wanted? 544 | bne >6 545 |3: 546 | stp BASE, L->top // Store new top. 547 | 548 |->vm_leave_cp: 549 | lp TMP0, SAVE_CFRAME // Restore previous C frame. 550 | li CRET1, 0 // Ok return status for vm_pcall. 551 | stp TMP0, L->cframe 552 | 553 |->vm_leave_unw: 554 | restoreregs 555 | blr 556 | 557 |6: 558 | ble >7 // Less results wanted? 559 | // More results wanted. Check stack size and fill up results with nil. 560 | lwz TMP1, L->maxstack 561 | cmplw BASE, TMP1 562 | bge >8 563 | stw TISNIL, 0(BASE) 564 | addi RD, RD, 8 565 | addi BASE, BASE, 8 566 | b <2 567 | 568 |7: // Less results wanted. 569 | subfic TMP3, TMP2, 0 // LUA_MULTRET+1 case? 570 | sub TMP0, RD, TMP2 571 | subfe TMP1, TMP1, TMP1 // TMP1 = TMP2 == 0 ? 0 : -1 572 | and TMP0, TMP0, TMP1 573 | sub BASE, BASE, TMP0 // Either keep top or shrink it. 574 | b <3 575 | 576 |8: // Corner case: need to grow stack for filling up results. 577 | // This can happen if: 578 | // - A C function grows the stack (a lot). 579 | // - The GC shrinks the stack in between. 580 | // - A return back from a lua_call() with (high) nresults adjustment. 581 | stp BASE, L->top // Save current top held in BASE (yes). 582 | mr SAVE0, RD 583 | srwi CARG2, TMP2, 3 584 | mr CARG1, L 585 | bl extern lj_state_growstack // (lua_State *L, int n) 586 | lwz TMP2, SAVE_NRES 587 | mr RD, SAVE0 588 | slwi TMP2, TMP2, 3 589 | lp BASE, L->top // Need the (realloced) L->top in BASE. 590 | b <2 591 | 592 |->vm_unwind_c: // Unwind C stack, return from vm_pcall. 593 | // (void *cframe, int errcode) 594 | mr sp, CARG1 595 | mr CRET1, CARG2 596 |->vm_unwind_c_eh: // Landing pad for external unwinder. 597 | lwz L, SAVE_L 598 | .toc ld TOCREG, SAVE_TOC 599 | li TMP0, ~LJ_VMST_C 600 | lwz GL:TMP1, L->glref 601 | stw TMP0, GL:TMP1->vmstate 602 | b ->vm_leave_unw 603 | 604 |->vm_unwind_ff: // Unwind C stack, return from ff pcall. 605 | // (void *cframe) 606 |.if GPR64 607 | rldicr sp, CARG1, 0, 61 608 |.else 609 | rlwinm sp, CARG1, 0, 0, 29 610 |.endif 611 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 612 | lwz L, SAVE_L 613 | .toc ld TOCREG, SAVE_TOC 614 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 615 | lp BASE, L->base 616 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 617 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 618 | li ZERO, 0 619 | stw TMP3, TMPD 620 | li TMP1, LJ_TFALSE 621 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 622 | li TISNIL, LJ_TNIL 623 | li_vmstate INTERP 624 | lfs TOBIT, TMPD 625 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. 626 | la RA, -8(BASE) // Results start at BASE-8. 627 | stw TMP3, TMPD 628 | addi DISPATCH, DISPATCH, GG_G2DISP 629 | stw TMP1, 0(RA) // Prepend false to error message. 630 | li RD, 16 // 2 results: false + error message. 631 | st_vmstate 632 | lfs TONUM, TMPD 633 | b ->vm_returnc 634 | 635 |//----------------------------------------------------------------------- 636 |//-- Grow stack for calls ----------------------------------------------- 637 |//----------------------------------------------------------------------- 638 | 639 |->vm_growstack_c: // Grow stack for C function. 640 | li CARG2, LUA_MINSTACK 641 | b >2 642 | 643 |->vm_growstack_l: // Grow stack for Lua function. 644 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC 645 | add RC, BASE, RC 646 | sub RA, RA, BASE 647 | stp BASE, L->base 648 | addi PC, PC, 4 // Must point after first instruction. 649 | stp RC, L->top 650 | srwi CARG2, RA, 3 651 |2: 652 | // L->base = new base, L->top = top 653 | stw PC, SAVE_PC 654 | mr CARG1, L 655 | bl extern lj_state_growstack // (lua_State *L, int n) 656 | lp BASE, L->base 657 | lp RC, L->top 658 | lwz LFUNC:RB, FRAME_FUNC(BASE) 659 | sub RC, RC, BASE 660 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC 661 | ins_callt // Just retry the call. 662 | 663 |//----------------------------------------------------------------------- 664 |//-- Entry points into the assembler VM --------------------------------- 665 |//----------------------------------------------------------------------- 666 | 667 |->vm_resume: // Setup C frame and resume thread. 668 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) 669 | saveregs 670 | mr L, CARG1 671 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 672 | mr BASE, CARG2 673 | lbz TMP1, L->status 674 | stw L, SAVE_L 675 | li PC, FRAME_CP 676 | addi TMP0, sp, CFRAME_RESUME 677 | addi DISPATCH, DISPATCH, GG_G2DISP 678 | stw CARG3, SAVE_NRES 679 | cmplwi TMP1, 0 680 | stw CARG3, SAVE_ERRF 681 | stp CARG3, SAVE_CFRAME 682 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 683 | stp TMP0, L->cframe 684 | beq >3 685 | 686 | // Resume after yield (like a return). 687 | stw L, DISPATCH_GL(cur_L)(DISPATCH) 688 | mr RA, BASE 689 | lp BASE, L->base 690 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 691 | lp TMP1, L->top 692 | lwz PC, FRAME_PC(BASE) 693 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 694 | stb CARG3, L->status 695 | stw TMP3, TMPD 696 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 697 | lfs TOBIT, TMPD 698 | sub RD, TMP1, BASE 699 | stw TMP3, TMPD 700 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 701 | addi RD, RD, 8 702 | stw TMP0, TONUM_HI 703 | li_vmstate INTERP 704 | li ZERO, 0 705 | st_vmstate 706 | andix. TMP0, PC, FRAME_TYPE 707 | mr MULTRES, RD 708 | lfs TONUM, TMPD 709 | li TISNIL, LJ_TNIL 710 | beq ->BC_RET_Z 711 | b ->vm_return 712 | 713 |->vm_pcall: // Setup protected C frame and enter VM. 714 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) 715 | saveregs 716 | li PC, FRAME_CP 717 | stw CARG4, SAVE_ERRF 718 | b >1 719 | 720 |->vm_call: // Setup C frame and enter VM. 721 | // (lua_State *L, TValue *base, int nres1) 722 | saveregs 723 | li PC, FRAME_C 724 | 725 |1: // Entry point for vm_pcall above (PC = ftype). 726 | lp TMP1, L:CARG1->cframe 727 | mr L, CARG1 728 | stw CARG3, SAVE_NRES 729 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 730 | stw CARG1, SAVE_L 731 | mr BASE, CARG2 732 | addi DISPATCH, DISPATCH, GG_G2DISP 733 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 734 | stp TMP1, SAVE_CFRAME 735 | stp sp, L->cframe // Add our C frame to cframe chain. 736 | 737 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 738 | stw L, DISPATCH_GL(cur_L)(DISPATCH) 739 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 740 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 741 | lp TMP1, L->top 742 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 743 | add PC, PC, BASE 744 | stw TMP3, TMPD 745 | li ZERO, 0 746 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 747 | lfs TOBIT, TMPD 748 | sub PC, PC, TMP2 // PC = frame delta + frame type 749 | stw TMP3, TMPD 750 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 751 | sub NARGS8:RC, TMP1, BASE 752 | stw TMP0, TONUM_HI 753 | li_vmstate INTERP 754 | lfs TONUM, TMPD 755 | li TISNIL, LJ_TNIL 756 | st_vmstate 757 | 758 |->vm_call_dispatch: 759 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC 760 | lwz TMP0, FRAME_PC(BASE) 761 | lwz LFUNC:RB, FRAME_FUNC(BASE) 762 | checkfunc TMP0; bne ->vmeta_call 763 | 764 |->vm_call_dispatch_f: 765 | ins_call 766 | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC 767 | 768 |->vm_cpcall: // Setup protected C frame, call C. 769 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) 770 | saveregs 771 | mr L, CARG1 772 | lwz TMP0, L:CARG1->stack 773 | stw CARG1, SAVE_L 774 | lp TMP1, L->top 775 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 776 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 777 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 778 | lp TMP1, L->cframe 779 | addi DISPATCH, DISPATCH, GG_G2DISP 780 | .toc lp CARG4, 0(CARG4) 781 | li TMP2, 0 782 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 783 | stw TMP2, SAVE_ERRF // No error function. 784 | stp TMP1, SAVE_CFRAME 785 | stp sp, L->cframe // Add our C frame to cframe chain. 786 | stw L, DISPATCH_GL(cur_L)(DISPATCH) 787 | mtctr CARG4 788 | bctrl // (lua_State *L, lua_CFunction func, void *ud) 789 |.if PPE 790 | mr BASE, CRET1 791 | cmpwi CRET1, 0 792 |.else 793 | mr. BASE, CRET1 794 |.endif 795 | li PC, FRAME_CP 796 | bne <3 // Else continue with the call. 797 | b ->vm_leave_cp // No base? Just remove C frame. 798 | 799 |//----------------------------------------------------------------------- 800 |//-- Metamethod handling ------------------------------------------------ 801 |//----------------------------------------------------------------------- 802 | 803 |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the 804 |// stack, so BASE doesn't need to be reloaded across these calls. 805 | 806 |//-- Continuation dispatch ---------------------------------------------- 807 | 808 |->cont_dispatch: 809 | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 810 | lwz TMP0, -12(BASE) // Continuation. 811 | mr RB, BASE 812 | mr BASE, TMP2 // Restore caller BASE. 813 | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) 814 |.if FFI 815 | cmplwi TMP0, 1 816 |.endif 817 | lwz PC, -16(RB) // Restore PC from [cont|PC]. 818 | subi TMP2, RD, 8 819 | lwz TMP1, LFUNC:TMP1->pc 820 | stwx TISNIL, RA, TMP2 // Ensure one valid arg. 821 |.if FFI 822 | ble >1 823 |.endif 824 | lwz KBASE, PC2PROTO(k)(TMP1) 825 | // BASE = base, RA = resultptr, RB = meta base 826 | mtctr TMP0 827 | bctr // Jump to continuation. 828 | 829 |.if FFI 830 |1: 831 | beq ->cont_ffi_callback // cont = 1: return from FFI callback. 832 | // cont = 0: tailcall from C function. 833 | subi TMP1, RB, 16 834 | sub RC, TMP1, BASE 835 | b ->vm_call_tail 836 |.endif 837 | 838 |->cont_cat: // RA = resultptr, RB = meta base 839 | lwz INS, -4(PC) 840 | subi CARG2, RB, 16 841 | decode_RB8 SAVE0, INS 842 | lfd f0, 0(RA) 843 | add TMP1, BASE, SAVE0 844 | stp BASE, L->base 845 | cmplw TMP1, CARG2 846 | sub CARG3, CARG2, TMP1 847 | decode_RA8 RA, INS 848 | stfd f0, 0(CARG2) 849 | bney ->BC_CAT_Z 850 | stfdx f0, BASE, RA 851 | b ->cont_nop 852 | 853 |//-- Table indexing metamethods ----------------------------------------- 854 | 855 |->vmeta_tgets1: 856 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 857 | li TMP0, LJ_TSTR 858 | decode_RB8 RB, INS 859 | stw STR:RC, 4(CARG3) 860 | add CARG2, BASE, RB 861 | stw TMP0, 0(CARG3) 862 | b >1 863 | 864 |->vmeta_tgets: 865 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 866 | li TMP0, LJ_TTAB 867 | stw TAB:RB, 4(CARG2) 868 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) 869 | stw TMP0, 0(CARG2) 870 | li TMP1, LJ_TSTR 871 | stw STR:RC, 4(CARG3) 872 | stw TMP1, 0(CARG3) 873 | b >1 874 | 875 |->vmeta_tgetb: // TMP0 = index 876 |.if not DUALNUM 877 | tonum_u f0, TMP0 878 |.endif 879 | decode_RB8 RB, INS 880 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 881 | add CARG2, BASE, RB 882 |.if DUALNUM 883 | stw TISNUM, 0(CARG3) 884 | stw TMP0, 4(CARG3) 885 |.else 886 | stfd f0, 0(CARG3) 887 |.endif 888 | b >1 889 | 890 |->vmeta_tgetv: 891 | decode_RB8 RB, INS 892 | decode_RC8 RC, INS 893 | add CARG2, BASE, RB 894 | add CARG3, BASE, RC 895 |1: 896 | stp BASE, L->base 897 | mr CARG1, L 898 | stw PC, SAVE_PC 899 | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) 900 | // Returns TValue * (finished) or NULL (metamethod). 901 | cmplwi CRET1, 0 902 | beq >3 903 | lfd f0, 0(CRET1) 904 | ins_next1 905 | stfdx f0, BASE, RA 906 | ins_next2 907 | 908 |3: // Call __index metamethod. 909 | // BASE = base, L->top = new base, stack = cont/func/t/k 910 | subfic TMP1, BASE, FRAME_CONT 911 | lp BASE, L->top 912 | stw PC, -16(BASE) // [cont|PC] 913 | add PC, TMP1, BASE 914 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 915 | li NARGS8:RC, 16 // 2 args for func(t, k). 916 | b ->vm_call_dispatch_f 917 | 918 |->vmeta_tgetr: 919 | bl extern lj_tab_getinth // (GCtab *t, int32_t key) 920 | // Returns cTValue * or NULL. 921 | cmplwi CRET1, 0 922 | beq >1 923 | lfd f14, 0(CRET1) 924 | b ->BC_TGETR_Z 925 |1: 926 | stwx TISNIL, BASE, RA 927 | b ->cont_nop 928 | 929 |//----------------------------------------------------------------------- 930 | 931 |->vmeta_tsets1: 932 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 933 | li TMP0, LJ_TSTR 934 | decode_RB8 RB, INS 935 | stw STR:RC, 4(CARG3) 936 | add CARG2, BASE, RB 937 | stw TMP0, 0(CARG3) 938 | b >1 939 | 940 |->vmeta_tsets: 941 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 942 | li TMP0, LJ_TTAB 943 | stw TAB:RB, 4(CARG2) 944 | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) 945 | stw TMP0, 0(CARG2) 946 | li TMP1, LJ_TSTR 947 | stw STR:RC, 4(CARG3) 948 | stw TMP1, 0(CARG3) 949 | b >1 950 | 951 |->vmeta_tsetb: // TMP0 = index 952 |.if not DUALNUM 953 | tonum_u f0, TMP0 954 |.endif 955 | decode_RB8 RB, INS 956 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 957 | add CARG2, BASE, RB 958 |.if DUALNUM 959 | stw TISNUM, 0(CARG3) 960 | stw TMP0, 4(CARG3) 961 |.else 962 | stfd f0, 0(CARG3) 963 |.endif 964 | b >1 965 | 966 |->vmeta_tsetv: 967 | decode_RB8 RB, INS 968 | decode_RC8 RC, INS 969 | add CARG2, BASE, RB 970 | add CARG3, BASE, RC 971 |1: 972 | stp BASE, L->base 973 | mr CARG1, L 974 | stw PC, SAVE_PC 975 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 976 | // Returns TValue * (finished) or NULL (metamethod). 977 | cmplwi CRET1, 0 978 | lfdx f0, BASE, RA 979 | beq >3 980 | // NOBARRIER: lj_meta_tset ensures the table is not black. 981 | ins_next1 982 | stfd f0, 0(CRET1) 983 | ins_next2 984 | 985 |3: // Call __newindex metamethod. 986 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) 987 | subfic TMP1, BASE, FRAME_CONT 988 | lp BASE, L->top 989 | stw PC, -16(BASE) // [cont|PC] 990 | add PC, TMP1, BASE 991 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 992 | li NARGS8:RC, 24 // 3 args for func(t, k, v) 993 | stfd f0, 16(BASE) // Copy value to third argument. 994 | b ->vm_call_dispatch_f 995 | 996 |->vmeta_tsetr: 997 | stp BASE, L->base 998 | stw PC, SAVE_PC 999 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) 1000 | // Returns TValue *. 1001 | stfd f14, 0(CRET1) 1002 | b ->cont_nop 1003 | 1004 |//-- Comparison metamethods --------------------------------------------- 1005 | 1006 |->vmeta_comp: 1007 | mr CARG1, L 1008 | subi PC, PC, 4 1009 |.if DUALNUM 1010 | mr CARG2, RA 1011 |.else 1012 | add CARG2, BASE, RA 1013 |.endif 1014 | stw PC, SAVE_PC 1015 |.if DUALNUM 1016 | mr CARG3, RD 1017 |.else 1018 | add CARG3, BASE, RD 1019 |.endif 1020 | stp BASE, L->base 1021 | decode_OP1 CARG4, INS 1022 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) 1023 | // Returns 0/1 or TValue * (metamethod). 1024 |3: 1025 | cmplwi CRET1, 1 1026 | bgt ->vmeta_binop 1027 | subfic CRET1, CRET1, 0 1028 |4: 1029 | lwz INS, 0(PC) 1030 | addi PC, PC, 4 1031 | decode_RD4 TMP2, INS 1032 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 1033 | and TMP2, TMP2, CRET1 1034 | add PC, PC, TMP2 1035 |->cont_nop: 1036 | ins_next 1037 | 1038 |->cont_ra: // RA = resultptr 1039 | lwz INS, -4(PC) 1040 | lfd f0, 0(RA) 1041 | decode_RA8 TMP1, INS 1042 | stfdx f0, BASE, TMP1 1043 | b ->cont_nop 1044 | 1045 |->cont_condt: // RA = resultptr 1046 | lwz TMP0, 0(RA) 1047 | .gpr64 extsw TMP0, TMP0 1048 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is true. 1049 | subfe CRET1, CRET1, CRET1 1050 | not CRET1, CRET1 1051 | b <4 1052 | 1053 |->cont_condf: // RA = resultptr 1054 | lwz TMP0, 0(RA) 1055 | .gpr64 extsw TMP0, TMP0 1056 | subfic TMP0, TMP0, LJ_TTRUE // Branch if result is false. 1057 | subfe CRET1, CRET1, CRET1 1058 | b <4 1059 | 1060 |->vmeta_equal: 1061 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. 1062 | subi PC, PC, 4 1063 | stp BASE, L->base 1064 | mr CARG1, L 1065 | stw PC, SAVE_PC 1066 | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) 1067 | // Returns 0/1 or TValue * (metamethod). 1068 | b <3 1069 | 1070 |->vmeta_equal_cd: 1071 |.if FFI 1072 | mr CARG2, INS 1073 | subi PC, PC, 4 1074 | stp BASE, L->base 1075 | mr CARG1, L 1076 | stw PC, SAVE_PC 1077 | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) 1078 | // Returns 0/1 or TValue * (metamethod). 1079 | b <3 1080 |.endif 1081 | 1082 |->vmeta_istype: 1083 | subi PC, PC, 4 1084 | stp BASE, L->base 1085 | srwi CARG2, RA, 3 1086 | mr CARG1, L 1087 | srwi CARG3, RD, 3 1088 | stw PC, SAVE_PC 1089 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) 1090 | b ->cont_nop 1091 | 1092 |//-- Arithmetic metamethods --------------------------------------------- 1093 | 1094 |->vmeta_arith_nv: 1095 | add CARG3, KBASE, RC 1096 | add CARG4, BASE, RB 1097 | b >1 1098 |->vmeta_arith_nv2: 1099 |.if DUALNUM 1100 | mr CARG3, RC 1101 | mr CARG4, RB 1102 | b >1 1103 |.endif 1104 | 1105 |->vmeta_unm: 1106 | mr CARG3, RD 1107 | mr CARG4, RD 1108 | b >1 1109 | 1110 |->vmeta_arith_vn: 1111 | add CARG3, BASE, RB 1112 | add CARG4, KBASE, RC 1113 | b >1 1114 | 1115 |->vmeta_arith_vv: 1116 | add CARG3, BASE, RB 1117 | add CARG4, BASE, RC 1118 |.if DUALNUM 1119 | b >1 1120 |.endif 1121 |->vmeta_arith_vn2: 1122 |->vmeta_arith_vv2: 1123 |.if DUALNUM 1124 | mr CARG3, RB 1125 | mr CARG4, RC 1126 |.endif 1127 |1: 1128 | add CARG2, BASE, RA 1129 | stp BASE, L->base 1130 | mr CARG1, L 1131 | stw PC, SAVE_PC 1132 | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS. 1133 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 1134 | // Returns NULL (finished) or TValue * (metamethod). 1135 | cmplwi CRET1, 0 1136 | beq ->cont_nop 1137 | 1138 | // Call metamethod for binary op. 1139 |->vmeta_binop: 1140 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 1141 | sub TMP1, CRET1, BASE 1142 | stw PC, -16(CRET1) // [cont|PC] 1143 | mr TMP2, BASE 1144 | addi PC, TMP1, FRAME_CONT 1145 | mr BASE, CRET1 1146 | li NARGS8:RC, 16 // 2 args for func(o1, o2). 1147 | b ->vm_call_dispatch 1148 | 1149 |->vmeta_len: 1150 | mr SAVE0, CARG1 1151 | mr CARG2, RD 1152 | stp BASE, L->base 1153 | mr CARG1, L 1154 | stw PC, SAVE_PC 1155 | bl extern lj_meta_len // (lua_State *L, TValue *o) 1156 | // Returns NULL (retry) or TValue * (metamethod base). 1157 | cmplwi CRET1, 0 1158 | bne ->vmeta_binop // Binop call for compatibility. 1159 | mr CARG1, SAVE0 1160 | b ->BC_LEN_Z 1161 | 1162 |//-- Call metamethod ---------------------------------------------------- 1163 | 1164 |->vmeta_call: // Resolve and call __call metamethod. 1165 | // TMP2 = old base, BASE = new base, RC = nargs*8 1166 | mr CARG1, L 1167 | stp TMP2, L->base // This is the callers base! 1168 | subi CARG2, BASE, 8 1169 | stw PC, SAVE_PC 1170 | add CARG3, BASE, RC 1171 | mr SAVE0, NARGS8:RC 1172 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1173 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 1174 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. 1175 | ins_call 1176 | 1177 |->vmeta_callt: // Resolve __call for BC_CALLT. 1178 | // BASE = old base, RA = new base, RC = nargs*8 1179 | mr CARG1, L 1180 | stp BASE, L->base 1181 | subi CARG2, RA, 8 1182 | stw PC, SAVE_PC 1183 | add CARG3, RA, RC 1184 | mr SAVE0, NARGS8:RC 1185 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1186 | lwz TMP1, FRAME_PC(BASE) 1187 | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. 1188 | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. 1189 | b ->BC_CALLT_Z 1190 | 1191 |//-- Argument coercion for 'for' statement ------------------------------ 1192 | 1193 |->vmeta_for: 1194 | mr CARG1, L 1195 | stp BASE, L->base 1196 | mr CARG2, RA 1197 | stw PC, SAVE_PC 1198 | mr SAVE0, INS 1199 | bl extern lj_meta_for // (lua_State *L, TValue *base) 1200 |.if JIT 1201 | decode_OP1 TMP0, SAVE0 1202 |.endif 1203 | decode_RA8 RA, SAVE0 1204 |.if JIT 1205 | cmpwi TMP0, BC_JFORI 1206 |.endif 1207 | decode_RD8 RD, SAVE0 1208 |.if JIT 1209 | beqy =>BC_JFORI 1210 |.endif 1211 | b =>BC_FORI 1212 | 1213 |//----------------------------------------------------------------------- 1214 |//-- Fast functions ----------------------------------------------------- 1215 |//----------------------------------------------------------------------- 1216 | 1217 |.macro .ffunc, name 1218 |->ff_ .. name: 1219 |.endmacro 1220 | 1221 |.macro .ffunc_1, name 1222 |->ff_ .. name: 1223 | cmplwi NARGS8:RC, 8 1224 | lwz CARG3, 0(BASE) 1225 | lwz CARG1, 4(BASE) 1226 | blt ->fff_fallback 1227 |.endmacro 1228 | 1229 |.macro .ffunc_2, name 1230 |->ff_ .. name: 1231 | cmplwi NARGS8:RC, 16 1232 | lwz CARG3, 0(BASE) 1233 | lwz CARG4, 8(BASE) 1234 | lwz CARG1, 4(BASE) 1235 | lwz CARG2, 12(BASE) 1236 | blt ->fff_fallback 1237 |.endmacro 1238 | 1239 |.macro .ffunc_n, name 1240 |->ff_ .. name: 1241 | cmplwi NARGS8:RC, 8 1242 | lwz CARG3, 0(BASE) 1243 | lfd FARG1, 0(BASE) 1244 | blt ->fff_fallback 1245 | checknum CARG3; bge ->fff_fallback 1246 |.endmacro 1247 | 1248 |.macro .ffunc_nn, name 1249 |->ff_ .. name: 1250 | cmplwi NARGS8:RC, 16 1251 | lwz CARG3, 0(BASE) 1252 | lfd FARG1, 0(BASE) 1253 | lwz CARG4, 8(BASE) 1254 | lfd FARG2, 8(BASE) 1255 | blt ->fff_fallback 1256 | checknum CARG3; bge ->fff_fallback 1257 | checknum CARG4; bge ->fff_fallback 1258 |.endmacro 1259 | 1260 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. 1261 |.macro ffgccheck 1262 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) 1263 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) 1264 | cmplw TMP0, TMP1 1265 | bgel ->fff_gcstep 1266 |.endmacro 1267 | 1268 |//-- Base library: checks ----------------------------------------------- 1269 | 1270 |.ffunc_1 assert 1271 | li TMP1, LJ_TFALSE 1272 | la RA, -8(BASE) 1273 | cmplw cr1, CARG3, TMP1 1274 | lwz PC, FRAME_PC(BASE) 1275 | bge cr1, ->fff_fallback 1276 | stw CARG3, 0(RA) 1277 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1278 | stw CARG1, 4(RA) 1279 | beq ->fff_res // Done if exactly 1 argument. 1280 | li TMP1, 8 1281 | subi RC, RC, 8 1282 |1: 1283 | cmplw TMP1, RC 1284 | lfdx f0, BASE, TMP1 1285 | stfdx f0, RA, TMP1 1286 | addi TMP1, TMP1, 8 1287 | bney <1 1288 | b ->fff_res 1289 | 1290 |.ffunc type 1291 | cmplwi NARGS8:RC, 8 1292 | lwz CARG1, 0(BASE) 1293 | blt ->fff_fallback 1294 | .gpr64 extsw CARG1, CARG1 1295 | subfc TMP0, TISNUM, CARG1 1296 | subfe TMP2, CARG1, CARG1 1297 | orc TMP1, TMP2, TMP0 1298 | addi TMP1, TMP1, ~LJ_TISNUM+1 1299 | slwi TMP1, TMP1, 3 1300 | la TMP2, CFUNC:RB->upvalue 1301 | lfdx FARG1, TMP2, TMP1 1302 | b ->fff_resn 1303 | 1304 |//-- Base library: getters and setters --------------------------------- 1305 | 1306 |.ffunc_1 getmetatable 1307 | checktab CARG3; bne >6 1308 |1: // Field metatable must be at same offset for GCtab and GCudata! 1309 | lwz TAB:CARG1, TAB:CARG1->metatable 1310 |2: 1311 | li CARG3, LJ_TNIL 1312 | cmplwi TAB:CARG1, 0 1313 | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) 1314 | beq ->fff_restv 1315 | lwz TMP0, TAB:CARG1->hmask 1316 | li CARG3, LJ_TTAB // Use metatable as default result. 1317 | lwz TMP1, STR:RC->hash 1318 | lwz NODE:TMP2, TAB:CARG1->node 1319 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1320 | slwi TMP0, TMP1, 5 1321 | slwi TMP1, TMP1, 3 1322 | sub TMP1, TMP0, TMP1 1323 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 1324 |3: // Rearranged logic, because we expect _not_ to find the key. 1325 | lwz CARG4, NODE:TMP2->key 1326 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 1327 | lwz CARG2, NODE:TMP2->val 1328 | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) 1329 | checkstr CARG4; bne >4 1330 | cmpw TMP0, STR:RC; beq >5 1331 |4: 1332 | lwz NODE:TMP2, NODE:TMP2->next 1333 | cmplwi NODE:TMP2, 0 1334 | beq ->fff_restv // Not found, keep default result. 1335 | b <3 1336 |5: 1337 | checknil CARG2 1338 | beq ->fff_restv // Ditto for nil value. 1339 | mr CARG3, CARG2 // Return value of mt.__metatable. 1340 | mr CARG1, TMP1 1341 | b ->fff_restv 1342 | 1343 |6: 1344 | cmpwi CARG3, LJ_TUDATA; beq <1 1345 | .gpr64 extsw CARG3, CARG3 1346 | subfc TMP0, TISNUM, CARG3 1347 | subfe TMP2, CARG3, CARG3 1348 | orc TMP1, TMP2, TMP0 1349 | addi TMP1, TMP1, ~LJ_TISNUM+1 1350 | slwi TMP1, TMP1, 2 1351 | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) 1352 | lwzx TAB:CARG1, TMP2, TMP1 1353 | b <2 1354 | 1355 |.ffunc_2 setmetatable 1356 | // Fast path: no mt for table yet and not clearing the mt. 1357 | checktab CARG3; bne ->fff_fallback 1358 | lwz TAB:TMP1, TAB:CARG1->metatable 1359 | checktab CARG4; bne ->fff_fallback 1360 | cmplwi TAB:TMP1, 0 1361 | lbz TMP3, TAB:CARG1->marked 1362 | bne ->fff_fallback 1363 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 1364 | stw TAB:CARG2, TAB:CARG1->metatable 1365 | beq ->fff_restv 1366 | barrierback TAB:CARG1, TMP3, TMP0 1367 | b ->fff_restv 1368 | 1369 |.ffunc rawget 1370 | cmplwi NARGS8:RC, 16 1371 | lwz CARG4, 0(BASE) 1372 | lwz TAB:CARG2, 4(BASE) 1373 | blt ->fff_fallback 1374 | checktab CARG4; bne ->fff_fallback 1375 | la CARG3, 8(BASE) 1376 | mr CARG1, L 1377 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1378 | // Returns cTValue *. 1379 | lfd FARG1, 0(CRET1) 1380 | b ->fff_resn 1381 | 1382 |//-- Base library: conversions ------------------------------------------ 1383 | 1384 |.ffunc tonumber 1385 | // Only handles the number case inline (without a base argument). 1386 | cmplwi NARGS8:RC, 8 1387 | lwz CARG1, 0(BASE) 1388 | lfd FARG1, 0(BASE) 1389 | bne ->fff_fallback // Exactly one argument. 1390 | checknum CARG1; bgt ->fff_fallback 1391 | b ->fff_resn 1392 | 1393 |.ffunc_1 tostring 1394 | // Only handles the string or number case inline. 1395 | checkstr CARG3 1396 | // A __tostring method in the string base metatable is ignored. 1397 | beq ->fff_restv // String key? 1398 | // Handle numbers inline, unless a number base metatable is present. 1399 | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) 1400 | checknum CARG3 1401 | cmplwi cr1, TMP0, 0 1402 | stp BASE, L->base // Add frame since C call can throw. 1403 | crorc 4*cr0+eq, 4*cr0+gt, 4*cr1+eq 1404 | stw PC, SAVE_PC // Redundant (but a defined value). 1405 | beq ->fff_fallback 1406 | ffgccheck 1407 | mr CARG1, L 1408 | mr CARG2, BASE 1409 |.if DUALNUM 1410 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) 1411 |.else 1412 | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np) 1413 |.endif 1414 | // Returns GCstr *. 1415 | li CARG3, LJ_TSTR 1416 | b ->fff_restv 1417 | 1418 |//-- Base library: iterators ------------------------------------------- 1419 | 1420 |.ffunc next 1421 | cmplwi NARGS8:RC, 8 1422 | lwz CARG1, 0(BASE) 1423 | lwz TAB:CARG2, 4(BASE) 1424 | blt ->fff_fallback 1425 | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. 1426 | checktab CARG1 1427 | lwz PC, FRAME_PC(BASE) 1428 | bne ->fff_fallback 1429 | stp BASE, L->base // Add frame since C call can throw. 1430 | mr CARG1, L 1431 | stp BASE, L->top // Dummy frame length is ok. 1432 | la CARG3, 8(BASE) 1433 | stw PC, SAVE_PC 1434 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1435 | // Returns 0 at end of traversal. 1436 | cmplwi CRET1, 0 1437 | li CARG3, LJ_TNIL 1438 | beq ->fff_restv // End of traversal: return nil. 1439 | lfd f0, 8(BASE) // Copy key and value to results. 1440 | la RA, -8(BASE) 1441 | lfd f1, 16(BASE) 1442 | stfd f0, 0(RA) 1443 | li RD, (2+1)*8 1444 | stfd f1, 8(RA) 1445 | b ->fff_res 1446 | 1447 |.ffunc_1 pairs 1448 | checktab CARG3 1449 | lwz PC, FRAME_PC(BASE) 1450 | bne ->fff_fallback 1451 | lwz TAB:TMP2, TAB:CARG1->metatable 1452 | lfd f0, CFUNC:RB->upvalue[0] 1453 | cmplwi TAB:TMP2, 0 1454 | la RA, -8(BASE) 1455 | bne ->fff_fallback 1456 | stw TISNIL, 8(BASE) 1457 | li RD, (3+1)*8 1458 | stfd f0, 0(RA) 1459 | b ->fff_res 1460 | 1461 |.ffunc ipairs_aux 1462 | cmplwi NARGS8:RC, 16 1463 | lwz CARG3, 0(BASE) 1464 | lwz TAB:CARG1, 4(BASE) 1465 | lwz CARG4, 8(BASE) 1466 |.if DUALNUM 1467 | lwz TMP2, 12(BASE) 1468 |.else 1469 | lfd FARG2, 8(BASE) 1470 |.endif 1471 | blt ->fff_fallback 1472 | checktab CARG3 1473 | checknum cr1, CARG4 1474 | lwz PC, FRAME_PC(BASE) 1475 |.if DUALNUM 1476 | bne ->fff_fallback 1477 | bne cr1, ->fff_fallback 1478 |.else 1479 | lus TMP0, 0x3ff0 1480 | stw ZERO, TMPD_LO 1481 | bne ->fff_fallback 1482 | stw TMP0, TMPD_HI 1483 | bge cr1, ->fff_fallback 1484 | lfd FARG1, TMPD 1485 | toint TMP2, FARG2, f0 1486 |.endif 1487 | lwz TMP0, TAB:CARG1->asize 1488 | lwz TMP1, TAB:CARG1->array 1489 |.if not DUALNUM 1490 | fadd FARG2, FARG2, FARG1 1491 |.endif 1492 | addi TMP2, TMP2, 1 1493 | la RA, -8(BASE) 1494 | cmplw TMP0, TMP2 1495 |.if DUALNUM 1496 | stw TISNUM, 0(RA) 1497 | slwi TMP3, TMP2, 3 1498 | stw TMP2, 4(RA) 1499 |.else 1500 | slwi TMP3, TMP2, 3 1501 | stfd FARG2, 0(RA) 1502 |.endif 1503 | ble >2 // Not in array part? 1504 | lwzx TMP2, TMP1, TMP3 1505 | lfdx f0, TMP1, TMP3 1506 |1: 1507 | checknil TMP2 1508 | li RD, (0+1)*8 1509 | beq ->fff_res // End of iteration, return 0 results. 1510 | li RD, (2+1)*8 1511 | stfd f0, 8(RA) 1512 | b ->fff_res 1513 |2: // Check for empty hash part first. Otherwise call C function. 1514 | lwz TMP0, TAB:CARG1->hmask 1515 | cmplwi TMP0, 0 1516 | li RD, (0+1)*8 1517 | beq ->fff_res 1518 | mr CARG2, TMP2 1519 | bl extern lj_tab_getinth // (GCtab *t, int32_t key) 1520 | // Returns cTValue * or NULL. 1521 | cmplwi CRET1, 0 1522 | li RD, (0+1)*8 1523 | beq ->fff_res 1524 | lwz TMP2, 0(CRET1) 1525 | lfd f0, 0(CRET1) 1526 | b <1 1527 | 1528 |.ffunc_1 ipairs 1529 | checktab CARG3 1530 | lwz PC, FRAME_PC(BASE) 1531 | bne ->fff_fallback 1532 | lwz TAB:TMP2, TAB:CARG1->metatable 1533 | lfd f0, CFUNC:RB->upvalue[0] 1534 | cmplwi TAB:TMP2, 0 1535 | la RA, -8(BASE) 1536 | bne ->fff_fallback 1537 |.if DUALNUM 1538 | stw TISNUM, 8(BASE) 1539 |.else 1540 | stw ZERO, 8(BASE) 1541 |.endif 1542 | stw ZERO, 12(BASE) 1543 | li RD, (3+1)*8 1544 | stfd f0, 0(RA) 1545 | b ->fff_res 1546 | 1547 |//-- Base library: catch errors ---------------------------------------- 1548 | 1549 |.ffunc pcall 1550 | cmplwi NARGS8:RC, 8 1551 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 1552 | blt ->fff_fallback 1553 | mr TMP2, BASE 1554 | la BASE, 8(BASE) 1555 | // Remember active hook before pcall. 1556 | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 1557 | subi NARGS8:RC, NARGS8:RC, 8 1558 | addi PC, TMP3, 8+FRAME_PCALL 1559 | b ->vm_call_dispatch 1560 | 1561 |.ffunc xpcall 1562 | cmplwi NARGS8:RC, 16 1563 | lwz CARG4, 8(BASE) 1564 | lfd FARG2, 8(BASE) 1565 | lfd FARG1, 0(BASE) 1566 | blt ->fff_fallback 1567 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1568 | mr TMP2, BASE 1569 | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. 1570 | la BASE, 16(BASE) 1571 | // Remember active hook before pcall. 1572 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 1573 | stfd FARG2, 0(TMP2) // Swap function and traceback. 1574 | subi NARGS8:RC, NARGS8:RC, 16 1575 | stfd FARG1, 8(TMP2) 1576 | addi PC, TMP1, 16+FRAME_PCALL 1577 | b ->vm_call_dispatch 1578 | 1579 |//-- Coroutine library -------------------------------------------------- 1580 | 1581 |.macro coroutine_resume_wrap, resume 1582 |.if resume 1583 |.ffunc_1 coroutine_resume 1584 | cmpwi CARG3, LJ_TTHREAD; bne ->fff_fallback 1585 |.else 1586 |.ffunc coroutine_wrap_aux 1587 | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr 1588 |.endif 1589 | lbz TMP0, L:CARG1->status 1590 | lp TMP1, L:CARG1->cframe 1591 | lp CARG2, L:CARG1->top 1592 | cmplwi cr0, TMP0, LUA_YIELD 1593 | lp TMP2, L:CARG1->base 1594 | cmplwi cr1, TMP1, 0 1595 | lwz TMP0, L:CARG1->maxstack 1596 | cmplw cr7, CARG2, TMP2 1597 | lwz PC, FRAME_PC(BASE) 1598 | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0 1599 | add TMP2, CARG2, NARGS8:RC 1600 | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD 1601 | cmplw cr1, TMP2, TMP0 1602 | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt 1603 | stw PC, SAVE_PC 1604 | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov 1605 | stp BASE, L->base 1606 | blt cr6, ->fff_fallback 1607 |1: 1608 |.if resume 1609 | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. 1610 | subi NARGS8:RC, NARGS8:RC, 8 1611 | subi TMP2, TMP2, 8 1612 |.endif 1613 | stp TMP2, L:CARG1->top 1614 | li TMP1, 0 1615 | stp BASE, L->top 1616 |2: // Move args to coroutine. 1617 | cmpw TMP1, NARGS8:RC 1618 | lfdx f0, BASE, TMP1 1619 | beq >3 1620 | stfdx f0, CARG2, TMP1 1621 | addi TMP1, TMP1, 8 1622 | b <2 1623 |3: 1624 | li CARG3, 0 1625 | mr L:SAVE0, L:CARG1 1626 | li CARG4, 0 1627 | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1628 | // Returns thread status. 1629 |4: 1630 | lp TMP2, L:SAVE0->base 1631 | cmplwi CRET1, LUA_YIELD 1632 | lp TMP3, L:SAVE0->top 1633 | li_vmstate INTERP 1634 | lp BASE, L->base 1635 | stw L, DISPATCH_GL(cur_L)(DISPATCH) 1636 | st_vmstate 1637 | bgt >8 1638 | sub RD, TMP3, TMP2 1639 | lwz TMP0, L->maxstack 1640 | cmplwi RD, 0 1641 | add TMP1, BASE, RD 1642 | beq >6 // No results? 1643 | cmplw TMP1, TMP0 1644 | li TMP1, 0 1645 | bgt >9 // Need to grow stack? 1646 | 1647 | subi TMP3, RD, 8 1648 | stp TMP2, L:SAVE0->top // Clear coroutine stack. 1649 |5: // Move results from coroutine. 1650 | cmplw TMP1, TMP3 1651 | lfdx f0, TMP2, TMP1 1652 | stfdx f0, BASE, TMP1 1653 | addi TMP1, TMP1, 8 1654 | bne <5 1655 |6: 1656 | andix. TMP0, PC, FRAME_TYPE 1657 |.if resume 1658 | li TMP1, LJ_TTRUE 1659 | la RA, -8(BASE) 1660 | stw TMP1, -8(BASE) // Prepend true to results. 1661 | addi RD, RD, 16 1662 |.else 1663 | mr RA, BASE 1664 | addi RD, RD, 8 1665 |.endif 1666 |7: 1667 | stw PC, SAVE_PC 1668 | mr MULTRES, RD 1669 | beq ->BC_RET_Z 1670 | b ->vm_return 1671 | 1672 |8: // Coroutine returned with error (at co->top-1). 1673 |.if resume 1674 | andix. TMP0, PC, FRAME_TYPE 1675 | la TMP3, -8(TMP3) 1676 | li TMP1, LJ_TFALSE 1677 | lfd f0, 0(TMP3) 1678 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. 1679 | li RD, (2+1)*8 1680 | stw TMP1, -8(BASE) // Prepend false to results. 1681 | la RA, -8(BASE) 1682 | stfd f0, 0(BASE) // Copy error message. 1683 | b <7 1684 |.else 1685 | mr CARG1, L 1686 | mr CARG2, L:SAVE0 1687 | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) 1688 |.endif 1689 | 1690 |9: // Handle stack expansion on return from yield. 1691 | mr CARG1, L 1692 | srwi CARG2, RD, 3 1693 | bl extern lj_state_growstack // (lua_State *L, int n) 1694 | li CRET1, 0 1695 | b <4 1696 |.endmacro 1697 | 1698 | coroutine_resume_wrap 1 // coroutine.resume 1699 | coroutine_resume_wrap 0 // coroutine.wrap 1700 | 1701 |.ffunc coroutine_yield 1702 | lp TMP0, L->cframe 1703 | add TMP1, BASE, NARGS8:RC 1704 | stp BASE, L->base 1705 | andix. TMP0, TMP0, CFRAME_RESUME 1706 | stp TMP1, L->top 1707 | li CRET1, LUA_YIELD 1708 | beq ->fff_fallback 1709 | stp ZERO, L->cframe 1710 | stb CRET1, L->status 1711 | b ->vm_leave_unw 1712 | 1713 |//-- Math library ------------------------------------------------------- 1714 | 1715 |.ffunc_1 math_abs 1716 | checknum CARG3 1717 |.if DUALNUM 1718 | bne >2 1719 | srawi TMP1, CARG1, 31 1720 | xor TMP2, TMP1, CARG1 1721 |.if GPR64 1722 | lus TMP0, 0x8000 1723 | sub CARG1, TMP2, TMP1 1724 | cmplw CARG1, TMP0 1725 | beq >1 1726 |.else 1727 | sub. CARG1, TMP2, TMP1 1728 | blt >1 1729 |.endif 1730 |->fff_resi: 1731 | lwz PC, FRAME_PC(BASE) 1732 | la RA, -8(BASE) 1733 | stw TISNUM, -8(BASE) 1734 | stw CRET1, -4(BASE) 1735 | b ->fff_res1 1736 |1: 1737 | lus CARG3, 0x41e0 // 2^31. 1738 | li CARG1, 0 1739 | b ->fff_restv 1740 |2: 1741 |.endif 1742 | bge ->fff_fallback 1743 | rlwinm CARG3, CARG3, 0, 1, 31 1744 | // Fallthrough. 1745 | 1746 |->fff_restv: 1747 | // CARG3/CARG1 = TValue result. 1748 | lwz PC, FRAME_PC(BASE) 1749 | stw CARG3, -8(BASE) 1750 | la RA, -8(BASE) 1751 | stw CARG1, -4(BASE) 1752 |->fff_res1: 1753 | // RA = results, PC = return. 1754 | li RD, (1+1)*8 1755 |->fff_res: 1756 | // RA = results, RD = (nresults+1)*8, PC = return. 1757 | andix. TMP0, PC, FRAME_TYPE 1758 | mr MULTRES, RD 1759 | bney ->vm_return 1760 | lwz INS, -4(PC) 1761 | decode_RB8 RB, INS 1762 |5: 1763 | cmplw RB, RD // More results expected? 1764 | decode_RA8 TMP0, INS 1765 | bgt >6 1766 | ins_next1 1767 | // Adjust BASE. KBASE is assumed to be set for the calling frame. 1768 | sub BASE, RA, TMP0 1769 | ins_next2 1770 | 1771 |6: // Fill up results with nil. 1772 | subi TMP1, RD, 8 1773 | addi RD, RD, 8 1774 | stwx TISNIL, RA, TMP1 1775 | b <5 1776 | 1777 |.macro math_extern, func 1778 | .ffunc_n math_ .. func 1779 | blex func 1780 | b ->fff_resn 1781 |.endmacro 1782 | 1783 |.macro math_extern2, func 1784 | .ffunc_nn math_ .. func 1785 | blex func 1786 | b ->fff_resn 1787 |.endmacro 1788 | 1789 |.macro math_round, func 1790 | .ffunc_1 math_ .. func 1791 | checknum CARG3; beqy ->fff_restv 1792 | rlwinm TMP2, CARG3, 12, 21, 31 1793 | bge ->fff_fallback 1794 | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 1795 | cmplwi cr1, TMP2, 31 // 0 <= exp < 31? 1796 | subfic TMP0, TMP2, 31 1797 | blt >3 1798 | slwi TMP1, CARG3, 11 1799 | srwi TMP3, CARG1, 21 1800 | oris TMP1, TMP1, 0x8000 1801 | addi TMP2, TMP2, 1 1802 | or TMP1, TMP1, TMP3 1803 | slwi CARG2, CARG1, 11 1804 | bge cr1, >4 1805 | slw TMP3, TMP1, TMP2 1806 | srw RD, TMP1, TMP0 1807 | or TMP3, TMP3, CARG2 1808 | srawi TMP2, CARG3, 31 1809 |.if "func" == "floor" 1810 | and TMP1, TMP3, TMP2 1811 | addic TMP0, TMP1, -1 1812 | subfe TMP1, TMP0, TMP1 1813 | add CARG1, RD, TMP1 1814 | xor CARG1, CARG1, TMP2 1815 | sub CARG1, CARG1, TMP2 1816 | b ->fff_resi 1817 |.else 1818 | andc TMP1, TMP3, TMP2 1819 | addic TMP0, TMP1, -1 1820 | subfe TMP1, TMP0, TMP1 1821 | add CARG1, RD, TMP1 1822 | cmpw CARG1, RD 1823 | xor CARG1, CARG1, TMP2 1824 | sub CARG1, CARG1, TMP2 1825 | bge ->fff_resi 1826 | // Overflow to 2^31. 1827 | lus CARG3, 0x41e0 // 2^31. 1828 | li CARG1, 0 1829 | b ->fff_restv 1830 |.endif 1831 |3: // |x| < 1 1832 | slwi TMP2, CARG3, 1 1833 | srawi TMP1, CARG3, 31 1834 | or TMP2, CARG1, TMP2 // ztest = (hi+hi) | lo 1835 |.if "func" == "floor" 1836 | and TMP1, TMP2, TMP1 // (ztest & sign) == 0 ? 0 : -1 1837 | subfic TMP2, TMP1, 0 1838 | subfe CARG1, CARG1, CARG1 1839 |.else 1840 | andc TMP1, TMP2, TMP1 // (ztest & ~sign) == 0 ? 0 : 1 1841 | addic TMP2, TMP1, -1 1842 | subfe CARG1, TMP2, TMP1 1843 |.endif 1844 | b ->fff_resi 1845 |4: // exp >= 31. Check for -(2^31). 1846 | xoris TMP1, TMP1, 0x8000 1847 | srawi TMP2, CARG3, 31 1848 |.if "func" == "floor" 1849 | or TMP1, TMP1, CARG2 1850 |.endif 1851 |.if PPE 1852 | orc TMP1, TMP1, TMP2 1853 | cmpwi TMP1, 0 1854 |.else 1855 | orc. TMP1, TMP1, TMP2 1856 |.endif 1857 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 1858 | lus CARG1, 0x8000 // -(2^31). 1859 | beqy ->fff_resi 1860 |5: 1861 | lfd FARG1, 0(BASE) 1862 | blex func 1863 | b ->fff_resn 1864 |.endmacro 1865 | 1866 |.if DUALNUM 1867 | math_round floor 1868 | math_round ceil 1869 |.else 1870 | // NYI: use internal implementation. 1871 | math_extern floor 1872 | math_extern ceil 1873 |.endif 1874 | 1875 |.if SQRT 1876 |.ffunc_n math_sqrt 1877 | fsqrt FARG1, FARG1 1878 | b ->fff_resn 1879 |.else 1880 | math_extern sqrt 1881 |.endif 1882 | 1883 |.ffunc math_log 1884 | cmplwi NARGS8:RC, 8 1885 | lwz CARG3, 0(BASE) 1886 | lfd FARG1, 0(BASE) 1887 | bne ->fff_fallback // Need exactly 1 argument. 1888 | checknum CARG3; bge ->fff_fallback 1889 | blex log 1890 | b ->fff_resn 1891 | 1892 | math_extern log10 1893 | math_extern exp 1894 | math_extern sin 1895 | math_extern cos 1896 | math_extern tan 1897 | math_extern asin 1898 | math_extern acos 1899 | math_extern atan 1900 | math_extern sinh 1901 | math_extern cosh 1902 | math_extern tanh 1903 | math_extern2 pow 1904 | math_extern2 atan2 1905 | math_extern2 fmod 1906 | 1907 |.if DUALNUM 1908 |.ffunc math_ldexp 1909 | cmplwi NARGS8:RC, 16 1910 | lwz CARG3, 0(BASE) 1911 | lfd FARG1, 0(BASE) 1912 | lwz CARG4, 8(BASE) 1913 |.if GPR64 1914 | lwz CARG2, 12(BASE) 1915 |.else 1916 | lwz CARG1, 12(BASE) 1917 |.endif 1918 | blt ->fff_fallback 1919 | checknum CARG3; bge ->fff_fallback 1920 | checknum CARG4; bne ->fff_fallback 1921 |.else 1922 |.ffunc_nn math_ldexp 1923 |.if GPR64 1924 | toint CARG2, FARG2 1925 |.else 1926 | toint CARG1, FARG2 1927 |.endif 1928 |.endif 1929 | blex ldexp 1930 | b ->fff_resn 1931 | 1932 |.ffunc_n math_frexp 1933 |.if GPR64 1934 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 1935 |.else 1936 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) 1937 |.endif 1938 | lwz PC, FRAME_PC(BASE) 1939 | blex frexp 1940 | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1941 | la RA, -8(BASE) 1942 |.if not DUALNUM 1943 | tonum_i FARG2, TMP1 1944 |.endif 1945 | stfd FARG1, 0(RA) 1946 | li RD, (2+1)*8 1947 |.if DUALNUM 1948 | stw TISNUM, 8(RA) 1949 | stw TMP1, 12(RA) 1950 |.else 1951 | stfd FARG2, 8(RA) 1952 |.endif 1953 | b ->fff_res 1954 | 1955 |.ffunc_n math_modf 1956 |.if GPR64 1957 | la CARG2, -8(BASE) 1958 |.else 1959 | la CARG1, -8(BASE) 1960 |.endif 1961 | lwz PC, FRAME_PC(BASE) 1962 | blex modf 1963 | la RA, -8(BASE) 1964 | stfd FARG1, 0(BASE) 1965 | li RD, (2+1)*8 1966 | b ->fff_res 1967 | 1968 |.macro math_minmax, name, ismax 1969 |.if DUALNUM 1970 | .ffunc_1 name 1971 | checknum CARG3 1972 | addi TMP1, BASE, 8 1973 | add TMP2, BASE, NARGS8:RC 1974 | bne >4 1975 |1: // Handle integers. 1976 | lwz CARG4, 0(TMP1) 1977 | cmplw cr1, TMP1, TMP2 1978 | lwz CARG2, 4(TMP1) 1979 | bge cr1, ->fff_resi 1980 | checknum CARG4 1981 | xoris TMP0, CARG1, 0x8000 1982 | xoris TMP3, CARG2, 0x8000 1983 | bne >3 1984 | subfc TMP3, TMP3, TMP0 1985 | subfe TMP0, TMP0, TMP0 1986 |.if ismax 1987 | andc TMP3, TMP3, TMP0 1988 |.else 1989 | and TMP3, TMP3, TMP0 1990 |.endif 1991 | add CARG1, TMP3, CARG2 1992 |.if GPR64 1993 | rldicl CARG1, CARG1, 0, 32 1994 |.endif 1995 | addi TMP1, TMP1, 8 1996 | b <1 1997 |3: 1998 | bge ->fff_fallback 1999 | // Convert intermediate result to number and continue below. 2000 | tonum_i FARG1, CARG1 2001 | lfd FARG2, 0(TMP1) 2002 | b >6 2003 |4: 2004 | lfd FARG1, 0(BASE) 2005 | bge ->fff_fallback 2006 |5: // Handle numbers. 2007 | lwz CARG4, 0(TMP1) 2008 | cmplw cr1, TMP1, TMP2 2009 | lfd FARG2, 0(TMP1) 2010 | bge cr1, ->fff_resn 2011 | checknum CARG4; bge >7 2012 |6: 2013 | fsub f0, FARG1, FARG2 2014 | addi TMP1, TMP1, 8 2015 |.if ismax 2016 | fsel FARG1, f0, FARG1, FARG2 2017 |.else 2018 | fsel FARG1, f0, FARG2, FARG1 2019 |.endif 2020 | b <5 2021 |7: // Convert integer to number and continue above. 2022 | lwz CARG2, 4(TMP1) 2023 | bne ->fff_fallback 2024 | tonum_i FARG2, CARG2 2025 | b <6 2026 |.else 2027 | .ffunc_n name 2028 | li TMP1, 8 2029 |1: 2030 | lwzx CARG2, BASE, TMP1 2031 | lfdx FARG2, BASE, TMP1 2032 | cmplw cr1, TMP1, NARGS8:RC 2033 | checknum CARG2 2034 | bge cr1, ->fff_resn 2035 | bge ->fff_fallback 2036 | fsub f0, FARG1, FARG2 2037 | addi TMP1, TMP1, 8 2038 |.if ismax 2039 | fsel FARG1, f0, FARG1, FARG2 2040 |.else 2041 | fsel FARG1, f0, FARG2, FARG1 2042 |.endif 2043 | b <1 2044 |.endif 2045 |.endmacro 2046 | 2047 | math_minmax math_min, 0 2048 | math_minmax math_max, 1 2049 | 2050 |//-- String library ----------------------------------------------------- 2051 | 2052 |.ffunc string_byte // Only handle the 1-arg case here. 2053 | cmplwi NARGS8:RC, 8 2054 | lwz CARG3, 0(BASE) 2055 | lwz STR:CARG1, 4(BASE) 2056 | bne ->fff_fallback // Need exactly 1 argument. 2057 | checkstr CARG3 2058 | bne ->fff_fallback 2059 | lwz TMP0, STR:CARG1->len 2060 |.if DUALNUM 2061 | lbz CARG1, STR:CARG1[1] // Access is always ok (NUL at end). 2062 | li RD, (0+1)*8 2063 | lwz PC, FRAME_PC(BASE) 2064 | cmplwi TMP0, 0 2065 | la RA, -8(BASE) 2066 | beqy ->fff_res 2067 | b ->fff_resi 2068 |.else 2069 | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). 2070 | addic TMP3, TMP0, -1 // RD = ((str->len != 0)+1)*8 2071 | subfe RD, TMP3, TMP0 2072 | stw TMP1, TONUM_LO // Inlined tonum_u f0, TMP1. 2073 | addi RD, RD, 1 2074 | lfd f0, TONUM_D 2075 | la RA, -8(BASE) 2076 | lwz PC, FRAME_PC(BASE) 2077 | fsub f0, f0, TOBIT 2078 | slwi RD, RD, 3 2079 | stfd f0, 0(RA) 2080 | b ->fff_res 2081 |.endif 2082 | 2083 |.ffunc string_char // Only handle the 1-arg case here. 2084 | ffgccheck 2085 | cmplwi NARGS8:RC, 8 2086 | lwz CARG3, 0(BASE) 2087 |.if DUALNUM 2088 | lwz TMP0, 4(BASE) 2089 | bne ->fff_fallback // Exactly 1 argument. 2090 | checknum CARG3; bne ->fff_fallback 2091 | la CARG2, 7(BASE) 2092 |.else 2093 | lfd FARG1, 0(BASE) 2094 | bne ->fff_fallback // Exactly 1 argument. 2095 | checknum CARG3; bge ->fff_fallback 2096 | toint TMP0, FARG1 2097 | la CARG2, TMPD_BLO 2098 |.endif 2099 | li CARG3, 1 2100 | cmplwi TMP0, 255; bgt ->fff_fallback 2101 |->fff_newstr: 2102 | mr CARG1, L 2103 | stp BASE, L->base 2104 | stw PC, SAVE_PC 2105 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2106 |->fff_resstr: 2107 | // Returns GCstr *. 2108 | lp BASE, L->base 2109 | li CARG3, LJ_TSTR 2110 | b ->fff_restv 2111 | 2112 |.ffunc string_sub 2113 | ffgccheck 2114 | cmplwi NARGS8:RC, 16 2115 | lwz CARG3, 16(BASE) 2116 |.if not DUALNUM 2117 | lfd f0, 16(BASE) 2118 |.endif 2119 | lwz TMP0, 0(BASE) 2120 | lwz STR:CARG1, 4(BASE) 2121 | blt ->fff_fallback 2122 | lwz CARG2, 8(BASE) 2123 |.if DUALNUM 2124 | lwz TMP1, 12(BASE) 2125 |.else 2126 | lfd f1, 8(BASE) 2127 |.endif 2128 | li TMP2, -1 2129 | beq >1 2130 |.if DUALNUM 2131 | checknum CARG3 2132 | lwz TMP2, 20(BASE) 2133 | bne ->fff_fallback 2134 |1: 2135 | checknum CARG2; bne ->fff_fallback 2136 |.else 2137 | checknum CARG3; bge ->fff_fallback 2138 | toint TMP2, f0 2139 |1: 2140 | checknum CARG2; bge ->fff_fallback 2141 |.endif 2142 | checkstr TMP0; bne ->fff_fallback 2143 |.if not DUALNUM 2144 | toint TMP1, f1 2145 |.endif 2146 | lwz TMP0, STR:CARG1->len 2147 | cmplw TMP0, TMP2 // len < end? (unsigned compare) 2148 | addi TMP3, TMP2, 1 2149 | blt >5 2150 |2: 2151 | cmpwi TMP1, 0 // start <= 0? 2152 | add TMP3, TMP1, TMP0 2153 | ble >7 2154 |3: 2155 | sub CARG3, TMP2, TMP1 2156 | addi CARG2, STR:CARG1, #STR-1 2157 | srawi TMP0, CARG3, 31 2158 | addi CARG3, CARG3, 1 2159 | add CARG2, CARG2, TMP1 2160 | andc CARG3, CARG3, TMP0 2161 |.if GPR64 2162 | rldicl CARG2, CARG2, 0, 32 2163 | rldicl CARG3, CARG3, 0, 32 2164 |.endif 2165 | b ->fff_newstr 2166 | 2167 |5: // Negative end or overflow. 2168 | cmpw TMP0, TMP2 // len >= end? (signed compare) 2169 | add TMP2, TMP0, TMP3 // Negative end: end = end+len+1. 2170 | bge <2 2171 | mr TMP2, TMP0 // Overflow: end = len. 2172 | b <2 2173 | 2174 |7: // Negative start or underflow. 2175 | .gpr64 extsw TMP1, TMP1 2176 | addic CARG3, TMP1, -1 2177 | subfe CARG3, CARG3, CARG3 2178 | srawi CARG2, TMP3, 31 // Note: modifies carry. 2179 | andc TMP3, TMP3, CARG3 2180 | andc TMP1, TMP3, CARG2 2181 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2182 | b <3 2183 | 2184 |.macro ffstring_op, name 2185 | .ffunc string_ .. name 2186 | ffgccheck 2187 | cmplwi NARGS8:RC, 8 2188 | lwz CARG3, 0(BASE) 2189 | lwz STR:CARG2, 4(BASE) 2190 | blt ->fff_fallback 2191 | checkstr CARG3 2192 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) 2193 | bne ->fff_fallback 2194 | lwz TMP0, SBUF:CARG1->b 2195 | stw L, SBUF:CARG1->L 2196 | stp BASE, L->base 2197 | stw PC, SAVE_PC 2198 | stw TMP0, SBUF:CARG1->p 2199 | bl extern lj_buf_putstr_ .. name 2200 | bl extern lj_buf_tostr 2201 | b ->fff_resstr 2202 |.endmacro 2203 | 2204 |ffstring_op reverse 2205 |ffstring_op lower 2206 |ffstring_op upper 2207 | 2208 |//-- Bit library -------------------------------------------------------- 2209 | 2210 |.macro .ffunc_bit, name 2211 |.if DUALNUM 2212 | .ffunc_1 bit_..name 2213 | checknum CARG3; bnel ->fff_tobit_fb 2214 |.else 2215 | .ffunc_n bit_..name 2216 | fadd FARG1, FARG1, TOBIT 2217 | stfd FARG1, TMPD 2218 | lwz CARG1, TMPD_LO 2219 |.endif 2220 |.endmacro 2221 | 2222 |.macro .ffunc_bit_op, name, ins 2223 | .ffunc_bit name 2224 | addi TMP1, BASE, 8 2225 | add TMP2, BASE, NARGS8:RC 2226 |1: 2227 | lwz CARG4, 0(TMP1) 2228 | cmplw cr1, TMP1, TMP2 2229 |.if DUALNUM 2230 | lwz CARG2, 4(TMP1) 2231 |.else 2232 | lfd FARG1, 0(TMP1) 2233 |.endif 2234 | bgey cr1, ->fff_resi 2235 | checknum CARG4 2236 |.if DUALNUM 2237 | bnel ->fff_bitop_fb 2238 |.else 2239 | fadd FARG1, FARG1, TOBIT 2240 | bge ->fff_fallback 2241 | stfd FARG1, TMPD 2242 | lwz CARG2, TMPD_LO 2243 |.endif 2244 | ins CARG1, CARG1, CARG2 2245 | addi TMP1, TMP1, 8 2246 | b <1 2247 |.endmacro 2248 | 2249 |.ffunc_bit_op band, and 2250 |.ffunc_bit_op bor, or 2251 |.ffunc_bit_op bxor, xor 2252 | 2253 |.ffunc_bit bswap 2254 | rotlwi TMP0, CARG1, 8 2255 | rlwimi TMP0, CARG1, 24, 0, 7 2256 | rlwimi TMP0, CARG1, 24, 16, 23 2257 | mr CRET1, TMP0 2258 | b ->fff_resi 2259 | 2260 |.ffunc_bit bnot 2261 | not CRET1, CARG1 2262 | b ->fff_resi 2263 | 2264 |.macro .ffunc_bit_sh, name, ins, shmod 2265 |.if DUALNUM 2266 | .ffunc_2 bit_..name 2267 | checknum CARG3; bnel ->fff_tobit_fb 2268 | // Note: no inline conversion from number for 2nd argument! 2269 | checknum CARG4; bne ->fff_fallback 2270 |.else 2271 | .ffunc_nn bit_..name 2272 | fadd FARG1, FARG1, TOBIT 2273 | fadd FARG2, FARG2, TOBIT 2274 | stfd FARG1, TMPD 2275 | lwz CARG1, TMPD_LO 2276 | stfd FARG2, TMPD 2277 | lwz CARG2, TMPD_LO 2278 |.endif 2279 |.if shmod == 1 2280 | rlwinm CARG2, CARG2, 0, 27, 31 2281 |.elif shmod == 2 2282 | neg CARG2, CARG2 2283 |.endif 2284 | ins CRET1, CARG1, CARG2 2285 | b ->fff_resi 2286 |.endmacro 2287 | 2288 |.ffunc_bit_sh lshift, slw, 1 2289 |.ffunc_bit_sh rshift, srw, 1 2290 |.ffunc_bit_sh arshift, sraw, 1 2291 |.ffunc_bit_sh rol, rotlw, 0 2292 |.ffunc_bit_sh ror, rotlw, 2 2293 | 2294 |.ffunc_bit tobit 2295 |.if DUALNUM 2296 | b ->fff_resi 2297 |.else 2298 |->fff_resi: 2299 | tonum_i FARG1, CRET1 2300 |.endif 2301 |->fff_resn: 2302 | lwz PC, FRAME_PC(BASE) 2303 | la RA, -8(BASE) 2304 | stfd FARG1, -8(BASE) 2305 | b ->fff_res1 2306 | 2307 |// Fallback FP number to bit conversion. 2308 |->fff_tobit_fb: 2309 |.if DUALNUM 2310 | lfd FARG1, 0(BASE) 2311 | bgt ->fff_fallback 2312 | fadd FARG1, FARG1, TOBIT 2313 | stfd FARG1, TMPD 2314 | lwz CARG1, TMPD_LO 2315 | blr 2316 |.endif 2317 |->fff_bitop_fb: 2318 |.if DUALNUM 2319 | lfd FARG1, 0(TMP1) 2320 | bgt ->fff_fallback 2321 | fadd FARG1, FARG1, TOBIT 2322 | stfd FARG1, TMPD 2323 | lwz CARG2, TMPD_LO 2324 | blr 2325 |.endif 2326 | 2327 |//----------------------------------------------------------------------- 2328 | 2329 |->fff_fallback: // Call fast function fallback handler. 2330 | // BASE = new base, RB = CFUNC, RC = nargs*8 2331 | lp TMP3, CFUNC:RB->f 2332 | add TMP1, BASE, NARGS8:RC 2333 | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. 2334 | addi TMP0, TMP1, 8*LUA_MINSTACK 2335 | lwz TMP2, L->maxstack 2336 | stw PC, SAVE_PC // Redundant (but a defined value). 2337 | .toc lp TMP3, 0(TMP3) 2338 | cmplw TMP0, TMP2 2339 | stp BASE, L->base 2340 | stp TMP1, L->top 2341 | mr CARG1, L 2342 | bgt >5 // Need to grow stack. 2343 | mtctr TMP3 2344 | bctrl // (lua_State *L) 2345 | // Either throws an error, or recovers and returns -1, 0 or nresults+1. 2346 | lp BASE, L->base 2347 | cmpwi CRET1, 0 2348 | slwi RD, CRET1, 3 2349 | la RA, -8(BASE) 2350 | bgt ->fff_res // Returned nresults+1? 2351 |1: // Returned 0 or -1: retry fast path. 2352 | lp TMP0, L->top 2353 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2354 | sub NARGS8:RC, TMP0, BASE 2355 | bne ->vm_call_tail // Returned -1? 2356 | ins_callt // Returned 0: retry fast path. 2357 | 2358 |// Reconstruct previous base for vmeta_call during tailcall. 2359 |->vm_call_tail: 2360 | andix. TMP0, PC, FRAME_TYPE 2361 | rlwinm TMP1, PC, 0, 0, 28 2362 | bne >3 2363 | lwz INS, -4(PC) 2364 | decode_RA8 TMP1, INS 2365 | addi TMP1, TMP1, 8 2366 |3: 2367 | sub TMP2, BASE, TMP1 2368 | b ->vm_call_dispatch // Resolve again for tailcall. 2369 | 2370 |5: // Grow stack for fallback handler. 2371 | li CARG2, LUA_MINSTACK 2372 | bl extern lj_state_growstack // (lua_State *L, int n) 2373 | lp BASE, L->base 2374 | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry. 2375 | b <1 2376 | 2377 |->fff_gcstep: // Call GC step function. 2378 | // BASE = new base, RC = nargs*8 2379 | mflr SAVE0 2380 | stp BASE, L->base 2381 | add TMP0, BASE, NARGS8:RC 2382 | stw PC, SAVE_PC // Redundant (but a defined value). 2383 | stp TMP0, L->top 2384 | mr CARG1, L 2385 | bl extern lj_gc_step // (lua_State *L) 2386 | lp BASE, L->base 2387 | mtlr SAVE0 2388 | lp TMP0, L->top 2389 | sub NARGS8:RC, TMP0, BASE 2390 | lwz CFUNC:RB, FRAME_FUNC(BASE) 2391 | blr 2392 | 2393 |//----------------------------------------------------------------------- 2394 |//-- Special dispatch targets ------------------------------------------- 2395 |//----------------------------------------------------------------------- 2396 | 2397 |->vm_record: // Dispatch target for recording phase. 2398 |.if JIT 2399 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2400 | andix. TMP0, TMP3, HOOK_VMEVENT // No recording while in vmevent. 2401 | bne >5 2402 | // Decrement the hookcount for consistency, but always do the call. 2403 | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2404 | andix. TMP0, TMP3, HOOK_ACTIVE 2405 | bne >1 2406 | subi TMP2, TMP2, 1 2407 | andi. TMP0, TMP3, LUA_MASKLINE|LUA_MASKCOUNT 2408 | beqy >1 2409 | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2410 | b >1 2411 |.endif 2412 | 2413 |->vm_rethook: // Dispatch target for return hooks. 2414 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2415 | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? 2416 | beq >1 2417 |5: // Re-dispatch to static ins. 2418 | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OPP TMP1, INS. 2419 | lpx TMP0, DISPATCH, TMP1 2420 | mtctr TMP0 2421 | bctr 2422 | 2423 |->vm_inshook: // Dispatch target for instr/line hooks. 2424 | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) 2425 | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2426 | andix. TMP0, TMP3, HOOK_ACTIVE // Hook already active? 2427 | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0 2428 | bne <5 2429 | 2430 | cmpwi cr1, TMP0, 0 2431 | addic. TMP2, TMP2, -1 2432 | beq cr1, <5 2433 | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) 2434 | beq >1 2435 | bge cr1, <5 2436 |1: 2437 | mr CARG1, L 2438 | stw MULTRES, SAVE_MULTRES 2439 | mr CARG2, PC 2440 | stp BASE, L->base 2441 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2442 | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) 2443 |3: 2444 | lp BASE, L->base 2445 |4: // Re-dispatch to static ins. 2446 | lwz INS, -4(PC) 2447 | decode_OPP TMP1, INS 2448 | decode_RB8 RB, INS 2449 | addi TMP1, TMP1, GG_DISP2STATIC 2450 | decode_RD8 RD, INS 2451 | lpx TMP0, DISPATCH, TMP1 2452 | decode_RA8 RA, INS 2453 | decode_RC8 RC, INS 2454 | mtctr TMP0 2455 | bctr 2456 | 2457 |->cont_hook: // Continue from hook yield. 2458 | addi PC, PC, 4 2459 | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. 2460 | b <4 2461 | 2462 |->vm_hotloop: // Hot loop counter underflow. 2463 |.if JIT 2464 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 2465 | addi CARG1, DISPATCH, GG_DISP2J 2466 | stw PC, SAVE_PC 2467 | lwz TMP1, LFUNC:TMP1->pc 2468 | mr CARG2, PC 2469 | stw L, DISPATCH_J(L)(DISPATCH) 2470 | lbz TMP1, PC2PROTO(framesize)(TMP1) 2471 | stp BASE, L->base 2472 | slwi TMP1, TMP1, 3 2473 | add TMP1, BASE, TMP1 2474 | stp TMP1, L->top 2475 | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) 2476 | b <3 2477 |.endif 2478 | 2479 |->vm_callhook: // Dispatch target for call hooks. 2480 | mr CARG2, PC 2481 |.if JIT 2482 | b >1 2483 |.endif 2484 | 2485 |->vm_hotcall: // Hot call counter underflow. 2486 |.if JIT 2487 | ori CARG2, PC, 1 2488 |1: 2489 |.endif 2490 | add TMP0, BASE, RC 2491 | stw PC, SAVE_PC 2492 | mr CARG1, L 2493 | stp BASE, L->base 2494 | sub RA, RA, BASE 2495 | stp TMP0, L->top 2496 | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) 2497 | // Returns ASMFunction. 2498 | lp BASE, L->base 2499 | lp TMP0, L->top 2500 | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. 2501 | sub NARGS8:RC, TMP0, BASE 2502 | add RA, BASE, RA 2503 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2504 | lwz INS, -4(PC) 2505 | mtctr CRET1 2506 | bctr 2507 | 2508 |->cont_stitch: // Trace stitching. 2509 |.if JIT 2510 | // RA = resultptr, RB = meta base 2511 | lwz INS, -4(PC) 2512 | lwz TRACE:TMP2, -20(RB) // Save previous trace. 2513 | addic. TMP1, MULTRES, -8 2514 | decode_RA8 RC, INS // Call base. 2515 | beq >2 2516 |1: // Move results down. 2517 | lfd f0, 0(RA) 2518 | addic. TMP1, TMP1, -8 2519 | addi RA, RA, 8 2520 | stfdx f0, BASE, RC 2521 | addi RC, RC, 8 2522 | bne <1 2523 |2: 2524 | decode_RA8 RA, INS 2525 | decode_RB8 RB, INS 2526 | add RA, RA, RB 2527 |3: 2528 | cmplw RA, RC 2529 | bgt >9 // More results wanted? 2530 | 2531 | lhz TMP3, TRACE:TMP2->traceno 2532 | lhz RD, TRACE:TMP2->link 2533 | cmpw RD, TMP3 2534 | cmpwi cr1, RD, 0 2535 | beq ->cont_nop // Blacklisted. 2536 | slwi RD, RD, 3 2537 | bne cr1, =>BC_JLOOP // Jump to stitched trace. 2538 | 2539 | // Stitch a new trace to the previous trace. 2540 | stw TMP3, DISPATCH_J(exitno)(DISPATCH) 2541 | stp L, DISPATCH_J(L)(DISPATCH) 2542 | stp BASE, L->base 2543 | addi CARG1, DISPATCH, GG_DISP2J 2544 | mr CARG2, PC 2545 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) 2546 | lp BASE, L->base 2547 | b ->cont_nop 2548 | 2549 |9: 2550 | stwx TISNIL, BASE, RC 2551 | addi RC, RC, 8 2552 | b <3 2553 |.endif 2554 | 2555 |->vm_profhook: // Dispatch target for profiler hook. 2556 #if LJ_HASPROFILE 2557 | mr CARG1, L 2558 | stw MULTRES, SAVE_MULTRES 2559 | mr CARG2, PC 2560 | stp BASE, L->base 2561 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) 2562 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. 2563 | lp BASE, L->base 2564 | subi PC, PC, 4 2565 | b ->cont_nop 2566 #endif 2567 | 2568 |//----------------------------------------------------------------------- 2569 |//-- Trace exit handler ------------------------------------------------- 2570 |//----------------------------------------------------------------------- 2571 | 2572 |.macro savex_, a, b, c, d 2573 | stfd f..a, 16+a*8(sp) 2574 | stfd f..b, 16+b*8(sp) 2575 | stfd f..c, 16+c*8(sp) 2576 | stfd f..d, 16+d*8(sp) 2577 |.endmacro 2578 | 2579 |->vm_exit_handler: 2580 |.if JIT 2581 | addi sp, sp, -(16+32*8+32*4) 2582 | stmw r2, 16+32*8+2*4(sp) 2583 | addi DISPATCH, JGL, -GG_DISP2G-32768 2584 | li CARG2, ~LJ_VMST_EXIT 2585 | lwz CARG1, 16+32*8+32*4(sp) // Get stack chain. 2586 | stw CARG2, DISPATCH_GL(vmstate)(DISPATCH) 2587 | savex_ 0,1,2,3 2588 | stw CARG1, 0(sp) // Store extended stack chain. 2589 | clrso TMP1 2590 | savex_ 4,5,6,7 2591 | addi CARG2, sp, 16+32*8+32*4 // Recompute original value of sp. 2592 | savex_ 8,9,10,11 2593 | stw CARG2, 16+32*8+1*4(sp) // Store sp in RID_SP. 2594 | savex_ 12,13,14,15 2595 | mflr CARG3 2596 | li TMP1, 0 2597 | savex_ 16,17,18,19 2598 | stw TMP1, 16+32*8+0*4(sp) // Clear RID_TMP. 2599 | savex_ 20,21,22,23 2600 | lhz CARG4, 2(CARG3) // Load trace number. 2601 | savex_ 24,25,26,27 2602 | lwz L, DISPATCH_GL(cur_L)(DISPATCH) 2603 | savex_ 28,29,30,31 2604 | sub CARG3, TMP0, CARG3 // Compute exit number. 2605 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 2606 | srwi CARG3, CARG3, 2 2607 | stp L, DISPATCH_J(L)(DISPATCH) 2608 | subi CARG3, CARG3, 2 2609 | stp BASE, L->base 2610 | stw CARG4, DISPATCH_J(parent)(DISPATCH) 2611 | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH) 2612 | addi CARG1, DISPATCH, GG_DISP2J 2613 | stw CARG3, DISPATCH_J(exitno)(DISPATCH) 2614 | addi CARG2, sp, 16 2615 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) 2616 | // Returns MULTRES (unscaled) or negated error code. 2617 | lp TMP1, L->cframe 2618 | lwz TMP2, 0(sp) 2619 | lp BASE, L->base 2620 |.if GPR64 2621 | rldicr sp, TMP1, 0, 61 2622 |.else 2623 | rlwinm sp, TMP1, 0, 0, 29 2624 |.endif 2625 | lwz PC, SAVE_PC // Get SAVE_PC. 2626 | stw TMP2, 0(sp) 2627 | stw L, SAVE_L // Set SAVE_L (on-trace resume/yield). 2628 | b >1 2629 |.endif 2630 |->vm_exit_interp: 2631 |.if JIT 2632 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. 2633 | lwz L, SAVE_L 2634 | addi DISPATCH, JGL, -GG_DISP2G-32768 2635 | stp BASE, L->base 2636 |1: 2637 | cmpwi CARG1, 0 2638 | blt >9 // Check for error from exit. 2639 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2640 | slwi MULTRES, CARG1, 3 2641 | li TMP2, 0 2642 | stw MULTRES, SAVE_MULTRES 2643 | lwz TMP1, LFUNC:RB->pc 2644 | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) 2645 | lwz KBASE, PC2PROTO(k)(TMP1) 2646 | // Setup type comparison constants. 2647 | li TISNUM, LJ_TISNUM 2648 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2649 | stw TMP3, TMPD 2650 | li ZERO, 0 2651 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 2652 | lfs TOBIT, TMPD 2653 | stw TMP3, TMPD 2654 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 2655 | li TISNIL, LJ_TNIL 2656 | stw TMP0, TONUM_HI 2657 | lfs TONUM, TMPD 2658 | // Modified copy of ins_next which handles function header dispatch, too. 2659 | lwz INS, 0(PC) 2660 | addi PC, PC, 4 2661 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1. 2662 | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) 2663 | decode_OPP TMP1, INS 2664 | decode_RA8 RA, INS 2665 | lpx TMP0, DISPATCH, TMP1 2666 | mtctr TMP0 2667 | cmplwi TMP1, BC_FUNCF*4 // Function header? 2668 | bge >2 2669 | decode_RB8 RB, INS 2670 | decode_RD8 RD, INS 2671 | decode_RC8 RC, INS 2672 | bctr 2673 |2: 2674 | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function? 2675 | blt >3 2676 | // Check frame below fast function. 2677 | lwz TMP1, FRAME_PC(BASE) 2678 | andix. TMP0, TMP1, FRAME_TYPE 2679 | bney >3 // Trace stitching continuation? 2680 | // Otherwise set KBASE for Lua function below fast function. 2681 | lwz TMP2, -4(TMP1) 2682 | decode_RA8 TMP0, TMP2 2683 | sub TMP1, BASE, TMP0 2684 | lwz LFUNC:TMP2, -12(TMP1) 2685 | lwz TMP1, LFUNC:TMP2->pc 2686 | lwz KBASE, PC2PROTO(k)(TMP1) 2687 |3: 2688 | subi RC, MULTRES, 8 2689 | add RA, RA, BASE 2690 | bctr 2691 | 2692 |9: // Rethrow error from the right C frame. 2693 | neg CARG2, CARG1 2694 | mr CARG1, L 2695 | bl extern lj_err_throw // (lua_State *L, int errcode) 2696 |.endif 2697 | 2698 |//----------------------------------------------------------------------- 2699 |//-- Math helper functions ---------------------------------------------- 2700 |//----------------------------------------------------------------------- 2701 | 2702 |// NYI: Use internal implementations of floor, ceil, trunc. 2703 | 2704 |->vm_modi: 2705 | divwo. TMP0, CARG1, CARG2 2706 | bso >1 2707 |.if GPR64 2708 | xor CARG3, CARG1, CARG2 2709 | cmpwi CARG3, 0 2710 |.else 2711 | xor. CARG3, CARG1, CARG2 2712 |.endif 2713 | mullw TMP0, TMP0, CARG2 2714 | sub CARG1, CARG1, TMP0 2715 | bgelr 2716 | cmpwi CARG1, 0; beqlr 2717 | add CARG1, CARG1, CARG2 2718 | blr 2719 |1: 2720 | cmpwi CARG2, 0 2721 | li CARG1, 0 2722 | beqlr 2723 | clrso TMP0 // Clear SO for -2147483648 % -1 and return 0. 2724 | blr 2725 | 2726 |//----------------------------------------------------------------------- 2727 |//-- Miscellaneous functions -------------------------------------------- 2728 |//----------------------------------------------------------------------- 2729 | 2730 |// void lj_vm_cachesync(void *start, void *end) 2731 |// Flush D-Cache and invalidate I-Cache. Assumes 32 byte cache line size. 2732 |// This is a good lower bound, except for very ancient PPC models. 2733 |->vm_cachesync: 2734 |.if JIT or FFI 2735 | // Compute start of first cache line and number of cache lines. 2736 | rlwinm CARG1, CARG1, 0, 0, 26 2737 | sub CARG2, CARG2, CARG1 2738 | addi CARG2, CARG2, 31 2739 | rlwinm. CARG2, CARG2, 27, 5, 31 2740 | beqlr 2741 | mtctr CARG2 2742 | mr CARG3, CARG1 2743 |1: // Flush D-Cache. 2744 | dcbst r0, CARG1 2745 | addi CARG1, CARG1, 32 2746 | bdnz <1 2747 | sync 2748 | mtctr CARG2 2749 |1: // Invalidate I-Cache. 2750 | icbi r0, CARG3 2751 | addi CARG3, CARG3, 32 2752 | bdnz <1 2753 | isync 2754 | blr 2755 |.endif 2756 | 2757 |//----------------------------------------------------------------------- 2758 |//-- FFI helper functions ----------------------------------------------- 2759 |//----------------------------------------------------------------------- 2760 | 2761 |// Handler for callback functions. Callback slot number in r11, g in r12. 2762 |->vm_ffi_callback: 2763 |.if FFI 2764 |.type CTSTATE, CTState, PC 2765 | saveregs 2766 | lwz CTSTATE, GL:r12->ctype_state 2767 | addi DISPATCH, r12, GG_G2DISP 2768 | stw r11, CTSTATE->cb.slot 2769 | stw r3, CTSTATE->cb.gpr[0] 2770 | stfd f1, CTSTATE->cb.fpr[0] 2771 | stw r4, CTSTATE->cb.gpr[1] 2772 | stfd f2, CTSTATE->cb.fpr[1] 2773 | stw r5, CTSTATE->cb.gpr[2] 2774 | stfd f3, CTSTATE->cb.fpr[2] 2775 | stw r6, CTSTATE->cb.gpr[3] 2776 | stfd f4, CTSTATE->cb.fpr[3] 2777 | stw r7, CTSTATE->cb.gpr[4] 2778 | stfd f5, CTSTATE->cb.fpr[4] 2779 | stw r8, CTSTATE->cb.gpr[5] 2780 | stfd f6, CTSTATE->cb.fpr[5] 2781 | stw r9, CTSTATE->cb.gpr[6] 2782 | stfd f7, CTSTATE->cb.fpr[6] 2783 | stw r10, CTSTATE->cb.gpr[7] 2784 | stfd f8, CTSTATE->cb.fpr[7] 2785 | addi TMP0, sp, CFRAME_SPACE+8 2786 | stw TMP0, CTSTATE->cb.stack 2787 | mr CARG1, CTSTATE 2788 | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. 2789 | mr CARG2, sp 2790 | bl extern lj_ccallback_enter // (CTState *cts, void *cf) 2791 | // Returns lua_State *. 2792 | lp BASE, L:CRET1->base 2793 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 2794 | lp RC, L:CRET1->top 2795 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2796 | li ZERO, 0 2797 | mr L, CRET1 2798 | stw TMP3, TMPD 2799 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 2800 | lwz LFUNC:RB, FRAME_FUNC(BASE) 2801 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 2802 | stw TMP0, TONUM_HI 2803 | li TISNIL, LJ_TNIL 2804 | li_vmstate INTERP 2805 | lfs TOBIT, TMPD 2806 | stw TMP3, TMPD 2807 | sub RC, RC, BASE 2808 | st_vmstate 2809 | lfs TONUM, TMPD 2810 | ins_callt 2811 |.endif 2812 | 2813 |->cont_ffi_callback: // Return from FFI callback. 2814 |.if FFI 2815 | lwz CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) 2816 | stp BASE, L->base 2817 | stp RB, L->top 2818 | stp L, CTSTATE->L 2819 | mr CARG1, CTSTATE 2820 | mr CARG2, RA 2821 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) 2822 | lwz CRET1, CTSTATE->cb.gpr[0] 2823 | lfd FARG1, CTSTATE->cb.fpr[0] 2824 | lwz CRET2, CTSTATE->cb.gpr[1] 2825 | b ->vm_leave_unw 2826 |.endif 2827 | 2828 |->vm_ffi_call: // Call C function via FFI. 2829 | // Caveat: needs special frame unwinding, see below. 2830 |.if FFI 2831 | .type CCSTATE, CCallState, CARG1 2832 | lwz TMP1, CCSTATE->spadj 2833 | mflr TMP0 2834 | lbz CARG2, CCSTATE->nsp 2835 | lbz CARG3, CCSTATE->nfpr 2836 | neg TMP1, TMP1 2837 | stw TMP0, 4(sp) 2838 | cmpwi cr1, CARG3, 0 2839 | mr TMP2, sp 2840 | addic. CARG2, CARG2, -1 2841 | stwux sp, sp, TMP1 2842 | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. 2843 | stw r14, -4(TMP2) 2844 | stw CCSTATE, -8(TMP2) 2845 | mr r14, TMP2 2846 | la TMP1, CCSTATE->stack 2847 | slwi CARG2, CARG2, 2 2848 | blty >2 2849 | la TMP2, 8(sp) 2850 |1: 2851 | lwzx TMP0, TMP1, CARG2 2852 | stwx TMP0, TMP2, CARG2 2853 | addic. CARG2, CARG2, -4 2854 | bge <1 2855 |2: 2856 | bney cr1, >3 2857 | lfd f1, CCSTATE->fpr[0] 2858 | lfd f2, CCSTATE->fpr[1] 2859 | lfd f3, CCSTATE->fpr[2] 2860 | lfd f4, CCSTATE->fpr[3] 2861 | lfd f5, CCSTATE->fpr[4] 2862 | lfd f6, CCSTATE->fpr[5] 2863 | lfd f7, CCSTATE->fpr[6] 2864 | lfd f8, CCSTATE->fpr[7] 2865 |3: 2866 | lp TMP0, CCSTATE->func 2867 | lwz CARG2, CCSTATE->gpr[1] 2868 | lwz CARG3, CCSTATE->gpr[2] 2869 | lwz CARG4, CCSTATE->gpr[3] 2870 | lwz CARG5, CCSTATE->gpr[4] 2871 | mtctr TMP0 2872 | lwz r8, CCSTATE->gpr[5] 2873 | lwz r9, CCSTATE->gpr[6] 2874 | lwz r10, CCSTATE->gpr[7] 2875 | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. 2876 | bctrl 2877 | lwz CCSTATE:TMP1, -8(r14) 2878 | lwz TMP2, -4(r14) 2879 | lwz TMP0, 4(r14) 2880 | stw CARG1, CCSTATE:TMP1->gpr[0] 2881 | stfd FARG1, CCSTATE:TMP1->fpr[0] 2882 | stw CARG2, CCSTATE:TMP1->gpr[1] 2883 | mtlr TMP0 2884 | stw CARG3, CCSTATE:TMP1->gpr[2] 2885 | mr sp, r14 2886 | stw CARG4, CCSTATE:TMP1->gpr[3] 2887 | mr r14, TMP2 2888 | blr 2889 |.endif 2890 |// Note: vm_ffi_call must be the last function in this object file! 2891 | 2892 |//----------------------------------------------------------------------- 2893 } 2894 2895 /* Generate the code for a single instruction. */ 2896 static void build_ins(BuildCtx *ctx, BCOp op, int defop) 2897 { 2898 int vk = 0; 2899 |=>defop: 2900 2901 switch (op) { 2902 2903 /* -- Comparison ops ---------------------------------------------------- */ 2904 2905 /* Remember: all ops branch for a true comparison, fall through otherwise. */ 2906 2907 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2908 | // RA = src1*8, RD = src2*8, JMP with RD = target 2909 |.if DUALNUM 2910 | lwzux TMP0, RA, BASE 2911 | addi PC, PC, 4 2912 | lwz CARG2, 4(RA) 2913 | lwzux TMP1, RD, BASE 2914 | lwz TMP2, -4(PC) 2915 | checknum cr0, TMP0 2916 | lwz CARG3, 4(RD) 2917 | decode_RD4 TMP2, TMP2 2918 | checknum cr1, TMP1 2919 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2920 | bne cr0, >7 2921 | bne cr1, >8 2922 | cmpw CARG2, CARG3 2923 if (op == BC_ISLT) { 2924 | bge >2 2925 } else if (op == BC_ISGE) { 2926 | blt >2 2927 } else if (op == BC_ISLE) { 2928 | bgt >2 2929 } else { 2930 | ble >2 2931 } 2932 |1: 2933 | add PC, PC, TMP2 2934 |2: 2935 | ins_next 2936 | 2937 |7: // RA is not an integer. 2938 | bgt cr0, ->vmeta_comp 2939 | // RA is a number. 2940 | lfd f0, 0(RA) 2941 | bgt cr1, ->vmeta_comp 2942 | blt cr1, >4 2943 | // RA is a number, RD is an integer. 2944 | tonum_i f1, CARG3 2945 | b >5 2946 | 2947 |8: // RA is an integer, RD is not an integer. 2948 | bgt cr1, ->vmeta_comp 2949 | // RA is an integer, RD is a number. 2950 | tonum_i f0, CARG2 2951 |4: 2952 | lfd f1, 0(RD) 2953 |5: 2954 | fcmpu cr0, f0, f1 2955 if (op == BC_ISLT) { 2956 | bge <2 2957 } else if (op == BC_ISGE) { 2958 | blt <2 2959 } else if (op == BC_ISLE) { 2960 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2961 | bge <2 2962 } else { 2963 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2964 | blt <2 2965 } 2966 | b <1 2967 |.else 2968 | lwzx TMP0, BASE, RA 2969 | addi PC, PC, 4 2970 | lfdx f0, BASE, RA 2971 | lwzx TMP1, BASE, RD 2972 | checknum cr0, TMP0 2973 | lwz TMP2, -4(PC) 2974 | lfdx f1, BASE, RD 2975 | checknum cr1, TMP1 2976 | decode_RD4 TMP2, TMP2 2977 | bge cr0, ->vmeta_comp 2978 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 2979 | bge cr1, ->vmeta_comp 2980 | fcmpu cr0, f0, f1 2981 if (op == BC_ISLT) { 2982 | bge >1 2983 } else if (op == BC_ISGE) { 2984 | blt >1 2985 } else if (op == BC_ISLE) { 2986 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2987 | bge >1 2988 } else { 2989 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+eq 2990 | blt >1 2991 } 2992 | add PC, PC, TMP2 2993 |1: 2994 | ins_next 2995 |.endif 2996 break; 2997 2998 case BC_ISEQV: case BC_ISNEV: 2999 vk = op == BC_ISEQV; 3000 | // RA = src1*8, RD = src2*8, JMP with RD = target 3001 |.if DUALNUM 3002 | lwzux TMP0, RA, BASE 3003 | addi PC, PC, 4 3004 | lwz CARG2, 4(RA) 3005 | lwzux TMP1, RD, BASE 3006 | checknum cr0, TMP0 3007 | lwz TMP2, -4(PC) 3008 | checknum cr1, TMP1 3009 | decode_RD4 TMP2, TMP2 3010 | lwz CARG3, 4(RD) 3011 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt 3012 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3013 if (vk) { 3014 | ble cr7, ->BC_ISEQN_Z 3015 } else { 3016 | ble cr7, ->BC_ISNEN_Z 3017 } 3018 |.else 3019 | lwzux TMP0, RA, BASE 3020 | lwz TMP2, 0(PC) 3021 | lfd f0, 0(RA) 3022 | addi PC, PC, 4 3023 | lwzux TMP1, RD, BASE 3024 | checknum cr0, TMP0 3025 | decode_RD4 TMP2, TMP2 3026 | lfd f1, 0(RD) 3027 | checknum cr1, TMP1 3028 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3029 | bge cr0, >5 3030 | bge cr1, >5 3031 | fcmpu cr0, f0, f1 3032 if (vk) { 3033 | bne >1 3034 | add PC, PC, TMP2 3035 } else { 3036 | beq >1 3037 | add PC, PC, TMP2 3038 } 3039 |1: 3040 | ins_next 3041 |.endif 3042 |5: // Either or both types are not numbers. 3043 |.if not DUALNUM 3044 | lwz CARG2, 4(RA) 3045 | lwz CARG3, 4(RD) 3046 |.endif 3047 |.if FFI 3048 | cmpwi cr7, TMP0, LJ_TCDATA 3049 | cmpwi cr5, TMP1, LJ_TCDATA 3050 |.endif 3051 | not TMP3, TMP0 3052 | cmplw TMP0, TMP1 3053 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 3054 |.if FFI 3055 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq 3056 |.endif 3057 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? 3058 |.if FFI 3059 | beq cr7, ->vmeta_equal_cd 3060 |.endif 3061 | cmplw cr5, CARG2, CARG3 3062 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. 3063 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. 3064 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. 3065 | mr SAVE0, PC 3066 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. 3067 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. 3068 if (vk) { 3069 | bne cr0, >6 3070 | add PC, PC, TMP2 3071 |6: 3072 } else { 3073 | beq cr0, >6 3074 | add PC, PC, TMP2 3075 |6: 3076 } 3077 |.if DUALNUM 3078 | bge cr0, >2 // Done if 1 or 2. 3079 |1: 3080 | ins_next 3081 |2: 3082 |.else 3083 | blt cr0, <1 // Done if 1 or 2. 3084 |.endif 3085 | blt cr6, <1 // Done if not tab/ud. 3086 | 3087 | // Different tables or userdatas. Need to check __eq metamethod. 3088 | // Field metatable must be at same offset for GCtab and GCudata! 3089 | lwz TAB:TMP2, TAB:CARG2->metatable 3090 | li CARG4, 1-vk // ne = 0 or 1. 3091 | cmplwi TAB:TMP2, 0 3092 | beq <1 // No metatable? 3093 | lbz TMP2, TAB:TMP2->nomm 3094 | andix. TMP2, TMP2, 1<<MM_eq 3095 | bne <1 // Or 'no __eq' flag set? 3096 | mr PC, SAVE0 // Restore old PC. 3097 | b ->vmeta_equal // Handle __eq metamethod. 3098 break; 3099 3100 case BC_ISEQS: case BC_ISNES: 3101 vk = op == BC_ISEQS; 3102 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target 3103 | lwzux TMP0, RA, BASE 3104 | srwi RD, RD, 1 3105 | lwz STR:TMP3, 4(RA) 3106 | lwz TMP2, 0(PC) 3107 | subfic RD, RD, -4 3108 | addi PC, PC, 4 3109 |.if FFI 3110 | cmpwi TMP0, LJ_TCDATA 3111 |.endif 3112 | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4 3113 | .gpr64 extsw TMP0, TMP0 3114 | subfic TMP0, TMP0, LJ_TSTR 3115 |.if FFI 3116 | beq ->vmeta_equal_cd 3117 |.endif 3118 | sub TMP1, STR:TMP1, STR:TMP3 3119 | or TMP0, TMP0, TMP1 3120 | decode_RD4 TMP2, TMP2 3121 | subfic TMP0, TMP0, 0 3122 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3123 | subfe TMP1, TMP1, TMP1 3124 if (vk) { 3125 | andc TMP2, TMP2, TMP1 3126 } else { 3127 | and TMP2, TMP2, TMP1 3128 } 3129 | add PC, PC, TMP2 3130 | ins_next 3131 break; 3132 3133 case BC_ISEQN: case BC_ISNEN: 3134 vk = op == BC_ISEQN; 3135 | // RA = src*8, RD = num_const*8, JMP with RD = target 3136 |.if DUALNUM 3137 | lwzux TMP0, RA, BASE 3138 | addi PC, PC, 4 3139 | lwz CARG2, 4(RA) 3140 | lwzux TMP1, RD, KBASE 3141 | checknum cr0, TMP0 3142 | lwz TMP2, -4(PC) 3143 | checknum cr1, TMP1 3144 | decode_RD4 TMP2, TMP2 3145 | lwz CARG3, 4(RD) 3146 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3147 if (vk) { 3148 |->BC_ISEQN_Z: 3149 } else { 3150 |->BC_ISNEN_Z: 3151 } 3152 | bne cr0, >7 3153 | bne cr1, >8 3154 | cmpw CARG2, CARG3 3155 |4: 3156 |.else 3157 if (vk) { 3158 |->BC_ISEQN_Z: // Dummy label. 3159 } else { 3160 |->BC_ISNEN_Z: // Dummy label. 3161 } 3162 | lwzx TMP0, BASE, RA 3163 | addi PC, PC, 4 3164 | lfdx f0, BASE, RA 3165 | lwz TMP2, -4(PC) 3166 | lfdx f1, KBASE, RD 3167 | decode_RD4 TMP2, TMP2 3168 | checknum TMP0 3169 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3170 | bge >3 3171 | fcmpu cr0, f0, f1 3172 |.endif 3173 if (vk) { 3174 | bne >1 3175 | add PC, PC, TMP2 3176 |1: 3177 |.if not FFI 3178 |3: 3179 |.endif 3180 } else { 3181 | beq >2 3182 |1: 3183 |.if not FFI 3184 |3: 3185 |.endif 3186 | add PC, PC, TMP2 3187 |2: 3188 } 3189 | ins_next 3190 |.if FFI 3191 |3: 3192 | cmpwi TMP0, LJ_TCDATA 3193 | beq ->vmeta_equal_cd 3194 | b <1 3195 |.endif 3196 |.if DUALNUM 3197 |7: // RA is not an integer. 3198 | bge cr0, <3 3199 | // RA is a number. 3200 | lfd f0, 0(RA) 3201 | blt cr1, >1 3202 | // RA is a number, RD is an integer. 3203 | tonum_i f1, CARG3 3204 | b >2 3205 | 3206 |8: // RA is an integer, RD is a number. 3207 | tonum_i f0, CARG2 3208 |1: 3209 | lfd f1, 0(RD) 3210 |2: 3211 | fcmpu cr0, f0, f1 3212 | b <4 3213 |.endif 3214 break; 3215 3216 case BC_ISEQP: case BC_ISNEP: 3217 vk = op == BC_ISEQP; 3218 | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target 3219 | lwzx TMP0, BASE, RA 3220 | srwi TMP1, RD, 3 3221 | lwz TMP2, 0(PC) 3222 | not TMP1, TMP1 3223 | addi PC, PC, 4 3224 |.if FFI 3225 | cmpwi TMP0, LJ_TCDATA 3226 |.endif 3227 | sub TMP0, TMP0, TMP1 3228 |.if FFI 3229 | beq ->vmeta_equal_cd 3230 |.endif 3231 | decode_RD4 TMP2, TMP2 3232 | .gpr64 extsw TMP0, TMP0 3233 | addic TMP0, TMP0, -1 3234 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3235 | subfe TMP1, TMP1, TMP1 3236 if (vk) { 3237 | and TMP2, TMP2, TMP1 3238 } else { 3239 | andc TMP2, TMP2, TMP1 3240 } 3241 | add PC, PC, TMP2 3242 | ins_next 3243 break; 3244 3245 /* -- Unary test and copy ops ------------------------------------------- */ 3246 3247 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: 3248 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target 3249 | lwzx TMP0, BASE, RD 3250 | lwz INS, 0(PC) 3251 | addi PC, PC, 4 3252 if (op == BC_IST || op == BC_ISF) { 3253 | .gpr64 extsw TMP0, TMP0 3254 | subfic TMP0, TMP0, LJ_TTRUE 3255 | decode_RD4 TMP2, INS 3256 | subfe TMP1, TMP1, TMP1 3257 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3258 if (op == BC_IST) { 3259 | andc TMP2, TMP2, TMP1 3260 } else { 3261 | and TMP2, TMP2, TMP1 3262 } 3263 | add PC, PC, TMP2 3264 } else { 3265 | li TMP1, LJ_TFALSE 3266 | lfdx f0, BASE, RD 3267 | cmplw TMP0, TMP1 3268 if (op == BC_ISTC) { 3269 | bge >1 3270 } else { 3271 | blt >1 3272 } 3273 | addis PC, PC, -(BCBIAS_J*4 >> 16) 3274 | decode_RD4 TMP2, INS 3275 | stfdx f0, BASE, RA 3276 | add PC, PC, TMP2 3277 |1: 3278 } 3279 | ins_next 3280 break; 3281 3282 case BC_ISTYPE: 3283 | // RA = src*8, RD = -type*8 3284 | lwzx TMP0, BASE, RA 3285 | srwi TMP1, RD, 3 3286 | ins_next1 3287 |.if not PPE and not GPR64 3288 | add. TMP0, TMP0, TMP1 3289 |.else 3290 | neg TMP1, TMP1 3291 | cmpw TMP0, TMP1 3292 |.endif 3293 | bne ->vmeta_istype 3294 | ins_next2 3295 break; 3296 case BC_ISNUM: 3297 | // RA = src*8, RD = -(TISNUM-1)*8 3298 | lwzx TMP0, BASE, RA 3299 | ins_next1 3300 | checknum TMP0 3301 | bge ->vmeta_istype 3302 | ins_next2 3303 break; 3304 3305 /* -- Unary ops --------------------------------------------------------- */ 3306 3307 case BC_MOV: 3308 | // RA = dst*8, RD = src*8 3309 | ins_next1 3310 | lfdx f0, BASE, RD 3311 | stfdx f0, BASE, RA 3312 | ins_next2 3313 break; 3314 case BC_NOT: 3315 | // RA = dst*8, RD = src*8 3316 | ins_next1 3317 | lwzx TMP0, BASE, RD 3318 | .gpr64 extsw TMP0, TMP0 3319 | subfic TMP1, TMP0, LJ_TTRUE 3320 | adde TMP0, TMP0, TMP1 3321 | stwx TMP0, BASE, RA 3322 | ins_next2 3323 break; 3324 case BC_UNM: 3325 | // RA = dst*8, RD = src*8 3326 | lwzux TMP1, RD, BASE 3327 | lwz TMP0, 4(RD) 3328 | checknum TMP1 3329 |.if DUALNUM 3330 | bne >5 3331 |.if GPR64 3332 | lus TMP2, 0x8000 3333 | neg TMP0, TMP0 3334 | cmplw TMP0, TMP2 3335 | beq >4 3336 |.else 3337 | nego. TMP0, TMP0 3338 | bso >4 3339 |1: 3340 |.endif 3341 | ins_next1 3342 | stwux TISNUM, RA, BASE 3343 | stw TMP0, 4(RA) 3344 |3: 3345 | ins_next2 3346 |4: 3347 |.if not GPR64 3348 | // Potential overflow. 3349 | checkov TMP1, <1 // Ignore unrelated overflow. 3350 |.endif 3351 | lus TMP1, 0x41e0 // 2^31. 3352 | li TMP0, 0 3353 | b >7 3354 |.endif 3355 |5: 3356 | bge ->vmeta_unm 3357 | xoris TMP1, TMP1, 0x8000 3358 |7: 3359 | ins_next1 3360 | stwux TMP1, RA, BASE 3361 | stw TMP0, 4(RA) 3362 |.if DUALNUM 3363 | b <3 3364 |.else 3365 | ins_next2 3366 |.endif 3367 break; 3368 case BC_LEN: 3369 | // RA = dst*8, RD = src*8 3370 | lwzux TMP0, RD, BASE 3371 | lwz CARG1, 4(RD) 3372 | checkstr TMP0; bne >2 3373 | lwz CRET1, STR:CARG1->len 3374 |1: 3375 |.if DUALNUM 3376 | ins_next1 3377 | stwux TISNUM, RA, BASE 3378 | stw CRET1, 4(RA) 3379 |.else 3380 | tonum_u f0, CRET1 // Result is a non-negative integer. 3381 | ins_next1 3382 | stfdx f0, BASE, RA 3383 |.endif 3384 | ins_next2 3385 |2: 3386 | checktab TMP0; bne ->vmeta_len 3387 | lwz TAB:TMP2, TAB:CARG1->metatable 3388 | cmplwi TAB:TMP2, 0 3389 | bne >9 3390 |3: 3391 |->BC_LEN_Z: 3392 | bl extern lj_tab_len // (GCtab *t) 3393 | // Returns uint32_t (but less than 2^31). 3394 | b <1 3395 |9: 3396 | lbz TMP0, TAB:TMP2->nomm 3397 | andix. TMP0, TMP0, 1<<MM_len 3398 | bne <3 // 'no __len' flag set: done. 3399 | b ->vmeta_len 3400 break; 3401 3402 /* -- Binary ops -------------------------------------------------------- */ 3403 3404 |.macro ins_arithpre 3405 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3406 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3407 ||switch (vk) { 3408 ||case 0: 3409 | lwzx TMP1, BASE, RB 3410 | .if DUALNUM 3411 | lwzx TMP2, KBASE, RC 3412 | .endif 3413 | lfdx f14, BASE, RB 3414 | lfdx f15, KBASE, RC 3415 | .if DUALNUM 3416 | checknum cr0, TMP1 3417 | checknum cr1, TMP2 3418 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3419 | bge ->vmeta_arith_vn 3420 | .else 3421 | checknum TMP1; bge ->vmeta_arith_vn 3422 | .endif 3423 || break; 3424 ||case 1: 3425 | lwzx TMP1, BASE, RB 3426 | .if DUALNUM 3427 | lwzx TMP2, KBASE, RC 3428 | .endif 3429 | lfdx f15, BASE, RB 3430 | lfdx f14, KBASE, RC 3431 | .if DUALNUM 3432 | checknum cr0, TMP1 3433 | checknum cr1, TMP2 3434 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3435 | bge ->vmeta_arith_nv 3436 | .else 3437 | checknum TMP1; bge ->vmeta_arith_nv 3438 | .endif 3439 || break; 3440 ||default: 3441 | lwzx TMP1, BASE, RB 3442 | lwzx TMP2, BASE, RC 3443 | lfdx f14, BASE, RB 3444 | lfdx f15, BASE, RC 3445 | checknum cr0, TMP1 3446 | checknum cr1, TMP2 3447 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3448 | bge ->vmeta_arith_vv 3449 || break; 3450 ||} 3451 |.endmacro 3452 | 3453 |.macro ins_arithfallback, ins 3454 ||switch (vk) { 3455 ||case 0: 3456 | ins ->vmeta_arith_vn2 3457 || break; 3458 ||case 1: 3459 | ins ->vmeta_arith_nv2 3460 || break; 3461 ||default: 3462 | ins ->vmeta_arith_vv2 3463 || break; 3464 ||} 3465 |.endmacro 3466 | 3467 |.macro intmod, a, b, c 3468 | bl ->vm_modi 3469 |.endmacro 3470 | 3471 |.macro fpmod, a, b, c 3472 |->BC_MODVN_Z: 3473 | fdiv FARG1, b, c 3474 | // NYI: Use internal implementation of floor. 3475 | blex floor // floor(b/c) 3476 | fmul a, FARG1, c 3477 | fsub a, b, a // b - floor(b/c)*c 3478 |.endmacro 3479 | 3480 |.macro ins_arithfp, fpins 3481 | ins_arithpre 3482 |.if "fpins" == "fpmod_" 3483 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3484 |.else 3485 | fpins f0, f14, f15 3486 | ins_next1 3487 | stfdx f0, BASE, RA 3488 | ins_next2 3489 |.endif 3490 |.endmacro 3491 | 3492 |.macro ins_arithdn, intins, fpins 3493 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3494 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3495 ||switch (vk) { 3496 ||case 0: 3497 | lwzux TMP1, RB, BASE 3498 | lwzux TMP2, RC, KBASE 3499 | lwz CARG1, 4(RB) 3500 | checknum cr0, TMP1 3501 | lwz CARG2, 4(RC) 3502 || break; 3503 ||case 1: 3504 | lwzux TMP1, RB, BASE 3505 | lwzux TMP2, RC, KBASE 3506 | lwz CARG2, 4(RB) 3507 | checknum cr0, TMP1 3508 | lwz CARG1, 4(RC) 3509 || break; 3510 ||default: 3511 | lwzux TMP1, RB, BASE 3512 | lwzux TMP2, RC, BASE 3513 | lwz CARG1, 4(RB) 3514 | checknum cr0, TMP1 3515 | lwz CARG2, 4(RC) 3516 || break; 3517 ||} 3518 | checknum cr1, TMP2 3519 | bne >5 3520 | bne cr1, >5 3521 | intins CARG1, CARG1, CARG2 3522 | bso >4 3523 |1: 3524 | ins_next1 3525 | stwux TISNUM, RA, BASE 3526 | stw CARG1, 4(RA) 3527 |2: 3528 | ins_next2 3529 |4: // Overflow. 3530 | checkov TMP0, <1 // Ignore unrelated overflow. 3531 | ins_arithfallback b 3532 |5: // FP variant. 3533 ||if (vk == 1) { 3534 | lfd f15, 0(RB) 3535 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3536 | lfd f14, 0(RC) 3537 ||} else { 3538 | lfd f14, 0(RB) 3539 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3540 | lfd f15, 0(RC) 3541 ||} 3542 | ins_arithfallback bge 3543 |.if "fpins" == "fpmod_" 3544 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3545 |.else 3546 | fpins f0, f14, f15 3547 | ins_next1 3548 | stfdx f0, BASE, RA 3549 | b <2 3550 |.endif 3551 |.endmacro 3552 | 3553 |.macro ins_arith, intins, fpins 3554 |.if DUALNUM 3555 | ins_arithdn intins, fpins 3556 |.else 3557 | ins_arithfp fpins 3558 |.endif 3559 |.endmacro 3560 3561 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3562 |.if GPR64 3563 |.macro addo32., y, a, b 3564 | // Need to check overflow for (a<<32) + (b<<32). 3565 | rldicr TMP0, a, 32, 31 3566 | rldicr TMP3, b, 32, 31 3567 | addo. TMP0, TMP0, TMP3 3568 | add y, a, b 3569 |.endmacro 3570 | ins_arith addo32., fadd 3571 |.else 3572 | ins_arith addo., fadd 3573 |.endif 3574 break; 3575 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3576 |.if GPR64 3577 |.macro subo32., y, a, b 3578 | // Need to check overflow for (a<<32) - (b<<32). 3579 | rldicr TMP0, a, 32, 31 3580 | rldicr TMP3, b, 32, 31 3581 | subo. TMP0, TMP0, TMP3 3582 | sub y, a, b 3583 |.endmacro 3584 | ins_arith subo32., fsub 3585 |.else 3586 | ins_arith subo., fsub 3587 |.endif 3588 break; 3589 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3590 | ins_arith mullwo., fmul 3591 break; 3592 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3593 | ins_arithfp fdiv 3594 break; 3595 case BC_MODVN: 3596 | ins_arith intmod, fpmod 3597 break; 3598 case BC_MODNV: case BC_MODVV: 3599 | ins_arith intmod, fpmod_ 3600 break; 3601 case BC_POW: 3602 | // NYI: (partial) integer arithmetic. 3603 | lwzx TMP1, BASE, RB 3604 | lfdx FARG1, BASE, RB 3605 | lwzx TMP2, BASE, RC 3606 | lfdx FARG2, BASE, RC 3607 | checknum cr0, TMP1 3608 | checknum cr1, TMP2 3609 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3610 | bge ->vmeta_arith_vv 3611 | blex pow 3612 | ins_next1 3613 | stfdx FARG1, BASE, RA 3614 | ins_next2 3615 break; 3616 3617 case BC_CAT: 3618 | // RA = dst*8, RB = src_start*8, RC = src_end*8 3619 | sub CARG3, RC, RB 3620 | stp BASE, L->base 3621 | add CARG2, BASE, RC 3622 | mr SAVE0, RB 3623 |->BC_CAT_Z: 3624 | stw PC, SAVE_PC 3625 | mr CARG1, L 3626 | srwi CARG3, CARG3, 3 3627 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) 3628 | // Returns NULL (finished) or TValue * (metamethod). 3629 | cmplwi CRET1, 0 3630 | lp BASE, L->base 3631 | bne ->vmeta_binop 3632 | ins_next1 3633 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. 3634 | stfdx f0, BASE, RA 3635 | ins_next2 3636 break; 3637 3638 /* -- Constant ops ------------------------------------------------------ */ 3639 3640 case BC_KSTR: 3641 | // RA = dst*8, RD = str_const*8 (~) 3642 | srwi TMP1, RD, 1 3643 | subfic TMP1, TMP1, -4 3644 | ins_next1 3645 | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 3646 | li TMP2, LJ_TSTR 3647 | stwux TMP2, RA, BASE 3648 | stw TMP0, 4(RA) 3649 | ins_next2 3650 break; 3651 case BC_KCDATA: 3652 |.if FFI 3653 | // RA = dst*8, RD = cdata_const*8 (~) 3654 | srwi TMP1, RD, 1 3655 | subfic TMP1, TMP1, -4 3656 | ins_next1 3657 | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 3658 | li TMP2, LJ_TCDATA 3659 | stwux TMP2, RA, BASE 3660 | stw TMP0, 4(RA) 3661 | ins_next2 3662 |.endif 3663 break; 3664 case BC_KSHORT: 3665 | // RA = dst*8, RD = int16_literal*8 3666 |.if DUALNUM 3667 | slwi RD, RD, 13 3668 | srawi RD, RD, 16 3669 | ins_next1 3670 | stwux TISNUM, RA, BASE 3671 | stw RD, 4(RA) 3672 | ins_next2 3673 |.else 3674 | // The soft-float approach is faster. 3675 | slwi RD, RD, 13 3676 | srawi TMP1, RD, 31 3677 | xor TMP2, TMP1, RD 3678 | sub TMP2, TMP2, TMP1 // TMP2 = abs(x) 3679 | cntlzw TMP3, TMP2 3680 | subfic TMP1, TMP3, 0x40d // TMP1 = exponent-1 3681 | slw TMP2, TMP2, TMP3 // TMP2 = left aligned mantissa 3682 | subfic TMP3, RD, 0 3683 | slwi TMP1, TMP1, 20 3684 | rlwimi RD, TMP2, 21, 1, 31 // hi = sign(x) | (mantissa>>11) 3685 | subfe TMP0, TMP0, TMP0 3686 | add RD, RD, TMP1 // hi = hi + exponent-1 3687 | and RD, RD, TMP0 // hi = x == 0 ? 0 : hi 3688 | ins_next1 3689 | stwux RD, RA, BASE 3690 | stw ZERO, 4(RA) 3691 | ins_next2 3692 |.endif 3693 break; 3694 case BC_KNUM: 3695 | // RA = dst*8, RD = num_const*8 3696 | ins_next1 3697 | lfdx f0, KBASE, RD 3698 | stfdx f0, BASE, RA 3699 | ins_next2 3700 break; 3701 case BC_KPRI: 3702 | // RA = dst*8, RD = primitive_type*8 (~) 3703 | srwi TMP1, RD, 3 3704 | not TMP0, TMP1 3705 | ins_next1 3706 | stwx TMP0, BASE, RA 3707 | ins_next2 3708 break; 3709 case BC_KNIL: 3710 | // RA = base*8, RD = end*8 3711 | stwx TISNIL, BASE, RA 3712 | addi RA, RA, 8 3713 |1: 3714 | stwx TISNIL, BASE, RA 3715 | cmpw RA, RD 3716 | addi RA, RA, 8 3717 | blt <1 3718 | ins_next_ 3719 break; 3720 3721 /* -- Upvalue and function ops ------------------------------------------ */ 3722 3723 case BC_UGET: 3724 | // RA = dst*8, RD = uvnum*8 3725 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3726 | srwi RD, RD, 1 3727 | addi RD, RD, offsetof(GCfuncL, uvptr) 3728 | lwzx UPVAL:RB, LFUNC:RB, RD 3729 | ins_next1 3730 | lwz TMP1, UPVAL:RB->v 3731 | lfd f0, 0(TMP1) 3732 | stfdx f0, BASE, RA 3733 | ins_next2 3734 break; 3735 case BC_USETV: 3736 | // RA = uvnum*8, RD = src*8 3737 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3738 | srwi RA, RA, 1 3739 | addi RA, RA, offsetof(GCfuncL, uvptr) 3740 | lfdux f0, RD, BASE 3741 | lwzx UPVAL:RB, LFUNC:RB, RA 3742 | lbz TMP3, UPVAL:RB->marked 3743 | lwz CARG2, UPVAL:RB->v 3744 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3745 | lbz TMP0, UPVAL:RB->closed 3746 | lwz TMP2, 0(RD) 3747 | stfd f0, 0(CARG2) 3748 | cmplwi cr1, TMP0, 0 3749 | lwz TMP1, 4(RD) 3750 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 3751 | subi TMP2, TMP2, (LJ_TNUMX+1) 3752 | bne >2 // Upvalue is closed and black? 3753 |1: 3754 | ins_next 3755 | 3756 |2: // Check if new value is collectable. 3757 | cmplwi TMP2, LJ_TISGCV - (LJ_TNUMX+1) 3758 | bge <1 // tvisgcv(v) 3759 | lbz TMP3, GCOBJ:TMP1->gch.marked 3760 | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(v) 3761 | la CARG1, GG_DISP2G(DISPATCH) 3762 | // Crossed a write barrier. Move the barrier forward. 3763 | beq <1 3764 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3765 | b <1 3766 break; 3767 case BC_USETS: 3768 | // RA = uvnum*8, RD = str_const*8 (~) 3769 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3770 | srwi TMP1, RD, 1 3771 | srwi RA, RA, 1 3772 | subfic TMP1, TMP1, -4 3773 | addi RA, RA, offsetof(GCfuncL, uvptr) 3774 | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 3775 | lwzx UPVAL:RB, LFUNC:RB, RA 3776 | lbz TMP3, UPVAL:RB->marked 3777 | lwz CARG2, UPVAL:RB->v 3778 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3779 | lbz TMP3, STR:TMP1->marked 3780 | lbz TMP2, UPVAL:RB->closed 3781 | li TMP0, LJ_TSTR 3782 | stw STR:TMP1, 4(CARG2) 3783 | stw TMP0, 0(CARG2) 3784 | bne >2 3785 |1: 3786 | ins_next 3787 | 3788 |2: // Check if string is white and ensure upvalue is closed. 3789 | andix. TMP3, TMP3, LJ_GC_WHITES // iswhite(str) 3790 | cmplwi cr1, TMP2, 0 3791 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 3792 | la CARG1, GG_DISP2G(DISPATCH) 3793 | // Crossed a write barrier. Move the barrier forward. 3794 | beq <1 3795 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) 3796 | b <1 3797 break; 3798 case BC_USETN: 3799 | // RA = uvnum*8, RD = num_const*8 3800 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3801 | srwi RA, RA, 1 3802 | addi RA, RA, offsetof(GCfuncL, uvptr) 3803 | lfdx f0, KBASE, RD 3804 | lwzx UPVAL:RB, LFUNC:RB, RA 3805 | ins_next1 3806 | lwz TMP1, UPVAL:RB->v 3807 | stfd f0, 0(TMP1) 3808 | ins_next2 3809 break; 3810 case BC_USETP: 3811 | // RA = uvnum*8, RD = primitive_type*8 (~) 3812 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3813 | srwi RA, RA, 1 3814 | srwi TMP0, RD, 3 3815 | addi RA, RA, offsetof(GCfuncL, uvptr) 3816 | not TMP0, TMP0 3817 | lwzx UPVAL:RB, LFUNC:RB, RA 3818 | ins_next1 3819 | lwz TMP1, UPVAL:RB->v 3820 | stw TMP0, 0(TMP1) 3821 | ins_next2 3822 break; 3823 3824 case BC_UCLO: 3825 | // RA = level*8, RD = target 3826 | lwz TMP1, L->openupval 3827 | branch_RD // Do this first since RD is not saved. 3828 | stp BASE, L->base 3829 | cmplwi TMP1, 0 3830 | mr CARG1, L 3831 | beq >1 3832 | add CARG2, BASE, RA 3833 | bl extern lj_func_closeuv // (lua_State *L, TValue *level) 3834 | lp BASE, L->base 3835 |1: 3836 | ins_next 3837 break; 3838 3839 case BC_FNEW: 3840 | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) 3841 | srwi TMP1, RD, 1 3842 | stp BASE, L->base 3843 | subfic TMP1, TMP1, -4 3844 | stw PC, SAVE_PC 3845 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 3846 | mr CARG1, L 3847 | lwz CARG3, FRAME_FUNC(BASE) 3848 | // (lua_State *L, GCproto *pt, GCfuncL *parent) 3849 | bl extern lj_func_newL_gc 3850 | // Returns GCfuncL *. 3851 | lp BASE, L->base 3852 | li TMP0, LJ_TFUNC 3853 | stwux TMP0, RA, BASE 3854 | stw LFUNC:CRET1, 4(RA) 3855 | ins_next 3856 break; 3857 3858 /* -- Table ops --------------------------------------------------------- */ 3859 3860 case BC_TNEW: 3861 case BC_TDUP: 3862 | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) 3863 | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) 3864 | mr CARG1, L 3865 | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) 3866 | stp BASE, L->base 3867 | cmplw TMP0, TMP1 3868 | stw PC, SAVE_PC 3869 | bge >5 3870 |1: 3871 if (op == BC_TNEW) { 3872 | rlwinm CARG2, RD, 29, 21, 31 3873 | rlwinm CARG3, RD, 18, 27, 31 3874 | cmpwi CARG2, 0x7ff; beq >3 3875 |2: 3876 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) 3877 | // Returns Table *. 3878 } else { 3879 | srwi TMP1, RD, 1 3880 | subfic TMP1, TMP1, -4 3881 | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 3882 | bl extern lj_tab_dup // (lua_State *L, Table *kt) 3883 | // Returns Table *. 3884 } 3885 | lp BASE, L->base 3886 | li TMP0, LJ_TTAB 3887 | stwux TMP0, RA, BASE 3888 | stw TAB:CRET1, 4(RA) 3889 | ins_next 3890 if (op == BC_TNEW) { 3891 |3: 3892 | li CARG2, 0x801 3893 | b <2 3894 } 3895 |5: 3896 | mr SAVE0, RD 3897 | bl extern lj_gc_step_fixtop // (lua_State *L) 3898 | mr RD, SAVE0 3899 | mr CARG1, L 3900 | b <1 3901 break; 3902 3903 case BC_GGET: 3904 | // RA = dst*8, RD = str_const*8 (~) 3905 case BC_GSET: 3906 | // RA = src*8, RD = str_const*8 (~) 3907 | lwz LFUNC:TMP2, FRAME_FUNC(BASE) 3908 | srwi TMP1, RD, 1 3909 | lwz TAB:RB, LFUNC:TMP2->env 3910 | subfic TMP1, TMP1, -4 3911 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 3912 if (op == BC_GGET) { 3913 | b ->BC_TGETS_Z 3914 } else { 3915 | b ->BC_TSETS_Z 3916 } 3917 break; 3918 3919 case BC_TGETV: 3920 | // RA = dst*8, RB = table*8, RC = key*8 3921 | lwzux CARG1, RB, BASE 3922 | lwzux CARG2, RC, BASE 3923 | lwz TAB:RB, 4(RB) 3924 |.if DUALNUM 3925 | lwz RC, 4(RC) 3926 |.else 3927 | lfd f0, 0(RC) 3928 |.endif 3929 | checktab CARG1 3930 | checknum cr1, CARG2 3931 | bne ->vmeta_tgetv 3932 |.if DUALNUM 3933 | lwz TMP0, TAB:RB->asize 3934 | bne cr1, >5 3935 | lwz TMP1, TAB:RB->array 3936 | cmplw TMP0, RC 3937 | slwi TMP2, RC, 3 3938 |.else 3939 | bge cr1, >5 3940 | // Convert number key to integer, check for integerness and range. 3941 | fctiwz f1, f0 3942 | fadd f2, f0, TOBIT 3943 | stfd f1, TMPD 3944 | lwz TMP0, TAB:RB->asize 3945 | fsub f2, f2, TOBIT 3946 | lwz TMP2, TMPD_LO 3947 | lwz TMP1, TAB:RB->array 3948 | fcmpu cr1, f0, f2 3949 | cmplw cr0, TMP0, TMP2 3950 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq 3951 | slwi TMP2, TMP2, 3 3952 |.endif 3953 | ble ->vmeta_tgetv // Integer key and in array part? 3954 | lwzx TMP0, TMP1, TMP2 3955 | lfdx f14, TMP1, TMP2 3956 | checknil TMP0; beq >2 3957 |1: 3958 | ins_next1 3959 | stfdx f14, BASE, RA 3960 | ins_next2 3961 | 3962 |2: // Check for __index if table value is nil. 3963 | lwz TAB:TMP2, TAB:RB->metatable 3964 | cmplwi TAB:TMP2, 0 3965 | beq <1 // No metatable: done. 3966 | lbz TMP0, TAB:TMP2->nomm 3967 | andix. TMP0, TMP0, 1<<MM_index 3968 | bne <1 // 'no __index' flag set: done. 3969 | b ->vmeta_tgetv 3970 | 3971 |5: 3972 | checkstr CARG2; bne ->vmeta_tgetv 3973 |.if not DUALNUM 3974 | lwz STR:RC, 4(RC) 3975 |.endif 3976 | b ->BC_TGETS_Z // String key? 3977 break; 3978 case BC_TGETS: 3979 | // RA = dst*8, RB = table*8, RC = str_const*8 (~) 3980 | lwzux CARG1, RB, BASE 3981 | srwi TMP1, RC, 1 3982 | lwz TAB:RB, 4(RB) 3983 | subfic TMP1, TMP1, -4 3984 | checktab CARG1 3985 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 3986 | bne ->vmeta_tgets1 3987 |->BC_TGETS_Z: 3988 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 3989 | lwz TMP0, TAB:RB->hmask 3990 | lwz TMP1, STR:RC->hash 3991 | lwz NODE:TMP2, TAB:RB->node 3992 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 3993 | slwi TMP0, TMP1, 5 3994 | slwi TMP1, TMP1, 3 3995 | sub TMP1, TMP0, TMP1 3996 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 3997 |1: 3998 | lwz CARG1, NODE:TMP2->key 3999 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 4000 | lwz CARG2, NODE:TMP2->val 4001 | lwz TMP1, 4+offsetof(Node, val)(NODE:TMP2) 4002 | checkstr CARG1; bne >4 4003 | cmpw TMP0, STR:RC; bne >4 4004 | checknil CARG2; beq >5 // Key found, but nil value? 4005 |3: 4006 | stwux CARG2, RA, BASE 4007 | stw TMP1, 4(RA) 4008 | ins_next 4009 | 4010 |4: // Follow hash chain. 4011 | lwz NODE:TMP2, NODE:TMP2->next 4012 | cmplwi NODE:TMP2, 0 4013 | bne <1 4014 | // End of hash chain: key not found, nil result. 4015 | li CARG2, LJ_TNIL 4016 | 4017 |5: // Check for __index if table value is nil. 4018 | lwz TAB:TMP2, TAB:RB->metatable 4019 | cmplwi TAB:TMP2, 0 4020 | beq <3 // No metatable: done. 4021 | lbz TMP0, TAB:TMP2->nomm 4022 | andix. TMP0, TMP0, 1<<MM_index 4023 | bne <3 // 'no __index' flag set: done. 4024 | b ->vmeta_tgets 4025 break; 4026 case BC_TGETB: 4027 | // RA = dst*8, RB = table*8, RC = index*8 4028 | lwzux CARG1, RB, BASE 4029 | srwi TMP0, RC, 3 4030 | lwz TAB:RB, 4(RB) 4031 | checktab CARG1; bne ->vmeta_tgetb 4032 | lwz TMP1, TAB:RB->asize 4033 | lwz TMP2, TAB:RB->array 4034 | cmplw TMP0, TMP1; bge ->vmeta_tgetb 4035 | lwzx TMP1, TMP2, RC 4036 | lfdx f0, TMP2, RC 4037 | checknil TMP1; beq >5 4038 |1: 4039 | ins_next1 4040 | stfdx f0, BASE, RA 4041 | ins_next2 4042 | 4043 |5: // Check for __index if table value is nil. 4044 | lwz TAB:TMP2, TAB:RB->metatable 4045 | cmplwi TAB:TMP2, 0 4046 | beq <1 // No metatable: done. 4047 | lbz TMP2, TAB:TMP2->nomm 4048 | andix. TMP2, TMP2, 1<<MM_index 4049 | bne <1 // 'no __index' flag set: done. 4050 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4051 break; 4052 case BC_TGETR: 4053 | // RA = dst*8, RB = table*8, RC = key*8 4054 | add RB, BASE, RB 4055 | lwz TAB:CARG1, 4(RB) 4056 |.if DUALNUM 4057 | add RC, BASE, RC 4058 | lwz TMP0, TAB:CARG1->asize 4059 | lwz CARG2, 4(RC) 4060 | lwz TMP1, TAB:CARG1->array 4061 |.else 4062 | lfdx f0, BASE, RC 4063 | lwz TMP0, TAB:CARG1->asize 4064 | toint CARG2, f0 4065 | lwz TMP1, TAB:CARG1->array 4066 |.endif 4067 | cmplw TMP0, CARG2 4068 | slwi TMP2, CARG2, 3 4069 | ble ->vmeta_tgetr // In array part? 4070 | lfdx f14, TMP1, TMP2 4071 |->BC_TGETR_Z: 4072 | ins_next1 4073 | stfdx f14, BASE, RA 4074 | ins_next2 4075 break; 4076 4077 case BC_TSETV: 4078 | // RA = src*8, RB = table*8, RC = key*8 4079 | lwzux CARG1, RB, BASE 4080 | lwzux CARG2, RC, BASE 4081 | lwz TAB:RB, 4(RB) 4082 |.if DUALNUM 4083 | lwz RC, 4(RC) 4084 |.else 4085 | lfd f0, 0(RC) 4086 |.endif 4087 | checktab CARG1 4088 | checknum cr1, CARG2 4089 | bne ->vmeta_tsetv 4090 |.if DUALNUM 4091 | lwz TMP0, TAB:RB->asize 4092 | bne cr1, >5 4093 | lwz TMP1, TAB:RB->array 4094 | cmplw TMP0, RC 4095 | slwi TMP0, RC, 3 4096 |.else 4097 | bge cr1, >5 4098 | // Convert number key to integer, check for integerness and range. 4099 | fctiwz f1, f0 4100 | fadd f2, f0, TOBIT 4101 | stfd f1, TMPD 4102 | lwz TMP0, TAB:RB->asize 4103 | fsub f2, f2, TOBIT 4104 | lwz TMP2, TMPD_LO 4105 | lwz TMP1, TAB:RB->array 4106 | fcmpu cr1, f0, f2 4107 | cmplw cr0, TMP0, TMP2 4108 | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+eq 4109 | slwi TMP0, TMP2, 3 4110 |.endif 4111 | ble ->vmeta_tsetv // Integer key and in array part? 4112 | lwzx TMP2, TMP1, TMP0 4113 | lbz TMP3, TAB:RB->marked 4114 | lfdx f14, BASE, RA 4115 | checknil TMP2; beq >3 4116 |1: 4117 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4118 | stfdx f14, TMP1, TMP0 4119 | bne >7 4120 |2: 4121 | ins_next 4122 | 4123 |3: // Check for __newindex if previous value is nil. 4124 | lwz TAB:TMP2, TAB:RB->metatable 4125 | cmplwi TAB:TMP2, 0 4126 | beq <1 // No metatable: done. 4127 | lbz TMP2, TAB:TMP2->nomm 4128 | andix. TMP2, TMP2, 1<<MM_newindex 4129 | bne <1 // 'no __newindex' flag set: done. 4130 | b ->vmeta_tsetv 4131 | 4132 |5: 4133 | checkstr CARG2; bne ->vmeta_tsetv 4134 |.if not DUALNUM 4135 | lwz STR:RC, 4(RC) 4136 |.endif 4137 | b ->BC_TSETS_Z // String key? 4138 | 4139 |7: // Possible table write barrier for the value. Skip valiswhite check. 4140 | barrierback TAB:RB, TMP3, TMP0 4141 | b <2 4142 break; 4143 case BC_TSETS: 4144 | // RA = src*8, RB = table*8, RC = str_const*8 (~) 4145 | lwzux CARG1, RB, BASE 4146 | srwi TMP1, RC, 1 4147 | lwz TAB:RB, 4(RB) 4148 | subfic TMP1, TMP1, -4 4149 | checktab CARG1 4150 | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 4151 | bne ->vmeta_tsets1 4152 |->BC_TSETS_Z: 4153 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 4154 | lwz TMP0, TAB:RB->hmask 4155 | lwz TMP1, STR:RC->hash 4156 | lwz NODE:TMP2, TAB:RB->node 4157 | stb ZERO, TAB:RB->nomm // Clear metamethod cache. 4158 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4159 | lfdx f14, BASE, RA 4160 | slwi TMP0, TMP1, 5 4161 | slwi TMP1, TMP1, 3 4162 | sub TMP1, TMP0, TMP1 4163 | lbz TMP3, TAB:RB->marked 4164 | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4165 |1: 4166 | lwz CARG1, NODE:TMP2->key 4167 | lwz TMP0, 4+offsetof(Node, key)(NODE:TMP2) 4168 | lwz CARG2, NODE:TMP2->val 4169 | lwz NODE:TMP1, NODE:TMP2->next 4170 | checkstr CARG1; bne >5 4171 | cmpw TMP0, STR:RC; bne >5 4172 | checknil CARG2; beq >4 // Key found, but nil value? 4173 |2: 4174 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4175 | stfd f14, NODE:TMP2->val 4176 | bne >7 4177 |3: 4178 | ins_next 4179 | 4180 |4: // Check for __newindex if previous value is nil. 4181 | lwz TAB:TMP1, TAB:RB->metatable 4182 | cmplwi TAB:TMP1, 0 4183 | beq <2 // No metatable: done. 4184 | lbz TMP0, TAB:TMP1->nomm 4185 | andix. TMP0, TMP0, 1<<MM_newindex 4186 | bne <2 // 'no __newindex' flag set: done. 4187 | b ->vmeta_tsets 4188 | 4189 |5: // Follow hash chain. 4190 | cmplwi NODE:TMP1, 0 4191 | mr NODE:TMP2, NODE:TMP1 4192 | bne <1 4193 | // End of hash chain: key not found, add a new one. 4194 | 4195 | // But check for __newindex first. 4196 | lwz TAB:TMP1, TAB:RB->metatable 4197 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) 4198 | stw PC, SAVE_PC 4199 | mr CARG1, L 4200 | cmplwi TAB:TMP1, 0 4201 | stp BASE, L->base 4202 | beq >6 // No metatable: continue. 4203 | lbz TMP0, TAB:TMP1->nomm 4204 | andix. TMP0, TMP0, 1<<MM_newindex 4205 | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. 4206 |6: 4207 | li TMP0, LJ_TSTR 4208 | stw STR:RC, 4(CARG3) 4209 | mr CARG2, TAB:RB 4210 | stw TMP0, 0(CARG3) 4211 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4212 | // Returns TValue *. 4213 | lp BASE, L->base 4214 | stfd f14, 0(CRET1) 4215 | b <3 // No 2nd write barrier needed. 4216 | 4217 |7: // Possible table write barrier for the value. Skip valiswhite check. 4218 | barrierback TAB:RB, TMP3, TMP0 4219 | b <3 4220 break; 4221 case BC_TSETB: 4222 | // RA = src*8, RB = table*8, RC = index*8 4223 | lwzux CARG1, RB, BASE 4224 | srwi TMP0, RC, 3 4225 | lwz TAB:RB, 4(RB) 4226 | checktab CARG1; bne ->vmeta_tsetb 4227 | lwz TMP1, TAB:RB->asize 4228 | lwz TMP2, TAB:RB->array 4229 | lbz TMP3, TAB:RB->marked 4230 | cmplw TMP0, TMP1 4231 | lfdx f14, BASE, RA 4232 | bge ->vmeta_tsetb 4233 | lwzx TMP1, TMP2, RC 4234 | checknil TMP1; beq >5 4235 |1: 4236 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4237 | stfdx f14, TMP2, RC 4238 | bne >7 4239 |2: 4240 | ins_next 4241 | 4242 |5: // Check for __newindex if previous value is nil. 4243 | lwz TAB:TMP1, TAB:RB->metatable 4244 | cmplwi TAB:TMP1, 0 4245 | beq <1 // No metatable: done. 4246 | lbz TMP1, TAB:TMP1->nomm 4247 | andix. TMP1, TMP1, 1<<MM_newindex 4248 | bne <1 // 'no __newindex' flag set: done. 4249 | b ->vmeta_tsetb // Caveat: preserve TMP0! 4250 | 4251 |7: // Possible table write barrier for the value. Skip valiswhite check. 4252 | barrierback TAB:RB, TMP3, TMP0 4253 | b <2 4254 break; 4255 case BC_TSETR: 4256 | // RA = dst*8, RB = table*8, RC = key*8 4257 | add RB, BASE, RB 4258 | lwz TAB:CARG2, 4(RB) 4259 |.if DUALNUM 4260 | add RC, BASE, RC 4261 | lbz TMP3, TAB:CARG2->marked 4262 | lwz TMP0, TAB:CARG2->asize 4263 | lwz CARG3, 4(RC) 4264 | lwz TMP1, TAB:CARG2->array 4265 |.else 4266 | lfdx f0, BASE, RC 4267 | lbz TMP3, TAB:CARG2->marked 4268 | lwz TMP0, TAB:CARG2->asize 4269 | toint CARG3, f0 4270 | lwz TMP1, TAB:CARG2->array 4271 |.endif 4272 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4273 | bne >7 4274 |2: 4275 | cmplw TMP0, CARG3 4276 | slwi TMP2, CARG3, 3 4277 | lfdx f14, BASE, RA 4278 | ble ->vmeta_tsetr // In array part? 4279 | ins_next1 4280 | stfdx f14, TMP1, TMP2 4281 | ins_next2 4282 | 4283 |7: // Possible table write barrier for the value. Skip valiswhite check. 4284 | barrierback TAB:CARG2, TMP3, TMP2 4285 | b <2 4286 break; 4287 4288 4289 case BC_TSETM: 4290 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4291 | add RA, BASE, RA 4292 |1: 4293 | add TMP3, KBASE, RD 4294 | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. 4295 | addic. TMP0, MULTRES, -8 4296 | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. 4297 | srwi CARG3, TMP0, 3 4298 | beq >4 // Nothing to copy? 4299 | add CARG3, CARG3, TMP3 4300 | lwz TMP2, TAB:CARG2->asize 4301 | slwi TMP1, TMP3, 3 4302 | lbz TMP3, TAB:CARG2->marked 4303 | cmplw CARG3, TMP2 4304 | add TMP2, RA, TMP0 4305 | lwz TMP0, TAB:CARG2->array 4306 | bgt >5 4307 | add TMP1, TMP1, TMP0 4308 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4309 |3: // Copy result slots to table. 4310 | lfd f0, 0(RA) 4311 | addi RA, RA, 8 4312 | cmpw cr1, RA, TMP2 4313 | stfd f0, 0(TMP1) 4314 | addi TMP1, TMP1, 8 4315 | blt cr1, <3 4316 | bne >7 4317 |4: 4318 | ins_next 4319 | 4320 |5: // Need to resize array part. 4321 | stp BASE, L->base 4322 | mr CARG1, L 4323 | stw PC, SAVE_PC 4324 | mr SAVE0, RD 4325 | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) 4326 | // Must not reallocate the stack. 4327 | mr RD, SAVE0 4328 | b <1 4329 | 4330 |7: // Possible table write barrier for any value. Skip valiswhite check. 4331 | barrierback TAB:CARG2, TMP3, TMP0 4332 | b <4 4333 break; 4334 4335 /* -- Calls and vararg handling ----------------------------------------- */ 4336 4337 case BC_CALLM: 4338 | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 4339 | add NARGS8:RC, NARGS8:RC, MULTRES 4340 | // Fall through. Assumes BC_CALL follows. 4341 break; 4342 case BC_CALL: 4343 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 4344 | mr TMP2, BASE 4345 | lwzux TMP0, BASE, RA 4346 | lwz LFUNC:RB, 4(BASE) 4347 | subi NARGS8:RC, NARGS8:RC, 8 4348 | addi BASE, BASE, 8 4349 | checkfunc TMP0; bne ->vmeta_call 4350 | ins_call 4351 break; 4352 4353 case BC_CALLMT: 4354 | // RA = base*8, (RB = 0,) RC = extra_nargs*8 4355 | add NARGS8:RC, NARGS8:RC, MULTRES 4356 | // Fall through. Assumes BC_CALLT follows. 4357 break; 4358 case BC_CALLT: 4359 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 4360 | lwzux TMP0, RA, BASE 4361 | lwz LFUNC:RB, 4(RA) 4362 | subi NARGS8:RC, NARGS8:RC, 8 4363 | lwz TMP1, FRAME_PC(BASE) 4364 | checkfunc TMP0 4365 | addi RA, RA, 8 4366 | bne ->vmeta_callt 4367 |->BC_CALLT_Z: 4368 | andix. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand. 4369 | lbz TMP3, LFUNC:RB->ffid 4370 | xori TMP2, TMP1, FRAME_VARG 4371 | cmplwi cr1, NARGS8:RC, 0 4372 | bne >7 4373 |1: 4374 | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. 4375 | li TMP2, 0 4376 | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function? 4377 | beq cr1, >3 4378 |2: 4379 | addi TMP3, TMP2, 8 4380 | lfdx f0, RA, TMP2 4381 | cmplw cr1, TMP3, NARGS8:RC 4382 | stfdx f0, BASE, TMP2 4383 | mr TMP2, TMP3 4384 | bne cr1, <2 4385 |3: 4386 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt 4387 | beq >5 4388 |4: 4389 | ins_callt 4390 | 4391 |5: // Tailcall to a fast function with a Lua frame below. 4392 | lwz INS, -4(TMP1) 4393 | decode_RA8 RA, INS 4394 | sub TMP1, BASE, RA 4395 | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1) 4396 | lwz TMP1, LFUNC:TMP1->pc 4397 | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. 4398 | b <4 4399 | 4400 |7: // Tailcall from a vararg function. 4401 | andix. TMP0, TMP2, FRAME_TYPEP 4402 | bne <1 // Vararg frame below? 4403 | sub BASE, BASE, TMP2 // Relocate BASE down. 4404 | lwz TMP1, FRAME_PC(BASE) 4405 | andix. TMP0, TMP1, FRAME_TYPE 4406 | b <1 4407 break; 4408 4409 case BC_ITERC: 4410 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) 4411 | mr TMP2, BASE 4412 | add BASE, BASE, RA 4413 | lwz TMP1, -24(BASE) 4414 | lwz LFUNC:RB, -20(BASE) 4415 | lfd f1, -8(BASE) 4416 | lfd f0, -16(BASE) 4417 | stw TMP1, 0(BASE) // Copy callable. 4418 | stw LFUNC:RB, 4(BASE) 4419 | checkfunc TMP1 4420 | stfd f1, 16(BASE) // Copy control var. 4421 | li NARGS8:RC, 16 // Iterators get 2 arguments. 4422 | stfdu f0, 8(BASE) // Copy state. 4423 | bne ->vmeta_call 4424 | ins_call 4425 break; 4426 4427 case BC_ITERN: 4428 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) 4429 |.if JIT 4430 | // NYI: add hotloop, record BC_ITERN. 4431 |.endif 4432 | add RA, BASE, RA 4433 | lwz TAB:RB, -12(RA) 4434 | lwz RC, -4(RA) // Get index from control var. 4435 | lwz TMP0, TAB:RB->asize 4436 | lwz TMP1, TAB:RB->array 4437 | addi PC, PC, 4 4438 |1: // Traverse array part. 4439 | cmplw RC, TMP0 4440 | slwi TMP3, RC, 3 4441 | bge >5 // Index points after array part? 4442 | lwzx TMP2, TMP1, TMP3 4443 | lfdx f0, TMP1, TMP3 4444 | checknil TMP2 4445 | lwz INS, -4(PC) 4446 | beq >4 4447 |.if DUALNUM 4448 | stw RC, 4(RA) 4449 | stw TISNUM, 0(RA) 4450 |.else 4451 | tonum_u f1, RC 4452 |.endif 4453 | addi RC, RC, 1 4454 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 4455 | stfd f0, 8(RA) 4456 | decode_RD4 TMP1, INS 4457 | stw RC, -4(RA) // Update control var. 4458 | add PC, TMP1, TMP3 4459 |.if not DUALNUM 4460 | stfd f1, 0(RA) 4461 |.endif 4462 |3: 4463 | ins_next 4464 | 4465 |4: // Skip holes in array part. 4466 | addi RC, RC, 1 4467 | b <1 4468 | 4469 |5: // Traverse hash part. 4470 | lwz TMP1, TAB:RB->hmask 4471 | sub RC, RC, TMP0 4472 | lwz TMP2, TAB:RB->node 4473 |6: 4474 | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. 4475 | slwi TMP3, RC, 5 4476 | bgty <3 4477 | slwi RB, RC, 3 4478 | sub TMP3, TMP3, RB 4479 | lwzx RB, TMP2, TMP3 4480 | lfdx f0, TMP2, TMP3 4481 | add NODE:TMP3, TMP2, TMP3 4482 | checknil RB 4483 | lwz INS, -4(PC) 4484 | beq >7 4485 | lfd f1, NODE:TMP3->key 4486 | addis TMP2, PC, -(BCBIAS_J*4 >> 16) 4487 | stfd f0, 8(RA) 4488 | add RC, RC, TMP0 4489 | decode_RD4 TMP1, INS 4490 | stfd f1, 0(RA) 4491 | addi RC, RC, 1 4492 | add PC, TMP1, TMP2 4493 | stw RC, -4(RA) // Update control var. 4494 | b <3 4495 | 4496 |7: // Skip holes in hash part. 4497 | addi RC, RC, 1 4498 | b <6 4499 break; 4500 4501 case BC_ISNEXT: 4502 | // RA = base*8, RD = target (points to ITERN) 4503 | add RA, BASE, RA 4504 | lwz TMP0, -24(RA) 4505 | lwz CFUNC:TMP1, -20(RA) 4506 | lwz TMP2, -16(RA) 4507 | lwz TMP3, -8(RA) 4508 | cmpwi cr0, TMP2, LJ_TTAB 4509 | cmpwi cr1, TMP0, LJ_TFUNC 4510 | cmpwi cr6, TMP3, LJ_TNIL 4511 | bne cr1, >5 4512 | lbz TMP1, CFUNC:TMP1->ffid 4513 | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq 4514 | cmpwi cr7, TMP1, FF_next_N 4515 | srwi TMP0, RD, 1 4516 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 4517 | add TMP3, PC, TMP0 4518 | bne cr0, >5 4519 | lus TMP1, 0xfffe 4520 | ori TMP1, TMP1, 0x7fff 4521 | stw ZERO, -4(RA) // Initialize control var. 4522 | stw TMP1, -8(RA) 4523 | addis PC, TMP3, -(BCBIAS_J*4 >> 16) 4524 |1: 4525 | ins_next 4526 |5: // Despecialize bytecode if any of the checks fail. 4527 | li TMP0, BC_JMP 4528 | li TMP1, BC_ITERC 4529 | stb TMP0, -1(PC) 4530 | addis PC, TMP3, -(BCBIAS_J*4 >> 16) 4531 | stb TMP1, 3(PC) 4532 | b <1 4533 break; 4534 4535 case BC_VARG: 4536 | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 4537 | lwz TMP0, FRAME_PC(BASE) 4538 | add RC, BASE, RC 4539 | add RA, BASE, RA 4540 | addi RC, RC, FRAME_VARG 4541 | add TMP2, RA, RB 4542 | subi TMP3, BASE, 8 // TMP3 = vtop 4543 | sub RC, RC, TMP0 // RC = vbase 4544 | // Note: RC may now be even _above_ BASE if nargs was < numparams. 4545 | cmplwi cr1, RB, 0 4546 |.if PPE 4547 | sub TMP1, TMP3, RC 4548 | cmpwi TMP1, 0 4549 |.else 4550 | sub. TMP1, TMP3, RC 4551 |.endif 4552 | beq cr1, >5 // Copy all varargs? 4553 | subi TMP2, TMP2, 16 4554 | ble >2 // No vararg slots? 4555 |1: // Copy vararg slots to destination slots. 4556 | lfd f0, 0(RC) 4557 | addi RC, RC, 8 4558 | stfd f0, 0(RA) 4559 | cmplw RA, TMP2 4560 | cmplw cr1, RC, TMP3 4561 | bge >3 // All destination slots filled? 4562 | addi RA, RA, 8 4563 | blt cr1, <1 // More vararg slots? 4564 |2: // Fill up remainder with nil. 4565 | stw TISNIL, 0(RA) 4566 | cmplw RA, TMP2 4567 | addi RA, RA, 8 4568 | blt <2 4569 |3: 4570 | ins_next 4571 | 4572 |5: // Copy all varargs. 4573 | lwz TMP0, L->maxstack 4574 | li MULTRES, 8 // MULTRES = (0+1)*8 4575 | bley <3 // No vararg slots? 4576 | add TMP2, RA, TMP1 4577 | cmplw TMP2, TMP0 4578 | addi MULTRES, TMP1, 8 4579 | bgt >7 4580 |6: 4581 | lfd f0, 0(RC) 4582 | addi RC, RC, 8 4583 | stfd f0, 0(RA) 4584 | cmplw RC, TMP3 4585 | addi RA, RA, 8 4586 | blt <6 // More vararg slots? 4587 | b <3 4588 | 4589 |7: // Grow stack for varargs. 4590 | mr CARG1, L 4591 | stp RA, L->top 4592 | sub SAVE0, RC, BASE // Need delta, because BASE may change. 4593 | stp BASE, L->base 4594 | sub RA, RA, BASE 4595 | stw PC, SAVE_PC 4596 | srwi CARG2, TMP1, 3 4597 | bl extern lj_state_growstack // (lua_State *L, int n) 4598 | lp BASE, L->base 4599 | add RA, BASE, RA 4600 | add RC, BASE, SAVE0 4601 | subi TMP3, BASE, 8 4602 | b <6 4603 break; 4604 4605 /* -- Returns ----------------------------------------------------------- */ 4606 4607 case BC_RETM: 4608 | // RA = results*8, RD = extra_nresults*8 4609 | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. 4610 | // Fall through. Assumes BC_RET follows. 4611 break; 4612 4613 case BC_RET: 4614 | // RA = results*8, RD = (nresults+1)*8 4615 | lwz PC, FRAME_PC(BASE) 4616 | add RA, BASE, RA 4617 | mr MULTRES, RD 4618 |1: 4619 | andix. TMP0, PC, FRAME_TYPE 4620 | xori TMP1, PC, FRAME_VARG 4621 | bne ->BC_RETV_Z 4622 | 4623 |->BC_RET_Z: 4624 | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return 4625 | lwz INS, -4(PC) 4626 | cmpwi RD, 8 4627 | subi TMP2, BASE, 8 4628 | subi RC, RD, 8 4629 | decode_RB8 RB, INS 4630 | beq >3 4631 | li TMP1, 0 4632 |2: 4633 | addi TMP3, TMP1, 8 4634 | lfdx f0, RA, TMP1 4635 | cmpw TMP3, RC 4636 | stfdx f0, TMP2, TMP1 4637 | beq >3 4638 | addi TMP1, TMP3, 8 4639 | lfdx f1, RA, TMP3 4640 | cmpw TMP1, RC 4641 | stfdx f1, TMP2, TMP3 4642 | bne <2 4643 |3: 4644 |5: 4645 | cmplw RB, RD 4646 | decode_RA8 RA, INS 4647 | bgt >6 4648 | sub BASE, TMP2, RA 4649 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 4650 | ins_next1 4651 | lwz TMP1, LFUNC:TMP1->pc 4652 | lwz KBASE, PC2PROTO(k)(TMP1) 4653 | ins_next2 4654 | 4655 |6: // Fill up results with nil. 4656 | subi TMP1, RD, 8 4657 | addi RD, RD, 8 4658 | stwx TISNIL, TMP2, TMP1 4659 | b <5 4660 | 4661 |->BC_RETV_Z: // Non-standard return case. 4662 | andix. TMP2, TMP1, FRAME_TYPEP 4663 | bne ->vm_return 4664 | // Return from vararg function: relocate BASE down. 4665 | sub BASE, BASE, TMP1 4666 | lwz PC, FRAME_PC(BASE) 4667 | b <1 4668 break; 4669 4670 case BC_RET0: case BC_RET1: 4671 | // RA = results*8, RD = (nresults+1)*8 4672 | lwz PC, FRAME_PC(BASE) 4673 | add RA, BASE, RA 4674 | mr MULTRES, RD 4675 | andix. TMP0, PC, FRAME_TYPE 4676 | xori TMP1, PC, FRAME_VARG 4677 | bney ->BC_RETV_Z 4678 | 4679 | lwz INS, -4(PC) 4680 | subi TMP2, BASE, 8 4681 | decode_RB8 RB, INS 4682 if (op == BC_RET1) { 4683 | lfd f0, 0(RA) 4684 | stfd f0, 0(TMP2) 4685 } 4686 |5: 4687 | cmplw RB, RD 4688 | decode_RA8 RA, INS 4689 | bgt >6 4690 | sub BASE, TMP2, RA 4691 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 4692 | ins_next1 4693 | lwz TMP1, LFUNC:TMP1->pc 4694 | lwz KBASE, PC2PROTO(k)(TMP1) 4695 | ins_next2 4696 | 4697 |6: // Fill up results with nil. 4698 | subi TMP1, RD, 8 4699 | addi RD, RD, 8 4700 | stwx TISNIL, TMP2, TMP1 4701 | b <5 4702 break; 4703 4704 /* -- Loops and branches ------------------------------------------------ */ 4705 4706 case BC_FORL: 4707 |.if JIT 4708 | hotloop 4709 |.endif 4710 | // Fall through. Assumes BC_IFORL follows. 4711 break; 4712 4713 case BC_JFORI: 4714 case BC_JFORL: 4715 #if !LJ_HASJIT 4716 break; 4717 #endif 4718 case BC_FORI: 4719 case BC_IFORL: 4720 | // RA = base*8, RD = target (after end of loop or start of loop) 4721 vk = (op == BC_IFORL || op == BC_JFORL); 4722 |.if DUALNUM 4723 | // Integer loop. 4724 | lwzux TMP1, RA, BASE 4725 | lwz CARG1, FORL_IDX*8+4(RA) 4726 | cmplw cr0, TMP1, TISNUM 4727 if (vk) { 4728 | lwz CARG3, FORL_STEP*8+4(RA) 4729 | bne >9 4730 |.if GPR64 4731 | // Need to check overflow for (a<<32) + (b<<32). 4732 | rldicr TMP0, CARG1, 32, 31 4733 | rldicr TMP2, CARG3, 32, 31 4734 | add CARG1, CARG1, CARG3 4735 | addo. TMP0, TMP0, TMP2 4736 |.else 4737 | addo. CARG1, CARG1, CARG3 4738 |.endif 4739 | cmpwi cr6, CARG3, 0 4740 | lwz CARG2, FORL_STOP*8+4(RA) 4741 | bso >6 4742 |4: 4743 | stw CARG1, FORL_IDX*8+4(RA) 4744 } else { 4745 | lwz TMP3, FORL_STEP*8(RA) 4746 | lwz CARG3, FORL_STEP*8+4(RA) 4747 | lwz TMP2, FORL_STOP*8(RA) 4748 | lwz CARG2, FORL_STOP*8+4(RA) 4749 | cmplw cr7, TMP3, TISNUM 4750 | cmplw cr1, TMP2, TISNUM 4751 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 4752 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 4753 | cmpwi cr6, CARG3, 0 4754 | bne >9 4755 } 4756 | blt cr6, >5 4757 | cmpw CARG1, CARG2 4758 |1: 4759 | stw TISNUM, FORL_EXT*8(RA) 4760 if (op != BC_JFORL) { 4761 | srwi RD, RD, 1 4762 } 4763 | stw CARG1, FORL_EXT*8+4(RA) 4764 if (op != BC_JFORL) { 4765 | add RD, PC, RD 4766 } 4767 if (op == BC_FORI) { 4768 | bgt >3 // See FP loop below. 4769 } else if (op == BC_JFORI) { 4770 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4771 | bley >7 4772 } else if (op == BC_IFORL) { 4773 | bgt >2 4774 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4775 } else { 4776 | bley =>BC_JLOOP 4777 } 4778 |2: 4779 | ins_next 4780 |5: // Invert check for negative step. 4781 | cmpw CARG2, CARG1 4782 | b <1 4783 if (vk) { 4784 |6: // Potential overflow. 4785 | checkov TMP0, <4 // Ignore unrelated overflow. 4786 | b <2 4787 } 4788 |.endif 4789 if (vk) { 4790 |.if DUALNUM 4791 |9: // FP loop. 4792 | lfd f1, FORL_IDX*8(RA) 4793 |.else 4794 | lfdux f1, RA, BASE 4795 |.endif 4796 | lfd f3, FORL_STEP*8(RA) 4797 | lfd f2, FORL_STOP*8(RA) 4798 | lwz TMP3, FORL_STEP*8(RA) 4799 | fadd f1, f1, f3 4800 | stfd f1, FORL_IDX*8(RA) 4801 } else { 4802 |.if DUALNUM 4803 |9: // FP loop. 4804 |.else 4805 | lwzux TMP1, RA, BASE 4806 | lwz TMP3, FORL_STEP*8(RA) 4807 | lwz TMP2, FORL_STOP*8(RA) 4808 | cmplw cr0, TMP1, TISNUM 4809 | cmplw cr7, TMP3, TISNUM 4810 | cmplw cr1, TMP2, TISNUM 4811 |.endif 4812 | lfd f1, FORL_IDX*8(RA) 4813 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt 4814 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4815 | lfd f2, FORL_STOP*8(RA) 4816 | bge ->vmeta_for 4817 } 4818 | cmpwi cr6, TMP3, 0 4819 if (op != BC_JFORL) { 4820 | srwi RD, RD, 1 4821 } 4822 | stfd f1, FORL_EXT*8(RA) 4823 if (op != BC_JFORL) { 4824 | add RD, PC, RD 4825 } 4826 | fcmpu cr0, f1, f2 4827 if (op == BC_JFORI) { 4828 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4829 } 4830 | blt cr6, >5 4831 if (op == BC_FORI) { 4832 | bgt >3 4833 } else if (op == BC_IFORL) { 4834 |.if DUALNUM 4835 | bgty <2 4836 |.else 4837 | bgt >2 4838 |.endif 4839 |1: 4840 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4841 } else if (op == BC_JFORI) { 4842 | bley >7 4843 } else { 4844 | bley =>BC_JLOOP 4845 } 4846 |.if DUALNUM 4847 | b <2 4848 |.else 4849 |2: 4850 | ins_next 4851 |.endif 4852 |5: // Negative step. 4853 if (op == BC_FORI) { 4854 | bge <2 4855 |3: // Used by integer loop, too. 4856 | addis PC, RD, -(BCBIAS_J*4 >> 16) 4857 } else if (op == BC_IFORL) { 4858 | bgey <1 4859 } else if (op == BC_JFORI) { 4860 | bgey >7 4861 } else { 4862 | bgey =>BC_JLOOP 4863 } 4864 | b <2 4865 if (op == BC_JFORI) { 4866 |7: 4867 | lwz INS, -4(PC) 4868 | decode_RD8 RD, INS 4869 | b =>BC_JLOOP 4870 } 4871 break; 4872 4873 case BC_ITERL: 4874 |.if JIT 4875 | hotloop 4876 |.endif 4877 | // Fall through. Assumes BC_IITERL follows. 4878 break; 4879 4880 case BC_JITERL: 4881 #if !LJ_HASJIT 4882 break; 4883 #endif 4884 case BC_IITERL: 4885 | // RA = base*8, RD = target 4886 | lwzux TMP1, RA, BASE 4887 | lwz TMP2, 4(RA) 4888 | checknil TMP1; beq >1 // Stop if iterator returned nil. 4889 if (op == BC_JITERL) { 4890 | stw TMP1, -8(RA) 4891 | stw TMP2, -4(RA) 4892 | b =>BC_JLOOP 4893 } else { 4894 | branch_RD // Otherwise save control var + branch. 4895 | stw TMP1, -8(RA) 4896 | stw TMP2, -4(RA) 4897 } 4898 |1: 4899 | ins_next 4900 break; 4901 4902 case BC_LOOP: 4903 | // RA = base*8, RD = target (loop extent) 4904 | // Note: RA/RD is only used by trace recorder to determine scope/extent 4905 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 4906 |.if JIT 4907 | hotloop 4908 |.endif 4909 | // Fall through. Assumes BC_ILOOP follows. 4910 break; 4911 4912 case BC_ILOOP: 4913 | // RA = base*8, RD = target (loop extent) 4914 | ins_next 4915 break; 4916 4917 case BC_JLOOP: 4918 |.if JIT 4919 | // RA = base*8 (ignored), RD = traceno*8 4920 | lwz TMP1, DISPATCH_J(trace)(DISPATCH) 4921 | srwi RD, RD, 1 4922 | // Traces on PPC don't store the trace number, so use 0. 4923 | stw ZERO, DISPATCH_GL(vmstate)(DISPATCH) 4924 | lwzx TRACE:TMP2, TMP1, RD 4925 | clrso TMP1 4926 | lp TMP2, TRACE:TMP2->mcode 4927 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) 4928 | mtctr TMP2 4929 | addi JGL, DISPATCH, GG_DISP2G+32768 4930 | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) 4931 | bctr 4932 |.endif 4933 break; 4934 4935 case BC_JMP: 4936 | // RA = base*8 (only used by trace recorder), RD = target 4937 | branch_RD 4938 | ins_next 4939 break; 4940 4941 /* -- Function headers -------------------------------------------------- */ 4942 4943 case BC_FUNCF: 4944 |.if JIT 4945 | hotcall 4946 |.endif 4947 case BC_FUNCV: /* NYI: compiled vararg functions. */ 4948 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. 4949 break; 4950 4951 case BC_JFUNCF: 4952 #if !LJ_HASJIT 4953 break; 4954 #endif 4955 case BC_IFUNCF: 4956 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 4957 | lwz TMP2, L->maxstack 4958 | lbz TMP1, -4+PC2PROTO(numparams)(PC) 4959 | lwz KBASE, -4+PC2PROTO(k)(PC) 4960 | cmplw RA, TMP2 4961 | slwi TMP1, TMP1, 3 4962 | bgt ->vm_growstack_l 4963 if (op != BC_JFUNCF) { 4964 | ins_next1 4965 } 4966 |2: 4967 | cmplw NARGS8:RC, TMP1 // Check for missing parameters. 4968 | blt >3 4969 if (op == BC_JFUNCF) { 4970 | decode_RD8 RD, INS 4971 | b =>BC_JLOOP 4972 } else { 4973 | ins_next2 4974 } 4975 | 4976 |3: // Clear missing parameters. 4977 | stwx TISNIL, BASE, NARGS8:RC 4978 | addi NARGS8:RC, NARGS8:RC, 8 4979 | b <2 4980 break; 4981 4982 case BC_JFUNCV: 4983 #if !LJ_HASJIT 4984 break; 4985 #endif 4986 | NYI // NYI: compiled vararg functions 4987 break; /* NYI: compiled vararg functions. */ 4988 4989 case BC_IFUNCV: 4990 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 4991 | lwz TMP2, L->maxstack 4992 | add TMP1, BASE, RC 4993 | add TMP0, RA, RC 4994 | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. 4995 | addi TMP3, RC, 8+FRAME_VARG 4996 | lwz KBASE, -4+PC2PROTO(k)(PC) 4997 | cmplw TMP0, TMP2 4998 | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. 4999 | bge ->vm_growstack_l 5000 | lbz TMP2, -4+PC2PROTO(numparams)(PC) 5001 | mr RA, BASE 5002 | mr RC, TMP1 5003 | ins_next1 5004 | cmpwi TMP2, 0 5005 | addi BASE, TMP1, 8 5006 | beq >3 5007 |1: 5008 | cmplw RA, RC // Less args than parameters? 5009 | lwz TMP0, 0(RA) 5010 | lwz TMP3, 4(RA) 5011 | bge >4 5012 | stw TISNIL, 0(RA) // Clear old fixarg slot (help the GC). 5013 | addi RA, RA, 8 5014 |2: 5015 | addic. TMP2, TMP2, -1 5016 | stw TMP0, 8(TMP1) 5017 | stw TMP3, 12(TMP1) 5018 | addi TMP1, TMP1, 8 5019 | bne <1 5020 |3: 5021 | ins_next2 5022 | 5023 |4: // Clear missing parameters. 5024 | li TMP0, LJ_TNIL 5025 | b <2 5026 break; 5027 5028 case BC_FUNCC: 5029 case BC_FUNCCW: 5030 | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 5031 if (op == BC_FUNCC) { 5032 | lp RD, CFUNC:RB->f 5033 } else { 5034 | lp RD, DISPATCH_GL(wrapf)(DISPATCH) 5035 } 5036 | add TMP1, RA, NARGS8:RC 5037 | lwz TMP2, L->maxstack 5038 | .toc lp TMP3, 0(RD) 5039 | add RC, BASE, NARGS8:RC 5040 | stp BASE, L->base 5041 | cmplw TMP1, TMP2 5042 | stp RC, L->top 5043 | li_vmstate C 5044 |.if TOC 5045 | mtctr TMP3 5046 |.else 5047 | mtctr RD 5048 |.endif 5049 if (op == BC_FUNCCW) { 5050 | lp CARG2, CFUNC:RB->f 5051 } 5052 | mr CARG1, L 5053 | bgt ->vm_growstack_c // Need to grow stack. 5054 | .toc lp TOCREG, TOC_OFS(RD) 5055 | .tocenv lp ENVREG, ENV_OFS(RD) 5056 | st_vmstate 5057 | bctrl // (lua_State *L [, lua_CFunction f]) 5058 | // Returns nresults. 5059 | lp BASE, L->base 5060 | .toc ld TOCREG, SAVE_TOC 5061 | slwi RD, CRET1, 3 5062 | lp TMP1, L->top 5063 | li_vmstate INTERP 5064 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 5065 | stw L, DISPATCH_GL(cur_L)(DISPATCH) 5066 | sub RA, TMP1, RD // RA = L->top - nresults*8 5067 | st_vmstate 5068 | b ->vm_returnc 5069 break; 5070 5071 /* ---------------------------------------------------------------------- */ 5072 5073 default: 5074 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); 5075 exit(2); 5076 break; 5077 } 5078 } 5079 5080 static int build_backend(BuildCtx *ctx) 5081 { 5082 int op; 5083 5084 dasm_growpc(Dst, BC__MAX); 5085 5086 build_subroutines(ctx); 5087 5088 |.code_op 5089 for (op = 0; op < BC__MAX; op++) 5090 build_ins(ctx, (BCOp)op, op); 5091 5092 return BC__MAX; 5093 } 5094 5095 /* Emit pseudo frame-info for all assembler functions. */ 5096 static void emit_asm_debug(BuildCtx *ctx) 5097 { 5098 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); 5099 int i; 5100 switch (ctx->mode) { 5101 case BUILD_elfasm: 5102 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); 5103 fprintf(ctx->fp, 5104 ".Lframe0:\n" 5105 "\t.long .LECIE0-.LSCIE0\n" 5106 ".LSCIE0:\n" 5107 "\t.long 0xffffffff\n" 5108 "\t.byte 0x1\n" 5109 "\t.string \"\"\n" 5110 "\t.uleb128 0x1\n" 5111 "\t.sleb128 -4\n" 5112 "\t.byte 65\n" 5113 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5114 "\t.align 2\n" 5115 ".LECIE0:\n\n"); 5116 fprintf(ctx->fp, 5117 ".LSFDE0:\n" 5118 "\t.long .LEFDE0-.LASFDE0\n" 5119 ".LASFDE0:\n" 5120 "\t.long .Lframe0\n" 5121 "\t.long .Lbegin\n" 5122 "\t.long %d\n" 5123 "\t.byte 0xe\n\t.uleb128 %d\n" 5124 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5125 "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", 5126 fcofs, CFRAME_SIZE); 5127 for (i = 14; i <= 31; i++) 5128 fprintf(ctx->fp, 5129 "\t.byte %d\n\t.uleb128 %d\n" 5130 "\t.byte %d\n\t.uleb128 %d\n", 5131 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); 5132 fprintf(ctx->fp, 5133 "\t.align 2\n" 5134 ".LEFDE0:\n\n"); 5135 #if LJ_HASFFI 5136 fprintf(ctx->fp, 5137 ".LSFDE1:\n" 5138 "\t.long .LEFDE1-.LASFDE1\n" 5139 ".LASFDE1:\n" 5140 "\t.long .Lframe0\n" 5141 #if LJ_TARGET_PS3 5142 "\t.long .lj_vm_ffi_call\n" 5143 #else 5144 "\t.long lj_vm_ffi_call\n" 5145 #endif 5146 "\t.long %d\n" 5147 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5148 "\t.byte 0x8e\n\t.uleb128 2\n" 5149 "\t.byte 0xd\n\t.uleb128 0xe\n" 5150 "\t.align 2\n" 5151 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); 5152 #endif 5153 #if !LJ_NO_UNWIND 5154 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); 5155 fprintf(ctx->fp, 5156 ".Lframe1:\n" 5157 "\t.long .LECIE1-.LSCIE1\n" 5158 ".LSCIE1:\n" 5159 "\t.long 0\n" 5160 "\t.byte 0x1\n" 5161 "\t.string \"zPR\"\n" 5162 "\t.uleb128 0x1\n" 5163 "\t.sleb128 -4\n" 5164 "\t.byte 65\n" 5165 "\t.uleb128 6\n" /* augmentation length */ 5166 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5167 "\t.long lj_err_unwind_dwarf-.\n" 5168 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5169 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5170 "\t.align 2\n" 5171 ".LECIE1:\n\n"); 5172 fprintf(ctx->fp, 5173 ".LSFDE2:\n" 5174 "\t.long .LEFDE2-.LASFDE2\n" 5175 ".LASFDE2:\n" 5176 "\t.long .LASFDE2-.Lframe1\n" 5177 "\t.long .Lbegin-.\n" 5178 "\t.long %d\n" 5179 "\t.uleb128 0\n" /* augmentation length */ 5180 "\t.byte 0xe\n\t.uleb128 %d\n" 5181 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5182 "\t.byte 0x5\n\t.uleb128 70\n\t.uleb128 55\n", 5183 fcofs, CFRAME_SIZE); 5184 for (i = 14; i <= 31; i++) 5185 fprintf(ctx->fp, 5186 "\t.byte %d\n\t.uleb128 %d\n" 5187 "\t.byte %d\n\t.uleb128 %d\n", 5188 0x80+i, 37+(31-i), 0x80+32+i, 2+2*(31-i)); 5189 fprintf(ctx->fp, 5190 "\t.align 2\n" 5191 ".LEFDE2:\n\n"); 5192 #if LJ_HASFFI 5193 fprintf(ctx->fp, 5194 ".Lframe2:\n" 5195 "\t.long .LECIE2-.LSCIE2\n" 5196 ".LSCIE2:\n" 5197 "\t.long 0\n" 5198 "\t.byte 0x1\n" 5199 "\t.string \"zR\"\n" 5200 "\t.uleb128 0x1\n" 5201 "\t.sleb128 -4\n" 5202 "\t.byte 65\n" 5203 "\t.uleb128 1\n" /* augmentation length */ 5204 "\t.byte 0x1b\n" /* pcrel|sdata4 */ 5205 "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" 5206 "\t.align 2\n" 5207 ".LECIE2:\n\n"); 5208 fprintf(ctx->fp, 5209 ".LSFDE3:\n" 5210 "\t.long .LEFDE3-.LASFDE3\n" 5211 ".LASFDE3:\n" 5212 "\t.long .LASFDE3-.Lframe2\n" 5213 "\t.long lj_vm_ffi_call-.\n" 5214 "\t.long %d\n" 5215 "\t.uleb128 0\n" /* augmentation length */ 5216 "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" 5217 "\t.byte 0x8e\n\t.uleb128 2\n" 5218 "\t.byte 0xd\n\t.uleb128 0xe\n" 5219 "\t.align 2\n" 5220 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); 5221 #endif 5222 #endif 5223 break; 5224 default: 5225 break; 5226 } 5227 } 5228