ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

lj_target.h (6004B)


      1 /*
      2 ** Definitions for target CPU.
      3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
      4 */
      5 
      6 #ifndef _LJ_TARGET_H
      7 #define _LJ_TARGET_H
      8 
      9 #include "lj_def.h"
     10 #include "lj_arch.h"
     11 
     12 /* -- Registers and spill slots ------------------------------------------- */
     13 
     14 /* Register type (uint8_t in ir->r). */
     15 typedef uint32_t Reg;
     16 
     17 /* The hi-bit is NOT set for an allocated register. This means the value
     18 ** can be directly used without masking. The hi-bit is set for a register
     19 ** allocation hint or for RID_INIT, RID_SINK or RID_SUNK.
     20 */
     21 #define RID_NONE		0x80
     22 #define RID_MASK		0x7f
     23 #define RID_INIT		(RID_NONE|RID_MASK)
     24 #define RID_SINK		(RID_INIT-1)
     25 #define RID_SUNK		(RID_INIT-2)
     26 
     27 #define ra_noreg(r)		((r) & RID_NONE)
     28 #define ra_hasreg(r)		(!((r) & RID_NONE))
     29 
     30 /* The ra_hashint() macro assumes a previous test for ra_noreg(). */
     31 #define ra_hashint(r)		((r) < RID_SUNK)
     32 #define ra_gethint(r)		((Reg)((r) & RID_MASK))
     33 #define ra_sethint(rr, r)	rr = (uint8_t)((r)|RID_NONE)
     34 #define ra_samehint(r1, r2)	(ra_gethint((r1)^(r2)) == 0)
     35 
     36 /* Spill slot 0 means no spill slot has been allocated. */
     37 #define SPS_NONE		0
     38 
     39 #define ra_hasspill(s)		((s) != SPS_NONE)
     40 
     41 /* Combined register and spill slot (uint16_t in ir->prev). */
     42 typedef uint32_t RegSP;
     43 
     44 #define REGSP(r, s)		((r) + ((s) << 8))
     45 #define REGSP_HINT(r)		((r)|RID_NONE)
     46 #define REGSP_INIT		REGSP(RID_INIT, 0)
     47 
     48 #define regsp_reg(rs)		((rs) & 255)
     49 #define regsp_spill(rs)		((rs) >> 8)
     50 #define regsp_used(rs) \
     51   (((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0))
     52 
     53 /* -- Register sets ------------------------------------------------------- */
     54 
     55 /* Bitset for registers. 32 registers suffice for most architectures.
     56 ** Note that one set holds bits for both GPRs and FPRs.
     57 */
     58 #if LJ_TARGET_PPC || LJ_TARGET_MIPS
     59 typedef uint64_t RegSet;
     60 #else
     61 typedef uint32_t RegSet;
     62 #endif
     63 
     64 #define RID2RSET(r)		(((RegSet)1) << (r))
     65 #define RSET_EMPTY		((RegSet)0)
     66 #define RSET_RANGE(lo, hi)	((RID2RSET((hi)-(lo))-1) << (lo))
     67 
     68 #define rset_test(rs, r)	((int)((rs) >> (r)) & 1)
     69 #define rset_set(rs, r)		(rs |= RID2RSET(r))
     70 #define rset_clear(rs, r)	(rs &= ~RID2RSET(r))
     71 #define rset_exclude(rs, r)	(rs & ~RID2RSET(r))
     72 #if LJ_TARGET_PPC || LJ_TARGET_MIPS
     73 #define rset_picktop(rs)	((Reg)(__builtin_clzll(rs)^63))
     74 #define rset_pickbot(rs)	((Reg)__builtin_ctzll(rs))
     75 #else
     76 #define rset_picktop(rs)	((Reg)lj_fls(rs))
     77 #define rset_pickbot(rs)	((Reg)lj_ffs(rs))
     78 #endif
     79 
     80 /* -- Register allocation cost -------------------------------------------- */
     81 
     82 /* The register allocation heuristic keeps track of the cost for allocating
     83 ** a specific register:
     84 **
     85 ** A free register (obviously) has a cost of 0 and a 1-bit in the free mask.
     86 **
     87 ** An already allocated register has the (non-zero) IR reference in the lowest
     88 ** bits and the result of a blended cost-model in the higher bits.
     89 **
     90 ** The allocator first checks the free mask for a hit. Otherwise an (unrolled)
     91 ** linear search for the minimum cost is used. The search doesn't need to
     92 ** keep track of the position of the minimum, which makes it very fast.
     93 ** The lowest bits of the minimum cost show the desired IR reference whose
     94 ** register is the one to evict.
     95 **
     96 ** Without the cost-model this degenerates to the standard heuristics for
     97 ** (reverse) linear-scan register allocation. Since code generation is done
     98 ** in reverse, a live interval extends from the last use to the first def.
     99 ** For an SSA IR the IR reference is the first (and only) def and thus
    100 ** trivially marks the end of the interval. The LSRA heuristics says to pick
    101 ** the register whose live interval has the furthest extent, i.e. the lowest
    102 ** IR reference in our case.
    103 **
    104 ** A cost-model should take into account other factors, like spill-cost and
    105 ** restore- or rematerialization-cost, which depend on the kind of instruction.
    106 ** E.g. constants have zero spill costs, variant instructions have higher
    107 ** costs than invariants and PHIs should preferably never be spilled.
    108 **
    109 ** Here's a first cut at simple, but effective blended cost-model for R-LSRA:
    110 ** - Due to careful design of the IR, constants already have lower IR
    111 **   references than invariants and invariants have lower IR references
    112 **   than variants.
    113 ** - The cost in the upper 16 bits is the sum of the IR reference and a
    114 **   weighted score. The score currently only takes into account whether
    115 **   the IRT_ISPHI bit is set in the instruction type.
    116 ** - The PHI weight is the minimum distance (in IR instructions) a PHI
    117 **   reference has to be further apart from a non-PHI reference to be spilled.
    118 ** - It should be a power of two (for speed) and must be between 2 and 32768.
    119 **   Good values for the PHI weight seem to be between 40 and 150.
    120 ** - Further study is required.
    121 */
    122 #define REGCOST_PHI_WEIGHT	64
    123 
    124 /* Cost for allocating a specific register. */
    125 typedef uint32_t RegCost;
    126 
    127 /* Note: assumes 16 bit IRRef1. */
    128 #define REGCOST(cost, ref)	((RegCost)(ref) + ((RegCost)(cost) << 16))
    129 #define regcost_ref(rc)		((IRRef1)(rc))
    130 
    131 #define REGCOST_T(t) \
    132   ((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI))
    133 #define REGCOST_REF_T(ref, t)	(REGCOST((ref), (ref)) + REGCOST_T((t)))
    134 
    135 /* -- Target-specific definitions ----------------------------------------- */
    136 
    137 #if LJ_TARGET_X86ORX64
    138 #include "lj_target_x86.h"
    139 #elif LJ_TARGET_ARM
    140 #include "lj_target_arm.h"
    141 #elif LJ_TARGET_ARM64
    142 #include "lj_target_arm64.h"
    143 #elif LJ_TARGET_PPC
    144 #include "lj_target_ppc.h"
    145 #elif LJ_TARGET_MIPS
    146 #include "lj_target_mips.h"
    147 #else
    148 #error "Missing include for target CPU"
    149 #endif
    150 
    151 #ifdef EXITSTUBS_PER_GROUP
    152 /* Return the address of an exit stub. */
    153 static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno)
    154 {
    155   lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL);
    156   return (char *)group[exitno / EXITSTUBS_PER_GROUP] +
    157 	 EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP);
    158 }
    159 /* Avoid dependence on lj_jit.h if only including lj_target.h. */
    160 #define exitstub_addr(J, exitno) \
    161   ((MCode *)exitstub_addr_((char **)((J)->exitstubgroup), (exitno)))
    162 #endif
    163 
    164 #endif