ljx

FORK: LuaJIT with native 5.2 and 5.3 support
git clone https://git.neptards.moe/neptards/ljx.git
Log | Files | Refs | README

lj_target_x86.h (10649B)


      1 /*
      2 ** Definitions for x86 and x64 CPUs.
      3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
      4 */
      5 
      6 #ifndef _LJ_TARGET_X86_H
      7 #define _LJ_TARGET_X86_H
      8 
      9 /* -- Registers IDs ------------------------------------------------------- */
     10 
     11 #if LJ_64
     12 #define GPRDEF(_) \
     13   _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \
     14   _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D)
     15 #define FPRDEF(_) \
     16   _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \
     17   _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15)
     18 #else
     19 #define GPRDEF(_) \
     20   _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI)
     21 #define FPRDEF(_) \
     22   _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
     23 #endif
     24 #define VRIDDEF(_) \
     25   _(MRM) _(RIP)
     26 
     27 #define RIDENUM(name)	RID_##name,
     28 
     29 enum {
     30   GPRDEF(RIDENUM)		/* General-purpose registers (GPRs). */
     31   FPRDEF(RIDENUM)		/* Floating-point registers (FPRs). */
     32   RID_MAX,
     33   RID_MRM = RID_MAX,		/* Pseudo-id for ModRM operand. */
     34   RID_RIP = RID_MAX+1,		/* Pseudo-id for RIP (x64 only). */
     35 
     36   /* Calling conventions. */
     37   RID_SP = RID_ESP,
     38   RID_RET = RID_EAX,
     39 #if LJ_64
     40   RID_FPRET = RID_XMM0,
     41 #else
     42   RID_RETLO = RID_EAX,
     43   RID_RETHI = RID_EDX,
     44 #endif
     45 
     46   /* These definitions must match with the *.dasc file(s): */
     47   RID_BASE = RID_EDX,		/* Interpreter BASE. */
     48 #if LJ_64 && !LJ_ABI_WIN
     49   RID_LPC = RID_EBX,		/* Interpreter PC. */
     50   RID_DISPATCH = RID_R14D,	/* Interpreter DISPATCH table. */
     51 #else
     52   RID_LPC = RID_ESI,		/* Interpreter PC. */
     53   RID_DISPATCH = RID_EBX,	/* Interpreter DISPATCH table. */
     54 #endif
     55 
     56   /* Register ranges [min, max) and number of registers. */
     57   RID_MIN_GPR = RID_EAX,
     58   RID_MIN_FPR = RID_XMM0,
     59   RID_MAX_GPR = RID_MIN_FPR,
     60   RID_MAX_FPR = RID_MAX,
     61   RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
     62   RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
     63 };
     64 
     65 /* -- Register sets ------------------------------------------------------- */
     66 
     67 /* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
     68 #define RSET_GPR	(RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \
     69 			 - RID2RSET(RID_ESP) \
     70 			 - LJ_GC64*RID2RSET(RID_DISPATCH))
     71 #define RSET_FPR	(RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
     72 #define RSET_ALL	(RSET_GPR|RSET_FPR)
     73 #define RSET_INIT	RSET_ALL
     74 
     75 #if LJ_64
     76 /* Note: this requires the use of FORCE_REX! */
     77 #define RSET_GPR8	RSET_GPR
     78 #else
     79 #define RSET_GPR8	(RSET_RANGE(RID_EAX, RID_EBX+1))
     80 #endif
     81 
     82 /* ABI-specific register sets. */
     83 #define RSET_ACD	(RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX))
     84 #if LJ_64
     85 #if LJ_ABI_WIN
     86 /* Windows x64 ABI. */
     87 #define RSET_SCRATCH \
     88   (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1))
     89 #define REGARG_GPRS \
     90   (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5))
     91 #define REGARG_NUMGPR	4
     92 #define REGARG_NUMFPR	4
     93 #define REGARG_FIRSTFPR	RID_XMM0
     94 #define REGARG_LASTFPR	RID_XMM3
     95 #define STACKARG_OFS	(4*8)
     96 #else
     97 /* The rest of the civilized x64 world has a common ABI. */
     98 #define RSET_SCRATCH \
     99   (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR)
    100 #define REGARG_GPRS \
    101   (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \
    102    <<5))<<5))<<5))<<5))<<5))
    103 #define REGARG_NUMGPR	6
    104 #define REGARG_NUMFPR	8
    105 #define REGARG_FIRSTFPR	RID_XMM0
    106 #define REGARG_LASTFPR	RID_XMM7
    107 #define STACKARG_OFS	0
    108 #endif
    109 #else
    110 /* Common x86 ABI. */
    111 #define RSET_SCRATCH	(RSET_ACD|RSET_FPR)
    112 #define REGARG_GPRS	(RID_ECX|(RID_EDX<<5))  /* Fastcall only. */
    113 #define REGARG_NUMGPR	2  /* Fastcall only. */
    114 #define REGARG_NUMFPR	0
    115 #define STACKARG_OFS	0
    116 #endif
    117 
    118 #if LJ_64
    119 /* Prefer the low 8 regs of each type to reduce REX prefixes. */
    120 #undef rset_picktop
    121 #define rset_picktop(rs)	(lj_fls(lj_bswap(rs)) ^ 0x18)
    122 #endif
    123 
    124 /* -- Spill slots --------------------------------------------------------- */
    125 
    126 /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
    127 **
    128 ** SPS_FIXED: Available fixed spill slots in interpreter frame.
    129 ** This definition must match with the *.dasc file(s).
    130 **
    131 ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
    132 */
    133 #if LJ_64
    134 #if LJ_ABI_WIN
    135 #define SPS_FIXED	(4*2)
    136 #define SPS_FIRST	(4*2)	/* Don't use callee register save area. */
    137 #else
    138 #if LJ_GC64
    139 #define SPS_FIXED	2
    140 #else
    141 #define SPS_FIXED	4
    142 #endif
    143 #define SPS_FIRST	2
    144 #endif
    145 #else
    146 #define SPS_FIXED	6
    147 #define SPS_FIRST	2
    148 #endif
    149 
    150 #define SPOFS_TMP	0
    151 
    152 #define sps_scale(slot)		(4 * (int32_t)(slot))
    153 #define sps_align(slot)		(((slot) - SPS_FIXED + 3) & ~3)
    154 
    155 /* -- Exit state ---------------------------------------------------------- */
    156 
    157 /* This definition must match with the *.dasc file(s). */
    158 typedef struct {
    159   lua_Number fpr[RID_NUM_FPR];	/* Floating-point registers. */
    160   intptr_t gpr[RID_NUM_GPR];	/* General-purpose registers. */
    161   int32_t spill[256];		/* Spill slots. */
    162 } ExitState;
    163 
    164 /* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */
    165 #define EXITSTUB_SPACING	(2+2)
    166 #define EXITSTUBS_PER_GROUP	32
    167 
    168 /* -- x86 ModRM operand encoding ------------------------------------------ */
    169 
    170 typedef enum {
    171   XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0,
    172   XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0,
    173   XM_MASK = 0xc0
    174 } x86Mode;
    175 
    176 /* Structure to hold variable ModRM operand. */
    177 typedef struct {
    178   int32_t ofs;		/* Offset. */
    179   uint8_t base;		/* Base register or RID_NONE. */
    180   uint8_t idx;		/* Index register or RID_NONE. */
    181   uint8_t scale;	/* Index scale (XM_SCALE1 .. XM_SCALE8). */
    182 } x86ModRM;
    183 
    184 /* -- Opcodes ------------------------------------------------------------- */
    185 
    186 /* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */
    187 #define XO_(o)		((uint32_t)(0x0000fe + (0x##o<<24)))
    188 #define XO_FPU(a,b)	((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24)))
    189 #define XO_0f(o)	((uint32_t)(0x0f00fd + (0x##o<<24)))
    190 #define XO_66(o)	((uint32_t)(0x6600fd + (0x##o<<24)))
    191 #define XO_660f(o)	((uint32_t)(0x0f66fc + (0x##o<<24)))
    192 #define XO_f20f(o)	((uint32_t)(0x0ff2fc + (0x##o<<24)))
    193 #define XO_f30f(o)	((uint32_t)(0x0ff3fc + (0x##o<<24)))
    194 
    195 #define XV_660f38(o)	((uint32_t)(0x79e2c4 + (0x##o<<24)))
    196 #define XV_f20f38(o)	((uint32_t)(0x7be2c4 + (0x##o<<24)))
    197 #define XV_f20f3a(o)	((uint32_t)(0x7be3c4 + (0x##o<<24)))
    198 #define XV_f30f38(o)	((uint32_t)(0x7ae2c4 + (0x##o<<24)))
    199 
    200 /* This list of x86 opcodes is not intended to be complete. Opcodes are only
    201 ** included when needed. Take a look at DynASM or jit.dis_x86 to see the
    202 ** whole mess.
    203 */
    204 typedef enum {
    205   /* Fixed length opcodes. XI_* prefix. */
    206   XI_O16 =	0x66,
    207   XI_NOP =	0x90,
    208   XI_XCHGa =	0x90,
    209   XI_CALL =	0xe8,
    210   XI_JMP =	0xe9,
    211   XI_JMPs =	0xeb,
    212   XI_PUSH =	0x50, /* Really 50+r. */
    213   XI_JCCs =	0x70, /* Really 7x. */
    214   XI_JCCn =	0x80, /* Really 0f8x. */
    215   XI_LEA =	0x8d,
    216   XI_MOVrib =	0xb0, /* Really b0+r. */
    217   XI_MOVri =	0xb8, /* Really b8+r. */
    218   XI_ARITHib =	0x80,
    219   XI_ARITHi =	0x81,
    220   XI_ARITHi8 =	0x83,
    221   XI_PUSHi8 =	0x6a,
    222   XI_TESTb =	0x84,
    223   XI_TEST =	0x85,
    224   XI_INT3 =	0xcc,
    225   XI_MOVmi =	0xc7,
    226   XI_GROUP5 =	0xff,
    227 
    228   /* Note: little-endian byte-order! */
    229   XI_FLDZ =	0xeed9,
    230   XI_FLD1 =	0xe8d9,
    231   XI_FLDLG2 =	0xecd9,
    232   XI_FLDLN2 =	0xedd9,
    233   XI_FDUP =	0xc0d9,  /* Really fld st0. */
    234   XI_FPOP =	0xd8dd,  /* Really fstp st0. */
    235   XI_FPOP1 =	0xd9dd,  /* Really fstp st1. */
    236   XI_FRNDINT =	0xfcd9,
    237   XI_FSIN =	0xfed9,
    238   XI_FCOS =	0xffd9,
    239   XI_FPTAN =	0xf2d9,
    240   XI_FPATAN =	0xf3d9,
    241   XI_FSCALE =	0xfdd9,
    242   XI_FYL2X =	0xf1d9,
    243 
    244   /* VEX-encoded instructions. XV_* prefix. */
    245   XV_RORX =	XV_f20f3a(f0),
    246   XV_SARX =	XV_f30f38(f7),
    247   XV_SHLX =	XV_660f38(f7),
    248   XV_SHRX =	XV_f20f38(f7),
    249 
    250   /* Variable-length opcodes. XO_* prefix. */
    251   XO_OR =	XO_(0b),
    252   XO_MOV =	XO_(8b),
    253   XO_MOVto =	XO_(89),
    254   XO_MOVtow =	XO_66(89),
    255   XO_MOVtob =	XO_(88),
    256   XO_MOVmi =	XO_(c7),
    257   XO_MOVmib =	XO_(c6),
    258   XO_LEA =	XO_(8d),
    259   XO_ARITHib =	XO_(80),
    260   XO_ARITHi =	XO_(81),
    261   XO_ARITHi8 =	XO_(83),
    262   XO_ARITHiw8 =	XO_66(83),
    263   XO_SHIFTi =	XO_(c1),
    264   XO_SHIFT1 =	XO_(d1),
    265   XO_SHIFTcl =	XO_(d3),
    266   XO_IMUL =	XO_0f(af),
    267   XO_IMULi =	XO_(69),
    268   XO_IMULi8 =	XO_(6b),
    269   XO_CMP =	XO_(3b),
    270   XO_TESTb =	XO_(84),
    271   XO_TEST =	XO_(85),
    272   XO_GROUP3b =	XO_(f6),
    273   XO_GROUP3 =	XO_(f7),
    274   XO_GROUP5b =	XO_(fe),
    275   XO_GROUP5 =	XO_(ff),
    276   XO_MOVZXb =	XO_0f(b6),
    277   XO_MOVZXw =	XO_0f(b7),
    278   XO_MOVSXb =	XO_0f(be),
    279   XO_MOVSXw =	XO_0f(bf),
    280   XO_MOVSXd =	XO_(63),
    281   XO_BSWAP =	XO_0f(c8),
    282   XO_CMOV =	XO_0f(40),
    283 
    284   XO_MOVSD =	XO_f20f(10),
    285   XO_MOVSDto =	XO_f20f(11),
    286   XO_MOVSS =	XO_f30f(10),
    287   XO_MOVSSto =	XO_f30f(11),
    288   XO_MOVLPD =	XO_660f(12),
    289   XO_MOVAPS =	XO_0f(28),
    290   XO_XORPS =	XO_0f(57),
    291   XO_ANDPS =	XO_0f(54),
    292   XO_ADDSD =	XO_f20f(58),
    293   XO_SUBSD =	XO_f20f(5c),
    294   XO_MULSD =	XO_f20f(59),
    295   XO_DIVSD =	XO_f20f(5e),
    296   XO_SQRTSD =	XO_f20f(51),
    297   XO_MINSD =	XO_f20f(5d),
    298   XO_MAXSD =	XO_f20f(5f),
    299   XO_ROUNDSD =	0x0b3a0ffc,  /* Really 66 0f 3a 0b. See asm_fpmath. */
    300   XO_UCOMISD =	XO_660f(2e),
    301   XO_CVTSI2SD =	XO_f20f(2a),
    302   XO_CVTTSD2SI=	XO_f20f(2c),
    303   XO_CVTSI2SS =	XO_f30f(2a),
    304   XO_CVTTSS2SI=	XO_f30f(2c),
    305   XO_CVTSS2SD =	XO_f30f(5a),
    306   XO_CVTSD2SS =	XO_f20f(5a),
    307   XO_ADDSS =	XO_f30f(58),
    308   XO_MOVD =	XO_660f(6e),
    309   XO_MOVDto =	XO_660f(7e),
    310 
    311   XO_FLDd =	XO_(d9), XOg_FLDd = 0,
    312   XO_FLDq =	XO_(dd), XOg_FLDq = 0,
    313   XO_FILDd =	XO_(db), XOg_FILDd = 0,
    314   XO_FILDq =	XO_(df), XOg_FILDq = 5,
    315   XO_FSTPd =	XO_(d9), XOg_FSTPd = 3,
    316   XO_FSTPq =	XO_(dd), XOg_FSTPq = 3,
    317   XO_FISTPq =	XO_(df), XOg_FISTPq = 7,
    318   XO_FISTTPq =	XO_(dd), XOg_FISTTPq = 1,
    319   XO_FADDq =	XO_(dc), XOg_FADDq = 0,
    320   XO_FLDCW =	XO_(d9), XOg_FLDCW = 5,
    321   XO_FNSTCW =	XO_(d9), XOg_FNSTCW = 7
    322 } x86Op;
    323 
    324 /* x86 opcode groups. */
    325 typedef uint32_t x86Group;
    326 
    327 #define XG_(i8, i, g)	((x86Group)(((i8) << 16) + ((i) << 8) + (g)))
    328 #define XG_ARITHi(g)	XG_(XI_ARITHi8, XI_ARITHi, g)
    329 #define XG_TOXOi(xg)	((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000)))
    330 #define XG_TOXOi8(xg)	((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000)))
    331 
    332 #define XO_ARITH(a)	((x86Op)(0x030000fe + ((a)<<27)))
    333 #define XO_ARITHw(a)	((x86Op)(0x036600fd + ((a)<<27)))
    334 
    335 typedef enum {
    336   XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP,
    337   XOg_X_IMUL
    338 } x86Arith;
    339 
    340 typedef enum {
    341   XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR
    342 } x86Shift;
    343 
    344 typedef enum {
    345   XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV
    346 } x86Group3;
    347 
    348 typedef enum {
    349   XOg_INC, XOg_DEC, XOg_CALL, XOg_CALLfar, XOg_JMP, XOg_JMPfar, XOg_PUSH
    350 } x86Group5;
    351 
    352 /* x86 condition codes. */
    353 typedef enum {
    354   CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE,
    355   CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE,
    356   CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB,
    357   CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE,
    358   CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL,
    359   CC_NG = CC_LE, CC_G = CC_NLE
    360 } x86CC;
    361 
    362 #endif