qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

decode-new.c.inc (71498B)


      1 /*
      2  * New-style decoder for i386 instructions
      3  *
      4  *  Copyright (c) 2022 Red Hat, Inc.
      5  *
      6  * Author: Paolo Bonzini <pbonzini@redhat.com>
      7  *
      8  * This library is free software; you can redistribute it and/or
      9  * modify it under the terms of the GNU Lesser General Public
     10  * License as published by the Free Software Foundation; either
     11  * version 2.1 of the License, or (at your option) any later version.
     12  *
     13  * This library is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16  * Lesser General Public License for more details.
     17  *
     18  * You should have received a copy of the GNU Lesser General Public
     19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
     20  */
     21 
     22 /*
     23  * The decoder is mostly based on tables copied from the Intel SDM.  As
     24  * a result, most operand load and writeback is done entirely in common
     25  * table-driven code using the same operand type (X86_TYPE_*) and
     26  * size (X86_SIZE_*) codes used in the manual.
     27  *
     28  * The main difference is that the V, U and W types are extended to
     29  * cover MMX as well; if an instruction is like
     30  *
     31  *      por   Pq, Qq
     32  *  66  por   Vx, Hx, Wx
     33  *
     34  * only the second row is included and the instruction is marked as a
     35  * valid MMX instruction.  The MMX flag directs the decoder to rewrite
     36  * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing
     37  * "x" to "q" if there is no prefix.
     38  *
     39  * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x"
     40  * if the difference is expressed via prefixes.  Individual instructions
     41  * are separated by prefix in the generator functions.
     42  *
     43  * There are a couple cases in which instructions (e.g. MOVD) write the
     44  * whole XMM or MM register but are established incorrectly in the manual
     45  * as "d" or "q".  These have to be fixed for the decoder to work correctly.
     46  */
     47 
     48 #define X86_OP_NONE { 0 },
     49 
     50 #define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
     51     .decode = glue(decode_, op),                                  \
     52     .op0 = glue(X86_TYPE_, op0_),                                 \
     53     .s0 = glue(X86_SIZE_, s0_),                                   \
     54     .op1 = glue(X86_TYPE_, op1_),                                 \
     55     .s1 = glue(X86_SIZE_, s1_),                                   \
     56     .op2 = glue(X86_TYPE_, op2_),                                 \
     57     .s2 = glue(X86_SIZE_, s2_),                                   \
     58     .is_decode = true,                                            \
     59     ## __VA_ARGS__                                                \
     60 }
     61 
     62 #define X86_OP_GROUP2(op, op0, s0, op1, s1, ...)                  \
     63     X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
     64 #define X86_OP_GROUP0(op, ...)                                    \
     65     X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
     66 
     67 #define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
     68     .gen = glue(gen_, op),                                        \
     69     .op0 = glue(X86_TYPE_, op0_),                                 \
     70     .s0 = glue(X86_SIZE_, s0_),                                   \
     71     .op1 = glue(X86_TYPE_, op1_),                                 \
     72     .s1 = glue(X86_SIZE_, s1_),                                   \
     73     .op2 = glue(X86_TYPE_, op2_),                                 \
     74     .s2 = glue(X86_SIZE_, s2_),                                   \
     75     ## __VA_ARGS__                                                \
     76 }
     77 
     78 #define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...)   \
     79     X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_,            \
     80         .op3 = X86_TYPE_I, .s3 = X86_SIZE_b,                      \
     81         ## __VA_ARGS__)
     82 
     83 #define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)                  \
     84     X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
     85 #define X86_OP_ENTRYw(op, op0, s0, ...)                           \
     86     X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
     87 #define X86_OP_ENTRYr(op, op0, s0, ...)                           \
     88     X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
     89 #define X86_OP_ENTRY0(op, ...)                                    \
     90     X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
     91 
     92 #define cpuid(feat) .cpuid = X86_FEAT_##feat,
     93 #define i64 .special = X86_SPECIAL_i64,
     94 #define o64 .special = X86_SPECIAL_o64,
     95 #define xchg .special = X86_SPECIAL_Locked,
     96 #define mmx .special = X86_SPECIAL_MMX,
     97 #define zext0 .special = X86_SPECIAL_ZExtOp0,
     98 #define zext2 .special = X86_SPECIAL_ZExtOp2,
     99 #define avx_movx .special = X86_SPECIAL_AVXExtMov,
    100 
    101 #define vex1 .vex_class = 1,
    102 #define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
    103 #define vex2 .vex_class = 2,
    104 #define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar,
    105 #define vex3 .vex_class = 3,
    106 #define vex4 .vex_class = 4,
    107 #define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
    108 #define vex5 .vex_class = 5,
    109 #define vex6 .vex_class = 6,
    110 #define vex7 .vex_class = 7,
    111 #define vex8 .vex_class = 8,
    112 #define vex11 .vex_class = 11,
    113 #define vex12 .vex_class = 12,
    114 #define vex13 .vex_class = 13,
    115 
    116 #define avx2_256 .vex_special = X86_VEX_AVX2_256,
    117 
    118 #define P_00          1
    119 #define P_66          (1 << PREFIX_DATA)
    120 #define P_F3          (1 << PREFIX_REPZ)
    121 #define P_F2          (1 << PREFIX_REPNZ)
    122 
    123 #define p_00          .valid_prefix = P_00,
    124 #define p_66          .valid_prefix = P_66,
    125 #define p_f3          .valid_prefix = P_F3,
    126 #define p_f2          .valid_prefix = P_F2,
    127 #define p_00_66       .valid_prefix = P_00 | P_66,
    128 #define p_00_f3       .valid_prefix = P_00 | P_F3,
    129 #define p_66_f2       .valid_prefix = P_66 | P_F2,
    130 #define p_00_66_f3    .valid_prefix = P_00 | P_66 | P_F3,
    131 #define p_66_f3_f2    .valid_prefix = P_66 | P_F3 | P_F2,
    132 #define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
    133 
    134 static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
    135 {
    136     if (!s->has_modrm) {
    137         s->modrm = x86_ldub_code(env, s);
    138         s->has_modrm = true;
    139     }
    140     return s->modrm;
    141 }
    142 
    143 static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4])
    144 {
    145     if (s->prefix & PREFIX_REPNZ) {
    146         return &entries[3];
    147     } else if (s->prefix & PREFIX_REPZ) {
    148         return &entries[2];
    149     } else if (s->prefix & PREFIX_DATA) {
    150         return &entries[1];
    151     } else {
    152         return &entries[0];
    153     }
    154 }
    155 
    156 static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    157 {
    158     /* only includes ldmxcsr and stmxcsr, because they have AVX variants.  */
    159     static const X86OpEntry group15_reg[8] = {
    160     };
    161 
    162     static const X86OpEntry group15_mem[8] = {
    163         [2] = X86_OP_ENTRYr(LDMXCSR,    E,d, vex5),
    164         [3] = X86_OP_ENTRYw(STMXCSR,    E,d, vex5),
    165     };
    166 
    167     uint8_t modrm = get_modrm(s, env);
    168     if ((modrm >> 6) == 3) {
    169         *entry = group15_reg[(modrm >> 3) & 7];
    170     } else {
    171         *entry = group15_mem[(modrm >> 3) & 7];
    172     }
    173 }
    174 
    175 static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    176 {
    177     static const X86GenFunc group17_gen[8] = {
    178         NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
    179     };
    180     int op = (get_modrm(s, env) >> 3) & 7;
    181     entry->gen = group17_gen[op];
    182 }
    183 
    184 static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    185 {
    186     static const X86OpEntry opcodes_group12[8] = {
    187         {},
    188         {},
    189         X86_OP_ENTRY3(PSRLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
    190         {},
    191         X86_OP_ENTRY3(PSRAW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
    192         {},
    193         X86_OP_ENTRY3(PSLLW_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
    194         {},
    195     };
    196 
    197     int op = (get_modrm(s, env) >> 3) & 7;
    198     *entry = opcodes_group12[op];
    199 }
    200 
    201 static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    202 {
    203     static const X86OpEntry opcodes_group13[8] = {
    204         {},
    205         {},
    206         X86_OP_ENTRY3(PSRLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
    207         {},
    208         X86_OP_ENTRY3(PSRAD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
    209         {},
    210         X86_OP_ENTRY3(PSLLD_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
    211         {},
    212     };
    213 
    214     int op = (get_modrm(s, env) >> 3) & 7;
    215     *entry = opcodes_group13[op];
    216 }
    217 
    218 static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    219 {
    220     static const X86OpEntry opcodes_group14[8] = {
    221         /* grp14 */
    222         {},
    223         {},
    224         X86_OP_ENTRY3(PSRLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
    225         X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
    226         {},
    227         {},
    228         X86_OP_ENTRY3(PSLLQ_i,  H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
    229         X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
    230     };
    231 
    232     int op = (get_modrm(s, env) >> 3) & 7;
    233     *entry = opcodes_group14[op];
    234 }
    235 
    236 static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    237 {
    238     static const X86OpEntry opcodes_0F6F[4] = {
    239         X86_OP_ENTRY3(MOVDQ,       P,q, None,None, Q,q, vex1 mmx),  /* movq */
    240         X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex1),      /* movdqa */
    241         X86_OP_ENTRY3(MOVDQ,       V,x, None,None, W,x, vex4_unal), /* movdqu */
    242         {},
    243     };
    244     *entry = *decode_by_prefix(s, opcodes_0F6F);
    245 }
    246 
    247 static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    248 {
    249     static const X86OpEntry pshufw[4] = {
    250         X86_OP_ENTRY3(PSHUFW,  P,q, Q,q, I,b, vex4 mmx),
    251         X86_OP_ENTRY3(PSHUFD,  V,x, W,x, I,b, vex4 avx2_256),
    252         X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256),
    253         X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256),
    254     };
    255 
    256     *entry = *decode_by_prefix(s, pshufw);
    257 }
    258 
    259 static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    260 {
    261     if (!(s->prefix & PREFIX_VEX)) {
    262         entry->gen = gen_EMMS;
    263     } else if (!s->vex_l) {
    264         entry->gen = gen_VZEROUPPER;
    265         entry->vex_class = 8;
    266     } else {
    267         entry->gen = gen_VZEROALL;
    268         entry->vex_class = 8;
    269     }
    270 }
    271 
    272 static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    273 {
    274     static const X86OpEntry opcodes_0F78[4] = {
    275         {},
    276         X86_OP_ENTRY3(EXTRQ_i,       V,x, None,None, I,w,  cpuid(SSE4A)),
    277         {},
    278         X86_OP_ENTRY3(INSERTQ_i,     V,x, U,x, I,w,        cpuid(SSE4A)),
    279     };
    280     *entry = *decode_by_prefix(s, opcodes_0F78);
    281 }
    282 
    283 static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    284 {
    285     if (s->prefix & PREFIX_REPNZ) {
    286         entry->gen = gen_INSERTQ_r;
    287     } else if (s->prefix & PREFIX_DATA) {
    288         entry->gen = gen_EXTRQ_r;
    289     } else {
    290         entry->gen = NULL;
    291     };
    292 }
    293 
    294 static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    295 {
    296     static const X86OpEntry opcodes_0F7E[4] = {
    297         X86_OP_ENTRY3(MOVD_from,  E,y, None,None, P,y, vex5 mmx),
    298         X86_OP_ENTRY3(MOVD_from,  E,y, None,None, V,y, vex5),
    299         X86_OP_ENTRY3(MOVQ,       V,x, None,None, W,q, vex5),  /* wrong dest Vy on SDM! */
    300         {},
    301     };
    302     *entry = *decode_by_prefix(s, opcodes_0F7E);
    303 }
    304 
    305 static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    306 {
    307     static const X86OpEntry opcodes_0F7F[4] = {
    308         X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex1 mmx), /* movq */
    309         X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex1), /* movdqa */
    310         X86_OP_ENTRY3(MOVDQ,       W,x, None,None, V,x, vex4_unal), /* movdqu */
    311         {},
    312     };
    313     *entry = *decode_by_prefix(s, opcodes_0F7F);
    314 }
    315 
    316 static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    317 {
    318     static const X86OpEntry movq[4] = {
    319         {},
    320         X86_OP_ENTRY3(MOVQ,    W,x,  None, None, V,q, vex5),
    321         X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q),
    322         X86_OP_ENTRY3(MOVq_dq, P,q,  None, None, U,q),
    323     };
    324 
    325     *entry = *decode_by_prefix(s, movq);
    326 }
    327 
    328 static const X86OpEntry opcodes_0F38_00toEF[240] = {
    329     [0x00] = X86_OP_ENTRY3(PSHUFB,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    330     [0x01] = X86_OP_ENTRY3(PHADDW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    331     [0x02] = X86_OP_ENTRY3(PHADDD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    332     [0x03] = X86_OP_ENTRY3(PHADDSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    333     [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    334     [0x05] = X86_OP_ENTRY3(PHSUBW,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    335     [0x06] = X86_OP_ENTRY3(PHSUBD,    V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    336     [0x07] = X86_OP_ENTRY3(PHSUBSW,   V,x,  H,x,   W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    337 
    338     [0x10] = X86_OP_ENTRY2(PBLENDVB,  V,x,         W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    339     [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x,         W,ph, vex11 cpuid(F16C) p_66),
    340     [0x14] = X86_OP_ENTRY2(BLENDVPS,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
    341     [0x15] = X86_OP_ENTRY2(BLENDVPD,  V,x,         W,x,  vex4 cpuid(SSE41) p_66),
    342     /* Listed incorrectly as type 4 */
    343     [0x16] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 cpuid(AVX2) p_66),
    344     [0x17] = X86_OP_ENTRY3(VPTEST,    None,None, V,x,  W,x,   vex4 cpuid(SSE41) p_66),
    345 
    346     /*
    347      * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed
    348      * as 128-bit only in 2-17.
    349      */
    350     [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    351     [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    352     [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    353     [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    354     [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    355     [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    356 
    357     /* Same as PMOVSX.  */
    358     [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    359     [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    360     [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x,  None,None, W,w,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    361     [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    362     [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x,  None,None, W,d,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    363     [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x,  None,None, W,q,   vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
    364     [0x36] = X86_OP_ENTRY3(VPERMD,    V,qq, H,qq,      W,qq,  vex6 cpuid(AVX2) p_66),
    365     [0x37] = X86_OP_ENTRY3(PCMPGTQ,   V,x,  H,x,       W,x,   vex4 cpuid(SSE42) avx2_256 p_66),
    366 
    367     [0x40] = X86_OP_ENTRY3(PMULLD,      V,x,  H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    368     [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66),
    369     /* Listed incorrectly as type 4 */
    370     [0x45] = X86_OP_ENTRY3(VPSRLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
    371     [0x46] = X86_OP_ENTRY3(VPSRAV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
    372     [0x47] = X86_OP_ENTRY3(VPSLLV,      V,x,  H,x,       W,x,  vex6 cpuid(AVX2) p_66),
    373 
    374     [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */
    375     [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */
    376     [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x,  H,x,  M,d,  vex12 cpuid(AVX2) p_66), /* vgatherdps/d */
    377     [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x,  H,x,  M,q,  vex12 cpuid(AVX2) p_66), /* vgatherqps/d */
    378 
    379     /* Should be exception type 2 but they do not have legacy SSE equivalents? */
    380     [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    381     [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    382 
    383     [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    384     [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    385 
    386     [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    387     [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    388 
    389     [0x08] = X86_OP_ENTRY3(PSIGNB,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    390     [0x09] = X86_OP_ENTRY3(PSIGNW,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    391     [0x0a] = X86_OP_ENTRY3(PSIGND,    V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    392     [0x0b] = X86_OP_ENTRY3(PMULHRSW,  V,x,        H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    393     [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x,        H,x,  W,x,  vex4 cpuid(AVX) p_00_66),
    394     [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x,        H,x,  W,x,  vex4 cpuid(AVX) p_66),
    395     [0x0e] = X86_OP_ENTRY3(VTESTPS,   None,None,  V,x,  W,x,  vex4 cpuid(AVX) p_66),
    396     [0x0f] = X86_OP_ENTRY3(VTESTPD,   None,None,  V,x,  W,x,  vex4 cpuid(AVX) p_66),
    397 
    398     [0x18] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 cpuid(AVX) p_66), /* vbroadcastss */
    399     [0x19] = X86_OP_ENTRY3(VPBROADCASTQ,   V,qq, None,None, W,q,  vex6 cpuid(AVX) p_66), /* vbroadcastsd */
    400     [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 cpuid(AVX) p_66),
    401     [0x1c] = X86_OP_ENTRY3(PABSB,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    402     [0x1d] = X86_OP_ENTRY3(PABSW,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    403     [0x1e] = X86_OP_ENTRY3(PABSD,          V,x,  None,None, W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    404 
    405     [0x28] = X86_OP_ENTRY3(PMULDQ,        V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    406     [0x29] = X86_OP_ENTRY3(PCMPEQQ,       V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    407     [0x2a] = X86_OP_ENTRY3(MOVDQ,         V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */
    408     [0x2b] = X86_OP_ENTRY3(VPACKUSDW,     V,x, H,x,       W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    409     [0x2c] = X86_OP_ENTRY3(VMASKMOVPS,    V,x, H,x,       WM,x, vex6 cpuid(AVX) p_66),
    410     [0x2d] = X86_OP_ENTRY3(VMASKMOVPD,    V,x, H,x,       WM,x, vex6 cpuid(AVX) p_66),
    411     /* Incorrectly listed as Mx,Hx,Vx in the manual */
    412     [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x,       H,x,  vex6 cpuid(AVX) p_66),
    413     [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x,       H,x,  vex6 cpuid(AVX) p_66),
    414 
    415     [0x38] = X86_OP_ENTRY3(PMINSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    416     [0x39] = X86_OP_ENTRY3(PMINSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    417     [0x3a] = X86_OP_ENTRY3(PMINUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    418     [0x3b] = X86_OP_ENTRY3(PMINUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    419     [0x3c] = X86_OP_ENTRY3(PMAXSB,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    420     [0x3d] = X86_OP_ENTRY3(PMAXSD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    421     [0x3e] = X86_OP_ENTRY3(PMAXUW,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    422     [0x3f] = X86_OP_ENTRY3(PMAXUD,        V,x,  H,x, W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    423 
    424     [0x58] = X86_OP_ENTRY3(VPBROADCASTD,   V,x,  None,None, W,d,  vex6 cpuid(AVX2) p_66),
    425     [0x59] = X86_OP_ENTRY3(VPBROADCASTQ,   V,x,  None,None, W,q,  vex6 cpuid(AVX2) p_66),
    426     [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 cpuid(AVX2) p_66),
    427 
    428     [0x78] = X86_OP_ENTRY3(VPBROADCASTB,   V,x,  None,None, W,b,  vex6 cpuid(AVX2) p_66),
    429     [0x79] = X86_OP_ENTRY3(VPBROADCASTW,   V,x,  None,None, W,w,  vex6 cpuid(AVX2) p_66),
    430 
    431     [0x8c] = X86_OP_ENTRY3(VPMASKMOV,    V,x,  H,x, WM,x, vex6 cpuid(AVX2) p_66),
    432     [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x,  V,x, H,x,  vex6 cpuid(AVX2) p_66),
    433 
    434     /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */
    435     [0x98] = X86_OP_ENTRY3(VFMADD132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    436     [0x99] = X86_OP_ENTRY3(VFMADD132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    437     [0x9a] = X86_OP_ENTRY3(VFMSUB132Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    438     [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    439     [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    440     [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    441     [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    442     [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    443 
    444     [0xa8] = X86_OP_ENTRY3(VFMADD213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    445     [0xa9] = X86_OP_ENTRY3(VFMADD213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    446     [0xaa] = X86_OP_ENTRY3(VFMSUB213Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    447     [0xab] = X86_OP_ENTRY3(VFMSUB213Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    448     [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    449     [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    450     [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    451     [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    452 
    453     [0xb8] = X86_OP_ENTRY3(VFMADD231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    454     [0xb9] = X86_OP_ENTRY3(VFMADD231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    455     [0xba] = X86_OP_ENTRY3(VFMSUB231Px,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    456     [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx,  V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    457     [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    458     [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    459     [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    460     [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x,  H,x, W,x,  vex6 cpuid(FMA) p_66),
    461 
    462     [0xdb] = X86_OP_ENTRY3(VAESIMC,     V,dq, None,None, W,dq, vex4 cpuid(AES) p_66),
    463     [0xdc] = X86_OP_ENTRY3(VAESENC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
    464     [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
    465     [0xde] = X86_OP_ENTRY3(VAESDEC,     V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
    466     [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x,  H,x,       W,x,  vex4 cpuid(AES) p_66),
    467 };
    468 
    469 /* five rows for no prefix, 66, F3, F2, 66+F2  */
    470 static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
    471     [0] = {
    472         X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
    473         X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
    474         {},
    475         X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
    476         X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
    477     },
    478     [1] = {
    479         X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
    480         X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
    481         {},
    482         X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
    483         X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
    484     },
    485     [2] = {
    486         X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
    487         {},
    488         {},
    489         {},
    490         {},
    491     },
    492     [3] = {
    493         X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
    494         {},
    495         {},
    496         {},
    497         {},
    498     },
    499     [5] = {
    500         X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
    501         {},
    502         X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)),
    503         X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)),
    504         {},
    505     },
    506     [6] = {
    507         {},
    508         X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
    509         X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
    510         X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
    511         {},
    512     },
    513     [7] = {
    514         X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)),
    515         X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
    516         X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
    517         X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
    518         {},
    519     },
    520 };
    521 
    522 static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    523 {
    524     *b = x86_ldub_code(env, s);
    525     if (*b < 0xf0) {
    526         *entry = opcodes_0F38_00toEF[*b];
    527     } else {
    528         int row = 0;
    529         if (s->prefix & PREFIX_REPZ) {
    530             /* The REPZ (F3) prefix has priority over 66 */
    531             row = 2;
    532         } else {
    533             row += s->prefix & PREFIX_REPNZ ? 3 : 0;
    534             row += s->prefix & PREFIX_DATA ? 1 : 0;
    535         }
    536         *entry = opcodes_0F38_F0toFF[*b & 15][row];
    537     }
    538 }
    539 
    540 static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    541 {
    542     static const X86OpEntry
    543         vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66),
    544         vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d,  vex5 cpuid(SSE41) p_66);
    545 
    546     int modrm = get_modrm(s, env);
    547     *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem;
    548 }
    549 
    550 static const X86OpEntry opcodes_0F3A[256] = {
    551     /*
    552      * These are VEX-only, but incorrectly listed in the manual as exception type 4.
    553      * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256
    554      * only.
    555      */
    556     [0x00] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 cpuid(AVX2) p_66),
    557     [0x01] = X86_OP_ENTRY3(VPERMQ,      V,qq, W,qq, I,b,  vex6 cpuid(AVX2) p_66), /* VPERMPD */
    558     [0x02] = X86_OP_ENTRY4(VBLENDPS,    V,x,  H,x,  W,x,  vex6 cpuid(AVX2) p_66), /* VPBLENDD */
    559     [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x,  W,x,  I,b,  vex6 cpuid(AVX) p_66),
    560     [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x,  W,x,  I,b,  vex6 cpuid(AVX) p_66),
    561     [0x06] = X86_OP_ENTRY4(VPERM2x128,  V,qq, H,qq, W,qq, vex6 cpuid(AVX) p_66),
    562 
    563     [0x14] = X86_OP_ENTRY3(PEXTRB,     E,b,  V,dq, I,b,  vex5 cpuid(SSE41) zext0 p_66),
    564     [0x15] = X86_OP_ENTRY3(PEXTRW,     E,w,  V,dq, I,b,  vex5 cpuid(SSE41) zext0 p_66),
    565     [0x16] = X86_OP_ENTRY3(PEXTR,      E,y,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
    566     [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d,  V,dq, I,b,  vex5 cpuid(SSE41) p_66),
    567     [0x1d] = X86_OP_ENTRY3(VCVTPS2PH,  W,ph, V,x,  I,b,  vex11 cpuid(F16C) p_66),
    568 
    569     [0x20] = X86_OP_ENTRY4(PINSRB,     V,dq, H,dq, E,b,  vex5 cpuid(SSE41) zext2 p_66),
    570     [0x21] = X86_OP_GROUP0(VINSERTPS),
    571     [0x22] = X86_OP_ENTRY4(PINSR,      V,dq, H,dq, E,y,  vex5 cpuid(SSE41) p_66),
    572 
    573     [0x40] = X86_OP_ENTRY4(VDDPS,      V,x,  H,x,  W,x,  vex2 cpuid(SSE41) p_66),
    574     [0x41] = X86_OP_ENTRY4(VDDPD,      V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66),
    575     [0x42] = X86_OP_ENTRY4(VMPSADBW,   V,x,  H,x,  W,x,  vex2 cpuid(SSE41) avx2_256 p_66),
    576     [0x44] = X86_OP_ENTRY4(PCLMULQDQ,  V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66),
    577     [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 cpuid(AVX2) p_66),
    578 
    579     [0x60] = X86_OP_ENTRY4(PCMPESTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
    580     [0x61] = X86_OP_ENTRY4(PCMPESTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
    581     [0x62] = X86_OP_ENTRY4(PCMPISTRM,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
    582     [0x63] = X86_OP_ENTRY4(PCMPISTRI,  None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
    583 
    584     [0x08] = X86_OP_ENTRY3(VROUNDPS,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
    585     [0x09] = X86_OP_ENTRY3(VROUNDPD,   V,x,  W,x,  I,b,  vex2 cpuid(SSE41) p_66),
    586     /*
    587      * Not listed as four operand in the manual.  Also writes and reads 128-bits
    588      * from the first two operands due to the V operand picking higher entries of
    589      * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect.
    590      * For other unary operations such as VSQRTSx this is hidden by the "REPScalar"
    591      * value of vex_special, because the table lists the operand types of VSQRTPx.
    592      */
    593     [0x0a] = X86_OP_ENTRY4(VROUNDSS,   V,x,  H,x, W,ss, vex3 cpuid(SSE41) p_66),
    594     [0x0b] = X86_OP_ENTRY4(VROUNDSD,   V,x,  H,x, W,sd, vex3 cpuid(SSE41) p_66),
    595     [0x0c] = X86_OP_ENTRY4(VBLENDPS,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
    596     [0x0d] = X86_OP_ENTRY4(VBLENDPD,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) p_66),
    597     [0x0e] = X86_OP_ENTRY4(VPBLENDW,   V,x,  H,x,  W,x,  vex4 cpuid(SSE41) avx2_256 p_66),
    598     [0x0f] = X86_OP_ENTRY4(PALIGNR,    V,x,  H,x,  W,x,  vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
    599 
    600     [0x18] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 cpuid(AVX) p_66),
    601     [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 cpuid(AVX) p_66),
    602 
    603     [0x38] = X86_OP_ENTRY4(VINSERTx128,  V,qq, H,qq, W,qq, vex6 cpuid(AVX2) p_66),
    604     [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b,  vex6 cpuid(AVX2) p_66),
    605 
    606     /* Listed incorrectly as type 4 */
    607     [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x,  H,x,  W,x,   vex6 cpuid(AVX) p_66),
    608     [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x,  H,x,  W,x,   vex6 cpuid(AVX) p_66),
    609     [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x,  H,x,  W,x,   vex6 cpuid(AVX) p_66 avx2_256),
    610 
    611     [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b,  vex4 cpuid(AES) p_66),
    612 
    613     [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
    614 };
    615 
    616 static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    617 {
    618     *b = x86_ldub_code(env, s);
    619     *entry = opcodes_0F3A[*b];
    620 }
    621 
    622 /*
    623  * There are some mistakes in the operands in the manual, and the load/store/register
    624  * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and
    625  * efficiency of implementation rather than copying what the manual says.
    626  *
    627  * In particular:
    628  *
    629  * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b,
    630  * but this is not mentioned in the tables.
    631  *
    632  * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their
    633  * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high
    634  * quadword of the V operand.
    635  */
    636 static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    637 {
    638     static const X86OpEntry opcodes_0F10_reg[4] = {
    639         X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPS */
    640         X86_OP_ENTRY3(MOVDQ,   V,x,  None,None, W,x, vex4_unal), /* MOVUPD */
    641         X86_OP_ENTRY3(VMOVSS,  V,x,  H,x,       W,x, vex4),
    642         X86_OP_ENTRY3(VMOVLPx, V,x,  H,x,       W,x, vex4), /* MOVSD */
    643     };
    644 
    645     static const X86OpEntry opcodes_0F10_mem[4] = {
    646         X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPS */
    647         X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x,  vex4_unal), /* MOVUPD */
    648         X86_OP_ENTRY3(VMOVSS_ld,  V,x,  H,x,       M,ss, vex4),
    649         X86_OP_ENTRY3(VMOVSD_ld,  V,x,  H,x,       M,sd, vex4),
    650     };
    651 
    652     if ((get_modrm(s, env) >> 6) == 3) {
    653         *entry = *decode_by_prefix(s, opcodes_0F10_reg);
    654     } else {
    655         *entry = *decode_by_prefix(s, opcodes_0F10_mem);
    656     }
    657 }
    658 
    659 static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    660 {
    661     static const X86OpEntry opcodes_0F11_reg[4] = {
    662         X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVPS */
    663         X86_OP_ENTRY3(MOVDQ,   W,x,  None,None, V,x, vex4), /* MOVPD */
    664         X86_OP_ENTRY3(VMOVSS,  W,x,  H,x,       V,x, vex4),
    665         X86_OP_ENTRY3(VMOVLPx, W,x,  H,x,       V,q, vex4), /* MOVSD */
    666     };
    667 
    668     static const X86OpEntry opcodes_0F11_mem[4] = {
    669         X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVPS */
    670         X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex4), /* MOVPD */
    671         X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4),
    672         X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4), /* MOVSD */
    673     };
    674 
    675     if ((get_modrm(s, env) >> 6) == 3) {
    676         *entry = *decode_by_prefix(s, opcodes_0F11_reg);
    677     } else {
    678         *entry = *decode_by_prefix(s, opcodes_0F11_mem);
    679     }
    680 }
    681 
    682 static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    683 {
    684     static const X86OpEntry opcodes_0F12_mem[4] = {
    685         /*
    686          * Use dq for operand for compatibility with gen_MOVSD and
    687          * to allow VEX128 only.
    688          */
    689         X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex4), /* MOVLPS */
    690         X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq,      M,q, vex4), /* MOVLPD */
    691         X86_OP_ENTRY3(VMOVSLDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
    692         X86_OP_ENTRY3(VMOVDDUP,   V,x,  None,None, WM,q, vex4 cpuid(SSE3)), /* qq if VEX.256 */
    693     };
    694     static const X86OpEntry opcodes_0F12_reg[4] = {
    695         X86_OP_ENTRY3(VMOVHLPS,  V,dq, H,dq,       U,dq, vex4),
    696         X86_OP_ENTRY3(VMOVLPx,   W,x,  H,x,        U,q,  vex4), /* MOVLPD */
    697         X86_OP_ENTRY3(VMOVSLDUP, V,x,  None,None,  U,x,  vex4 cpuid(SSE3)),
    698         X86_OP_ENTRY3(VMOVDDUP,  V,x,  None,None,  U,x,  vex4 cpuid(SSE3)),
    699     };
    700 
    701     if ((get_modrm(s, env) >> 6) == 3) {
    702         *entry = *decode_by_prefix(s, opcodes_0F12_reg);
    703     } else {
    704         *entry = *decode_by_prefix(s, opcodes_0F12_mem);
    705         if ((s->prefix & PREFIX_REPNZ) && s->vex_l) {
    706             entry->s2 = X86_SIZE_qq;
    707         }
    708     }
    709 }
    710 
    711 static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    712 {
    713     static const X86OpEntry opcodes_0F16_mem[4] = {
    714         /*
    715          * Operand 1 technically only reads the low 64 bits, but uses dq so that
    716          * it is easier to check for op0 == op1 in an endianness-neutral manner.
    717          */
    718         X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex4), /* MOVHPS */
    719         X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq,      M,q, vex4), /* MOVHPD */
    720         X86_OP_ENTRY3(VMOVSHDUP,  V,x,  None,None, W,x, vex4 cpuid(SSE3)),
    721         {},
    722     };
    723     static const X86OpEntry opcodes_0F16_reg[4] = {
    724         /* Same as above, operand 1 could be Hq if it wasn't for big-endian.  */
    725         X86_OP_ENTRY3(VMOVLHPS,  V,dq, H,dq,      U,q, vex4),
    726         X86_OP_ENTRY3(VMOVHPx,   V,x,  H,x,       U,x, vex4), /* MOVHPD */
    727         X86_OP_ENTRY3(VMOVSHDUP, V,x,  None,None, U,x, vex4 cpuid(SSE3)),
    728         {},
    729     };
    730 
    731     if ((get_modrm(s, env) >> 6) == 3) {
    732         *entry = *decode_by_prefix(s, opcodes_0F16_reg);
    733     } else {
    734         *entry = *decode_by_prefix(s, opcodes_0F16_mem);
    735     }
    736 }
    737 
    738 static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    739 {
    740     static const X86OpEntry opcodes_0F2A[4] = {
    741         X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
    742         X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
    743         X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
    744         X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
    745     };
    746     *entry = *decode_by_prefix(s, opcodes_0F2A);
    747 }
    748 
    749 static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    750 {
    751     static const X86OpEntry opcodes_0F2B[4] = {
    752         X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex4), /* MOVNTPS */
    753         X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex4), /* MOVNTPD */
    754         X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
    755         X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
    756     };
    757 
    758     *entry = *decode_by_prefix(s, opcodes_0F2B);
    759 }
    760 
    761 static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    762 {
    763     static const X86OpEntry opcodes_0F2C[4] = {
    764         /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit.  */
    765         X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,q),
    766         X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,dq),
    767         X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,ss, vex3),
    768         X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,sd, vex3),
    769     };
    770     *entry = *decode_by_prefix(s, opcodes_0F2C);
    771 }
    772 
    773 static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    774 {
    775     static const X86OpEntry opcodes_0F2D[4] = {
    776         /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit.  */
    777         X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,q),
    778         X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,dq),
    779         X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,ss, vex3),
    780         X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,sd, vex3),
    781     };
    782     *entry = *decode_by_prefix(s, opcodes_0F2D);
    783 }
    784 
    785 static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    786 {
    787     if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
    788         entry->op1 = X86_TYPE_None;
    789         entry->s1 = X86_SIZE_None;
    790     }
    791     switch (*b) {
    792     case 0x51: entry->gen = gen_VSQRT; break;
    793     case 0x52: entry->gen = gen_VRSQRT; break;
    794     case 0x53: entry->gen = gen_VRCP; break;
    795     case 0x5A: entry->gen = gen_VCVTfp2fp; break;
    796     }
    797 }
    798 
    799 static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    800 {
    801     static const X86OpEntry opcodes_0F5B[4] = {
    802         X86_OP_ENTRY2(VCVTDQ2PS,   V,x, W,x,      vex2),
    803         X86_OP_ENTRY2(VCVTPS2DQ,   V,x, W,x,      vex2),
    804         X86_OP_ENTRY2(VCVTTPS2DQ,  V,x, W,x,      vex2),
    805         {},
    806     };
    807     *entry = *decode_by_prefix(s, opcodes_0F5B);
    808 }
    809 
    810 static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    811 {
    812     static const X86OpEntry opcodes_0FE6[4] = {
    813         {},
    814         X86_OP_ENTRY2(VCVTTPD2DQ,  V,x, W,x,      vex2),
    815         X86_OP_ENTRY2(VCVTDQ2PD,   V,x, W,x,      vex2),
    816         X86_OP_ENTRY2(VCVTPD2DQ,   V,x, W,x,      vex2),
    817     };
    818     *entry = *decode_by_prefix(s, opcodes_0FE6);
    819 }
    820 
    821 static const X86OpEntry opcodes_0F[256] = {
    822     [0x0E] = X86_OP_ENTRY0(EMMS,                              cpuid(3DNOW)), /* femms */
    823     /*
    824      * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
    825      * more like an Ib operand.  Dispatch to the right helper in a single gen_*
    826      * function.
    827      */
    828     [0x0F] = X86_OP_ENTRY3(3dnow,       P,q, Q,q, I,b,        cpuid(3DNOW)),
    829 
    830     [0x10] = X86_OP_GROUP0(0F10),
    831     [0x11] = X86_OP_GROUP0(0F11),
    832     [0x12] = X86_OP_GROUP0(0F12),
    833     [0x13] = X86_OP_ENTRY3(VMOVLPx_st,  M,q, None,None, V,q,  vex4 p_00_66),
    834     [0x14] = X86_OP_ENTRY3(VUNPCKLPx,   V,x, H,x, W,x,        vex4 p_00_66),
    835     [0x15] = X86_OP_ENTRY3(VUNPCKHPx,   V,x, H,x, W,x,        vex4 p_00_66),
    836     [0x16] = X86_OP_GROUP0(0F16),
    837     /* Incorrectly listed as Mq,Vq in the manual */
    838     [0x17] = X86_OP_ENTRY3(VMOVHPx_st,  M,q, None,None, V,dq, vex4 p_00_66),
    839 
    840     [0x50] = X86_OP_ENTRY3(MOVMSK,     G,y, None,None, U,x, vex7 p_00_66),
    841     [0x51] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
    842     [0x52] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex5 p_00_f3),
    843     [0x53] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex5 p_00_f3),
    844     [0x54] = X86_OP_ENTRY3(PAND,       V,x, H,x, W,x,  vex4 p_00_66), /* vand */
    845     [0x55] = X86_OP_ENTRY3(PANDN,      V,x, H,x, W,x,  vex4 p_00_66), /* vandn */
    846     [0x56] = X86_OP_ENTRY3(POR,        V,x, H,x, W,x,  vex4 p_00_66), /* vor */
    847     [0x57] = X86_OP_ENTRY3(PXOR,       V,x, H,x, W,x,  vex4 p_00_66), /* vxor */
    848 
    849     [0x60] = X86_OP_ENTRY3(PUNPCKLBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    850     [0x61] = X86_OP_ENTRY3(PUNPCKLWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    851     [0x62] = X86_OP_ENTRY3(PUNPCKLDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    852     [0x63] = X86_OP_ENTRY3(PACKSSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    853     [0x64] = X86_OP_ENTRY3(PCMPGTB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    854     [0x65] = X86_OP_ENTRY3(PCMPGTW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    855     [0x66] = X86_OP_ENTRY3(PCMPGTD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    856     [0x67] = X86_OP_ENTRY3(PACKUSWB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    857 
    858     [0x70] = X86_OP_GROUP0(0F70),
    859     [0x71] = X86_OP_GROUP0(group12),
    860     [0x72] = X86_OP_GROUP0(group13),
    861     [0x73] = X86_OP_GROUP0(group14),
    862     [0x74] = X86_OP_ENTRY3(PCMPEQB,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    863     [0x75] = X86_OP_ENTRY3(PCMPEQW,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    864     [0x76] = X86_OP_ENTRY3(PCMPEQD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    865     [0x77] = X86_OP_GROUP0(0F77),
    866 
    867     [0x28] = X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x, vex1 p_00_66), /* MOVAPS */
    868     [0x29] = X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex1 p_00_66), /* MOVAPS */
    869     [0x2A] = X86_OP_GROUP0(0F2A),
    870     [0x2B] = X86_OP_GROUP0(0F2B),
    871     [0x2C] = X86_OP_GROUP0(0F2C),
    872     [0x2D] = X86_OP_GROUP0(0F2D),
    873     [0x2E] = X86_OP_ENTRY3(VUCOMI,     None,None, V,x, W,x,  vex4 p_00_66),
    874     [0x2F] = X86_OP_ENTRY3(VCOMI,      None,None, V,x, W,x,  vex4 p_00_66),
    875 
    876     [0x38] = X86_OP_GROUP0(0F38),
    877     [0x3a] = X86_OP_GROUP0(0F3A),
    878 
    879     [0x58] = X86_OP_ENTRY3(VADD,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
    880     [0x59] = X86_OP_ENTRY3(VMUL,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
    881     [0x5a] = X86_OP_GROUP3(sse_unary,  V,x, H,x, W,x, vex3 p_00_66_f3_f2),
    882     [0x5b] = X86_OP_GROUP0(0F5B),
    883     [0x5c] = X86_OP_ENTRY3(VSUB,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
    884     [0x5d] = X86_OP_ENTRY3(VMIN,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
    885     [0x5e] = X86_OP_ENTRY3(VDIV,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
    886     [0x5f] = X86_OP_ENTRY3(VMAX,       V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
    887 
    888     [0x68] = X86_OP_ENTRY3(PUNPCKHBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    889     [0x69] = X86_OP_ENTRY3(PUNPCKHWD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    890     [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    891     [0x6b] = X86_OP_ENTRY3(PACKSSDW,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    892     [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
    893     [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x,  vex4 p_66 avx2_256),
    894     [0x6e] = X86_OP_ENTRY3(MOVD_to,    V,x, None,None, E,y, vex5 mmx p_00_66),  /* wrong dest Vy on SDM! */
    895     [0x6f] = X86_OP_GROUP0(0F6F),
    896 
    897     [0x78] = X86_OP_GROUP0(0F78),
    898     [0x79] = X86_OP_GROUP2(0F79,       V,x, U,x,       cpuid(SSE4A)),
    899     [0x7c] = X86_OP_ENTRY3(VHADD,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
    900     [0x7d] = X86_OP_ENTRY3(VHSUB,      V,x, H,x, W,x,  vex2 cpuid(SSE3) p_66_f2),
    901     [0x7e] = X86_OP_GROUP0(0F7E),
    902     [0x7f] = X86_OP_GROUP0(0F7F),
    903 
    904     [0xae] = X86_OP_GROUP0(group15),
    905 
    906     [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
    907     [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
    908     [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
    909     [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
    910 
    911     [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
    912     [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
    913     [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
    914     [0xd3] = X86_OP_ENTRY3(PSRLQ_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
    915     [0xd4] = X86_OP_ENTRY3(PADDQ,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
    916     [0xd5] = X86_OP_ENTRY3(PMULLW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
    917     [0xd6] = X86_OP_GROUP0(0FD6),
    918     [0xd7] = X86_OP_ENTRY3(PMOVMSKB,  G,d, None,None, U,x,  vex7 mmx avx2_256 p_00_66),
    919 
    920     [0xe0] = X86_OP_ENTRY3(PAVGB,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
    921     [0xe1] = X86_OP_ENTRY3(PSRAW_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
    922     [0xe2] = X86_OP_ENTRY3(PSRAD_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
    923     [0xe3] = X86_OP_ENTRY3(PAVGW,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
    924     [0xe4] = X86_OP_ENTRY3(PMULHUW,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
    925     [0xe5] = X86_OP_ENTRY3(PMULHW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
    926     [0xe6] = X86_OP_GROUP0(0FE6),
    927     [0xe7] = X86_OP_ENTRY3(MOVDQ,     W,x, None,None, V,x,  vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
    928 
    929     [0xf0] = X86_OP_ENTRY3(MOVDQ,    V,x, None,None, WM,x,  vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
    930     [0xf1] = X86_OP_ENTRY3(PSLLW_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
    931     [0xf2] = X86_OP_ENTRY3(PSLLD_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
    932     [0xf3] = X86_OP_ENTRY3(PSLLQ_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
    933     [0xf4] = X86_OP_ENTRY3(PMULUDQ,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
    934     [0xf5] = X86_OP_ENTRY3(PMADDWD,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
    935     [0xf6] = X86_OP_ENTRY3(PSADBW,   V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
    936     [0xf7] = X86_OP_ENTRY3(MASKMOV,  None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
    937 
    938     /* Incorrectly missing from 2-17 */
    939     [0xd8] = X86_OP_ENTRY3(PSUBUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    940     [0xd9] = X86_OP_ENTRY3(PSUBUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    941     [0xda] = X86_OP_ENTRY3(PMINUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    942     [0xdb] = X86_OP_ENTRY3(PAND,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    943     [0xdc] = X86_OP_ENTRY3(PADDUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    944     [0xdd] = X86_OP_ENTRY3(PADDUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    945     [0xde] = X86_OP_ENTRY3(PMAXUB,   V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    946     [0xdf] = X86_OP_ENTRY3(PANDN,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    947 
    948     [0xe8] = X86_OP_ENTRY3(PSUBSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    949     [0xe9] = X86_OP_ENTRY3(PSUBSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    950     [0xea] = X86_OP_ENTRY3(PMINSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    951     [0xeb] = X86_OP_ENTRY3(POR,     V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    952     [0xec] = X86_OP_ENTRY3(PADDSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    953     [0xed] = X86_OP_ENTRY3(PADDSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    954     [0xee] = X86_OP_ENTRY3(PMAXSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    955     [0xef] = X86_OP_ENTRY3(PXOR,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    956 
    957     [0xf8] = X86_OP_ENTRY3(PSUBB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    958     [0xf9] = X86_OP_ENTRY3(PSUBW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    959     [0xfa] = X86_OP_ENTRY3(PSUBD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    960     [0xfb] = X86_OP_ENTRY3(PSUBQ,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    961     [0xfc] = X86_OP_ENTRY3(PADDB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    962     [0xfd] = X86_OP_ENTRY3(PADDW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    963     [0xfe] = X86_OP_ENTRY3(PADDD,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
    964     /* 0xff = UD0 */
    965 };
    966 
    967 static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    968 {
    969     *entry = opcodes_0F[*b];
    970 }
    971 
    972 static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
    973 {
    974     *b = x86_ldub_code(env, s);
    975     do_decode_0F(s, env, entry, b);
    976 }
    977 
    978 static const X86OpEntry opcodes_root[256] = {
    979     [0x0F] = X86_OP_GROUP0(0F),
    980 };
    981 
    982 #undef mmx
    983 #undef vex1
    984 #undef vex2
    985 #undef vex3
    986 #undef vex4
    987 #undef vex4_unal
    988 #undef vex5
    989 #undef vex6
    990 #undef vex7
    991 #undef vex8
    992 #undef vex11
    993 #undef vex12
    994 #undef vex13
    995 
    996 /*
    997  * Decode the fixed part of the opcode and place the last
    998  * in b.
    999  */
   1000 static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
   1001 {
   1002     *entry = opcodes_root[*b];
   1003 }
   1004 
   1005 
   1006 static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
   1007                         X86DecodedOp *op, X86OpType type)
   1008 {
   1009     int modrm = get_modrm(s, env);
   1010     if ((modrm >> 6) == 3) {
   1011         if (s->prefix & PREFIX_LOCK) {
   1012             decode->e.gen = gen_illegal;
   1013             return 0xff;
   1014         }
   1015         op->n = (modrm & 7);
   1016         if (type != X86_TYPE_Q && type != X86_TYPE_N) {
   1017             op->n |= REX_B(s);
   1018         }
   1019     } else {
   1020         op->has_ea = true;
   1021         op->n = -1;
   1022         decode->mem = gen_lea_modrm_0(env, s, get_modrm(s, env));
   1023     }
   1024     return modrm;
   1025 }
   1026 
   1027 static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot)
   1028 {
   1029     switch (size) {
   1030     case X86_SIZE_b:  /* byte */
   1031         *ot = MO_8;
   1032         return true;
   1033 
   1034     case X86_SIZE_d:  /* 32-bit */
   1035     case X86_SIZE_ss: /* SSE/AVX scalar single precision */
   1036         *ot = MO_32;
   1037         return true;
   1038 
   1039     case X86_SIZE_p:  /* Far pointer, return offset size */
   1040     case X86_SIZE_s:  /* Descriptor, return offset size */
   1041     case X86_SIZE_v:  /* 16/32/64-bit, based on operand size */
   1042         *ot = s->dflag;
   1043         return true;
   1044 
   1045     case X86_SIZE_pi: /* MMX */
   1046     case X86_SIZE_q:  /* 64-bit */
   1047     case X86_SIZE_sd: /* SSE/AVX scalar double precision */
   1048         *ot = MO_64;
   1049         return true;
   1050 
   1051     case X86_SIZE_w:  /* 16-bit */
   1052         *ot = MO_16;
   1053         return true;
   1054 
   1055     case X86_SIZE_y:  /* 32/64-bit, based on operand size */
   1056         *ot = s->dflag == MO_16 ? MO_32 : s->dflag;
   1057         return true;
   1058 
   1059     case X86_SIZE_z:  /* 16-bit for 16-bit operand size, else 32-bit */
   1060         *ot = s->dflag == MO_16 ? MO_16 : MO_32;
   1061         return true;
   1062 
   1063     case X86_SIZE_dq: /* SSE/AVX 128-bit */
   1064         if (e->special == X86_SPECIAL_MMX &&
   1065             !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
   1066             *ot = MO_64;
   1067             return true;
   1068         }
   1069         if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) {
   1070             return false;
   1071         }
   1072         *ot = MO_128;
   1073         return true;
   1074 
   1075     case X86_SIZE_qq: /* AVX 256-bit */
   1076         if (!s->vex_l) {
   1077             return false;
   1078         }
   1079         *ot = MO_256;
   1080         return true;
   1081 
   1082     case X86_SIZE_x:  /* 128/256-bit, based on operand size */
   1083         if (e->special == X86_SPECIAL_MMX &&
   1084             !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
   1085             *ot = MO_64;
   1086             return true;
   1087         }
   1088         /* fall through */
   1089     case X86_SIZE_ps: /* SSE/AVX packed single precision */
   1090     case X86_SIZE_pd: /* SSE/AVX packed double precision */
   1091         *ot = s->vex_l ? MO_256 : MO_128;
   1092         return true;
   1093 
   1094     case X86_SIZE_ph: /* SSE/AVX packed half precision */
   1095         *ot = s->vex_l ? MO_128 : MO_64;
   1096         return true;
   1097 
   1098     case X86_SIZE_d64:  /* Default to 64-bit in 64-bit mode */
   1099         *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag;
   1100         return true;
   1101 
   1102     case X86_SIZE_f64:  /* Ignore size override prefix in 64-bit mode */
   1103         *ot = CODE64(s) ? MO_64 : s->dflag;
   1104         return true;
   1105 
   1106     default:
   1107         *ot = -1;
   1108         return true;
   1109     }
   1110 }
   1111 
   1112 static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
   1113                       X86DecodedOp *op, X86OpType type, int b)
   1114 {
   1115     int modrm;
   1116 
   1117     switch (type) {
   1118     case X86_TYPE_None:  /* Implicit or absent */
   1119     case X86_TYPE_A:  /* Implicit */
   1120     case X86_TYPE_F:  /* EFLAGS/RFLAGS */
   1121         break;
   1122 
   1123     case X86_TYPE_B:  /* VEX.vvvv selects a GPR */
   1124         op->unit = X86_OP_INT;
   1125         op->n = s->vex_v;
   1126         break;
   1127 
   1128     case X86_TYPE_C:  /* REG in the modrm byte selects a control register */
   1129         op->unit = X86_OP_CR;
   1130         goto get_reg;
   1131 
   1132     case X86_TYPE_D:  /* REG in the modrm byte selects a debug register */
   1133         op->unit = X86_OP_DR;
   1134         goto get_reg;
   1135 
   1136     case X86_TYPE_G:  /* REG in the modrm byte selects a GPR */
   1137         op->unit = X86_OP_INT;
   1138         goto get_reg;
   1139 
   1140     case X86_TYPE_S:  /* reg selects a segment register */
   1141         op->unit = X86_OP_SEG;
   1142         goto get_reg;
   1143 
   1144     case X86_TYPE_P:
   1145         op->unit = X86_OP_MMX;
   1146         goto get_reg;
   1147 
   1148     case X86_TYPE_V:  /* reg in the modrm byte selects an XMM/YMM register */
   1149         if (decode->e.special == X86_SPECIAL_MMX &&
   1150             !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
   1151             op->unit = X86_OP_MMX;
   1152         } else {
   1153             op->unit = X86_OP_SSE;
   1154         }
   1155     get_reg:
   1156         op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
   1157         break;
   1158 
   1159     case X86_TYPE_E:  /* ALU modrm operand */
   1160         op->unit = X86_OP_INT;
   1161         goto get_modrm;
   1162 
   1163     case X86_TYPE_Q:  /* MMX modrm operand */
   1164         op->unit = X86_OP_MMX;
   1165         goto get_modrm;
   1166 
   1167     case X86_TYPE_W:  /* XMM/YMM modrm operand */
   1168         if (decode->e.special == X86_SPECIAL_MMX &&
   1169             !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
   1170             op->unit = X86_OP_MMX;
   1171         } else {
   1172             op->unit = X86_OP_SSE;
   1173         }
   1174         goto get_modrm;
   1175 
   1176     case X86_TYPE_N:  /* R/M in the modrm byte selects an MMX register */
   1177         op->unit = X86_OP_MMX;
   1178         goto get_modrm_reg;
   1179 
   1180     case X86_TYPE_U:  /* R/M in the modrm byte selects an XMM/YMM register */
   1181         if (decode->e.special == X86_SPECIAL_MMX &&
   1182             !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
   1183             op->unit = X86_OP_MMX;
   1184         } else {
   1185             op->unit = X86_OP_SSE;
   1186         }
   1187         goto get_modrm_reg;
   1188 
   1189     case X86_TYPE_R:  /* R/M in the modrm byte selects a register */
   1190         op->unit = X86_OP_INT;
   1191     get_modrm_reg:
   1192         modrm = get_modrm(s, env);
   1193         if ((modrm >> 6) != 3) {
   1194             return false;
   1195         }
   1196         goto get_modrm;
   1197 
   1198     case X86_TYPE_WM:  /* modrm byte selects an XMM/YMM memory operand */
   1199         op->unit = X86_OP_SSE;
   1200         /* fall through */
   1201     case X86_TYPE_M:  /* modrm byte selects a memory operand */
   1202         modrm = get_modrm(s, env);
   1203         if ((modrm >> 6) == 3) {
   1204             return false;
   1205         }
   1206     get_modrm:
   1207         decode_modrm(s, env, decode, op, type);
   1208         break;
   1209 
   1210     case X86_TYPE_O:  /* Absolute address encoded in the instruction */
   1211         op->unit = X86_OP_INT;
   1212         op->has_ea = true;
   1213         op->n = -1;
   1214         decode->mem = (AddressParts) {
   1215             .def_seg = R_DS,
   1216             .base = -1,
   1217             .index = -1,
   1218             .disp = insn_get_addr(env, s, s->aflag)
   1219         };
   1220         break;
   1221 
   1222     case X86_TYPE_H:  /* For AVX, VEX.vvvv selects an XMM/YMM register */
   1223         if ((s->prefix & PREFIX_VEX)) {
   1224             op->unit = X86_OP_SSE;
   1225             op->n = s->vex_v;
   1226             break;
   1227         }
   1228         if (op == &decode->op[0]) {
   1229             /* shifts place the destination in VEX.vvvv, use modrm */
   1230             return decode_op(s, env, decode, op, decode->e.op1, b);
   1231         } else {
   1232             return decode_op(s, env, decode, op, decode->e.op0, b);
   1233         }
   1234 
   1235     case X86_TYPE_I:  /* Immediate */
   1236         op->unit = X86_OP_IMM;
   1237         decode->immediate = insn_get_signed(env, s, op->ot);
   1238         break;
   1239 
   1240     case X86_TYPE_J:  /* Relative offset for a jump */
   1241         op->unit = X86_OP_IMM;
   1242         decode->immediate = insn_get_signed(env, s, op->ot);
   1243         decode->immediate += s->pc - s->cs_base;
   1244         if (s->dflag == MO_16) {
   1245             decode->immediate &= 0xffff;
   1246         } else if (!CODE64(s)) {
   1247             decode->immediate &= 0xffffffffu;
   1248         }
   1249         break;
   1250 
   1251     case X86_TYPE_L:  /* The upper 4 bits of the immediate select a 128-bit register */
   1252         op->n = insn_get(env, s, op->ot) >> 4;
   1253         break;
   1254 
   1255     case X86_TYPE_X:  /* string source */
   1256         op->n = -1;
   1257         decode->mem = (AddressParts) {
   1258             .def_seg = R_DS,
   1259             .base = R_ESI,
   1260             .index = -1,
   1261         };
   1262         break;
   1263 
   1264     case X86_TYPE_Y:  /* string destination */
   1265         op->n = -1;
   1266         decode->mem = (AddressParts) {
   1267             .def_seg = R_ES,
   1268             .base = R_EDI,
   1269             .index = -1,
   1270         };
   1271         break;
   1272 
   1273     case X86_TYPE_2op:
   1274         *op = decode->op[0];
   1275         break;
   1276 
   1277     case X86_TYPE_LoBits:
   1278         op->n = (b & 7) | REX_B(s);
   1279         op->unit = X86_OP_INT;
   1280         break;
   1281 
   1282     case X86_TYPE_0 ... X86_TYPE_7:
   1283         op->n = type - X86_TYPE_0;
   1284         op->unit = X86_OP_INT;
   1285         break;
   1286 
   1287     case X86_TYPE_ES ... X86_TYPE_GS:
   1288         op->n = type - X86_TYPE_ES;
   1289         op->unit = X86_OP_SEG;
   1290         break;
   1291     }
   1292 
   1293     return true;
   1294 }
   1295 
   1296 static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e)
   1297 {
   1298     uint16_t sse_prefixes;
   1299 
   1300     if (!e->valid_prefix) {
   1301         return true;
   1302     }
   1303     if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
   1304         /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66.  */
   1305         s->prefix &= ~PREFIX_DATA;
   1306     }
   1307 
   1308     /* Now, either zero or one bit is set in sse_prefixes.  */
   1309     sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
   1310     return e->valid_prefix & (1 << sse_prefixes);
   1311 }
   1312 
   1313 static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func,
   1314                         X86DecodedInsn *decode)
   1315 {
   1316     X86OpEntry *e = &decode->e;
   1317 
   1318     decode_func(s, env, e, &decode->b);
   1319     while (e->is_decode) {
   1320         e->is_decode = false;
   1321         e->decode(s, env, e, &decode->b);
   1322     }
   1323 
   1324     if (!validate_sse_prefix(s, e)) {
   1325         return false;
   1326     }
   1327 
   1328     /* First compute size of operands in order to initialize s->rip_offset.  */
   1329     if (e->op0 != X86_TYPE_None) {
   1330         if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) {
   1331             return false;
   1332         }
   1333         if (e->op0 == X86_TYPE_I) {
   1334             s->rip_offset += 1 << decode->op[0].ot;
   1335         }
   1336     }
   1337     if (e->op1 != X86_TYPE_None) {
   1338         if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) {
   1339             return false;
   1340         }
   1341         if (e->op1 == X86_TYPE_I) {
   1342             s->rip_offset += 1 << decode->op[1].ot;
   1343         }
   1344     }
   1345     if (e->op2 != X86_TYPE_None) {
   1346         if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) {
   1347             return false;
   1348         }
   1349         if (e->op2 == X86_TYPE_I) {
   1350             s->rip_offset += 1 << decode->op[2].ot;
   1351         }
   1352     }
   1353     if (e->op3 != X86_TYPE_None) {
   1354         /*
   1355          * A couple instructions actually use the extra immediate byte for an Lx
   1356          * register operand; those are handled in the gen_* functions as one off.
   1357          */
   1358         assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b);
   1359         s->rip_offset += 1;
   1360     }
   1361 
   1362     if (e->op0 != X86_TYPE_None &&
   1363         !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) {
   1364         return false;
   1365     }
   1366 
   1367     if (e->op1 != X86_TYPE_None &&
   1368         !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) {
   1369         return false;
   1370     }
   1371 
   1372     if (e->op2 != X86_TYPE_None &&
   1373         !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) {
   1374         return false;
   1375     }
   1376 
   1377     if (e->op3 != X86_TYPE_None) {
   1378         decode->immediate = insn_get_signed(env, s, MO_8);
   1379     }
   1380 
   1381     return true;
   1382 }
   1383 
   1384 static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
   1385 {
   1386     switch (cpuid) {
   1387     case X86_FEAT_None:
   1388         return true;
   1389     case X86_FEAT_F16C:
   1390         return (s->cpuid_ext_features & CPUID_EXT_F16C);
   1391     case X86_FEAT_FMA:
   1392         return (s->cpuid_ext_features & CPUID_EXT_FMA);
   1393     case X86_FEAT_MOVBE:
   1394         return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
   1395     case X86_FEAT_PCLMULQDQ:
   1396         return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
   1397     case X86_FEAT_SSE:
   1398         return (s->cpuid_ext_features & CPUID_SSE);
   1399     case X86_FEAT_SSE2:
   1400         return (s->cpuid_ext_features & CPUID_SSE2);
   1401     case X86_FEAT_SSE3:
   1402         return (s->cpuid_ext_features & CPUID_EXT_SSE3);
   1403     case X86_FEAT_SSSE3:
   1404         return (s->cpuid_ext_features & CPUID_EXT_SSSE3);
   1405     case X86_FEAT_SSE41:
   1406         return (s->cpuid_ext_features & CPUID_EXT_SSE41);
   1407     case X86_FEAT_SSE42:
   1408         return (s->cpuid_ext_features & CPUID_EXT_SSE42);
   1409     case X86_FEAT_AES:
   1410         if (!(s->cpuid_ext_features & CPUID_EXT_AES)) {
   1411             return false;
   1412         } else if (!(s->prefix & PREFIX_VEX)) {
   1413             return true;
   1414         } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) {
   1415             return false;
   1416         } else {
   1417             return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES);
   1418         }
   1419 
   1420     case X86_FEAT_AVX:
   1421         return (s->cpuid_ext_features & CPUID_EXT_AVX);
   1422 
   1423     case X86_FEAT_3DNOW:
   1424         return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
   1425     case X86_FEAT_SSE4A:
   1426         return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
   1427 
   1428     case X86_FEAT_ADX:
   1429         return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX);
   1430     case X86_FEAT_BMI1:
   1431         return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1);
   1432     case X86_FEAT_BMI2:
   1433         return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
   1434     case X86_FEAT_AVX2:
   1435         return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
   1436     }
   1437     g_assert_not_reached();
   1438 }
   1439 
   1440 static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
   1441 {
   1442     X86OpEntry *e = &decode->e;
   1443 
   1444     switch (e->vex_special) {
   1445     case X86_VEX_REPScalar:
   1446         /*
   1447          * Instructions which differ between 00/66 and F2/F3 in the
   1448          * exception classification and the size of the memory operand.
   1449          */
   1450         assert(e->vex_class == 1 || e->vex_class == 2);
   1451         if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
   1452             e->vex_class = 3;
   1453             if (s->vex_l) {
   1454                 goto illegal;
   1455             }
   1456             assert(decode->e.s2 == X86_SIZE_x);
   1457             if (decode->op[2].has_ea) {
   1458                 decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64;
   1459             }
   1460         }
   1461         break;
   1462 
   1463     case X86_VEX_SSEUnaligned:
   1464         /* handled in sse_needs_alignment.  */
   1465         break;
   1466 
   1467     case X86_VEX_AVX2_256:
   1468         if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) {
   1469             goto illegal;
   1470         }
   1471     }
   1472 
   1473     /* TODO: instructions that require VEX.W=0 (Table 2-16) */
   1474 
   1475     switch (e->vex_class) {
   1476     case 0:
   1477         if (s->prefix & PREFIX_VEX) {
   1478             goto illegal;
   1479         }
   1480         return true;
   1481     case 1:
   1482     case 2:
   1483     case 3:
   1484     case 4:
   1485     case 5:
   1486     case 7:
   1487         if (s->prefix & PREFIX_VEX) {
   1488             if (!(s->flags & HF_AVX_EN_MASK)) {
   1489                 goto illegal;
   1490             }
   1491         } else if (e->special != X86_SPECIAL_MMX ||
   1492                    (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
   1493             if (!(s->flags & HF_OSFXSR_MASK)) {
   1494                 goto illegal;
   1495             }
   1496         }
   1497         break;
   1498     case 12:
   1499         /* Must have a VSIB byte and no address prefix.  */
   1500         assert(s->has_modrm);
   1501         if ((s->modrm & 7) != 4 || s->aflag == MO_16) {
   1502             goto illegal;
   1503         }
   1504 
   1505         /* Check no overlap between registers.  */
   1506         if (!decode->op[0].has_ea &&
   1507             (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) {
   1508             goto illegal;
   1509         }
   1510         assert(!decode->op[1].has_ea);
   1511         if (decode->op[1].n == decode->mem.index) {
   1512             goto illegal;
   1513         }
   1514         if (!decode->op[2].has_ea &&
   1515             (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) {
   1516             goto illegal;
   1517         }
   1518         /* fall through */
   1519     case 6:
   1520     case 11:
   1521         if (!(s->prefix & PREFIX_VEX)) {
   1522             goto illegal;
   1523         }
   1524         if (!(s->flags & HF_AVX_EN_MASK)) {
   1525             goto illegal;
   1526         }
   1527         break;
   1528     case 8:
   1529         /* Non-VEX case handled in decode_0F77.  */
   1530         assert(s->prefix & PREFIX_VEX);
   1531         if (!(s->flags & HF_AVX_EN_MASK)) {
   1532             goto illegal;
   1533         }
   1534         break;
   1535     case 13:
   1536         if (!(s->prefix & PREFIX_VEX)) {
   1537             goto illegal;
   1538         }
   1539         if (s->vex_l) {
   1540             goto illegal;
   1541         }
   1542         /* All integer instructions use VEX.vvvv, so exit.  */
   1543         return true;
   1544     }
   1545 
   1546     if (s->vex_v != 0 &&
   1547         e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B &&
   1548         e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B &&
   1549         e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) {
   1550         goto illegal;
   1551     }
   1552 
   1553     if (s->flags & HF_TS_MASK) {
   1554         goto nm_exception;
   1555     }
   1556     if (s->flags & HF_EM_MASK) {
   1557         goto illegal;
   1558     }
   1559     return true;
   1560 
   1561 nm_exception:
   1562     gen_NM_exception(s);
   1563     return false;
   1564 illegal:
   1565     gen_illegal_opcode(s);
   1566     return false;
   1567 }
   1568 
   1569 static void decode_temp_free(X86DecodedOp *op)
   1570 {
   1571     if (op->v_ptr) {
   1572         tcg_temp_free_ptr(op->v_ptr);
   1573     }
   1574 }
   1575 
   1576 static void decode_temps_free(X86DecodedInsn *decode)
   1577 {
   1578     decode_temp_free(&decode->op[0]);
   1579     decode_temp_free(&decode->op[1]);
   1580     decode_temp_free(&decode->op[2]);
   1581 }
   1582 
   1583 /*
   1584  * Convert one instruction. s->base.is_jmp is set if the translation must
   1585  * be stopped.
   1586  */
   1587 static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
   1588 {
   1589     CPUX86State *env = cpu->env_ptr;
   1590     bool first = true;
   1591     X86DecodedInsn decode;
   1592     X86DecodeFunc decode_func = decode_root;
   1593 
   1594     s->has_modrm = false;
   1595 
   1596  next_byte:
   1597     if (first) {
   1598         first = false;
   1599     } else {
   1600         b = x86_ldub_code(env, s);
   1601     }
   1602     /* Collect prefixes.  */
   1603     switch (b) {
   1604     case 0xf3:
   1605         s->prefix |= PREFIX_REPZ;
   1606         s->prefix &= ~PREFIX_REPNZ;
   1607         goto next_byte;
   1608     case 0xf2:
   1609         s->prefix |= PREFIX_REPNZ;
   1610         s->prefix &= ~PREFIX_REPZ;
   1611         goto next_byte;
   1612     case 0xf0:
   1613         s->prefix |= PREFIX_LOCK;
   1614         goto next_byte;
   1615     case 0x2e:
   1616         s->override = R_CS;
   1617         goto next_byte;
   1618     case 0x36:
   1619         s->override = R_SS;
   1620         goto next_byte;
   1621     case 0x3e:
   1622         s->override = R_DS;
   1623         goto next_byte;
   1624     case 0x26:
   1625         s->override = R_ES;
   1626         goto next_byte;
   1627     case 0x64:
   1628         s->override = R_FS;
   1629         goto next_byte;
   1630     case 0x65:
   1631         s->override = R_GS;
   1632         goto next_byte;
   1633     case 0x66:
   1634         s->prefix |= PREFIX_DATA;
   1635         goto next_byte;
   1636     case 0x67:
   1637         s->prefix |= PREFIX_ADR;
   1638         goto next_byte;
   1639 #ifdef TARGET_X86_64
   1640     case 0x40 ... 0x4f:
   1641         if (CODE64(s)) {
   1642             /* REX prefix */
   1643             s->prefix |= PREFIX_REX;
   1644             s->vex_w = (b >> 3) & 1;
   1645             s->rex_r = (b & 0x4) << 1;
   1646             s->rex_x = (b & 0x2) << 2;
   1647             s->rex_b = (b & 0x1) << 3;
   1648             goto next_byte;
   1649         }
   1650         break;
   1651 #endif
   1652     case 0xc5: /* 2-byte VEX */
   1653     case 0xc4: /* 3-byte VEX */
   1654         /*
   1655          * VEX prefixes cannot be used except in 32-bit mode.
   1656          * Otherwise the instruction is LES or LDS.
   1657          */
   1658         if (CODE32(s) && !VM86(s)) {
   1659             static const int pp_prefix[4] = {
   1660                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
   1661             };
   1662             int vex3, vex2 = x86_ldub_code(env, s);
   1663 
   1664             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
   1665                 /*
   1666                  * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
   1667                  * otherwise the instruction is LES or LDS.
   1668                  */
   1669                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
   1670                 break;
   1671             }
   1672 
   1673             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
   1674             if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
   1675                              | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
   1676                 goto illegal_op;
   1677             }
   1678 #ifdef TARGET_X86_64
   1679             s->rex_r = (~vex2 >> 4) & 8;
   1680 #endif
   1681             if (b == 0xc5) {
   1682                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
   1683                 vex3 = vex2;
   1684                 decode_func = decode_0F;
   1685             } else {
   1686                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
   1687                 vex3 = x86_ldub_code(env, s);
   1688 #ifdef TARGET_X86_64
   1689                 s->rex_x = (~vex2 >> 3) & 8;
   1690                 s->rex_b = (~vex2 >> 2) & 8;
   1691 #endif
   1692                 s->vex_w = (vex3 >> 7) & 1;
   1693                 switch (vex2 & 0x1f) {
   1694                 case 0x01: /* Implied 0f leading opcode bytes.  */
   1695                     decode_func = decode_0F;
   1696                     break;
   1697                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
   1698                     decode_func = decode_0F38;
   1699                     break;
   1700                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
   1701                     decode_func = decode_0F3A;
   1702                     break;
   1703                 default:   /* Reserved for future use.  */
   1704                     goto unknown_op;
   1705                 }
   1706             }
   1707             s->vex_v = (~vex3 >> 3) & 0xf;
   1708             s->vex_l = (vex3 >> 2) & 1;
   1709             s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX;
   1710         }
   1711         break;
   1712     default:
   1713         if (b >= 0x100) {
   1714             b -= 0x100;
   1715             decode_func = do_decode_0F;
   1716         }
   1717         break;
   1718     }
   1719 
   1720     /* Post-process prefixes.  */
   1721     if (CODE64(s)) {
   1722         /*
   1723          * In 64-bit mode, the default data size is 32-bit.  Select 64-bit
   1724          * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
   1725          * over 0x66 if both are present.
   1726          */
   1727         s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32);
   1728         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
   1729         s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64);
   1730     } else {
   1731         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
   1732         if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) {
   1733             s->dflag = MO_32;
   1734         } else {
   1735             s->dflag = MO_16;
   1736         }
   1737         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
   1738         if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) {
   1739             s->aflag = MO_32;
   1740         }  else {
   1741             s->aflag = MO_16;
   1742         }
   1743     }
   1744 
   1745     memset(&decode, 0, sizeof(decode));
   1746     decode.b = b;
   1747     if (!decode_insn(s, env, decode_func, &decode)) {
   1748         goto illegal_op;
   1749     }
   1750     if (!decode.e.gen) {
   1751         goto unknown_op;
   1752     }
   1753 
   1754     if (!has_cpuid_feature(s, decode.e.cpuid)) {
   1755         goto illegal_op;
   1756     }
   1757 
   1758     switch (decode.e.special) {
   1759     case X86_SPECIAL_None:
   1760         break;
   1761 
   1762     case X86_SPECIAL_Locked:
   1763         if (decode.op[0].has_ea) {
   1764             s->prefix |= PREFIX_LOCK;
   1765         }
   1766         break;
   1767 
   1768     case X86_SPECIAL_ProtMode:
   1769         if (!PE(s) || VM86(s)) {
   1770             goto illegal_op;
   1771         }
   1772         break;
   1773 
   1774     case X86_SPECIAL_i64:
   1775         if (CODE64(s)) {
   1776             goto illegal_op;
   1777         }
   1778         break;
   1779     case X86_SPECIAL_o64:
   1780         if (!CODE64(s)) {
   1781             goto illegal_op;
   1782         }
   1783         break;
   1784 
   1785     case X86_SPECIAL_ZExtOp0:
   1786         assert(decode.op[0].unit == X86_OP_INT);
   1787         if (!decode.op[0].has_ea) {
   1788             decode.op[0].ot = MO_32;
   1789         }
   1790         break;
   1791 
   1792     case X86_SPECIAL_ZExtOp2:
   1793         assert(decode.op[2].unit == X86_OP_INT);
   1794         if (!decode.op[2].has_ea) {
   1795             decode.op[2].ot = MO_32;
   1796         }
   1797         break;
   1798 
   1799     case X86_SPECIAL_AVXExtMov:
   1800         if (!decode.op[2].has_ea) {
   1801             decode.op[2].ot = s->vex_l ? MO_256 : MO_128;
   1802         } else if (s->vex_l) {
   1803             decode.op[2].ot++;
   1804         }
   1805         break;
   1806 
   1807     case X86_SPECIAL_MMX:
   1808         if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
   1809             gen_helper_enter_mmx(cpu_env);
   1810         }
   1811         break;
   1812     }
   1813 
   1814     if (!validate_vex(s, &decode)) {
   1815         return;
   1816     }
   1817     if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
   1818         gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
   1819     }
   1820     if (s->prefix & PREFIX_LOCK) {
   1821         if (decode.op[0].unit != X86_OP_INT || !decode.op[0].has_ea) {
   1822             goto illegal_op;
   1823         }
   1824         gen_load(s, &decode, 2, s->T1);
   1825         decode.e.gen(s, env, &decode);
   1826     } else {
   1827         if (decode.op[0].unit == X86_OP_MMX) {
   1828             compute_mmx_offset(&decode.op[0]);
   1829         } else if (decode.op[0].unit == X86_OP_SSE) {
   1830             compute_xmm_offset(&decode.op[0]);
   1831         }
   1832         gen_load(s, &decode, 1, s->T0);
   1833         gen_load(s, &decode, 2, s->T1);
   1834         decode.e.gen(s, env, &decode);
   1835         gen_writeback(s, &decode, 0, s->T0);
   1836     }
   1837     decode_temps_free(&decode);
   1838     return;
   1839  illegal_op:
   1840     gen_illegal_opcode(s);
   1841     return;
   1842  unknown_op:
   1843     gen_unknown_opcode(env, s);
   1844 }