qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

howvec.c (13414B)


      1 /*
      2  * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
      3  *
      4  * How vectorised is this code?
      5  *
      6  * Attempt to measure the amount of vectorisation that has been done
      7  * on some code by counting classes of instruction.
      8  *
      9  * License: GNU GPL, version 2 or later.
     10  *   See the COPYING file in the top-level directory.
     11  */
     12 #include <inttypes.h>
     13 #include <assert.h>
     14 #include <stdlib.h>
     15 #include <inttypes.h>
     16 #include <string.h>
     17 #include <unistd.h>
     18 #include <stdio.h>
     19 #include <glib.h>
     20 
     21 #include <qemu-plugin.h>
     22 
     23 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
     24 
     25 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
     26 
     27 typedef enum {
     28     COUNT_CLASS,
     29     COUNT_INDIVIDUAL,
     30     COUNT_NONE
     31 } CountType;
     32 
     33 static int limit = 50;
     34 static bool do_inline;
     35 static bool verbose;
     36 
     37 static GMutex lock;
     38 static GHashTable *insns;
     39 
     40 typedef struct {
     41     const char *class;
     42     const char *opt;
     43     uint32_t mask;
     44     uint32_t pattern;
     45     CountType what;
     46     uint64_t count;
     47 } InsnClassExecCount;
     48 
     49 typedef struct {
     50     char *insn;
     51     uint32_t opcode;
     52     uint64_t count;
     53     InsnClassExecCount *class;
     54 } InsnExecCount;
     55 
     56 /*
     57  * Matchers for classes of instructions, order is important.
     58  *
     59  * Your most precise match must be before looser matches. If no match
     60  * is found in the table we can create an individual entry.
     61  *
     62  * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0
     63  */
     64 static InsnClassExecCount aarch64_insn_classes[] = {
     65     /* "Reserved"" */
     66     { "  UDEF",              "udef",   0xffff0000, 0x00000000, COUNT_NONE},
     67     { "  SVE",               "sve",    0x1e000000, 0x04000000, COUNT_CLASS},
     68     { "Reserved",            "res",    0x1e000000, 0x00000000, COUNT_CLASS},
     69     /* Data Processing Immediate */
     70     { "  PCrel addr",        "pcrel",  0x1f000000, 0x10000000, COUNT_CLASS},
     71     { "  Add/Sub (imm,tags)", "asit",   0x1f800000, 0x11800000, COUNT_CLASS},
     72     { "  Add/Sub (imm)",     "asi",    0x1f000000, 0x11000000, COUNT_CLASS},
     73     { "  Logical (imm)",     "logi",   0x1f800000, 0x12000000, COUNT_CLASS},
     74     { "  Move Wide (imm)",   "movwi",  0x1f800000, 0x12800000, COUNT_CLASS},
     75     { "  Bitfield",          "bitf",   0x1f800000, 0x13000000, COUNT_CLASS},
     76     { "  Extract",           "extr",   0x1f800000, 0x13800000, COUNT_CLASS},
     77     { "Data Proc Imm",       "dpri",   0x1c000000, 0x10000000, COUNT_CLASS},
     78     /* Branches */
     79     { "  Cond Branch (imm)", "cndb",   0xfe000000, 0x54000000, COUNT_CLASS},
     80     { "  Exception Gen",     "excp",   0xff000000, 0xd4000000, COUNT_CLASS},
     81     { "    NOP",             "nop",    0xffffffff, 0xd503201f, COUNT_NONE},
     82     { "  Hints",             "hint",   0xfffff000, 0xd5032000, COUNT_CLASS},
     83     { "  Barriers",          "barr",   0xfffff000, 0xd5033000, COUNT_CLASS},
     84     { "  PSTATE",            "psta",   0xfff8f000, 0xd5004000, COUNT_CLASS},
     85     { "  System Insn",       "sins",   0xffd80000, 0xd5080000, COUNT_CLASS},
     86     { "  System Reg",        "sreg",   0xffd00000, 0xd5100000, COUNT_CLASS},
     87     { "  Branch (reg)",      "breg",   0xfe000000, 0xd6000000, COUNT_CLASS},
     88     { "  Branch (imm)",      "bimm",   0x7c000000, 0x14000000, COUNT_CLASS},
     89     { "  Cmp & Branch",      "cmpb",   0x7e000000, 0x34000000, COUNT_CLASS},
     90     { "  Tst & Branch",      "tstb",   0x7e000000, 0x36000000, COUNT_CLASS},
     91     { "Branches",            "branch", 0x1c000000, 0x14000000, COUNT_CLASS},
     92     /* Loads and Stores */
     93     { "  AdvSimd ldstmult",  "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS},
     94     { "  AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS},
     95     { "  AdvSimd ldst",      "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS},
     96     { "  AdvSimd ldst++",    "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS},
     97     { "  ldst excl",         "ldstx",  0x3f000000, 0x08000000, COUNT_CLASS},
     98     { "    Prefetch",        "prfm",   0xff000000, 0xd8000000, COUNT_CLASS},
     99     { "  Load Reg (lit)",    "ldlit",  0x1b000000, 0x18000000, COUNT_CLASS},
    100     { "  ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS},
    101     { "  ldst pair",         "ldstp",  0x38000000, 0x28000000, COUNT_CLASS},
    102     { "  ldst reg",          "ldstr",  0x3b200000, 0x38000000, COUNT_CLASS},
    103     { "  Atomic ldst",       "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS},
    104     { "  ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS},
    105     { "  ldst reg (pac)",    "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS},
    106     { "  ldst reg (imm)",    "ldsti",  0x3b000000, 0x39000000, COUNT_CLASS},
    107     { "Loads & Stores",      "ldst",   0x0a000000, 0x08000000, COUNT_CLASS},
    108     /* Data Processing Register */
    109     { "Data Proc Reg",       "dprr",   0x0e000000, 0x0a000000, COUNT_CLASS},
    110     /* Scalar FP */
    111     { "Scalar FP ",          "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS},
    112     /* Unclassified */
    113     { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_CLASS},
    114 };
    115 
    116 static InsnClassExecCount sparc32_insn_classes[] = {
    117     { "Call",                "call",   0xc0000000, 0x40000000, COUNT_CLASS},
    118     { "Branch ICond",        "bcc",    0xc1c00000, 0x00800000, COUNT_CLASS},
    119     { "Branch Fcond",        "fbcc",   0xc1c00000, 0x01800000, COUNT_CLASS},
    120     { "SetHi",               "sethi",  0xc1c00000, 0x01000000, COUNT_CLASS},
    121     { "FPU ALU",             "fpu",    0xc1f00000, 0x81a00000, COUNT_CLASS},
    122     { "ALU",                 "alu",    0xc0000000, 0x80000000, COUNT_CLASS},
    123     { "Load/Store",          "ldst",   0xc0000000, 0xc0000000, COUNT_CLASS},
    124     /* Unclassified */
    125     { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
    126 };
    127 
    128 static InsnClassExecCount sparc64_insn_classes[] = {
    129     { "SetHi & Branches",     "op0",   0xc0000000, 0x00000000, COUNT_CLASS},
    130     { "Call",                 "op1",   0xc0000000, 0x40000000, COUNT_CLASS},
    131     { "Arith/Logical/Move",   "op2",   0xc0000000, 0x80000000, COUNT_CLASS},
    132     { "Arith/Logical/Move",   "op3",   0xc0000000, 0xc0000000, COUNT_CLASS},
    133     /* Unclassified */
    134     { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
    135 };
    136 
    137 /* Default matcher for currently unclassified architectures */
    138 static InsnClassExecCount default_insn_classes[] = {
    139     { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
    140 };
    141 
    142 typedef struct {
    143     const char *qemu_target;
    144     InsnClassExecCount *table;
    145     int table_sz;
    146 } ClassSelector;
    147 
    148 static ClassSelector class_tables[] = {
    149     { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) },
    150     { "sparc",   sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) },
    151     { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) },
    152     { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) },
    153 };
    154 
    155 static InsnClassExecCount *class_table;
    156 static int class_table_sz;
    157 
    158 static gint cmp_exec_count(gconstpointer a, gconstpointer b)
    159 {
    160     InsnExecCount *ea = (InsnExecCount *) a;
    161     InsnExecCount *eb = (InsnExecCount *) b;
    162     return ea->count > eb->count ? -1 : 1;
    163 }
    164 
    165 static void free_record(gpointer data)
    166 {
    167     InsnExecCount *rec = (InsnExecCount *) data;
    168     g_free(rec->insn);
    169     g_free(rec);
    170 }
    171 
    172 static void plugin_exit(qemu_plugin_id_t id, void *p)
    173 {
    174     g_autoptr(GString) report = g_string_new("Instruction Classes:\n");
    175     int i;
    176     GList *counts;
    177     InsnClassExecCount *class = NULL;
    178 
    179     for (i = 0; i < class_table_sz; i++) {
    180         class = &class_table[i];
    181         switch (class->what) {
    182         case COUNT_CLASS:
    183             if (class->count || verbose) {
    184                 g_string_append_printf(report, "Class: %-24s\t(%ld hits)\n",
    185                                        class->class,
    186                                        class->count);
    187             }
    188             break;
    189         case COUNT_INDIVIDUAL:
    190             g_string_append_printf(report, "Class: %-24s\tcounted individually\n",
    191                                    class->class);
    192             break;
    193         case COUNT_NONE:
    194             g_string_append_printf(report, "Class: %-24s\tnot counted\n",
    195                                    class->class);
    196             break;
    197         default:
    198             break;
    199         }
    200     }
    201 
    202     counts = g_hash_table_get_values(insns);
    203     if (counts && g_list_next(counts)) {
    204         g_string_append_printf(report, "Individual Instructions:\n");
    205         counts = g_list_sort(counts, cmp_exec_count);
    206 
    207         for (i = 0; i < limit && g_list_next(counts);
    208              i++, counts = g_list_next(counts)) {
    209             InsnExecCount *rec = (InsnExecCount *) counts->data;
    210             g_string_append_printf(report,
    211                                    "Instr: %-24s\t(%ld hits)\t(op=0x%08x/%s)\n",
    212                                    rec->insn,
    213                                    rec->count,
    214                                    rec->opcode,
    215                                    rec->class ?
    216                                    rec->class->class : "un-categorised");
    217         }
    218         g_list_free(counts);
    219     }
    220 
    221     g_hash_table_destroy(insns);
    222 
    223     qemu_plugin_outs(report->str);
    224 }
    225 
    226 static void plugin_init(void)
    227 {
    228     insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record);
    229 }
    230 
    231 static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
    232 {
    233     uint64_t *count = (uint64_t *) udata;
    234     (*count)++;
    235 }
    236 
    237 static uint64_t *find_counter(struct qemu_plugin_insn *insn)
    238 {
    239     int i;
    240     uint64_t *cnt = NULL;
    241     uint32_t opcode;
    242     InsnClassExecCount *class = NULL;
    243 
    244     /*
    245      * We only match the first 32 bits of the instruction which is
    246      * fine for most RISCs but a bit limiting for CISC architectures.
    247      * They would probably benefit from a more tailored plugin.
    248      * However we can fall back to individual instruction counting.
    249      */
    250     opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
    251 
    252     for (i = 0; !cnt && i < class_table_sz; i++) {
    253         class = &class_table[i];
    254         uint32_t masked_bits = opcode & class->mask;
    255         if (masked_bits == class->pattern) {
    256             break;
    257         }
    258     }
    259 
    260     g_assert(class);
    261 
    262     switch (class->what) {
    263     case COUNT_NONE:
    264         return NULL;
    265     case COUNT_CLASS:
    266         return &class->count;
    267     case COUNT_INDIVIDUAL:
    268     {
    269         InsnExecCount *icount;
    270 
    271         g_mutex_lock(&lock);
    272         icount = (InsnExecCount *) g_hash_table_lookup(insns,
    273                                                        GUINT_TO_POINTER(opcode));
    274 
    275         if (!icount) {
    276             icount = g_new0(InsnExecCount, 1);
    277             icount->opcode = opcode;
    278             icount->insn = qemu_plugin_insn_disas(insn);
    279             icount->class = class;
    280 
    281             g_hash_table_insert(insns, GUINT_TO_POINTER(opcode),
    282                                 (gpointer) icount);
    283         }
    284         g_mutex_unlock(&lock);
    285 
    286         return &icount->count;
    287     }
    288     default:
    289         g_assert_not_reached();
    290     }
    291 
    292     return NULL;
    293 }
    294 
    295 static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
    296 {
    297     size_t n = qemu_plugin_tb_n_insns(tb);
    298     size_t i;
    299 
    300     for (i = 0; i < n; i++) {
    301         uint64_t *cnt;
    302         struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
    303         cnt = find_counter(insn);
    304 
    305         if (cnt) {
    306             if (do_inline) {
    307                 qemu_plugin_register_vcpu_insn_exec_inline(
    308                     insn, QEMU_PLUGIN_INLINE_ADD_U64, cnt, 1);
    309             } else {
    310                 qemu_plugin_register_vcpu_insn_exec_cb(
    311                     insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt);
    312             }
    313         }
    314     }
    315 }
    316 
    317 QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
    318                                            const qemu_info_t *info,
    319                                            int argc, char **argv)
    320 {
    321     int i;
    322 
    323     /* Select a class table appropriate to the guest architecture */
    324     for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
    325         ClassSelector *entry = &class_tables[i];
    326         if (!entry->qemu_target ||
    327             strcmp(entry->qemu_target, info->target_name) == 0) {
    328             class_table = entry->table;
    329             class_table_sz = entry->table_sz;
    330             break;
    331         }
    332     }
    333 
    334     for (i = 0; i < argc; i++) {
    335         char *p = argv[i];
    336         g_autofree char **tokens = g_strsplit(p, "=", -1);
    337         if (g_strcmp0(tokens[0], "inline") == 0) {
    338             if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
    339                 fprintf(stderr, "boolean argument parsing failed: %s\n", p);
    340                 return -1;
    341             }
    342         } else if (g_strcmp0(tokens[0], "verbose") == 0) {
    343             if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) {
    344                 fprintf(stderr, "boolean argument parsing failed: %s\n", p);
    345                 return -1;
    346             }
    347         } else if (g_strcmp0(tokens[0], "count") == 0) {
    348             char *value = tokens[1];
    349             int j;
    350             CountType type = COUNT_INDIVIDUAL;
    351             if (*value == '!') {
    352                 type = COUNT_NONE;
    353                 value++;
    354             }
    355             for (j = 0; j < class_table_sz; j++) {
    356                 if (strcmp(value, class_table[j].opt) == 0) {
    357                     class_table[j].what = type;
    358                     break;
    359                 }
    360             }
    361         } else {
    362             fprintf(stderr, "option parsing failed: %s\n", p);
    363             return -1;
    364         }
    365     }
    366 
    367     plugin_init();
    368 
    369     qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
    370     qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
    371     return 0;
    372 }