qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

kvmvapic.c (24719B)


      1 /*
      2  * TPR optimization for 32-bit Windows guests (XP and Server 2003)
      3  *
      4  * Copyright (C) 2007-2008 Qumranet Technologies
      5  * Copyright (C) 2012      Jan Kiszka, Siemens AG
      6  *
      7  * This work is licensed under the terms of the GNU GPL version 2, or
      8  * (at your option) any later version. See the COPYING file in the
      9  * top-level directory.
     10  */
     11 
     12 #include "qemu/osdep.h"
     13 #include "qemu/module.h"
     14 #include "sysemu/sysemu.h"
     15 #include "sysemu/cpus.h"
     16 #include "sysemu/hw_accel.h"
     17 #include "sysemu/kvm.h"
     18 #include "sysemu/runstate.h"
     19 #include "hw/i386/apic_internal.h"
     20 #include "hw/sysbus.h"
     21 #include "hw/boards.h"
     22 #include "migration/vmstate.h"
     23 #include "qom/object.h"
     24 
     25 #define VAPIC_IO_PORT           0x7e
     26 
     27 #define VAPIC_CPU_SHIFT         7
     28 
     29 #define ROM_BLOCK_SIZE          512
     30 #define ROM_BLOCK_MASK          (~(ROM_BLOCK_SIZE - 1))
     31 
     32 typedef enum VAPICMode {
     33     VAPIC_INACTIVE = 0,
     34     VAPIC_ACTIVE   = 1,
     35     VAPIC_STANDBY  = 2,
     36 } VAPICMode;
     37 
     38 typedef struct VAPICHandlers {
     39     uint32_t set_tpr;
     40     uint32_t set_tpr_eax;
     41     uint32_t get_tpr[8];
     42     uint32_t get_tpr_stack;
     43 } QEMU_PACKED VAPICHandlers;
     44 
     45 typedef struct GuestROMState {
     46     char signature[8];
     47     uint32_t vaddr;
     48     uint32_t fixup_start;
     49     uint32_t fixup_end;
     50     uint32_t vapic_vaddr;
     51     uint32_t vapic_size;
     52     uint32_t vcpu_shift;
     53     uint32_t real_tpr_addr;
     54     VAPICHandlers up;
     55     VAPICHandlers mp;
     56 } QEMU_PACKED GuestROMState;
     57 
     58 struct VAPICROMState {
     59     SysBusDevice busdev;
     60     MemoryRegion io;
     61     MemoryRegion rom;
     62     uint32_t state;
     63     uint32_t rom_state_paddr;
     64     uint32_t rom_state_vaddr;
     65     uint32_t vapic_paddr;
     66     uint32_t real_tpr_addr;
     67     GuestROMState rom_state;
     68     size_t rom_size;
     69     bool rom_mapped_writable;
     70     VMChangeStateEntry *vmsentry;
     71 };
     72 
     73 #define TYPE_VAPIC "kvmvapic"
     74 OBJECT_DECLARE_SIMPLE_TYPE(VAPICROMState, VAPIC)
     75 
     76 #define TPR_INSTR_ABS_MODRM             0x1
     77 #define TPR_INSTR_MATCH_MODRM_REG       0x2
     78 
     79 typedef struct TPRInstruction {
     80     uint8_t opcode;
     81     uint8_t modrm_reg;
     82     unsigned int flags;
     83     TPRAccess access;
     84     size_t length;
     85     off_t addr_offset;
     86 } TPRInstruction;
     87 
     88 /* must be sorted by length, shortest first */
     89 static const TPRInstruction tpr_instr[] = {
     90     { /* mov abs to eax */
     91         .opcode = 0xa1,
     92         .access = TPR_ACCESS_READ,
     93         .length = 5,
     94         .addr_offset = 1,
     95     },
     96     { /* mov eax to abs */
     97         .opcode = 0xa3,
     98         .access = TPR_ACCESS_WRITE,
     99         .length = 5,
    100         .addr_offset = 1,
    101     },
    102     { /* mov r32 to r/m32 */
    103         .opcode = 0x89,
    104         .flags = TPR_INSTR_ABS_MODRM,
    105         .access = TPR_ACCESS_WRITE,
    106         .length = 6,
    107         .addr_offset = 2,
    108     },
    109     { /* mov r/m32 to r32 */
    110         .opcode = 0x8b,
    111         .flags = TPR_INSTR_ABS_MODRM,
    112         .access = TPR_ACCESS_READ,
    113         .length = 6,
    114         .addr_offset = 2,
    115     },
    116     { /* push r/m32 */
    117         .opcode = 0xff,
    118         .modrm_reg = 6,
    119         .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
    120         .access = TPR_ACCESS_READ,
    121         .length = 6,
    122         .addr_offset = 2,
    123     },
    124     { /* mov imm32, r/m32 (c7/0) */
    125         .opcode = 0xc7,
    126         .modrm_reg = 0,
    127         .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
    128         .access = TPR_ACCESS_WRITE,
    129         .length = 10,
    130         .addr_offset = 2,
    131     },
    132 };
    133 
    134 static void read_guest_rom_state(VAPICROMState *s)
    135 {
    136     cpu_physical_memory_read(s->rom_state_paddr, &s->rom_state,
    137                              sizeof(GuestROMState));
    138 }
    139 
    140 static void write_guest_rom_state(VAPICROMState *s)
    141 {
    142     cpu_physical_memory_write(s->rom_state_paddr, &s->rom_state,
    143                               sizeof(GuestROMState));
    144 }
    145 
    146 static void update_guest_rom_state(VAPICROMState *s)
    147 {
    148     read_guest_rom_state(s);
    149 
    150     s->rom_state.real_tpr_addr = cpu_to_le32(s->real_tpr_addr);
    151     s->rom_state.vcpu_shift = cpu_to_le32(VAPIC_CPU_SHIFT);
    152 
    153     write_guest_rom_state(s);
    154 }
    155 
    156 static int find_real_tpr_addr(VAPICROMState *s, CPUX86State *env)
    157 {
    158     CPUState *cs = env_cpu(env);
    159     hwaddr paddr;
    160     target_ulong addr;
    161 
    162     if (s->state == VAPIC_ACTIVE) {
    163         return 0;
    164     }
    165     /*
    166      * If there is no prior TPR access instruction we could analyze (which is
    167      * the case after resume from hibernation), we need to scan the possible
    168      * virtual address space for the APIC mapping.
    169      */
    170     for (addr = 0xfffff000; addr >= 0x80000000; addr -= TARGET_PAGE_SIZE) {
    171         paddr = cpu_get_phys_page_debug(cs, addr);
    172         if (paddr != APIC_DEFAULT_ADDRESS) {
    173             continue;
    174         }
    175         s->real_tpr_addr = addr + 0x80;
    176         update_guest_rom_state(s);
    177         return 0;
    178     }
    179     return -1;
    180 }
    181 
    182 static uint8_t modrm_reg(uint8_t modrm)
    183 {
    184     return (modrm >> 3) & 7;
    185 }
    186 
    187 static bool is_abs_modrm(uint8_t modrm)
    188 {
    189     return (modrm & 0xc7) == 0x05;
    190 }
    191 
    192 static bool opcode_matches(uint8_t *opcode, const TPRInstruction *instr)
    193 {
    194     return opcode[0] == instr->opcode &&
    195         (!(instr->flags & TPR_INSTR_ABS_MODRM) || is_abs_modrm(opcode[1])) &&
    196         (!(instr->flags & TPR_INSTR_MATCH_MODRM_REG) ||
    197          modrm_reg(opcode[1]) == instr->modrm_reg);
    198 }
    199 
    200 static int evaluate_tpr_instruction(VAPICROMState *s, X86CPU *cpu,
    201                                     target_ulong *pip, TPRAccess access)
    202 {
    203     CPUState *cs = CPU(cpu);
    204     const TPRInstruction *instr;
    205     target_ulong ip = *pip;
    206     uint8_t opcode[2];
    207     uint32_t real_tpr_addr;
    208     int i;
    209 
    210     if ((ip & 0xf0000000ULL) != 0x80000000ULL &&
    211         (ip & 0xf0000000ULL) != 0xe0000000ULL) {
    212         return -1;
    213     }
    214 
    215     /*
    216      * Early Windows 2003 SMP initialization contains a
    217      *
    218      *   mov imm32, r/m32
    219      *
    220      * instruction that is patched by TPR optimization. The problem is that
    221      * RSP, used by the patched instruction, is zero, so the guest gets a
    222      * double fault and dies.
    223      */
    224     if (cpu->env.regs[R_ESP] == 0) {
    225         return -1;
    226     }
    227 
    228     if (kvm_enabled() && !kvm_irqchip_in_kernel()) {
    229         /*
    230          * KVM without kernel-based TPR access reporting will pass an IP that
    231          * points after the accessing instruction. So we need to look backward
    232          * to find the reason.
    233          */
    234         for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
    235             instr = &tpr_instr[i];
    236             if (instr->access != access) {
    237                 continue;
    238             }
    239             if (cpu_memory_rw_debug(cs, ip - instr->length, opcode,
    240                                     sizeof(opcode), 0) < 0) {
    241                 return -1;
    242             }
    243             if (opcode_matches(opcode, instr)) {
    244                 ip -= instr->length;
    245                 goto instruction_ok;
    246             }
    247         }
    248         return -1;
    249     } else {
    250         if (cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0) < 0) {
    251             return -1;
    252         }
    253         for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
    254             instr = &tpr_instr[i];
    255             if (opcode_matches(opcode, instr)) {
    256                 goto instruction_ok;
    257             }
    258         }
    259         return -1;
    260     }
    261 
    262 instruction_ok:
    263     /*
    264      * Grab the virtual TPR address from the instruction
    265      * and update the cached values.
    266      */
    267     if (cpu_memory_rw_debug(cs, ip + instr->addr_offset,
    268                             (void *)&real_tpr_addr,
    269                             sizeof(real_tpr_addr), 0) < 0) {
    270         return -1;
    271     }
    272     real_tpr_addr = le32_to_cpu(real_tpr_addr);
    273     if ((real_tpr_addr & 0xfff) != 0x80) {
    274         return -1;
    275     }
    276     s->real_tpr_addr = real_tpr_addr;
    277     update_guest_rom_state(s);
    278 
    279     *pip = ip;
    280     return 0;
    281 }
    282 
    283 static int update_rom_mapping(VAPICROMState *s, CPUX86State *env, target_ulong ip)
    284 {
    285     CPUState *cs = env_cpu(env);
    286     hwaddr paddr;
    287     uint32_t rom_state_vaddr;
    288     uint32_t pos, patch, offset;
    289 
    290     /* nothing to do if already activated */
    291     if (s->state == VAPIC_ACTIVE) {
    292         return 0;
    293     }
    294 
    295     /* bail out if ROM init code was not executed (missing ROM?) */
    296     if (s->state == VAPIC_INACTIVE) {
    297         return -1;
    298     }
    299 
    300     /* find out virtual address of the ROM */
    301     rom_state_vaddr = s->rom_state_paddr + (ip & 0xf0000000);
    302     paddr = cpu_get_phys_page_debug(cs, rom_state_vaddr);
    303     if (paddr == -1) {
    304         return -1;
    305     }
    306     paddr += rom_state_vaddr & ~TARGET_PAGE_MASK;
    307     if (paddr != s->rom_state_paddr) {
    308         return -1;
    309     }
    310     read_guest_rom_state(s);
    311     if (memcmp(s->rom_state.signature, "kvm aPiC", 8) != 0) {
    312         return -1;
    313     }
    314     s->rom_state_vaddr = rom_state_vaddr;
    315 
    316     /* fixup addresses in ROM if needed */
    317     if (rom_state_vaddr == le32_to_cpu(s->rom_state.vaddr)) {
    318         return 0;
    319     }
    320     for (pos = le32_to_cpu(s->rom_state.fixup_start);
    321          pos < le32_to_cpu(s->rom_state.fixup_end);
    322          pos += 4) {
    323         cpu_physical_memory_read(paddr + pos - s->rom_state.vaddr,
    324                                  &offset, sizeof(offset));
    325         offset = le32_to_cpu(offset);
    326         cpu_physical_memory_read(paddr + offset, &patch, sizeof(patch));
    327         patch = le32_to_cpu(patch);
    328         patch += rom_state_vaddr - le32_to_cpu(s->rom_state.vaddr);
    329         patch = cpu_to_le32(patch);
    330         cpu_physical_memory_write(paddr + offset, &patch, sizeof(patch));
    331     }
    332     read_guest_rom_state(s);
    333     s->vapic_paddr = paddr + le32_to_cpu(s->rom_state.vapic_vaddr) -
    334         le32_to_cpu(s->rom_state.vaddr);
    335 
    336     return 0;
    337 }
    338 
    339 /*
    340  * Tries to read the unique processor number from the Kernel Processor Control
    341  * Region (KPCR) of 32-bit Windows XP and Server 2003. Returns -1 if the KPCR
    342  * cannot be accessed or is considered invalid. This also ensures that we are
    343  * not patching the wrong guest.
    344  */
    345 static int get_kpcr_number(X86CPU *cpu)
    346 {
    347     CPUX86State *env = &cpu->env;
    348     struct kpcr {
    349         uint8_t  fill1[0x1c];
    350         uint32_t self;
    351         uint8_t  fill2[0x31];
    352         uint8_t  number;
    353     } QEMU_PACKED kpcr;
    354 
    355     if (cpu_memory_rw_debug(CPU(cpu), env->segs[R_FS].base,
    356                             (void *)&kpcr, sizeof(kpcr), 0) < 0 ||
    357         kpcr.self != env->segs[R_FS].base) {
    358         return -1;
    359     }
    360     return kpcr.number;
    361 }
    362 
    363 static int vapic_enable(VAPICROMState *s, X86CPU *cpu)
    364 {
    365     int cpu_number = get_kpcr_number(cpu);
    366     hwaddr vapic_paddr;
    367     static const uint8_t enabled = 1;
    368 
    369     if (cpu_number < 0) {
    370         return -1;
    371     }
    372     vapic_paddr = s->vapic_paddr +
    373         (((hwaddr)cpu_number) << VAPIC_CPU_SHIFT);
    374     cpu_physical_memory_write(vapic_paddr + offsetof(VAPICState, enabled),
    375                               &enabled, sizeof(enabled));
    376     apic_enable_vapic(cpu->apic_state, vapic_paddr);
    377 
    378     s->state = VAPIC_ACTIVE;
    379 
    380     return 0;
    381 }
    382 
    383 static void patch_byte(X86CPU *cpu, target_ulong addr, uint8_t byte)
    384 {
    385     cpu_memory_rw_debug(CPU(cpu), addr, &byte, 1, 1);
    386 }
    387 
    388 static void patch_call(X86CPU *cpu, target_ulong ip, uint32_t target)
    389 {
    390     uint32_t offset;
    391 
    392     offset = cpu_to_le32(target - ip - 5);
    393     patch_byte(cpu, ip, 0xe8); /* call near */
    394     cpu_memory_rw_debug(CPU(cpu), ip + 1, (void *)&offset, sizeof(offset), 1);
    395 }
    396 
    397 typedef struct PatchInfo {
    398     VAPICHandlers *handler;
    399     target_ulong ip;
    400 } PatchInfo;
    401 
    402 static void do_patch_instruction(CPUState *cs, run_on_cpu_data data)
    403 {
    404     X86CPU *x86_cpu = X86_CPU(cs);
    405     PatchInfo *info = (PatchInfo *) data.host_ptr;
    406     VAPICHandlers *handlers = info->handler;
    407     target_ulong ip = info->ip;
    408     uint8_t opcode[2];
    409     uint32_t imm32 = 0;
    410 
    411     cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0);
    412 
    413     switch (opcode[0]) {
    414     case 0x89: /* mov r32 to r/m32 */
    415         patch_byte(x86_cpu, ip, 0x50 + modrm_reg(opcode[1]));  /* push reg */
    416         patch_call(x86_cpu, ip + 1, handlers->set_tpr);
    417         break;
    418     case 0x8b: /* mov r/m32 to r32 */
    419         patch_byte(x86_cpu, ip, 0x90);
    420         patch_call(x86_cpu, ip + 1, handlers->get_tpr[modrm_reg(opcode[1])]);
    421         break;
    422     case 0xa1: /* mov abs to eax */
    423         patch_call(x86_cpu, ip, handlers->get_tpr[0]);
    424         break;
    425     case 0xa3: /* mov eax to abs */
    426         patch_call(x86_cpu, ip, handlers->set_tpr_eax);
    427         break;
    428     case 0xc7: /* mov imm32, r/m32 (c7/0) */
    429         patch_byte(x86_cpu, ip, 0x68);  /* push imm32 */
    430         cpu_memory_rw_debug(cs, ip + 6, (void *)&imm32, sizeof(imm32), 0);
    431         cpu_memory_rw_debug(cs, ip + 1, (void *)&imm32, sizeof(imm32), 1);
    432         patch_call(x86_cpu, ip + 5, handlers->set_tpr);
    433         break;
    434     case 0xff: /* push r/m32 */
    435         patch_byte(x86_cpu, ip, 0x50); /* push eax */
    436         patch_call(x86_cpu, ip + 1, handlers->get_tpr_stack);
    437         break;
    438     default:
    439         abort();
    440     }
    441 
    442     g_free(info);
    443 }
    444 
    445 static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
    446 {
    447     MachineState *ms = MACHINE(qdev_get_machine());
    448     CPUState *cs = CPU(cpu);
    449     VAPICHandlers *handlers;
    450     PatchInfo *info;
    451 
    452     if (ms->smp.cpus == 1) {
    453         handlers = &s->rom_state.up;
    454     } else {
    455         handlers = &s->rom_state.mp;
    456     }
    457 
    458     info  = g_new(PatchInfo, 1);
    459     info->handler = handlers;
    460     info->ip = ip;
    461 
    462     async_safe_run_on_cpu(cs, do_patch_instruction, RUN_ON_CPU_HOST_PTR(info));
    463 }
    464 
    465 void vapic_report_tpr_access(DeviceState *dev, CPUState *cs, target_ulong ip,
    466                              TPRAccess access)
    467 {
    468     VAPICROMState *s = VAPIC(dev);
    469     X86CPU *cpu = X86_CPU(cs);
    470     CPUX86State *env = &cpu->env;
    471 
    472     cpu_synchronize_state(cs);
    473 
    474     if (evaluate_tpr_instruction(s, cpu, &ip, access) < 0) {
    475         if (s->state == VAPIC_ACTIVE) {
    476             vapic_enable(s, cpu);
    477         }
    478         return;
    479     }
    480     if (update_rom_mapping(s, env, ip) < 0) {
    481         return;
    482     }
    483     if (vapic_enable(s, cpu) < 0) {
    484         return;
    485     }
    486     patch_instruction(s, cpu, ip);
    487 }
    488 
    489 typedef struct VAPICEnableTPRReporting {
    490     DeviceState *apic;
    491     bool enable;
    492 } VAPICEnableTPRReporting;
    493 
    494 static void vapic_do_enable_tpr_reporting(CPUState *cpu, run_on_cpu_data data)
    495 {
    496     VAPICEnableTPRReporting *info = data.host_ptr;
    497     apic_enable_tpr_access_reporting(info->apic, info->enable);
    498 }
    499 
    500 static void vapic_enable_tpr_reporting(bool enable)
    501 {
    502     VAPICEnableTPRReporting info = {
    503         .enable = enable,
    504     };
    505     CPUState *cs;
    506     X86CPU *cpu;
    507 
    508     CPU_FOREACH(cs) {
    509         cpu = X86_CPU(cs);
    510         info.apic = cpu->apic_state;
    511         run_on_cpu(cs, vapic_do_enable_tpr_reporting, RUN_ON_CPU_HOST_PTR(&info));
    512     }
    513 }
    514 
    515 static void vapic_reset(DeviceState *dev)
    516 {
    517     VAPICROMState *s = VAPIC(dev);
    518 
    519     s->state = VAPIC_INACTIVE;
    520     s->rom_state_paddr = 0;
    521     vapic_enable_tpr_reporting(false);
    522 }
    523 
    524 /*
    525  * Set the IRQ polling hypercalls to the supported variant:
    526  *  - vmcall if using KVM in-kernel irqchip
    527  *  - 32-bit VAPIC port write otherwise
    528  */
    529 static int patch_hypercalls(VAPICROMState *s)
    530 {
    531     hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
    532     static const uint8_t vmcall_pattern[] = { /* vmcall */
    533         0xb8, 0x1, 0, 0, 0, 0xf, 0x1, 0xc1
    534     };
    535     static const uint8_t outl_pattern[] = { /* nop; outl %eax,0x7e */
    536         0xb8, 0x1, 0, 0, 0, 0x90, 0xe7, 0x7e
    537     };
    538     uint8_t alternates[2];
    539     const uint8_t *pattern;
    540     const uint8_t *patch;
    541     off_t pos;
    542     uint8_t *rom;
    543 
    544     rom = g_malloc(s->rom_size);
    545     cpu_physical_memory_read(rom_paddr, rom, s->rom_size);
    546 
    547     for (pos = 0; pos < s->rom_size - sizeof(vmcall_pattern); pos++) {
    548         if (kvm_irqchip_in_kernel()) {
    549             pattern = outl_pattern;
    550             alternates[0] = outl_pattern[7];
    551             alternates[1] = outl_pattern[7];
    552             patch = &vmcall_pattern[5];
    553         } else {
    554             pattern = vmcall_pattern;
    555             alternates[0] = vmcall_pattern[7];
    556             alternates[1] = 0xd9; /* AMD's VMMCALL */
    557             patch = &outl_pattern[5];
    558         }
    559         if (memcmp(rom + pos, pattern, 7) == 0 &&
    560             (rom[pos + 7] == alternates[0] || rom[pos + 7] == alternates[1])) {
    561             cpu_physical_memory_write(rom_paddr + pos + 5, patch, 3);
    562             /*
    563              * Don't flush the tb here. Under ordinary conditions, the patched
    564              * calls are miles away from the current IP. Under malicious
    565              * conditions, the guest could trick us to crash.
    566              */
    567         }
    568     }
    569 
    570     g_free(rom);
    571     return 0;
    572 }
    573 
    574 /*
    575  * For TCG mode or the time KVM honors read-only memory regions, we need to
    576  * enable write access to the option ROM so that variables can be updated by
    577  * the guest.
    578  */
    579 static int vapic_map_rom_writable(VAPICROMState *s)
    580 {
    581     hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
    582     MemoryRegionSection section;
    583     MemoryRegion *as;
    584     size_t rom_size;
    585     uint8_t *ram;
    586 
    587     as = sysbus_address_space(&s->busdev);
    588 
    589     if (s->rom_mapped_writable) {
    590         memory_region_del_subregion(as, &s->rom);
    591         object_unparent(OBJECT(&s->rom));
    592     }
    593 
    594     /* grab RAM memory region (region @rom_paddr may still be pc.rom) */
    595     section = memory_region_find(as, 0, 1);
    596 
    597     /* read ROM size from RAM region */
    598     if (rom_paddr + 2 >= memory_region_size(section.mr)) {
    599         return -1;
    600     }
    601     ram = memory_region_get_ram_ptr(section.mr);
    602     rom_size = ram[rom_paddr + 2] * ROM_BLOCK_SIZE;
    603     if (rom_size == 0) {
    604         return -1;
    605     }
    606     s->rom_size = rom_size;
    607 
    608     /* We need to round to avoid creating subpages
    609      * from which we cannot run code. */
    610     rom_size += rom_paddr & ~TARGET_PAGE_MASK;
    611     rom_paddr &= TARGET_PAGE_MASK;
    612     rom_size = TARGET_PAGE_ALIGN(rom_size);
    613 
    614     memory_region_init_alias(&s->rom, OBJECT(s), "kvmvapic-rom", section.mr,
    615                              rom_paddr, rom_size);
    616     memory_region_add_subregion_overlap(as, rom_paddr, &s->rom, 1000);
    617     s->rom_mapped_writable = true;
    618     memory_region_unref(section.mr);
    619 
    620     return 0;
    621 }
    622 
    623 static int vapic_prepare(VAPICROMState *s)
    624 {
    625     if (vapic_map_rom_writable(s) < 0) {
    626         return -1;
    627     }
    628 
    629     if (patch_hypercalls(s) < 0) {
    630         return -1;
    631     }
    632 
    633     vapic_enable_tpr_reporting(true);
    634 
    635     return 0;
    636 }
    637 
    638 static void vapic_write(void *opaque, hwaddr addr, uint64_t data,
    639                         unsigned int size)
    640 {
    641     VAPICROMState *s = opaque;
    642     X86CPU *cpu;
    643     CPUX86State *env;
    644     hwaddr rom_paddr;
    645 
    646     if (!current_cpu) {
    647         return;
    648     }
    649 
    650     cpu_synchronize_state(current_cpu);
    651     cpu = X86_CPU(current_cpu);
    652     env = &cpu->env;
    653 
    654     /*
    655      * The VAPIC supports two PIO-based hypercalls, both via port 0x7E.
    656      *  o 16-bit write access:
    657      *    Reports the option ROM initialization to the hypervisor. Written
    658      *    value is the offset of the state structure in the ROM.
    659      *  o 8-bit write access:
    660      *    Reactivates the VAPIC after a guest hibernation, i.e. after the
    661      *    option ROM content has been re-initialized by a guest power cycle.
    662      *  o 32-bit write access:
    663      *    Poll for pending IRQs, considering the current VAPIC state.
    664      */
    665     switch (size) {
    666     case 2:
    667         if (s->state == VAPIC_INACTIVE) {
    668             rom_paddr = (env->segs[R_CS].base + env->eip) & ROM_BLOCK_MASK;
    669             s->rom_state_paddr = rom_paddr + data;
    670 
    671             s->state = VAPIC_STANDBY;
    672         }
    673         if (vapic_prepare(s) < 0) {
    674             s->state = VAPIC_INACTIVE;
    675             s->rom_state_paddr = 0;
    676             break;
    677         }
    678         break;
    679     case 1:
    680         if (kvm_enabled()) {
    681             /*
    682              * Disable triggering instruction in ROM by writing a NOP.
    683              *
    684              * We cannot do this in TCG mode as the reported IP is not
    685              * accurate.
    686              */
    687             pause_all_vcpus();
    688             patch_byte(cpu, env->eip - 2, 0x66);
    689             patch_byte(cpu, env->eip - 1, 0x90);
    690             resume_all_vcpus();
    691         }
    692 
    693         if (s->state == VAPIC_ACTIVE) {
    694             break;
    695         }
    696         if (update_rom_mapping(s, env, env->eip) < 0) {
    697             break;
    698         }
    699         if (find_real_tpr_addr(s, env) < 0) {
    700             break;
    701         }
    702         vapic_enable(s, cpu);
    703         break;
    704     default:
    705     case 4:
    706         if (!kvm_irqchip_in_kernel()) {
    707             apic_poll_irq(cpu->apic_state);
    708         }
    709         break;
    710     }
    711 }
    712 
    713 static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size)
    714 {
    715     return 0xffffffff;
    716 }
    717 
    718 static const MemoryRegionOps vapic_ops = {
    719     .write = vapic_write,
    720     .read = vapic_read,
    721     .endianness = DEVICE_NATIVE_ENDIAN,
    722 };
    723 
    724 static void vapic_realize(DeviceState *dev, Error **errp)
    725 {
    726     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
    727     VAPICROMState *s = VAPIC(dev);
    728 
    729     memory_region_init_io(&s->io, OBJECT(s), &vapic_ops, s, "kvmvapic", 2);
    730     sysbus_add_io(sbd, VAPIC_IO_PORT, &s->io);
    731     sysbus_init_ioports(sbd, VAPIC_IO_PORT, 2);
    732 
    733     option_rom[nb_option_roms].name = "kvmvapic.bin";
    734     option_rom[nb_option_roms].bootindex = -1;
    735     nb_option_roms++;
    736 }
    737 
    738 static void do_vapic_enable(CPUState *cs, run_on_cpu_data data)
    739 {
    740     VAPICROMState *s = data.host_ptr;
    741     X86CPU *cpu = X86_CPU(cs);
    742 
    743     static const uint8_t enabled = 1;
    744     cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled),
    745                               &enabled, sizeof(enabled));
    746     apic_enable_vapic(cpu->apic_state, s->vapic_paddr);
    747     s->state = VAPIC_ACTIVE;
    748 }
    749 
    750 static void kvmvapic_vm_state_change(void *opaque, bool running,
    751                                      RunState state)
    752 {
    753     MachineState *ms = MACHINE(qdev_get_machine());
    754     VAPICROMState *s = opaque;
    755     uint8_t *zero;
    756 
    757     if (!running) {
    758         return;
    759     }
    760 
    761     if (s->state == VAPIC_ACTIVE) {
    762         if (ms->smp.cpus == 1) {
    763             run_on_cpu(first_cpu, do_vapic_enable, RUN_ON_CPU_HOST_PTR(s));
    764         } else {
    765             zero = g_malloc0(s->rom_state.vapic_size);
    766             cpu_physical_memory_write(s->vapic_paddr, zero,
    767                                       s->rom_state.vapic_size);
    768             g_free(zero);
    769         }
    770     }
    771 
    772     qemu_del_vm_change_state_handler(s->vmsentry);
    773     s->vmsentry = NULL;
    774 }
    775 
    776 static int vapic_post_load(void *opaque, int version_id)
    777 {
    778     VAPICROMState *s = opaque;
    779 
    780     /*
    781      * The old implementation of qemu-kvm did not provide the state
    782      * VAPIC_STANDBY. Reconstruct it.
    783      */
    784     if (s->state == VAPIC_INACTIVE && s->rom_state_paddr != 0) {
    785         s->state = VAPIC_STANDBY;
    786     }
    787 
    788     if (s->state != VAPIC_INACTIVE) {
    789         if (vapic_prepare(s) < 0) {
    790             return -1;
    791         }
    792     }
    793 
    794     if (!s->vmsentry) {
    795         s->vmsentry =
    796             qemu_add_vm_change_state_handler(kvmvapic_vm_state_change, s);
    797     }
    798     return 0;
    799 }
    800 
    801 static const VMStateDescription vmstate_handlers = {
    802     .name = "kvmvapic-handlers",
    803     .version_id = 1,
    804     .minimum_version_id = 1,
    805     .fields = (VMStateField[]) {
    806         VMSTATE_UINT32(set_tpr, VAPICHandlers),
    807         VMSTATE_UINT32(set_tpr_eax, VAPICHandlers),
    808         VMSTATE_UINT32_ARRAY(get_tpr, VAPICHandlers, 8),
    809         VMSTATE_UINT32(get_tpr_stack, VAPICHandlers),
    810         VMSTATE_END_OF_LIST()
    811     }
    812 };
    813 
    814 static const VMStateDescription vmstate_guest_rom = {
    815     .name = "kvmvapic-guest-rom",
    816     .version_id = 1,
    817     .minimum_version_id = 1,
    818     .fields = (VMStateField[]) {
    819         VMSTATE_UNUSED(8),     /* signature */
    820         VMSTATE_UINT32(vaddr, GuestROMState),
    821         VMSTATE_UINT32(fixup_start, GuestROMState),
    822         VMSTATE_UINT32(fixup_end, GuestROMState),
    823         VMSTATE_UINT32(vapic_vaddr, GuestROMState),
    824         VMSTATE_UINT32(vapic_size, GuestROMState),
    825         VMSTATE_UINT32(vcpu_shift, GuestROMState),
    826         VMSTATE_UINT32(real_tpr_addr, GuestROMState),
    827         VMSTATE_STRUCT(up, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
    828         VMSTATE_STRUCT(mp, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
    829         VMSTATE_END_OF_LIST()
    830     }
    831 };
    832 
    833 static const VMStateDescription vmstate_vapic = {
    834     .name = "kvm-tpr-opt",      /* compatible with qemu-kvm VAPIC */
    835     .version_id = 1,
    836     .minimum_version_id = 1,
    837     .post_load = vapic_post_load,
    838     .fields = (VMStateField[]) {
    839         VMSTATE_STRUCT(rom_state, VAPICROMState, 0, vmstate_guest_rom,
    840                        GuestROMState),
    841         VMSTATE_UINT32(state, VAPICROMState),
    842         VMSTATE_UINT32(real_tpr_addr, VAPICROMState),
    843         VMSTATE_UINT32(rom_state_vaddr, VAPICROMState),
    844         VMSTATE_UINT32(vapic_paddr, VAPICROMState),
    845         VMSTATE_UINT32(rom_state_paddr, VAPICROMState),
    846         VMSTATE_END_OF_LIST()
    847     }
    848 };
    849 
    850 static void vapic_class_init(ObjectClass *klass, void *data)
    851 {
    852     DeviceClass *dc = DEVICE_CLASS(klass);
    853 
    854     dc->reset   = vapic_reset;
    855     dc->vmsd    = &vmstate_vapic;
    856     dc->realize = vapic_realize;
    857 }
    858 
    859 static const TypeInfo vapic_type = {
    860     .name          = TYPE_VAPIC,
    861     .parent        = TYPE_SYS_BUS_DEVICE,
    862     .instance_size = sizeof(VAPICROMState),
    863     .class_init    = vapic_class_init,
    864 };
    865 
    866 static void vapic_register(void)
    867 {
    868     type_register_static(&vapic_type);
    869 }
    870 
    871 type_init(vapic_register);