qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

hax-all.c (30619B)


      1 /*
      2  * QEMU HAX support
      3  *
      4  * Copyright IBM, Corp. 2008
      5  *           Red Hat, Inc. 2008
      6  *
      7  * Authors:
      8  *  Anthony Liguori   <aliguori@us.ibm.com>
      9  *  Glauber Costa     <gcosta@redhat.com>
     10  *
     11  * Copyright (c) 2011 Intel Corporation
     12  *  Written by:
     13  *  Jiang Yunhong<yunhong.jiang@intel.com>
     14  *  Xin Xiaohui<xiaohui.xin@intel.com>
     15  *  Zhang Xiantao<xiantao.zhang@intel.com>
     16  *
     17  * This work is licensed under the terms of the GNU GPL, version 2 or later.
     18  * See the COPYING file in the top-level directory.
     19  *
     20  */
     21 
     22 /*
     23  * HAX common code for both windows and darwin
     24  */
     25 
     26 #include "qemu/osdep.h"
     27 #include "cpu.h"
     28 #include "exec/address-spaces.h"
     29 
     30 #include "qemu/accel.h"
     31 #include "sysemu/reset.h"
     32 #include "sysemu/runstate.h"
     33 #include "hw/boards.h"
     34 
     35 #include "hax-accel-ops.h"
     36 
     37 #define DEBUG_HAX 0
     38 
     39 #define DPRINTF(fmt, ...) \
     40     do { \
     41         if (DEBUG_HAX) { \
     42             fprintf(stdout, fmt, ## __VA_ARGS__); \
     43         } \
     44     } while (0)
     45 
     46 /* Current version */
     47 const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
     48 /* Minimum HAX kernel version */
     49 const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
     50 
     51 bool hax_allowed;
     52 
     53 struct hax_state hax_global;
     54 
     55 static void hax_vcpu_sync_state(CPUArchState *env, int modified);
     56 static int hax_arch_get_registers(CPUArchState *env);
     57 
     58 int valid_hax_tunnel_size(uint16_t size)
     59 {
     60     return size >= sizeof(struct hax_tunnel);
     61 }
     62 
     63 hax_fd hax_vcpu_get_fd(CPUArchState *env)
     64 {
     65     struct hax_vcpu_state *vcpu = env_cpu(env)->hax_vcpu;
     66     if (!vcpu) {
     67         return HAX_INVALID_FD;
     68     }
     69     return vcpu->fd;
     70 }
     71 
     72 static int hax_get_capability(struct hax_state *hax)
     73 {
     74     int ret;
     75     struct hax_capabilityinfo capinfo, *cap = &capinfo;
     76 
     77     ret = hax_capability(hax, cap);
     78     if (ret) {
     79         return ret;
     80     }
     81 
     82     if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
     83         if (cap->winfo & HAX_CAP_FAILREASON_VT) {
     84             DPRINTF
     85                 ("VTX feature is not enabled, HAX driver will not work.\n");
     86         } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
     87             DPRINTF
     88                 ("NX feature is not enabled, HAX driver will not work.\n");
     89         }
     90         return -ENXIO;
     91 
     92     }
     93 
     94     if (!(cap->winfo & HAX_CAP_UG)) {
     95         fprintf(stderr, "UG mode is not supported by the hardware.\n");
     96         return -ENOTSUP;
     97     }
     98 
     99     hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
    100 
    101     if (cap->wstatus & HAX_CAP_MEMQUOTA) {
    102         if (cap->mem_quota < hax->mem_quota) {
    103             fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
    104             return -ENOSPC;
    105         }
    106     }
    107     return 0;
    108 }
    109 
    110 static int hax_version_support(struct hax_state *hax)
    111 {
    112     int ret;
    113     struct hax_module_version version;
    114 
    115     ret = hax_mod_version(hax, &version);
    116     if (ret < 0) {
    117         return 0;
    118     }
    119 
    120     if (hax_min_version > version.cur_version) {
    121         fprintf(stderr, "Incompatible HAX module version %d,",
    122                 version.cur_version);
    123         fprintf(stderr, "requires minimum version %d\n", hax_min_version);
    124         return 0;
    125     }
    126     if (hax_cur_version < version.compat_version) {
    127         fprintf(stderr, "Incompatible QEMU HAX API version %x,",
    128                 hax_cur_version);
    129         fprintf(stderr, "requires minimum HAX API version %x\n",
    130                 version.compat_version);
    131         return 0;
    132     }
    133 
    134     return 1;
    135 }
    136 
    137 int hax_vcpu_create(int id)
    138 {
    139     struct hax_vcpu_state *vcpu = NULL;
    140     int ret;
    141 
    142     if (!hax_global.vm) {
    143         fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
    144         return -1;
    145     }
    146 
    147     if (hax_global.vm->vcpus[id]) {
    148         fprintf(stderr, "vcpu %x allocated already\n", id);
    149         return 0;
    150     }
    151 
    152     vcpu = g_new0(struct hax_vcpu_state, 1);
    153 
    154     ret = hax_host_create_vcpu(hax_global.vm->fd, id);
    155     if (ret) {
    156         fprintf(stderr, "Failed to create vcpu %x\n", id);
    157         goto error;
    158     }
    159 
    160     vcpu->vcpu_id = id;
    161     vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
    162     if (hax_invalid_fd(vcpu->fd)) {
    163         fprintf(stderr, "Failed to open the vcpu\n");
    164         ret = -ENODEV;
    165         goto error;
    166     }
    167 
    168     hax_global.vm->vcpus[id] = vcpu;
    169 
    170     ret = hax_host_setup_vcpu_channel(vcpu);
    171     if (ret) {
    172         fprintf(stderr, "Invalid hax tunnel size\n");
    173         ret = -EINVAL;
    174         goto error;
    175     }
    176     return 0;
    177 
    178   error:
    179     /* vcpu and tunnel will be closed automatically */
    180     if (vcpu && !hax_invalid_fd(vcpu->fd)) {
    181         hax_close_fd(vcpu->fd);
    182     }
    183 
    184     hax_global.vm->vcpus[id] = NULL;
    185     g_free(vcpu);
    186     return -1;
    187 }
    188 
    189 int hax_vcpu_destroy(CPUState *cpu)
    190 {
    191     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
    192 
    193     if (!hax_global.vm) {
    194         fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
    195         return -1;
    196     }
    197 
    198     if (!vcpu) {
    199         return 0;
    200     }
    201 
    202     /*
    203      * 1. The hax_tunnel is also destroyed when vcpu is destroyed
    204      * 2. close fd will cause hax module vcpu be cleaned
    205      */
    206     hax_close_fd(vcpu->fd);
    207     hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
    208     g_free(vcpu);
    209     return 0;
    210 }
    211 
    212 int hax_init_vcpu(CPUState *cpu)
    213 {
    214     int ret;
    215 
    216     ret = hax_vcpu_create(cpu->cpu_index);
    217     if (ret < 0) {
    218         fprintf(stderr, "Failed to create HAX vcpu\n");
    219         exit(-1);
    220     }
    221 
    222     cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
    223     cpu->vcpu_dirty = true;
    224     qemu_register_reset(hax_reset_vcpu_state, cpu->env_ptr);
    225 
    226     return ret;
    227 }
    228 
    229 struct hax_vm *hax_vm_create(struct hax_state *hax, int max_cpus)
    230 {
    231     struct hax_vm *vm;
    232     int vm_id = 0, ret, i;
    233 
    234     if (hax_invalid_fd(hax->fd)) {
    235         return NULL;
    236     }
    237 
    238     if (hax->vm) {
    239         return hax->vm;
    240     }
    241 
    242     if (max_cpus > HAX_MAX_VCPU) {
    243         fprintf(stderr, "Maximum VCPU number QEMU supported is %d\n", HAX_MAX_VCPU);
    244         return NULL;
    245     }
    246 
    247     vm = g_new0(struct hax_vm, 1);
    248 
    249     ret = hax_host_create_vm(hax, &vm_id);
    250     if (ret) {
    251         fprintf(stderr, "Failed to create vm %x\n", ret);
    252         goto error;
    253     }
    254     vm->id = vm_id;
    255     vm->fd = hax_host_open_vm(hax, vm_id);
    256     if (hax_invalid_fd(vm->fd)) {
    257         fprintf(stderr, "Failed to open vm %d\n", vm_id);
    258         goto error;
    259     }
    260 
    261     vm->numvcpus = max_cpus;
    262     vm->vcpus = g_new0(struct hax_vcpu_state *, vm->numvcpus);
    263     for (i = 0; i < vm->numvcpus; i++) {
    264         vm->vcpus[i] = NULL;
    265     }
    266 
    267     hax->vm = vm;
    268     return vm;
    269 
    270   error:
    271     g_free(vm);
    272     hax->vm = NULL;
    273     return NULL;
    274 }
    275 
    276 int hax_vm_destroy(struct hax_vm *vm)
    277 {
    278     int i;
    279 
    280     for (i = 0; i < vm->numvcpus; i++)
    281         if (vm->vcpus[i]) {
    282             fprintf(stderr, "VCPU should be cleaned before vm clean\n");
    283             return -1;
    284         }
    285     hax_close_fd(vm->fd);
    286     vm->numvcpus = 0;
    287     g_free(vm->vcpus);
    288     g_free(vm);
    289     hax_global.vm = NULL;
    290     return 0;
    291 }
    292 
    293 static int hax_init(ram_addr_t ram_size, int max_cpus)
    294 {
    295     struct hax_state *hax = NULL;
    296     struct hax_qemu_version qversion;
    297     int ret;
    298 
    299     hax = &hax_global;
    300 
    301     memset(hax, 0, sizeof(struct hax_state));
    302     hax->mem_quota = ram_size;
    303 
    304     hax->fd = hax_mod_open();
    305     if (hax_invalid_fd(hax->fd)) {
    306         hax->fd = 0;
    307         ret = -ENODEV;
    308         goto error;
    309     }
    310 
    311     ret = hax_get_capability(hax);
    312 
    313     if (ret) {
    314         if (ret != -ENOSPC) {
    315             ret = -EINVAL;
    316         }
    317         goto error;
    318     }
    319 
    320     if (!hax_version_support(hax)) {
    321         ret = -EINVAL;
    322         goto error;
    323     }
    324 
    325     hax->vm = hax_vm_create(hax, max_cpus);
    326     if (!hax->vm) {
    327         fprintf(stderr, "Failed to create HAX VM\n");
    328         ret = -EINVAL;
    329         goto error;
    330     }
    331 
    332     hax_memory_init();
    333 
    334     qversion.cur_version = hax_cur_version;
    335     qversion.min_version = hax_min_version;
    336     hax_notify_qemu_version(hax->vm->fd, &qversion);
    337 
    338     return ret;
    339   error:
    340     if (hax->vm) {
    341         hax_vm_destroy(hax->vm);
    342     }
    343     if (hax->fd) {
    344         hax_mod_close(hax);
    345     }
    346 
    347     return ret;
    348 }
    349 
    350 static int hax_accel_init(MachineState *ms)
    351 {
    352     int ret = hax_init(ms->ram_size, (int)ms->smp.max_cpus);
    353 
    354     if (ret && (ret != -ENOSPC)) {
    355         fprintf(stderr, "No accelerator found.\n");
    356     } else {
    357         fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
    358                 !ret ? "working" : "not working",
    359                 !ret ? "fast virt" : "emulation");
    360     }
    361     return ret;
    362 }
    363 
    364 static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
    365 {
    366     if (hft->direction < 2) {
    367         cpu_physical_memory_rw(hft->gpa, &hft->value, hft->size,
    368                                hft->direction);
    369     } else {
    370         /*
    371          * HAX API v4 supports transferring data between two MMIO addresses,
    372          * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
    373          *  hft->direction == 2: gpa ==> gpa2
    374          */
    375         uint64_t value;
    376         cpu_physical_memory_read(hft->gpa, &value, hft->size);
    377         cpu_physical_memory_write(hft->gpa2, &value, hft->size);
    378     }
    379 
    380     return 0;
    381 }
    382 
    383 static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
    384                          int direction, int size, int count, void *buffer)
    385 {
    386     uint8_t *ptr;
    387     int i;
    388     MemTxAttrs attrs = { 0 };
    389 
    390     if (!df) {
    391         ptr = (uint8_t *) buffer;
    392     } else {
    393         ptr = buffer + size * count - size;
    394     }
    395     for (i = 0; i < count; i++) {
    396         address_space_rw(&address_space_io, port, attrs,
    397                          ptr, size, direction == HAX_EXIT_IO_OUT);
    398         if (!df) {
    399             ptr += size;
    400         } else {
    401             ptr -= size;
    402         }
    403     }
    404 
    405     return 0;
    406 }
    407 
    408 static int hax_vcpu_interrupt(CPUArchState *env)
    409 {
    410     CPUState *cpu = env_cpu(env);
    411     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
    412     struct hax_tunnel *ht = vcpu->tunnel;
    413 
    414     /*
    415      * Try to inject an interrupt if the guest can accept it
    416      * Unlike KVM, HAX kernel check for the eflags, instead of qemu
    417      */
    418     if (ht->ready_for_interrupt_injection &&
    419         (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
    420         int irq;
    421 
    422         irq = cpu_get_pic_interrupt(env);
    423         if (irq >= 0) {
    424             hax_inject_interrupt(env, irq);
    425             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
    426         }
    427     }
    428 
    429     /* If we have an interrupt but the guest is not ready to receive an
    430      * interrupt, request an interrupt window exit.  This will
    431      * cause a return to userspace as soon as the guest is ready to
    432      * receive interrupts. */
    433     if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
    434         ht->request_interrupt_window = 1;
    435     } else {
    436         ht->request_interrupt_window = 0;
    437     }
    438     return 0;
    439 }
    440 
    441 void hax_raise_event(CPUState *cpu)
    442 {
    443     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
    444 
    445     if (!vcpu) {
    446         return;
    447     }
    448     vcpu->tunnel->user_event_pending = 1;
    449 }
    450 
    451 /*
    452  * Ask hax kernel module to run the CPU for us till:
    453  * 1. Guest crash or shutdown
    454  * 2. Need QEMU's emulation like guest execute MMIO instruction
    455  * 3. Guest execute HLT
    456  * 4. QEMU have Signal/event pending
    457  * 5. An unknown VMX exit happens
    458  */
    459 static int hax_vcpu_hax_exec(CPUArchState *env)
    460 {
    461     int ret = 0;
    462     CPUState *cpu = env_cpu(env);
    463     X86CPU *x86_cpu = X86_CPU(cpu);
    464     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
    465     struct hax_tunnel *ht = vcpu->tunnel;
    466 
    467     if (!hax_enabled()) {
    468         DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
    469         return 0;
    470     }
    471 
    472     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
    473         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
    474         apic_poll_irq(x86_cpu->apic_state);
    475     }
    476 
    477     /* After a vcpu is halted (either because it is an AP and has just been
    478      * reset, or because it has executed the HLT instruction), it will not be
    479      * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
    480      * for events that may change the halted state of this vcpu:
    481      *  a) Maskable interrupt, when RFLAGS.IF is 1;
    482      *     Note: env->eflags may not reflect the current RFLAGS state, because
    483      *           it is not updated after each hax_vcpu_run(). We cannot afford
    484      *           to fail to recognize any unhalt-by-maskable-interrupt event
    485      *           (in which case the vcpu will halt forever), and yet we cannot
    486      *           afford the overhead of hax_vcpu_sync_state(). The current
    487      *           solution is to err on the side of caution and have the HLT
    488      *           handler (see case HAX_EXIT_HLT below) unconditionally set the
    489      *           IF_MASK bit in env->eflags, which, in effect, disables the
    490      *           RFLAGS.IF check.
    491      *  b) NMI;
    492      *  c) INIT signal;
    493      *  d) SIPI signal.
    494      */
    495     if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
    496          (env->eflags & IF_MASK)) ||
    497         (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
    498         cpu->halted = 0;
    499     }
    500 
    501     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
    502         DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
    503                 cpu->cpu_index);
    504         do_cpu_init(x86_cpu);
    505         hax_vcpu_sync_state(env, 1);
    506     }
    507 
    508     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
    509         DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
    510                 cpu->cpu_index);
    511         hax_vcpu_sync_state(env, 0);
    512         do_cpu_sipi(x86_cpu);
    513         hax_vcpu_sync_state(env, 1);
    514     }
    515 
    516     if (cpu->halted) {
    517         /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
    518          * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
    519          * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
    520          * until the vcpu is unhalted.
    521          */
    522         cpu->exception_index = EXCP_HLT;
    523         return 0;
    524     }
    525 
    526     do {
    527         int hax_ret;
    528 
    529         if (cpu->exit_request) {
    530             ret = 1;
    531             break;
    532         }
    533 
    534         hax_vcpu_interrupt(env);
    535 
    536         qemu_mutex_unlock_iothread();
    537         cpu_exec_start(cpu);
    538         hax_ret = hax_vcpu_run(vcpu);
    539         cpu_exec_end(cpu);
    540         qemu_mutex_lock_iothread();
    541 
    542         /* Simply continue the vcpu_run if system call interrupted */
    543         if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
    544             DPRINTF("io window interrupted\n");
    545             continue;
    546         }
    547 
    548         if (hax_ret < 0) {
    549             fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
    550             abort();
    551         }
    552         switch (ht->_exit_status) {
    553         case HAX_EXIT_IO:
    554             ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
    555                             ht->pio._direction,
    556                             ht->pio._size, ht->pio._count, vcpu->iobuf);
    557             break;
    558         case HAX_EXIT_FAST_MMIO:
    559             ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
    560             break;
    561         /* Guest state changed, currently only for shutdown */
    562         case HAX_EXIT_STATECHANGE:
    563             fprintf(stdout, "VCPU shutdown request\n");
    564             qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
    565             hax_vcpu_sync_state(env, 0);
    566             ret = 1;
    567             break;
    568         case HAX_EXIT_UNKNOWN_VMEXIT:
    569             fprintf(stderr, "Unknown VMX exit %x from guest\n",
    570                     ht->_exit_reason);
    571             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
    572             hax_vcpu_sync_state(env, 0);
    573             cpu_dump_state(cpu, stderr, 0);
    574             ret = -1;
    575             break;
    576         case HAX_EXIT_HLT:
    577             if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
    578                 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
    579                 /* hlt instruction with interrupt disabled is shutdown */
    580                 env->eflags |= IF_MASK;
    581                 cpu->halted = 1;
    582                 cpu->exception_index = EXCP_HLT;
    583                 ret = 1;
    584             }
    585             break;
    586         /* these situations will continue to hax module */
    587         case HAX_EXIT_INTERRUPT:
    588         case HAX_EXIT_PAUSED:
    589             break;
    590         case HAX_EXIT_MMIO:
    591             /* Should not happen on UG system */
    592             fprintf(stderr, "HAX: unsupported MMIO emulation\n");
    593             ret = -1;
    594             break;
    595         case HAX_EXIT_REAL:
    596             /* Should not happen on UG system */
    597             fprintf(stderr, "HAX: unimplemented real mode emulation\n");
    598             ret = -1;
    599             break;
    600         default:
    601             fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
    602             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
    603             hax_vcpu_sync_state(env, 0);
    604             cpu_dump_state(cpu, stderr, 0);
    605             ret = 1;
    606             break;
    607         }
    608     } while (!ret);
    609 
    610     if (cpu->exit_request) {
    611         cpu->exit_request = 0;
    612         cpu->exception_index = EXCP_INTERRUPT;
    613     }
    614     return ret < 0;
    615 }
    616 
    617 static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
    618 {
    619     CPUArchState *env = cpu->env_ptr;
    620 
    621     hax_arch_get_registers(env);
    622     cpu->vcpu_dirty = true;
    623 }
    624 
    625 void hax_cpu_synchronize_state(CPUState *cpu)
    626 {
    627     if (!cpu->vcpu_dirty) {
    628         run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
    629     }
    630 }
    631 
    632 static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
    633                                               run_on_cpu_data arg)
    634 {
    635     CPUArchState *env = cpu->env_ptr;
    636 
    637     hax_vcpu_sync_state(env, 1);
    638     cpu->vcpu_dirty = false;
    639 }
    640 
    641 void hax_cpu_synchronize_post_reset(CPUState *cpu)
    642 {
    643     run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
    644 }
    645 
    646 static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
    647 {
    648     CPUArchState *env = cpu->env_ptr;
    649 
    650     hax_vcpu_sync_state(env, 1);
    651     cpu->vcpu_dirty = false;
    652 }
    653 
    654 void hax_cpu_synchronize_post_init(CPUState *cpu)
    655 {
    656     run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
    657 }
    658 
    659 static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
    660 {
    661     cpu->vcpu_dirty = true;
    662 }
    663 
    664 void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
    665 {
    666     run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
    667 }
    668 
    669 int hax_smp_cpu_exec(CPUState *cpu)
    670 {
    671     CPUArchState *env = cpu->env_ptr;
    672     int fatal;
    673     int ret;
    674 
    675     while (1) {
    676         if (cpu->exception_index >= EXCP_INTERRUPT) {
    677             ret = cpu->exception_index;
    678             cpu->exception_index = -1;
    679             break;
    680         }
    681 
    682         fatal = hax_vcpu_hax_exec(env);
    683 
    684         if (fatal) {
    685             fprintf(stderr, "Unsupported HAX vcpu return\n");
    686             abort();
    687         }
    688     }
    689 
    690     return ret;
    691 }
    692 
    693 static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
    694 {
    695     memset(lhs, 0, sizeof(struct segment_desc_t));
    696     lhs->selector = rhs->selector;
    697     lhs->base = rhs->base;
    698     lhs->limit = rhs->limit;
    699     lhs->type = 3;
    700     lhs->present = 1;
    701     lhs->dpl = 3;
    702     lhs->operand_size = 0;
    703     lhs->desc = 1;
    704     lhs->long_mode = 0;
    705     lhs->granularity = 0;
    706     lhs->available = 0;
    707 }
    708 
    709 static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
    710 {
    711     lhs->selector = rhs->selector;
    712     lhs->base = rhs->base;
    713     lhs->limit = rhs->limit;
    714     lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
    715         | (rhs->present * DESC_P_MASK)
    716         | (rhs->dpl << DESC_DPL_SHIFT)
    717         | (rhs->operand_size << DESC_B_SHIFT)
    718         | (rhs->desc * DESC_S_MASK)
    719         | (rhs->long_mode << DESC_L_SHIFT)
    720         | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
    721 }
    722 
    723 static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
    724 {
    725     unsigned flags = rhs->flags;
    726 
    727     memset(lhs, 0, sizeof(struct segment_desc_t));
    728     lhs->selector = rhs->selector;
    729     lhs->base = rhs->base;
    730     lhs->limit = rhs->limit;
    731     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
    732     lhs->present = (flags & DESC_P_MASK) != 0;
    733     lhs->dpl = rhs->selector & 3;
    734     lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
    735     lhs->desc = (flags & DESC_S_MASK) != 0;
    736     lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
    737     lhs->granularity = (flags & DESC_G_MASK) != 0;
    738     lhs->available = (flags & DESC_AVL_MASK) != 0;
    739 }
    740 
    741 static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
    742 {
    743     target_ulong reg = *hax_reg;
    744 
    745     if (set) {
    746         *hax_reg = *qemu_reg;
    747     } else {
    748         *qemu_reg = reg;
    749     }
    750 }
    751 
    752 /* The sregs has been synced with HAX kernel already before this call */
    753 static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
    754 {
    755     get_seg(&env->segs[R_CS], &sregs->_cs);
    756     get_seg(&env->segs[R_DS], &sregs->_ds);
    757     get_seg(&env->segs[R_ES], &sregs->_es);
    758     get_seg(&env->segs[R_FS], &sregs->_fs);
    759     get_seg(&env->segs[R_GS], &sregs->_gs);
    760     get_seg(&env->segs[R_SS], &sregs->_ss);
    761 
    762     get_seg(&env->tr, &sregs->_tr);
    763     get_seg(&env->ldt, &sregs->_ldt);
    764     env->idt.limit = sregs->_idt.limit;
    765     env->idt.base = sregs->_idt.base;
    766     env->gdt.limit = sregs->_gdt.limit;
    767     env->gdt.base = sregs->_gdt.base;
    768     return 0;
    769 }
    770 
    771 static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
    772 {
    773     if ((env->eflags & VM_MASK)) {
    774         set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
    775         set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
    776         set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
    777         set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
    778         set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
    779         set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
    780     } else {
    781         set_seg(&sregs->_cs, &env->segs[R_CS]);
    782         set_seg(&sregs->_ds, &env->segs[R_DS]);
    783         set_seg(&sregs->_es, &env->segs[R_ES]);
    784         set_seg(&sregs->_fs, &env->segs[R_FS]);
    785         set_seg(&sregs->_gs, &env->segs[R_GS]);
    786         set_seg(&sregs->_ss, &env->segs[R_SS]);
    787 
    788         if (env->cr[0] & CR0_PE_MASK) {
    789             /* force ss cpl to cs cpl */
    790             sregs->_ss.selector = (sregs->_ss.selector & ~3) |
    791                                   (sregs->_cs.selector & 3);
    792             sregs->_ss.dpl = sregs->_ss.selector & 3;
    793         }
    794     }
    795 
    796     set_seg(&sregs->_tr, &env->tr);
    797     set_seg(&sregs->_ldt, &env->ldt);
    798     sregs->_idt.limit = env->idt.limit;
    799     sregs->_idt.base = env->idt.base;
    800     sregs->_gdt.limit = env->gdt.limit;
    801     sregs->_gdt.base = env->gdt.base;
    802     return 0;
    803 }
    804 
    805 static int hax_sync_vcpu_register(CPUArchState *env, int set)
    806 {
    807     struct vcpu_state_t regs;
    808     int ret;
    809     memset(&regs, 0, sizeof(struct vcpu_state_t));
    810 
    811     if (!set) {
    812         ret = hax_sync_vcpu_state(env, &regs, 0);
    813         if (ret < 0) {
    814             return -1;
    815         }
    816     }
    817 
    818     /* generic register */
    819     hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
    820     hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
    821     hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
    822     hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
    823     hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
    824     hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
    825     hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
    826     hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
    827 #ifdef TARGET_X86_64
    828     hax_getput_reg(&regs._r8, &env->regs[8], set);
    829     hax_getput_reg(&regs._r9, &env->regs[9], set);
    830     hax_getput_reg(&regs._r10, &env->regs[10], set);
    831     hax_getput_reg(&regs._r11, &env->regs[11], set);
    832     hax_getput_reg(&regs._r12, &env->regs[12], set);
    833     hax_getput_reg(&regs._r13, &env->regs[13], set);
    834     hax_getput_reg(&regs._r14, &env->regs[14], set);
    835     hax_getput_reg(&regs._r15, &env->regs[15], set);
    836 #endif
    837     hax_getput_reg(&regs._rflags, &env->eflags, set);
    838     hax_getput_reg(&regs._rip, &env->eip, set);
    839 
    840     if (set) {
    841         regs._cr0 = env->cr[0];
    842         regs._cr2 = env->cr[2];
    843         regs._cr3 = env->cr[3];
    844         regs._cr4 = env->cr[4];
    845         hax_set_segments(env, &regs);
    846     } else {
    847         env->cr[0] = regs._cr0;
    848         env->cr[2] = regs._cr2;
    849         env->cr[3] = regs._cr3;
    850         env->cr[4] = regs._cr4;
    851         hax_get_segments(env, &regs);
    852     }
    853 
    854     if (set) {
    855         ret = hax_sync_vcpu_state(env, &regs, 1);
    856         if (ret < 0) {
    857             return -1;
    858         }
    859     }
    860     return 0;
    861 }
    862 
    863 static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
    864                               uint64_t value)
    865 {
    866     item->entry = index;
    867     item->value = value;
    868 }
    869 
    870 static int hax_get_msrs(CPUArchState *env)
    871 {
    872     struct hax_msr_data md;
    873     struct vmx_msr *msrs = md.entries;
    874     int ret, i, n;
    875 
    876     n = 0;
    877     msrs[n++].entry = MSR_IA32_SYSENTER_CS;
    878     msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
    879     msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
    880     msrs[n++].entry = MSR_IA32_TSC;
    881 #ifdef TARGET_X86_64
    882     msrs[n++].entry = MSR_EFER;
    883     msrs[n++].entry = MSR_STAR;
    884     msrs[n++].entry = MSR_LSTAR;
    885     msrs[n++].entry = MSR_CSTAR;
    886     msrs[n++].entry = MSR_FMASK;
    887     msrs[n++].entry = MSR_KERNELGSBASE;
    888 #endif
    889     md.nr_msr = n;
    890     ret = hax_sync_msr(env, &md, 0);
    891     if (ret < 0) {
    892         return ret;
    893     }
    894 
    895     for (i = 0; i < md.done; i++) {
    896         switch (msrs[i].entry) {
    897         case MSR_IA32_SYSENTER_CS:
    898             env->sysenter_cs = msrs[i].value;
    899             break;
    900         case MSR_IA32_SYSENTER_ESP:
    901             env->sysenter_esp = msrs[i].value;
    902             break;
    903         case MSR_IA32_SYSENTER_EIP:
    904             env->sysenter_eip = msrs[i].value;
    905             break;
    906         case MSR_IA32_TSC:
    907             env->tsc = msrs[i].value;
    908             break;
    909 #ifdef TARGET_X86_64
    910         case MSR_EFER:
    911             env->efer = msrs[i].value;
    912             break;
    913         case MSR_STAR:
    914             env->star = msrs[i].value;
    915             break;
    916         case MSR_LSTAR:
    917             env->lstar = msrs[i].value;
    918             break;
    919         case MSR_CSTAR:
    920             env->cstar = msrs[i].value;
    921             break;
    922         case MSR_FMASK:
    923             env->fmask = msrs[i].value;
    924             break;
    925         case MSR_KERNELGSBASE:
    926             env->kernelgsbase = msrs[i].value;
    927             break;
    928 #endif
    929         }
    930     }
    931 
    932     return 0;
    933 }
    934 
    935 static int hax_set_msrs(CPUArchState *env)
    936 {
    937     struct hax_msr_data md;
    938     struct vmx_msr *msrs;
    939     msrs = md.entries;
    940     int n = 0;
    941 
    942     memset(&md, 0, sizeof(struct hax_msr_data));
    943     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
    944     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
    945     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
    946     hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
    947 #ifdef TARGET_X86_64
    948     hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
    949     hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
    950     hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
    951     hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
    952     hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
    953     hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
    954 #endif
    955     md.nr_msr = n;
    956     md.done = 0;
    957 
    958     return hax_sync_msr(env, &md, 1);
    959 }
    960 
    961 static int hax_get_fpu(CPUArchState *env)
    962 {
    963     struct fx_layout fpu;
    964     int i, ret;
    965 
    966     ret = hax_sync_fpu(env, &fpu, 0);
    967     if (ret < 0) {
    968         return ret;
    969     }
    970 
    971     env->fpstt = (fpu.fsw >> 11) & 7;
    972     env->fpus = fpu.fsw;
    973     env->fpuc = fpu.fcw;
    974     for (i = 0; i < 8; ++i) {
    975         env->fptags[i] = !((fpu.ftw >> i) & 1);
    976     }
    977     memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
    978 
    979     for (i = 0; i < 8; i++) {
    980         env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
    981         env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
    982         if (CPU_NB_REGS > 8) {
    983             env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
    984             env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
    985         }
    986     }
    987     env->mxcsr = fpu.mxcsr;
    988 
    989     return 0;
    990 }
    991 
    992 static int hax_set_fpu(CPUArchState *env)
    993 {
    994     struct fx_layout fpu;
    995     int i;
    996 
    997     memset(&fpu, 0, sizeof(fpu));
    998     fpu.fsw = env->fpus & ~(7 << 11);
    999     fpu.fsw |= (env->fpstt & 7) << 11;
   1000     fpu.fcw = env->fpuc;
   1001 
   1002     for (i = 0; i < 8; ++i) {
   1003         fpu.ftw |= (!env->fptags[i]) << i;
   1004     }
   1005 
   1006     memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
   1007     for (i = 0; i < 8; i++) {
   1008         stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
   1009         stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
   1010         if (CPU_NB_REGS > 8) {
   1011             stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
   1012             stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
   1013         }
   1014     }
   1015 
   1016     fpu.mxcsr = env->mxcsr;
   1017 
   1018     return hax_sync_fpu(env, &fpu, 1);
   1019 }
   1020 
   1021 static int hax_arch_get_registers(CPUArchState *env)
   1022 {
   1023     int ret;
   1024 
   1025     ret = hax_sync_vcpu_register(env, 0);
   1026     if (ret < 0) {
   1027         return ret;
   1028     }
   1029 
   1030     ret = hax_get_fpu(env);
   1031     if (ret < 0) {
   1032         return ret;
   1033     }
   1034 
   1035     ret = hax_get_msrs(env);
   1036     if (ret < 0) {
   1037         return ret;
   1038     }
   1039 
   1040     x86_update_hflags(env);
   1041     return 0;
   1042 }
   1043 
   1044 static int hax_arch_set_registers(CPUArchState *env)
   1045 {
   1046     int ret;
   1047     ret = hax_sync_vcpu_register(env, 1);
   1048 
   1049     if (ret < 0) {
   1050         fprintf(stderr, "Failed to sync vcpu reg\n");
   1051         return ret;
   1052     }
   1053     ret = hax_set_fpu(env);
   1054     if (ret < 0) {
   1055         fprintf(stderr, "FPU failed\n");
   1056         return ret;
   1057     }
   1058     ret = hax_set_msrs(env);
   1059     if (ret < 0) {
   1060         fprintf(stderr, "MSR failed\n");
   1061         return ret;
   1062     }
   1063 
   1064     return 0;
   1065 }
   1066 
   1067 static void hax_vcpu_sync_state(CPUArchState *env, int modified)
   1068 {
   1069     if (hax_enabled()) {
   1070         if (modified) {
   1071             hax_arch_set_registers(env);
   1072         } else {
   1073             hax_arch_get_registers(env);
   1074         }
   1075     }
   1076 }
   1077 
   1078 /*
   1079  * much simpler than kvm, at least in first stage because:
   1080  * We don't need consider the device pass-through, we don't need
   1081  * consider the framebuffer, and we may even remove the bios at all
   1082  */
   1083 int hax_sync_vcpus(void)
   1084 {
   1085     if (hax_enabled()) {
   1086         CPUState *cpu;
   1087 
   1088         cpu = first_cpu;
   1089         if (!cpu) {
   1090             return 0;
   1091         }
   1092 
   1093         for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
   1094             int ret;
   1095 
   1096             ret = hax_arch_set_registers(cpu->env_ptr);
   1097             if (ret < 0) {
   1098                 return ret;
   1099             }
   1100         }
   1101     }
   1102 
   1103     return 0;
   1104 }
   1105 
   1106 void hax_reset_vcpu_state(void *opaque)
   1107 {
   1108     CPUState *cpu;
   1109     for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
   1110         cpu->hax_vcpu->tunnel->user_event_pending = 0;
   1111         cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
   1112     }
   1113 }
   1114 
   1115 static void hax_accel_class_init(ObjectClass *oc, void *data)
   1116 {
   1117     AccelClass *ac = ACCEL_CLASS(oc);
   1118     ac->name = "HAX";
   1119     ac->init_machine = hax_accel_init;
   1120     ac->allowed = &hax_allowed;
   1121 }
   1122 
   1123 static const TypeInfo hax_accel_type = {
   1124     .name = ACCEL_CLASS_NAME("hax"),
   1125     .parent = TYPE_ACCEL,
   1126     .class_init = hax_accel_class_init,
   1127 };
   1128 
   1129 static void hax_type_init(void)
   1130 {
   1131     type_register_static(&hax_accel_type);
   1132 }
   1133 
   1134 type_init(hax_type_init);