qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

xics_kvm.c (14395B)


      1 /*
      2  * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
      3  *
      4  * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics, in-kernel emulation
      5  *
      6  * Copyright (c) 2013 David Gibson, IBM Corporation.
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a copy
      9  * of this software and associated documentation files (the "Software"), to deal
     10  * in the Software without restriction, including without limitation the rights
     11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     12  * copies of the Software, and to permit persons to whom the Software is
     13  * furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice shall be included in
     16  * all copies or substantial portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     21  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     24  * THE SOFTWARE.
     25  *
     26  */
     27 
     28 #include "qemu/osdep.h"
     29 #include "qapi/error.h"
     30 #include "trace.h"
     31 #include "sysemu/kvm.h"
     32 #include "hw/ppc/spapr.h"
     33 #include "hw/ppc/spapr_cpu_core.h"
     34 #include "hw/ppc/xics.h"
     35 #include "hw/ppc/xics_spapr.h"
     36 #include "kvm_ppc.h"
     37 #include "qemu/config-file.h"
     38 #include "qemu/error-report.h"
     39 
     40 #include <sys/ioctl.h>
     41 
     42 static int kernel_xics_fd = -1;
     43 
     44 typedef struct KVMEnabledICP {
     45     unsigned long vcpu_id;
     46     QLIST_ENTRY(KVMEnabledICP) node;
     47 } KVMEnabledICP;
     48 
     49 static QLIST_HEAD(, KVMEnabledICP)
     50     kvm_enabled_icps = QLIST_HEAD_INITIALIZER(&kvm_enabled_icps);
     51 
     52 static void kvm_disable_icps(void)
     53 {
     54     KVMEnabledICP *enabled_icp, *next;
     55 
     56     QLIST_FOREACH_SAFE(enabled_icp, &kvm_enabled_icps, node, next) {
     57         QLIST_REMOVE(enabled_icp, node);
     58         g_free(enabled_icp);
     59     }
     60 }
     61 
     62 /*
     63  * ICP-KVM
     64  */
     65 void icp_get_kvm_state(ICPState *icp)
     66 {
     67     uint64_t state;
     68     int ret;
     69 
     70     /* The KVM XICS device is not in use */
     71     if (kernel_xics_fd == -1) {
     72         return;
     73     }
     74 
     75     /* ICP for this CPU thread is not in use, exiting */
     76     if (!icp->cs) {
     77         return;
     78     }
     79 
     80     ret = kvm_get_one_reg(icp->cs, KVM_REG_PPC_ICP_STATE, &state);
     81     if (ret != 0) {
     82         error_report("Unable to retrieve KVM interrupt controller state"
     83                 " for CPU %ld: %s", kvm_arch_vcpu_id(icp->cs), strerror(errno));
     84         exit(1);
     85     }
     86 
     87     icp->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
     88     icp->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
     89         & KVM_REG_PPC_ICP_MFRR_MASK;
     90     icp->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
     91         & KVM_REG_PPC_ICP_PPRI_MASK;
     92 }
     93 
     94 static void do_icp_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
     95 {
     96     icp_get_kvm_state(arg.host_ptr);
     97 }
     98 
     99 void icp_synchronize_state(ICPState *icp)
    100 {
    101     if (icp->cs) {
    102         run_on_cpu(icp->cs, do_icp_synchronize_state, RUN_ON_CPU_HOST_PTR(icp));
    103     }
    104 }
    105 
    106 int icp_set_kvm_state(ICPState *icp, Error **errp)
    107 {
    108     uint64_t state;
    109     int ret;
    110 
    111     /* The KVM XICS device is not in use */
    112     if (kernel_xics_fd == -1) {
    113         return 0;
    114     }
    115 
    116     /* ICP for this CPU thread is not in use, exiting */
    117     if (!icp->cs) {
    118         return 0;
    119     }
    120 
    121     state = ((uint64_t)icp->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
    122         | ((uint64_t)icp->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
    123         | ((uint64_t)icp->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
    124 
    125     ret = kvm_set_one_reg(icp->cs, KVM_REG_PPC_ICP_STATE, &state);
    126     if (ret < 0) {
    127         error_setg_errno(errp, -ret,
    128                          "Unable to restore KVM interrupt controller state (0x%"
    129                          PRIx64 ") for CPU %ld", state,
    130                          kvm_arch_vcpu_id(icp->cs));
    131         return ret;
    132     }
    133 
    134     return 0;
    135 }
    136 
    137 void icp_kvm_realize(DeviceState *dev, Error **errp)
    138 {
    139     ICPState *icp = ICP(dev);
    140     CPUState *cs;
    141     KVMEnabledICP *enabled_icp;
    142     unsigned long vcpu_id;
    143     int ret;
    144 
    145     /* The KVM XICS device is not in use */
    146     if (kernel_xics_fd == -1) {
    147         return;
    148     }
    149 
    150     cs = icp->cs;
    151     vcpu_id = kvm_arch_vcpu_id(cs);
    152 
    153     /*
    154      * If we are reusing a parked vCPU fd corresponding to the CPU
    155      * which was hot-removed earlier we don't have to renable
    156      * KVM_CAP_IRQ_XICS capability again.
    157      */
    158     QLIST_FOREACH(enabled_icp, &kvm_enabled_icps, node) {
    159         if (enabled_icp->vcpu_id == vcpu_id) {
    160             return;
    161         }
    162     }
    163 
    164     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, vcpu_id);
    165     if (ret < 0) {
    166         Error *local_err = NULL;
    167 
    168         error_setg(&local_err, "Unable to connect CPU%ld to kernel XICS: %s",
    169                    vcpu_id, strerror(errno));
    170         if (errno == ENOSPC) {
    171             error_append_hint(&local_err, "Try -smp maxcpus=N with N < %u\n",
    172                               MACHINE(qdev_get_machine())->smp.max_cpus);
    173         }
    174         error_propagate(errp, local_err);
    175         return;
    176     }
    177     enabled_icp = g_malloc(sizeof(*enabled_icp));
    178     enabled_icp->vcpu_id = vcpu_id;
    179     QLIST_INSERT_HEAD(&kvm_enabled_icps, enabled_icp, node);
    180 }
    181 
    182 /*
    183  * ICS-KVM
    184  */
    185 void ics_get_kvm_state(ICSState *ics)
    186 {
    187     uint64_t state;
    188     int i;
    189 
    190     /* The KVM XICS device is not in use */
    191     if (kernel_xics_fd == -1) {
    192         return;
    193     }
    194 
    195     for (i = 0; i < ics->nr_irqs; i++) {
    196         ICSIRQState *irq = &ics->irqs[i];
    197 
    198         if (ics_irq_free(ics, i)) {
    199             continue;
    200         }
    201 
    202         kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES,
    203                           i + ics->offset, &state, false, &error_fatal);
    204 
    205         irq->server = state & KVM_XICS_DESTINATION_MASK;
    206         irq->saved_priority = (state >> KVM_XICS_PRIORITY_SHIFT)
    207             & KVM_XICS_PRIORITY_MASK;
    208         /*
    209          * To be consistent with the software emulation in xics.c, we
    210          * split out the masked state + priority that we get from the
    211          * kernel into 'current priority' (0xff if masked) and
    212          * 'saved priority' (if masked, this is the priority the
    213          * interrupt had before it was masked).  Masking and unmasking
    214          * are done with the ibm,int-off and ibm,int-on RTAS calls.
    215          */
    216         if (state & KVM_XICS_MASKED) {
    217             irq->priority = 0xff;
    218         } else {
    219             irq->priority = irq->saved_priority;
    220         }
    221 
    222         irq->status = 0;
    223         if (state & KVM_XICS_PENDING) {
    224             if (state & KVM_XICS_LEVEL_SENSITIVE) {
    225                 irq->status |= XICS_STATUS_ASSERTED;
    226             } else {
    227                 /*
    228                  * A pending edge-triggered interrupt (or MSI)
    229                  * must have been rejected previously when we
    230                  * first detected it and tried to deliver it,
    231                  * so mark it as pending and previously rejected
    232                  * for consistency with how xics.c works.
    233                  */
    234                 irq->status |= XICS_STATUS_MASKED_PENDING
    235                     | XICS_STATUS_REJECTED;
    236             }
    237         }
    238         if (state & KVM_XICS_PRESENTED) {
    239                 irq->status |= XICS_STATUS_PRESENTED;
    240         }
    241         if (state & KVM_XICS_QUEUED) {
    242                 irq->status |= XICS_STATUS_QUEUED;
    243         }
    244     }
    245 }
    246 
    247 void ics_synchronize_state(ICSState *ics)
    248 {
    249     ics_get_kvm_state(ics);
    250 }
    251 
    252 int ics_set_kvm_state_one(ICSState *ics, int srcno, Error **errp)
    253 {
    254     uint64_t state;
    255     ICSIRQState *irq = &ics->irqs[srcno];
    256     int ret;
    257 
    258     /* The KVM XICS device is not in use */
    259     if (kernel_xics_fd == -1) {
    260         return 0;
    261     }
    262 
    263     state = irq->server;
    264     state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK)
    265         << KVM_XICS_PRIORITY_SHIFT;
    266     if (irq->priority != irq->saved_priority) {
    267         assert(irq->priority == 0xff);
    268     }
    269 
    270     if (irq->priority == 0xff) {
    271         state |= KVM_XICS_MASKED;
    272     }
    273 
    274     if (irq->flags & XICS_FLAGS_IRQ_LSI) {
    275         state |= KVM_XICS_LEVEL_SENSITIVE;
    276         if (irq->status & XICS_STATUS_ASSERTED) {
    277             state |= KVM_XICS_PENDING;
    278         }
    279     } else {
    280         if (irq->status & XICS_STATUS_MASKED_PENDING) {
    281             state |= KVM_XICS_PENDING;
    282         }
    283     }
    284     if (irq->status & XICS_STATUS_PRESENTED) {
    285         state |= KVM_XICS_PRESENTED;
    286     }
    287     if (irq->status & XICS_STATUS_QUEUED) {
    288         state |= KVM_XICS_QUEUED;
    289     }
    290 
    291     ret = kvm_device_access(kernel_xics_fd, KVM_DEV_XICS_GRP_SOURCES,
    292                             srcno + ics->offset, &state, true, errp);
    293     if (ret < 0) {
    294         return ret;
    295     }
    296 
    297     return 0;
    298 }
    299 
    300 int ics_set_kvm_state(ICSState *ics, Error **errp)
    301 {
    302     int i;
    303 
    304     /* The KVM XICS device is not in use */
    305     if (kernel_xics_fd == -1) {
    306         return 0;
    307     }
    308 
    309     for (i = 0; i < ics->nr_irqs; i++) {
    310         int ret;
    311 
    312         if (ics_irq_free(ics, i)) {
    313             continue;
    314         }
    315 
    316         ret = ics_set_kvm_state_one(ics, i, errp);
    317         if (ret < 0) {
    318             return ret;
    319         }
    320     }
    321 
    322     return 0;
    323 }
    324 
    325 void ics_kvm_set_irq(ICSState *ics, int srcno, int val)
    326 {
    327     struct kvm_irq_level args;
    328     int rc;
    329 
    330     /* The KVM XICS device should be in use */
    331     assert(kernel_xics_fd != -1);
    332 
    333     args.irq = srcno + ics->offset;
    334     if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_MSI) {
    335         if (!val) {
    336             return;
    337         }
    338         args.level = KVM_INTERRUPT_SET;
    339     } else {
    340         args.level = val ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
    341     }
    342     rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
    343     if (rc < 0) {
    344         perror("kvm_irq_line");
    345     }
    346 }
    347 
    348 int xics_kvm_connect(SpaprInterruptController *intc, uint32_t nr_servers,
    349                      Error **errp)
    350 {
    351     ICSState *ics = ICS_SPAPR(intc);
    352     int rc;
    353     CPUState *cs;
    354     Error *local_err = NULL;
    355 
    356     /*
    357      * The KVM XICS device already in use. This is the case when
    358      * rebooting under the XICS-only interrupt mode.
    359      */
    360     if (kernel_xics_fd != -1) {
    361         return 0;
    362     }
    363 
    364     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) {
    365         error_setg(errp,
    366                    "KVM and IRQ_XICS capability must be present for in-kernel XICS");
    367         return -1;
    368     }
    369 
    370     rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_SET_XIVE, "ibm,set-xive");
    371     if (rc < 0) {
    372         error_setg_errno(&local_err, -rc,
    373                          "kvmppc_define_rtas_kernel_token: ibm,set-xive");
    374         goto fail;
    375     }
    376 
    377     rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_GET_XIVE, "ibm,get-xive");
    378     if (rc < 0) {
    379         error_setg_errno(&local_err, -rc,
    380                          "kvmppc_define_rtas_kernel_token: ibm,get-xive");
    381         goto fail;
    382     }
    383 
    384     rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_ON, "ibm,int-on");
    385     if (rc < 0) {
    386         error_setg_errno(&local_err, -rc,
    387                          "kvmppc_define_rtas_kernel_token: ibm,int-on");
    388         goto fail;
    389     }
    390 
    391     rc = kvmppc_define_rtas_kernel_token(RTAS_IBM_INT_OFF, "ibm,int-off");
    392     if (rc < 0) {
    393         error_setg_errno(&local_err, -rc,
    394                          "kvmppc_define_rtas_kernel_token: ibm,int-off");
    395         goto fail;
    396     }
    397 
    398     /* Create the KVM XICS device */
    399     rc = kvm_create_device(kvm_state, KVM_DEV_TYPE_XICS, false);
    400     if (rc < 0) {
    401         error_setg_errno(&local_err, -rc, "Error on KVM_CREATE_DEVICE for XICS");
    402         goto fail;
    403     }
    404 
    405     /* Tell KVM about the # of VCPUs we may have (POWER9 and newer only) */
    406     if (kvm_device_check_attr(rc, KVM_DEV_XICS_GRP_CTRL,
    407                               KVM_DEV_XICS_NR_SERVERS)) {
    408         if (kvm_device_access(rc, KVM_DEV_XICS_GRP_CTRL,
    409                               KVM_DEV_XICS_NR_SERVERS, &nr_servers, true,
    410                               &local_err)) {
    411             goto fail;
    412         }
    413     }
    414 
    415     kernel_xics_fd = rc;
    416     kvm_kernel_irqchip = true;
    417     kvm_msi_via_irqfd_allowed = true;
    418     kvm_gsi_direct_mapping = true;
    419 
    420     /* Create the presenters */
    421     CPU_FOREACH(cs) {
    422         PowerPCCPU *cpu = POWERPC_CPU(cs);
    423 
    424         icp_kvm_realize(DEVICE(spapr_cpu_state(cpu)->icp), &local_err);
    425         if (local_err) {
    426             goto fail;
    427         }
    428     }
    429 
    430     /* Update the KVM sources */
    431     ics_set_kvm_state(ics, &local_err);
    432     if (local_err) {
    433         goto fail;
    434     }
    435 
    436     /* Connect the presenters to the initial VCPUs of the machine */
    437     CPU_FOREACH(cs) {
    438         PowerPCCPU *cpu = POWERPC_CPU(cs);
    439         icp_set_kvm_state(spapr_cpu_state(cpu)->icp, &local_err);
    440         if (local_err) {
    441             goto fail;
    442         }
    443     }
    444 
    445     return 0;
    446 
    447 fail:
    448     error_propagate(errp, local_err);
    449     xics_kvm_disconnect(intc);
    450     return -1;
    451 }
    452 
    453 void xics_kvm_disconnect(SpaprInterruptController *intc)
    454 {
    455     /*
    456      * Only on P9 using the XICS-on XIVE KVM device:
    457      *
    458      * When the KVM device fd is closed, the device is destroyed and
    459      * removed from the list of devices of the VM. The VCPU presenters
    460      * are also detached from the device.
    461      */
    462     if (kernel_xics_fd != -1) {
    463         close(kernel_xics_fd);
    464         kernel_xics_fd = -1;
    465     }
    466 
    467     kvmppc_define_rtas_kernel_token(0, "ibm,set-xive");
    468     kvmppc_define_rtas_kernel_token(0, "ibm,get-xive");
    469     kvmppc_define_rtas_kernel_token(0, "ibm,int-on");
    470     kvmppc_define_rtas_kernel_token(0, "ibm,int-off");
    471 
    472     kvm_kernel_irqchip = false;
    473     kvm_msi_via_irqfd_allowed = false;
    474     kvm_gsi_direct_mapping = false;
    475 
    476     /* Clear the presenter from the VCPUs */
    477     kvm_disable_icps();
    478 }
    479 
    480 /*
    481  * This is a heuristic to detect older KVMs on POWER9 hosts that don't
    482  * support destruction of a KVM XICS device while the VM is running.
    483  * Required to start a spapr machine with ic-mode=dual,kernel-irqchip=on.
    484  */
    485 bool xics_kvm_has_broken_disconnect(void)
    486 {
    487     int rc;
    488 
    489     rc = kvm_create_device(kvm_state, KVM_DEV_TYPE_XICS, false);
    490     if (rc < 0) {
    491         /*
    492          * The error is ignored on purpose. The KVM XICS setup code
    493          * will catch it again anyway. The goal here is to see if
    494          * close() actually destroys the device or not.
    495          */
    496         return false;
    497     }
    498 
    499     close(rc);
    500 
    501     rc = kvm_create_device(kvm_state, KVM_DEV_TYPE_XICS, false);
    502     if (rc >= 0) {
    503         close(rc);
    504         return false;
    505     }
    506 
    507     return errno == EEXIST;
    508 }