qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

spapr_irq.c (16853B)


      1 /*
      2  * QEMU PowerPC sPAPR IRQ interface
      3  *
      4  * Copyright (c) 2018, IBM Corporation.
      5  *
      6  * This code is licensed under the GPL version 2 or later. See the
      7  * COPYING file in the top-level directory.
      8  */
      9 
     10 #include "qemu/osdep.h"
     11 #include "qemu/log.h"
     12 #include "qemu/error-report.h"
     13 #include "qapi/error.h"
     14 #include "hw/irq.h"
     15 #include "hw/ppc/spapr.h"
     16 #include "hw/ppc/spapr_cpu_core.h"
     17 #include "hw/ppc/spapr_xive.h"
     18 #include "hw/ppc/xics.h"
     19 #include "hw/ppc/xics_spapr.h"
     20 #include "hw/qdev-properties.h"
     21 #include "cpu-models.h"
     22 #include "sysemu/kvm.h"
     23 
     24 #include "trace.h"
     25 
     26 static const TypeInfo spapr_intc_info = {
     27     .name = TYPE_SPAPR_INTC,
     28     .parent = TYPE_INTERFACE,
     29     .class_size = sizeof(SpaprInterruptControllerClass),
     30 };
     31 
     32 static void spapr_irq_msi_init(SpaprMachineState *spapr)
     33 {
     34     if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
     35         /* Legacy mode doesn't use this allocator */
     36         return;
     37     }
     38 
     39     spapr->irq_map_nr = spapr_irq_nr_msis(spapr);
     40     spapr->irq_map = bitmap_new(spapr->irq_map_nr);
     41 }
     42 
     43 int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
     44                         Error **errp)
     45 {
     46     int irq;
     47 
     48     /*
     49      * The 'align_mask' parameter of bitmap_find_next_zero_area()
     50      * should be one less than a power of 2; 0 means no
     51      * alignment. Adapt the 'align' value of the former allocator
     52      * to fit the requirements of bitmap_find_next_zero_area()
     53      */
     54     align -= 1;
     55 
     56     irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
     57                                      align);
     58     if (irq == spapr->irq_map_nr) {
     59         error_setg(errp, "can't find a free %d-IRQ block", num);
     60         return -1;
     61     }
     62 
     63     bitmap_set(spapr->irq_map, irq, num);
     64 
     65     return irq + SPAPR_IRQ_MSI;
     66 }
     67 
     68 void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num)
     69 {
     70     bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
     71 }
     72 
     73 int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn,
     74                        SpaprInterruptController *intc,
     75                        uint32_t nr_servers,
     76                        Error **errp)
     77 {
     78     Error *local_err = NULL;
     79 
     80     if (kvm_enabled() && kvm_kernel_irqchip_allowed()) {
     81         if (fn(intc, nr_servers, &local_err) < 0) {
     82             if (kvm_kernel_irqchip_required()) {
     83                 error_prepend(&local_err,
     84                               "kernel_irqchip requested but unavailable: ");
     85                 error_propagate(errp, local_err);
     86                 return -1;
     87             }
     88 
     89             /*
     90              * We failed to initialize the KVM device, fallback to
     91              * emulated mode
     92              */
     93             error_prepend(&local_err,
     94                           "kernel_irqchip allowed but unavailable: ");
     95             error_append_hint(&local_err,
     96                               "Falling back to kernel-irqchip=off\n");
     97             warn_report_err(local_err);
     98         }
     99     }
    100 
    101     return 0;
    102 }
    103 
    104 /*
    105  * XICS IRQ backend.
    106  */
    107 
    108 SpaprIrq spapr_irq_xics = {
    109     .xics        = true,
    110     .xive        = false,
    111 };
    112 
    113 /*
    114  * XIVE IRQ backend.
    115  */
    116 
    117 SpaprIrq spapr_irq_xive = {
    118     .xics        = false,
    119     .xive        = true,
    120 };
    121 
    122 /*
    123  * Dual XIVE and XICS IRQ backend.
    124  *
    125  * Both interrupt mode, XIVE and XICS, objects are created but the
    126  * machine starts in legacy interrupt mode (XICS). It can be changed
    127  * by the CAS negotiation process and, in that case, the new mode is
    128  * activated after an extra machine reset.
    129  */
    130 
    131 /*
    132  * Define values in sync with the XIVE and XICS backend
    133  */
    134 SpaprIrq spapr_irq_dual = {
    135     .xics        = true,
    136     .xive        = true,
    137 };
    138 
    139 
    140 static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
    141 {
    142     ERRP_GUARD();
    143     MachineState *machine = MACHINE(spapr);
    144 
    145     /*
    146      * Sanity checks on non-P9 machines. On these, XIVE is not
    147      * advertised, see spapr_dt_ov5_platform_support()
    148      */
    149     if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
    150                                0, spapr->max_compat_pvr)) {
    151         /*
    152          * If the 'dual' interrupt mode is selected, force XICS as CAS
    153          * negotiation is useless.
    154          */
    155         if (spapr->irq == &spapr_irq_dual) {
    156             spapr->irq = &spapr_irq_xics;
    157             return 0;
    158         }
    159 
    160         /*
    161          * Non-P9 machines using only XIVE is a bogus setup. We have two
    162          * scenarios to take into account because of the compat mode:
    163          *
    164          * 1. POWER7/8 machines should fail to init later on when creating
    165          *    the XIVE interrupt presenters because a POWER9 exception
    166          *    model is required.
    167 
    168          * 2. POWER9 machines using the POWER8 compat mode won't fail and
    169          *    will let the OS boot with a partial XIVE setup : DT
    170          *    properties but no hcalls.
    171          *
    172          * To cover both and not confuse the OS, add an early failure in
    173          * QEMU.
    174          */
    175         if (!spapr->irq->xics) {
    176             error_setg(errp, "XIVE-only machines require a POWER9 CPU");
    177             return -1;
    178         }
    179     }
    180 
    181     /*
    182      * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
    183      * re-created. Same happens with KVM nested guests. Detect that early to
    184      * avoid QEMU to exit later when the guest reboots.
    185      */
    186     if (kvm_enabled() &&
    187         spapr->irq == &spapr_irq_dual &&
    188         kvm_kernel_irqchip_required() &&
    189         xics_kvm_has_broken_disconnect()) {
    190         error_setg(errp,
    191             "KVM is incompatible with ic-mode=dual,kernel-irqchip=on");
    192         error_append_hint(errp,
    193             "This can happen with an old KVM or in a KVM nested guest.\n");
    194         error_append_hint(errp,
    195             "Try without kernel-irqchip or with kernel-irqchip=off.\n");
    196         return -1;
    197     }
    198 
    199     return 0;
    200 }
    201 
    202 /*
    203  * sPAPR IRQ frontend routines for devices
    204  */
    205 #define ALL_INTCS(spapr_) \
    206     { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), }
    207 
    208 int spapr_irq_cpu_intc_create(SpaprMachineState *spapr,
    209                               PowerPCCPU *cpu, Error **errp)
    210 {
    211     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
    212     int i;
    213     int rc;
    214 
    215     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
    216         SpaprInterruptController *intc = intcs[i];
    217         if (intc) {
    218             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
    219             rc = sicc->cpu_intc_create(intc, cpu, errp);
    220             if (rc < 0) {
    221                 return rc;
    222             }
    223         }
    224     }
    225 
    226     return 0;
    227 }
    228 
    229 void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu)
    230 {
    231     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
    232     int i;
    233 
    234     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
    235         SpaprInterruptController *intc = intcs[i];
    236         if (intc) {
    237             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
    238             sicc->cpu_intc_reset(intc, cpu);
    239         }
    240     }
    241 }
    242 
    243 void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu)
    244 {
    245     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
    246     int i;
    247 
    248     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
    249         SpaprInterruptController *intc = intcs[i];
    250         if (intc) {
    251             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
    252             sicc->cpu_intc_destroy(intc, cpu);
    253         }
    254     }
    255 }
    256 
    257 static void spapr_set_irq(void *opaque, int irq, int level)
    258 {
    259     SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
    260     SpaprInterruptControllerClass *sicc
    261         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
    262 
    263     sicc->set_irq(spapr->active_intc, irq, level);
    264 }
    265 
    266 void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon)
    267 {
    268     SpaprInterruptControllerClass *sicc
    269         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
    270 
    271     sicc->print_info(spapr->active_intc, mon);
    272 }
    273 
    274 void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers,
    275                   void *fdt, uint32_t phandle)
    276 {
    277     SpaprInterruptControllerClass *sicc
    278         = SPAPR_INTC_GET_CLASS(spapr->active_intc);
    279 
    280     sicc->dt(spapr->active_intc, nr_servers, fdt, phandle);
    281 }
    282 
    283 uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr)
    284 {
    285     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
    286 
    287     if (smc->legacy_irq_allocation) {
    288         return smc->nr_xirqs;
    289     } else {
    290         return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI;
    291     }
    292 }
    293 
    294 void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
    295 {
    296     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
    297 
    298     if (kvm_enabled() && kvm_kernel_irqchip_split()) {
    299         error_setg(errp, "kernel_irqchip split mode not supported on pseries");
    300         return;
    301     }
    302 
    303     if (spapr_irq_check(spapr, errp) < 0) {
    304         return;
    305     }
    306 
    307     /* Initialize the MSI IRQ allocator. */
    308     spapr_irq_msi_init(spapr);
    309 
    310     if (spapr->irq->xics) {
    311         Object *obj;
    312 
    313         obj = object_new(TYPE_ICS_SPAPR);
    314 
    315         object_property_add_child(OBJECT(spapr), "ics", obj);
    316         object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr),
    317                                  &error_abort);
    318         object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort);
    319         if (!qdev_realize(DEVICE(obj), NULL, errp)) {
    320             return;
    321         }
    322 
    323         spapr->ics = ICS_SPAPR(obj);
    324     }
    325 
    326     if (spapr->irq->xive) {
    327         uint32_t nr_servers = spapr_max_server_number(spapr);
    328         DeviceState *dev;
    329         int i;
    330 
    331         dev = qdev_new(TYPE_SPAPR_XIVE);
    332         qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE);
    333         /*
    334          * 8 XIVE END structures per CPU. One for each available
    335          * priority
    336          */
    337         qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
    338         object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr),
    339                                  &error_abort);
    340         sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
    341 
    342         spapr->xive = SPAPR_XIVE(dev);
    343 
    344         /* Enable the CPU IPIs */
    345         for (i = 0; i < nr_servers; ++i) {
    346             SpaprInterruptControllerClass *sicc
    347                 = SPAPR_INTC_GET_CLASS(spapr->xive);
    348 
    349             if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
    350                                 false, errp) < 0) {
    351                 return;
    352             }
    353         }
    354 
    355         spapr_xive_hcall_init(spapr);
    356     }
    357 
    358     spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
    359                                       smc->nr_xirqs + SPAPR_XIRQ_BASE);
    360 
    361     /*
    362      * Mostly we don't actually need this until reset, except that not
    363      * having this set up can cause VFIO devices to issue a
    364      * false-positive warning during realize(), because they don't yet
    365      * have an in-kernel irq chip.
    366      */
    367     spapr_irq_update_active_intc(spapr);
    368 }
    369 
    370 int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
    371 {
    372     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
    373     int i;
    374     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
    375     int rc;
    376 
    377     assert(irq >= SPAPR_XIRQ_BASE);
    378     assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
    379 
    380     for (i = 0; i < ARRAY_SIZE(intcs); i++) {
    381         SpaprInterruptController *intc = intcs[i];
    382         if (intc) {
    383             SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
    384             rc = sicc->claim_irq(intc, irq, lsi, errp);
    385             if (rc < 0) {
    386                 return rc;
    387             }
    388         }
    389     }
    390 
    391     return 0;
    392 }
    393 
    394 void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
    395 {
    396     SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
    397     int i, j;
    398     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
    399 
    400     assert(irq >= SPAPR_XIRQ_BASE);
    401     assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE));
    402 
    403     for (i = irq; i < (irq + num); i++) {
    404         for (j = 0; j < ARRAY_SIZE(intcs); j++) {
    405             SpaprInterruptController *intc = intcs[j];
    406 
    407             if (intc) {
    408                 SpaprInterruptControllerClass *sicc
    409                     = SPAPR_INTC_GET_CLASS(intc);
    410                 sicc->free_irq(intc, i);
    411             }
    412         }
    413     }
    414 }
    415 
    416 qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
    417 {
    418     SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
    419 
    420     /*
    421      * This interface is basically for VIO and PHB devices to find the
    422      * right qemu_irq to manipulate, so we only allow access to the
    423      * external irqs for now.  Currently anything which needs to
    424      * access the IPIs most naturally gets there via the guest side
    425      * interfaces, we can change this if we need to in future.
    426      */
    427     assert(irq >= SPAPR_XIRQ_BASE);
    428     assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
    429 
    430     if (spapr->ics) {
    431         assert(ics_valid_irq(spapr->ics, irq));
    432     }
    433     if (spapr->xive) {
    434         assert(irq < spapr->xive->nr_irqs);
    435         assert(xive_eas_is_valid(&spapr->xive->eat[irq]));
    436     }
    437 
    438     return spapr->qirqs[irq];
    439 }
    440 
    441 int spapr_irq_post_load(SpaprMachineState *spapr, int version_id)
    442 {
    443     SpaprInterruptControllerClass *sicc;
    444 
    445     spapr_irq_update_active_intc(spapr);
    446     sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
    447     return sicc->post_load(spapr->active_intc, version_id);
    448 }
    449 
    450 void spapr_irq_reset(SpaprMachineState *spapr, Error **errp)
    451 {
    452     assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr));
    453 
    454     spapr_irq_update_active_intc(spapr);
    455 }
    456 
    457 int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp)
    458 {
    459     const char *nodename = "interrupt-controller";
    460     int offset, phandle;
    461 
    462     offset = fdt_subnode_offset(fdt, 0, nodename);
    463     if (offset < 0) {
    464         error_setg(errp, "Can't find node \"%s\": %s",
    465                    nodename, fdt_strerror(offset));
    466         return -1;
    467     }
    468 
    469     phandle = fdt_get_phandle(fdt, offset);
    470     if (!phandle) {
    471         error_setg(errp, "Can't get phandle of node \"%s\"", nodename);
    472         return -1;
    473     }
    474 
    475     return phandle;
    476 }
    477 
    478 static void set_active_intc(SpaprMachineState *spapr,
    479                             SpaprInterruptController *new_intc)
    480 {
    481     SpaprInterruptControllerClass *sicc;
    482     uint32_t nr_servers = spapr_max_server_number(spapr);
    483 
    484     assert(new_intc);
    485 
    486     if (new_intc == spapr->active_intc) {
    487         /* Nothing to do */
    488         return;
    489     }
    490 
    491     if (spapr->active_intc) {
    492         sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
    493         if (sicc->deactivate) {
    494             sicc->deactivate(spapr->active_intc);
    495         }
    496     }
    497 
    498     sicc = SPAPR_INTC_GET_CLASS(new_intc);
    499     if (sicc->activate) {
    500         sicc->activate(new_intc, nr_servers, &error_fatal);
    501     }
    502 
    503     spapr->active_intc = new_intc;
    504 
    505     /*
    506      * We've changed the kernel irqchip, let VFIO devices know they
    507      * need to readjust.
    508      */
    509     kvm_irqchip_change_notify();
    510 }
    511 
    512 void spapr_irq_update_active_intc(SpaprMachineState *spapr)
    513 {
    514     SpaprInterruptController *new_intc;
    515 
    516     if (!spapr->ics) {
    517         /*
    518          * XXX before we run CAS, ov5_cas is initialized empty, which
    519          * indicates XICS, even if we have ic-mode=xive.  TODO: clean
    520          * up the CAS path so that we have a clearer way of handling
    521          * this.
    522          */
    523         new_intc = SPAPR_INTC(spapr->xive);
    524     } else if (spapr->ov5_cas
    525                && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
    526         new_intc = SPAPR_INTC(spapr->xive);
    527     } else {
    528         new_intc = SPAPR_INTC(spapr->ics);
    529     }
    530 
    531     set_active_intc(spapr, new_intc);
    532 }
    533 
    534 /*
    535  * XICS legacy routines - to deprecate one day
    536  */
    537 
    538 static int ics_find_free_block(ICSState *ics, int num, int alignnum)
    539 {
    540     int first, i;
    541 
    542     for (first = 0; first < ics->nr_irqs; first += alignnum) {
    543         if (num > (ics->nr_irqs - first)) {
    544             return -1;
    545         }
    546         for (i = first; i < first + num; ++i) {
    547             if (!ics_irq_free(ics, i)) {
    548                 break;
    549             }
    550         }
    551         if (i == (first + num)) {
    552             return first;
    553         }
    554     }
    555 
    556     return -1;
    557 }
    558 
    559 int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
    560 {
    561     ICSState *ics = spapr->ics;
    562     int first = -1;
    563 
    564     assert(ics);
    565 
    566     /*
    567      * MSIMesage::data is used for storing VIRQ so
    568      * it has to be aligned to num to support multiple
    569      * MSI vectors. MSI-X is not affected by this.
    570      * The hint is used for the first IRQ, the rest should
    571      * be allocated continuously.
    572      */
    573     if (align) {
    574         assert((num == 1) || (num == 2) || (num == 4) ||
    575                (num == 8) || (num == 16) || (num == 32));
    576         first = ics_find_free_block(ics, num, num);
    577     } else {
    578         first = ics_find_free_block(ics, num, 1);
    579     }
    580 
    581     if (first < 0) {
    582         error_setg(errp, "can't find a free %d-IRQ block", num);
    583         return -1;
    584     }
    585 
    586     return first + ics->offset;
    587 }
    588 
    589 SpaprIrq spapr_irq_xics_legacy = {
    590     .xics        = true,
    591     .xive        = false,
    592 };
    593 
    594 static void spapr_irq_register_types(void)
    595 {
    596     type_register_static(&spapr_intc_info);
    597 }
    598 
    599 type_init(spapr_irq_register_types)