qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vfio-user-obj.c (29018B)


      1 /**
      2  * QEMU vfio-user-server server object
      3  *
      4  * Copyright © 2022 Oracle and/or its affiliates.
      5  *
      6  * This work is licensed under the terms of the GNU GPL-v2, version 2 or later.
      7  *
      8  * See the COPYING file in the top-level directory.
      9  *
     10  */
     11 
     12 /**
     13  * Usage: add options:
     14  *     -machine x-remote,vfio-user=on,auto-shutdown=on
     15  *     -device <PCI-device>,id=<pci-dev-id>
     16  *     -object x-vfio-user-server,id=<id>,type=unix,path=<socket-path>,
     17  *             device=<pci-dev-id>
     18  *
     19  * Note that x-vfio-user-server object must be used with x-remote machine only.
     20  * This server could only support PCI devices for now.
     21  *
     22  * type - SocketAddress type - presently "unix" alone is supported. Required
     23  *        option
     24  *
     25  * path - named unix socket, it will be created by the server. It is
     26  *        a required option
     27  *
     28  * device - id of a device on the server, a required option. PCI devices
     29  *          alone are supported presently.
     30  *
     31  * notes - x-vfio-user-server could block IO and monitor during the
     32  *         initialization phase.
     33  */
     34 
     35 #include "qemu/osdep.h"
     36 
     37 #include "qom/object.h"
     38 #include "qom/object_interfaces.h"
     39 #include "qemu/error-report.h"
     40 #include "trace.h"
     41 #include "sysemu/runstate.h"
     42 #include "hw/boards.h"
     43 #include "hw/remote/machine.h"
     44 #include "qapi/error.h"
     45 #include "qapi/qapi-visit-sockets.h"
     46 #include "qapi/qapi-events-misc.h"
     47 #include "qemu/notify.h"
     48 #include "qemu/thread.h"
     49 #include "qemu/main-loop.h"
     50 #include "sysemu/sysemu.h"
     51 #include "libvfio-user.h"
     52 #include "hw/qdev-core.h"
     53 #include "hw/pci/pci.h"
     54 #include "qemu/timer.h"
     55 #include "exec/memory.h"
     56 #include "hw/pci/msi.h"
     57 #include "hw/pci/msix.h"
     58 #include "hw/remote/vfio-user-obj.h"
     59 
     60 #define TYPE_VFU_OBJECT "x-vfio-user-server"
     61 OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
     62 
     63 /**
     64  * VFU_OBJECT_ERROR - reports an error message. If auto_shutdown
     65  * is set, it aborts the machine on error. Otherwise, it logs an
     66  * error message without aborting.
     67  */
     68 #define VFU_OBJECT_ERROR(o, fmt, ...)                                     \
     69     {                                                                     \
     70         if (vfu_object_auto_shutdown()) {                                 \
     71             error_setg(&error_abort, (fmt), ## __VA_ARGS__);              \
     72         } else {                                                          \
     73             error_report((fmt), ## __VA_ARGS__);                          \
     74         }                                                                 \
     75     }                                                                     \
     76 
     77 struct VfuObjectClass {
     78     ObjectClass parent_class;
     79 
     80     unsigned int nr_devs;
     81 };
     82 
     83 struct VfuObject {
     84     /* private */
     85     Object parent;
     86 
     87     SocketAddress *socket;
     88 
     89     char *device;
     90 
     91     Error *err;
     92 
     93     Notifier machine_done;
     94 
     95     vfu_ctx_t *vfu_ctx;
     96 
     97     PCIDevice *pci_dev;
     98 
     99     Error *unplug_blocker;
    100 
    101     int vfu_poll_fd;
    102 
    103     MSITriggerFunc *default_msi_trigger;
    104     MSIPrepareMessageFunc *default_msi_prepare_message;
    105     MSIxPrepareMessageFunc *default_msix_prepare_message;
    106 };
    107 
    108 static void vfu_object_init_ctx(VfuObject *o, Error **errp);
    109 
    110 static bool vfu_object_auto_shutdown(void)
    111 {
    112     bool auto_shutdown = true;
    113     Error *local_err = NULL;
    114 
    115     if (!current_machine) {
    116         return auto_shutdown;
    117     }
    118 
    119     auto_shutdown = object_property_get_bool(OBJECT(current_machine),
    120                                              "auto-shutdown",
    121                                              &local_err);
    122 
    123     /*
    124      * local_err would be set if no such property exists - safe to ignore.
    125      * Unlikely scenario as auto-shutdown is always defined for
    126      * TYPE_REMOTE_MACHINE, and  TYPE_VFU_OBJECT only works with
    127      * TYPE_REMOTE_MACHINE
    128      */
    129     if (local_err) {
    130         auto_shutdown = true;
    131         error_free(local_err);
    132     }
    133 
    134     return auto_shutdown;
    135 }
    136 
    137 static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
    138                                   void *opaque, Error **errp)
    139 {
    140     VfuObject *o = VFU_OBJECT(obj);
    141 
    142     if (o->vfu_ctx) {
    143         error_setg(errp, "vfu: Unable to set socket property - server busy");
    144         return;
    145     }
    146 
    147     qapi_free_SocketAddress(o->socket);
    148 
    149     o->socket = NULL;
    150 
    151     visit_type_SocketAddress(v, name, &o->socket, errp);
    152 
    153     if (o->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
    154         error_setg(errp, "vfu: Unsupported socket type - %s",
    155                    SocketAddressType_str(o->socket->type));
    156         qapi_free_SocketAddress(o->socket);
    157         o->socket = NULL;
    158         return;
    159     }
    160 
    161     trace_vfu_prop("socket", o->socket->u.q_unix.path);
    162 
    163     vfu_object_init_ctx(o, errp);
    164 }
    165 
    166 static void vfu_object_set_device(Object *obj, const char *str, Error **errp)
    167 {
    168     VfuObject *o = VFU_OBJECT(obj);
    169 
    170     if (o->vfu_ctx) {
    171         error_setg(errp, "vfu: Unable to set device property - server busy");
    172         return;
    173     }
    174 
    175     g_free(o->device);
    176 
    177     o->device = g_strdup(str);
    178 
    179     trace_vfu_prop("device", str);
    180 
    181     vfu_object_init_ctx(o, errp);
    182 }
    183 
    184 static void vfu_object_ctx_run(void *opaque)
    185 {
    186     VfuObject *o = opaque;
    187     const char *vfu_id;
    188     char *vfu_path, *pci_dev_path;
    189     int ret = -1;
    190 
    191     while (ret != 0) {
    192         ret = vfu_run_ctx(o->vfu_ctx);
    193         if (ret < 0) {
    194             if (errno == EINTR) {
    195                 continue;
    196             } else if (errno == ENOTCONN) {
    197                 vfu_id = object_get_canonical_path_component(OBJECT(o));
    198                 vfu_path = object_get_canonical_path(OBJECT(o));
    199                 g_assert(o->pci_dev);
    200                 pci_dev_path = object_get_canonical_path(OBJECT(o->pci_dev));
    201                  /* o->device is a required property and is non-NULL here */
    202                 g_assert(o->device);
    203                 qapi_event_send_vfu_client_hangup(vfu_id, vfu_path,
    204                                                   o->device, pci_dev_path);
    205                 qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
    206                 o->vfu_poll_fd = -1;
    207                 object_unparent(OBJECT(o));
    208                 g_free(vfu_path);
    209                 g_free(pci_dev_path);
    210                 break;
    211             } else {
    212                 VFU_OBJECT_ERROR(o, "vfu: Failed to run device %s - %s",
    213                                  o->device, strerror(errno));
    214                 break;
    215             }
    216         }
    217     }
    218 }
    219 
    220 static void vfu_object_attach_ctx(void *opaque)
    221 {
    222     VfuObject *o = opaque;
    223     GPollFD pfds[1];
    224     int ret;
    225 
    226     qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
    227 
    228     pfds[0].fd = o->vfu_poll_fd;
    229     pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
    230 
    231 retry_attach:
    232     ret = vfu_attach_ctx(o->vfu_ctx);
    233     if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
    234         /**
    235          * vfu_object_attach_ctx can block QEMU's main loop
    236          * during attach - the monitor and other IO
    237          * could be unresponsive during this time.
    238          */
    239         (void)qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS);
    240         goto retry_attach;
    241     } else if (ret < 0) {
    242         VFU_OBJECT_ERROR(o, "vfu: Failed to attach device %s to context - %s",
    243                          o->device, strerror(errno));
    244         return;
    245     }
    246 
    247     o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
    248     if (o->vfu_poll_fd < 0) {
    249         VFU_OBJECT_ERROR(o, "vfu: Failed to get poll fd %s", o->device);
    250         return;
    251     }
    252 
    253     qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o);
    254 }
    255 
    256 static ssize_t vfu_object_cfg_access(vfu_ctx_t *vfu_ctx, char * const buf,
    257                                      size_t count, loff_t offset,
    258                                      const bool is_write)
    259 {
    260     VfuObject *o = vfu_get_private(vfu_ctx);
    261     uint32_t pci_access_width = sizeof(uint32_t);
    262     size_t bytes = count;
    263     uint32_t val = 0;
    264     char *ptr = buf;
    265     int len;
    266 
    267     /*
    268      * Writes to the BAR registers would trigger an update to the
    269      * global Memory and IO AddressSpaces. But the remote device
    270      * never uses the global AddressSpaces, therefore overlapping
    271      * memory regions are not a problem
    272      */
    273     while (bytes > 0) {
    274         len = (bytes > pci_access_width) ? pci_access_width : bytes;
    275         if (is_write) {
    276             memcpy(&val, ptr, len);
    277             pci_host_config_write_common(o->pci_dev, offset,
    278                                          pci_config_size(o->pci_dev),
    279                                          val, len);
    280             trace_vfu_cfg_write(offset, val);
    281         } else {
    282             val = pci_host_config_read_common(o->pci_dev, offset,
    283                                               pci_config_size(o->pci_dev), len);
    284             memcpy(ptr, &val, len);
    285             trace_vfu_cfg_read(offset, val);
    286         }
    287         offset += len;
    288         ptr += len;
    289         bytes -= len;
    290     }
    291 
    292     return count;
    293 }
    294 
    295 static void dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
    296 {
    297     VfuObject *o = vfu_get_private(vfu_ctx);
    298     AddressSpace *dma_as = NULL;
    299     MemoryRegion *subregion = NULL;
    300     g_autofree char *name = NULL;
    301     struct iovec *iov = &info->iova;
    302 
    303     if (!info->vaddr) {
    304         return;
    305     }
    306 
    307     name = g_strdup_printf("mem-%s-%"PRIx64"", o->device,
    308                            (uint64_t)info->vaddr);
    309 
    310     subregion = g_new0(MemoryRegion, 1);
    311 
    312     memory_region_init_ram_ptr(subregion, NULL, name,
    313                                iov->iov_len, info->vaddr);
    314 
    315     dma_as = pci_device_iommu_address_space(o->pci_dev);
    316 
    317     memory_region_add_subregion(dma_as->root, (hwaddr)iov->iov_base, subregion);
    318 
    319     trace_vfu_dma_register((uint64_t)iov->iov_base, iov->iov_len);
    320 }
    321 
    322 static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
    323 {
    324     VfuObject *o = vfu_get_private(vfu_ctx);
    325     AddressSpace *dma_as = NULL;
    326     MemoryRegion *mr = NULL;
    327     ram_addr_t offset;
    328 
    329     mr = memory_region_from_host(info->vaddr, &offset);
    330     if (!mr) {
    331         return;
    332     }
    333 
    334     dma_as = pci_device_iommu_address_space(o->pci_dev);
    335 
    336     memory_region_del_subregion(dma_as->root, mr);
    337 
    338     object_unparent((OBJECT(mr)));
    339 
    340     trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
    341 }
    342 
    343 static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset,
    344                             hwaddr size, const bool is_write)
    345 {
    346     uint8_t *ptr = buf;
    347     bool release_lock = false;
    348     uint8_t *ram_ptr = NULL;
    349     MemTxResult result;
    350     int access_size;
    351     uint64_t val;
    352 
    353     if (memory_access_is_direct(mr, is_write)) {
    354         /**
    355          * Some devices expose a PCI expansion ROM, which could be buffer
    356          * based as compared to other regions which are primarily based on
    357          * MemoryRegionOps. memory_region_find() would already check
    358          * for buffer overflow, we don't need to repeat it here.
    359          */
    360         ram_ptr = memory_region_get_ram_ptr(mr);
    361 
    362         if (is_write) {
    363             memcpy((ram_ptr + offset), buf, size);
    364         } else {
    365             memcpy(buf, (ram_ptr + offset), size);
    366         }
    367 
    368         return 0;
    369     }
    370 
    371     while (size) {
    372         /**
    373          * The read/write logic used below is similar to the ones in
    374          * flatview_read/write_continue()
    375          */
    376         release_lock = prepare_mmio_access(mr);
    377 
    378         access_size = memory_access_size(mr, size, offset);
    379 
    380         if (is_write) {
    381             val = ldn_he_p(ptr, access_size);
    382 
    383             result = memory_region_dispatch_write(mr, offset, val,
    384                                                   size_memop(access_size),
    385                                                   MEMTXATTRS_UNSPECIFIED);
    386         } else {
    387             result = memory_region_dispatch_read(mr, offset, &val,
    388                                                  size_memop(access_size),
    389                                                  MEMTXATTRS_UNSPECIFIED);
    390 
    391             stn_he_p(ptr, access_size, val);
    392         }
    393 
    394         if (release_lock) {
    395             qemu_mutex_unlock_iothread();
    396             release_lock = false;
    397         }
    398 
    399         if (result != MEMTX_OK) {
    400             return -1;
    401         }
    402 
    403         size -= access_size;
    404         ptr += access_size;
    405         offset += access_size;
    406     }
    407 
    408     return 0;
    409 }
    410 
    411 static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
    412                                 hwaddr bar_offset, char * const buf,
    413                                 hwaddr len, const bool is_write)
    414 {
    415     MemoryRegionSection section = { 0 };
    416     uint8_t *ptr = (uint8_t *)buf;
    417     MemoryRegion *section_mr = NULL;
    418     uint64_t section_size;
    419     hwaddr section_offset;
    420     hwaddr size = 0;
    421 
    422     while (len) {
    423         section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
    424                                      bar_offset, len);
    425 
    426         if (!section.mr) {
    427             warn_report("vfu: invalid address 0x%"PRIx64"", bar_offset);
    428             return size;
    429         }
    430 
    431         section_mr = section.mr;
    432         section_offset = section.offset_within_region;
    433         section_size = int128_get64(section.size);
    434 
    435         if (is_write && section_mr->readonly) {
    436             warn_report("vfu: attempting to write to readonly region in "
    437                         "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
    438                         pci_bar, bar_offset,
    439                         (bar_offset + section_size));
    440             memory_region_unref(section_mr);
    441             return size;
    442         }
    443 
    444         if (vfu_object_mr_rw(section_mr, ptr, section_offset,
    445                              section_size, is_write)) {
    446             warn_report("vfu: failed to %s "
    447                         "[0x%"PRIx64" - 0x%"PRIx64"] in bar %d",
    448                         is_write ? "write to" : "read from", bar_offset,
    449                         (bar_offset + section_size), pci_bar);
    450             memory_region_unref(section_mr);
    451             return size;
    452         }
    453 
    454         size += section_size;
    455         bar_offset += section_size;
    456         ptr += section_size;
    457         len -= section_size;
    458 
    459         memory_region_unref(section_mr);
    460     }
    461 
    462     return size;
    463 }
    464 
    465 /**
    466  * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs.
    467  *
    468  * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would
    469  * define vfu_object_bar2_handler
    470  */
    471 #define VFU_OBJECT_BAR_HANDLER(BAR_NO)                                         \
    472     static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx,        \
    473                                         char * const buf, size_t count,        \
    474                                         loff_t offset, const bool is_write)    \
    475     {                                                                          \
    476         VfuObject *o = vfu_get_private(vfu_ctx);                               \
    477         PCIDevice *pci_dev = o->pci_dev;                                       \
    478                                                                                \
    479         return vfu_object_bar_rw(pci_dev, BAR_NO, offset,                      \
    480                                  buf, count, is_write);                        \
    481     }                                                                          \
    482 
    483 VFU_OBJECT_BAR_HANDLER(0)
    484 VFU_OBJECT_BAR_HANDLER(1)
    485 VFU_OBJECT_BAR_HANDLER(2)
    486 VFU_OBJECT_BAR_HANDLER(3)
    487 VFU_OBJECT_BAR_HANDLER(4)
    488 VFU_OBJECT_BAR_HANDLER(5)
    489 VFU_OBJECT_BAR_HANDLER(6)
    490 
    491 static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = {
    492     &vfu_object_bar0_handler,
    493     &vfu_object_bar1_handler,
    494     &vfu_object_bar2_handler,
    495     &vfu_object_bar3_handler,
    496     &vfu_object_bar4_handler,
    497     &vfu_object_bar5_handler,
    498     &vfu_object_bar6_handler,
    499 };
    500 
    501 /**
    502  * vfu_object_register_bars - Identify active BAR regions of pdev and setup
    503  *                            callbacks to handle read/write accesses
    504  */
    505 static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
    506 {
    507     int flags = VFU_REGION_FLAG_RW;
    508     int i;
    509 
    510     for (i = 0; i < PCI_NUM_REGIONS; i++) {
    511         if (!pdev->io_regions[i].size) {
    512             continue;
    513         }
    514 
    515         if ((i == VFU_PCI_DEV_ROM_REGION_IDX) ||
    516             pdev->io_regions[i].memory->readonly) {
    517             flags &= ~VFU_REGION_FLAG_WRITE;
    518         }
    519 
    520         vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i,
    521                          (size_t)pdev->io_regions[i].size,
    522                          vfu_object_bar_handlers[i],
    523                          flags, NULL, 0, -1, 0);
    524 
    525         trace_vfu_bar_register(i, pdev->io_regions[i].addr,
    526                                pdev->io_regions[i].size);
    527     }
    528 }
    529 
    530 static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
    531 {
    532     int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
    533                                 pci_dev->devfn);
    534 
    535     return pci_bdf;
    536 }
    537 
    538 static void vfu_object_set_irq(void *opaque, int pirq, int level)
    539 {
    540     PCIBus *pci_bus = opaque;
    541     PCIDevice *pci_dev = NULL;
    542     vfu_ctx_t *vfu_ctx = NULL;
    543     int pci_bus_num, devfn;
    544 
    545     if (level) {
    546         pci_bus_num = PCI_BUS_NUM(pirq);
    547         devfn = PCI_BDF_TO_DEVFN(pirq);
    548 
    549         /*
    550          * pci_find_device() performs at O(1) if the device is attached
    551          * to the root PCI bus. Whereas, if the device is attached to a
    552          * secondary PCI bus (such as when a root port is involved),
    553          * finding the parent PCI bus could take O(n)
    554          */
    555         pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn);
    556 
    557         vfu_ctx = pci_dev->irq_opaque;
    558 
    559         g_assert(vfu_ctx);
    560 
    561         vfu_irq_trigger(vfu_ctx, 0);
    562     }
    563 }
    564 
    565 static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev,
    566                                              unsigned int vector)
    567 {
    568     MSIMessage msg;
    569 
    570     msg.address = 0;
    571     msg.data = vector;
    572 
    573     return msg;
    574 }
    575 
    576 static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg)
    577 {
    578     vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque;
    579 
    580     vfu_irq_trigger(vfu_ctx, msg.data);
    581 }
    582 
    583 static void vfu_object_setup_msi_cbs(VfuObject *o)
    584 {
    585     o->default_msi_trigger = o->pci_dev->msi_trigger;
    586     o->default_msi_prepare_message = o->pci_dev->msi_prepare_message;
    587     o->default_msix_prepare_message = o->pci_dev->msix_prepare_message;
    588 
    589     o->pci_dev->msi_trigger = vfu_object_msi_trigger;
    590     o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg;
    591     o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg;
    592 }
    593 
    594 static void vfu_object_restore_msi_cbs(VfuObject *o)
    595 {
    596     o->pci_dev->msi_trigger = o->default_msi_trigger;
    597     o->pci_dev->msi_prepare_message = o->default_msi_prepare_message;
    598     o->pci_dev->msix_prepare_message = o->default_msix_prepare_message;
    599 }
    600 
    601 static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
    602                                uint32_t count, bool mask)
    603 {
    604     VfuObject *o = vfu_get_private(vfu_ctx);
    605     uint32_t vector;
    606 
    607     for (vector = start; vector < count; vector++) {
    608         msix_set_mask(o->pci_dev, vector, mask);
    609     }
    610 }
    611 
    612 static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
    613                               uint32_t count, bool mask)
    614 {
    615     VfuObject *o = vfu_get_private(vfu_ctx);
    616     Error *err = NULL;
    617     uint32_t vector;
    618 
    619     for (vector = start; vector < count; vector++) {
    620         msi_set_mask(o->pci_dev, vector, mask, &err);
    621         if (err) {
    622             VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
    623                              error_get_pretty(err));
    624             error_free(err);
    625             err = NULL;
    626         }
    627     }
    628 }
    629 
    630 static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
    631 {
    632     vfu_ctx_t *vfu_ctx = o->vfu_ctx;
    633     int ret;
    634 
    635     ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
    636     if (ret < 0) {
    637         return ret;
    638     }
    639 
    640     if (msix_nr_vectors_allocated(pci_dev)) {
    641         ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
    642                                        msix_nr_vectors_allocated(pci_dev));
    643         vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ,
    644                                      &vfu_msix_irq_state);
    645     } else if (msi_nr_vectors_allocated(pci_dev)) {
    646         ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
    647                                        msi_nr_vectors_allocated(pci_dev));
    648         vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ,
    649                                      &vfu_msi_irq_state);
    650     }
    651 
    652     if (ret < 0) {
    653         return ret;
    654     }
    655 
    656     vfu_object_setup_msi_cbs(o);
    657 
    658     pci_dev->irq_opaque = vfu_ctx;
    659 
    660     return 0;
    661 }
    662 
    663 void vfu_object_set_bus_irq(PCIBus *pci_bus)
    664 {
    665     int bus_num = pci_bus_num(pci_bus);
    666     int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1);
    667 
    668     pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, pci_bus,
    669                  max_bdf);
    670 }
    671 
    672 static int vfu_object_device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type)
    673 {
    674     VfuObject *o = vfu_get_private(vfu_ctx);
    675 
    676     /* vfu_object_ctx_run() handles lost connection */
    677     if (type == VFU_RESET_LOST_CONN) {
    678         return 0;
    679     }
    680 
    681     qdev_reset_all(DEVICE(o->pci_dev));
    682 
    683     return 0;
    684 }
    685 
    686 /*
    687  * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
    688  * properties. It also depends on devices instantiated in QEMU. These
    689  * dependencies are not available during the instance_init phase of this
    690  * object's life-cycle. As such, the server is initialized after the
    691  * machine is setup. machine_init_done_notifier notifies TYPE_VFU_OBJECT
    692  * when the machine is setup, and the dependencies are available.
    693  */
    694 static void vfu_object_machine_done(Notifier *notifier, void *data)
    695 {
    696     VfuObject *o = container_of(notifier, VfuObject, machine_done);
    697     Error *err = NULL;
    698 
    699     vfu_object_init_ctx(o, &err);
    700 
    701     if (err) {
    702         error_propagate(&error_abort, err);
    703     }
    704 }
    705 
    706 /**
    707  * vfu_object_init_ctx: Create and initialize libvfio-user context. Add
    708  *     an unplug blocker for the associated PCI device. Setup a FD handler
    709  *     to process incoming messages in the context's socket.
    710  *
    711  *     The socket and device properties are mandatory, and this function
    712  *     will not create the context without them - the setters for these
    713  *     properties should call this function when the property is set. The
    714  *     machine should also be ready when this function is invoked - it is
    715  *     because QEMU objects are initialized before devices, and the
    716  *     associated PCI device wouldn't be available at the object
    717  *     initialization time. Until these conditions are satisfied, this
    718  *     function would return early without performing any task.
    719  */
    720 static void vfu_object_init_ctx(VfuObject *o, Error **errp)
    721 {
    722     ERRP_GUARD();
    723     DeviceState *dev = NULL;
    724     vfu_pci_type_t pci_type = VFU_PCI_TYPE_CONVENTIONAL;
    725     int ret;
    726 
    727     if (o->vfu_ctx || !o->socket || !o->device ||
    728             !phase_check(PHASE_MACHINE_READY)) {
    729         return;
    730     }
    731 
    732     if (o->err) {
    733         error_propagate(errp, o->err);
    734         o->err = NULL;
    735         return;
    736     }
    737 
    738     o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path,
    739                                 LIBVFIO_USER_FLAG_ATTACH_NB,
    740                                 o, VFU_DEV_TYPE_PCI);
    741     if (o->vfu_ctx == NULL) {
    742         error_setg(errp, "vfu: Failed to create context - %s", strerror(errno));
    743         return;
    744     }
    745 
    746     dev = qdev_find_recursive(sysbus_get_default(), o->device);
    747     if (dev == NULL) {
    748         error_setg(errp, "vfu: Device %s not found", o->device);
    749         goto fail;
    750     }
    751 
    752     if (!object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
    753         error_setg(errp, "vfu: %s not a PCI device", o->device);
    754         goto fail;
    755     }
    756 
    757     o->pci_dev = PCI_DEVICE(dev);
    758 
    759     object_ref(OBJECT(o->pci_dev));
    760 
    761     if (pci_is_express(o->pci_dev)) {
    762         pci_type = VFU_PCI_TYPE_EXPRESS;
    763     }
    764 
    765     ret = vfu_pci_init(o->vfu_ctx, pci_type, PCI_HEADER_TYPE_NORMAL, 0);
    766     if (ret < 0) {
    767         error_setg(errp,
    768                    "vfu: Failed to attach PCI device %s to context - %s",
    769                    o->device, strerror(errno));
    770         goto fail;
    771     }
    772 
    773     error_setg(&o->unplug_blocker,
    774                "vfu: %s for %s must be deleted before unplugging",
    775                TYPE_VFU_OBJECT, o->device);
    776     qdev_add_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
    777 
    778     ret = vfu_setup_region(o->vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX,
    779                            pci_config_size(o->pci_dev), &vfu_object_cfg_access,
    780                            VFU_REGION_FLAG_RW | VFU_REGION_FLAG_ALWAYS_CB,
    781                            NULL, 0, -1, 0);
    782     if (ret < 0) {
    783         error_setg(errp,
    784                    "vfu: Failed to setup config space handlers for %s- %s",
    785                    o->device, strerror(errno));
    786         goto fail;
    787     }
    788 
    789     ret = vfu_setup_device_dma(o->vfu_ctx, &dma_register, &dma_unregister);
    790     if (ret < 0) {
    791         error_setg(errp, "vfu: Failed to setup DMA handlers for %s",
    792                    o->device);
    793         goto fail;
    794     }
    795 
    796     vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
    797 
    798     ret = vfu_object_setup_irqs(o, o->pci_dev);
    799     if (ret < 0) {
    800         error_setg(errp, "vfu: Failed to setup interrupts for %s",
    801                    o->device);
    802         goto fail;
    803     }
    804 
    805     ret = vfu_setup_device_reset_cb(o->vfu_ctx, &vfu_object_device_reset);
    806     if (ret < 0) {
    807         error_setg(errp, "vfu: Failed to setup reset callback");
    808         goto fail;
    809     }
    810 
    811     ret = vfu_realize_ctx(o->vfu_ctx);
    812     if (ret < 0) {
    813         error_setg(errp, "vfu: Failed to realize device %s- %s",
    814                    o->device, strerror(errno));
    815         goto fail;
    816     }
    817 
    818     o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx);
    819     if (o->vfu_poll_fd < 0) {
    820         error_setg(errp, "vfu: Failed to get poll fd %s", o->device);
    821         goto fail;
    822     }
    823 
    824     qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
    825 
    826     return;
    827 
    828 fail:
    829     vfu_destroy_ctx(o->vfu_ctx);
    830     if (o->unplug_blocker && o->pci_dev) {
    831         qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
    832         error_free(o->unplug_blocker);
    833         o->unplug_blocker = NULL;
    834     }
    835     if (o->pci_dev) {
    836         vfu_object_restore_msi_cbs(o);
    837         o->pci_dev->irq_opaque = NULL;
    838         object_unref(OBJECT(o->pci_dev));
    839         o->pci_dev = NULL;
    840     }
    841     o->vfu_ctx = NULL;
    842 }
    843 
    844 static void vfu_object_init(Object *obj)
    845 {
    846     VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
    847     VfuObject *o = VFU_OBJECT(obj);
    848 
    849     k->nr_devs++;
    850 
    851     if (!object_dynamic_cast(OBJECT(current_machine), TYPE_REMOTE_MACHINE)) {
    852         error_setg(&o->err, "vfu: %s only compatible with %s machine",
    853                    TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE);
    854         return;
    855     }
    856 
    857     if (!phase_check(PHASE_MACHINE_READY)) {
    858         o->machine_done.notify = vfu_object_machine_done;
    859         qemu_add_machine_init_done_notifier(&o->machine_done);
    860     }
    861 
    862     o->vfu_poll_fd = -1;
    863 }
    864 
    865 static void vfu_object_finalize(Object *obj)
    866 {
    867     VfuObjectClass *k = VFU_OBJECT_GET_CLASS(obj);
    868     VfuObject *o = VFU_OBJECT(obj);
    869 
    870     k->nr_devs--;
    871 
    872     qapi_free_SocketAddress(o->socket);
    873 
    874     o->socket = NULL;
    875 
    876     if (o->vfu_poll_fd != -1) {
    877         qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL);
    878         o->vfu_poll_fd = -1;
    879     }
    880 
    881     if (o->vfu_ctx) {
    882         vfu_destroy_ctx(o->vfu_ctx);
    883         o->vfu_ctx = NULL;
    884     }
    885 
    886     g_free(o->device);
    887 
    888     o->device = NULL;
    889 
    890     if (o->unplug_blocker && o->pci_dev) {
    891         qdev_del_unplug_blocker(DEVICE(o->pci_dev), o->unplug_blocker);
    892         error_free(o->unplug_blocker);
    893         o->unplug_blocker = NULL;
    894     }
    895 
    896     if (o->pci_dev) {
    897         vfu_object_restore_msi_cbs(o);
    898         o->pci_dev->irq_opaque = NULL;
    899         object_unref(OBJECT(o->pci_dev));
    900         o->pci_dev = NULL;
    901     }
    902 
    903     if (!k->nr_devs && vfu_object_auto_shutdown()) {
    904         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
    905     }
    906 
    907     if (o->machine_done.notify) {
    908         qemu_remove_machine_init_done_notifier(&o->machine_done);
    909         o->machine_done.notify = NULL;
    910     }
    911 }
    912 
    913 static void vfu_object_class_init(ObjectClass *klass, void *data)
    914 {
    915     VfuObjectClass *k = VFU_OBJECT_CLASS(klass);
    916 
    917     k->nr_devs = 0;
    918 
    919     object_class_property_add(klass, "socket", "SocketAddress", NULL,
    920                               vfu_object_set_socket, NULL, NULL);
    921     object_class_property_set_description(klass, "socket",
    922                                           "SocketAddress "
    923                                           "(ex: type=unix,path=/tmp/sock). "
    924                                           "Only UNIX is presently supported");
    925     object_class_property_add_str(klass, "device", NULL,
    926                                   vfu_object_set_device);
    927     object_class_property_set_description(klass, "device",
    928                                           "device ID - only PCI devices "
    929                                           "are presently supported");
    930 }
    931 
    932 static const TypeInfo vfu_object_info = {
    933     .name = TYPE_VFU_OBJECT,
    934     .parent = TYPE_OBJECT,
    935     .instance_size = sizeof(VfuObject),
    936     .instance_init = vfu_object_init,
    937     .instance_finalize = vfu_object_finalize,
    938     .class_size = sizeof(VfuObjectClass),
    939     .class_init = vfu_object_class_init,
    940     .interfaces = (InterfaceInfo[]) {
    941         { TYPE_USER_CREATABLE },
    942         { }
    943     }
    944 };
    945 
    946 static void vfu_register_types(void)
    947 {
    948     type_register_static(&vfu_object_info);
    949 }
    950 
    951 type_init(vfu_register_types);