qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vhost.c (60468B)


      1 /*
      2  * vhost support
      3  *
      4  * Copyright Red Hat, Inc. 2010
      5  *
      6  * Authors:
      7  *  Michael S. Tsirkin <mst@redhat.com>
      8  *
      9  * This work is licensed under the terms of the GNU GPL, version 2.  See
     10  * the COPYING file in the top-level directory.
     11  *
     12  * Contributions after 2012-01-13 are licensed under the terms of the
     13  * GNU GPL, version 2 or (at your option) any later version.
     14  */
     15 
     16 #include "qemu/osdep.h"
     17 #include "qapi/error.h"
     18 #include "hw/virtio/vhost.h"
     19 #include "qemu/atomic.h"
     20 #include "qemu/range.h"
     21 #include "qemu/error-report.h"
     22 #include "qemu/memfd.h"
     23 #include "standard-headers/linux/vhost_types.h"
     24 #include "hw/virtio/virtio-bus.h"
     25 #include "hw/virtio/virtio-access.h"
     26 #include "migration/blocker.h"
     27 #include "migration/qemu-file-types.h"
     28 #include "sysemu/dma.h"
     29 #include "trace.h"
     30 
     31 /* enabled until disconnected backend stabilizes */
     32 #define _VHOST_DEBUG 1
     33 
     34 #ifdef _VHOST_DEBUG
     35 #define VHOST_OPS_DEBUG(retval, fmt, ...) \
     36     do { \
     37         error_report(fmt ": %s (%d)", ## __VA_ARGS__, \
     38                      strerror(-retval), -retval); \
     39     } while (0)
     40 #else
     41 #define VHOST_OPS_DEBUG(retval, fmt, ...) \
     42     do { } while (0)
     43 #endif
     44 
     45 static struct vhost_log *vhost_log;
     46 static struct vhost_log *vhost_log_shm;
     47 
     48 static unsigned int used_memslots;
     49 static QLIST_HEAD(, vhost_dev) vhost_devices =
     50     QLIST_HEAD_INITIALIZER(vhost_devices);
     51 
     52 bool vhost_has_free_slot(void)
     53 {
     54     unsigned int slots_limit = ~0U;
     55     struct vhost_dev *hdev;
     56 
     57     QLIST_FOREACH(hdev, &vhost_devices, entry) {
     58         unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
     59         slots_limit = MIN(slots_limit, r);
     60     }
     61     return slots_limit > used_memslots;
     62 }
     63 
     64 static void vhost_dev_sync_region(struct vhost_dev *dev,
     65                                   MemoryRegionSection *section,
     66                                   uint64_t mfirst, uint64_t mlast,
     67                                   uint64_t rfirst, uint64_t rlast)
     68 {
     69     vhost_log_chunk_t *log = dev->log->log;
     70 
     71     uint64_t start = MAX(mfirst, rfirst);
     72     uint64_t end = MIN(mlast, rlast);
     73     vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK;
     74     vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1;
     75     uint64_t addr = QEMU_ALIGN_DOWN(start, VHOST_LOG_CHUNK);
     76 
     77     if (end < start) {
     78         return;
     79     }
     80     assert(end / VHOST_LOG_CHUNK < dev->log_size);
     81     assert(start / VHOST_LOG_CHUNK < dev->log_size);
     82 
     83     for (;from < to; ++from) {
     84         vhost_log_chunk_t log;
     85         /* We first check with non-atomic: much cheaper,
     86          * and we expect non-dirty to be the common case. */
     87         if (!*from) {
     88             addr += VHOST_LOG_CHUNK;
     89             continue;
     90         }
     91         /* Data must be read atomically. We don't really need barrier semantics
     92          * but it's easier to use atomic_* than roll our own. */
     93         log = qatomic_xchg(from, 0);
     94         while (log) {
     95             int bit = ctzl(log);
     96             hwaddr page_addr;
     97             hwaddr section_offset;
     98             hwaddr mr_offset;
     99             page_addr = addr + bit * VHOST_LOG_PAGE;
    100             section_offset = page_addr - section->offset_within_address_space;
    101             mr_offset = section_offset + section->offset_within_region;
    102             memory_region_set_dirty(section->mr, mr_offset, VHOST_LOG_PAGE);
    103             log &= ~(0x1ull << bit);
    104         }
    105         addr += VHOST_LOG_CHUNK;
    106     }
    107 }
    108 
    109 static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
    110                                    MemoryRegionSection *section,
    111                                    hwaddr first,
    112                                    hwaddr last)
    113 {
    114     int i;
    115     hwaddr start_addr;
    116     hwaddr end_addr;
    117 
    118     if (!dev->log_enabled || !dev->started) {
    119         return 0;
    120     }
    121     start_addr = section->offset_within_address_space;
    122     end_addr = range_get_last(start_addr, int128_get64(section->size));
    123     start_addr = MAX(first, start_addr);
    124     end_addr = MIN(last, end_addr);
    125 
    126     for (i = 0; i < dev->mem->nregions; ++i) {
    127         struct vhost_memory_region *reg = dev->mem->regions + i;
    128         vhost_dev_sync_region(dev, section, start_addr, end_addr,
    129                               reg->guest_phys_addr,
    130                               range_get_last(reg->guest_phys_addr,
    131                                              reg->memory_size));
    132     }
    133     for (i = 0; i < dev->nvqs; ++i) {
    134         struct vhost_virtqueue *vq = dev->vqs + i;
    135 
    136         if (!vq->used_phys && !vq->used_size) {
    137             continue;
    138         }
    139 
    140         vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
    141                               range_get_last(vq->used_phys, vq->used_size));
    142     }
    143     return 0;
    144 }
    145 
    146 static void vhost_log_sync(MemoryListener *listener,
    147                           MemoryRegionSection *section)
    148 {
    149     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
    150                                          memory_listener);
    151     vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL);
    152 }
    153 
    154 static void vhost_log_sync_range(struct vhost_dev *dev,
    155                                  hwaddr first, hwaddr last)
    156 {
    157     int i;
    158     /* FIXME: this is N^2 in number of sections */
    159     for (i = 0; i < dev->n_mem_sections; ++i) {
    160         MemoryRegionSection *section = &dev->mem_sections[i];
    161         vhost_sync_dirty_bitmap(dev, section, first, last);
    162     }
    163 }
    164 
    165 static uint64_t vhost_get_log_size(struct vhost_dev *dev)
    166 {
    167     uint64_t log_size = 0;
    168     int i;
    169     for (i = 0; i < dev->mem->nregions; ++i) {
    170         struct vhost_memory_region *reg = dev->mem->regions + i;
    171         uint64_t last = range_get_last(reg->guest_phys_addr,
    172                                        reg->memory_size);
    173         log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1);
    174     }
    175     return log_size;
    176 }
    177 
    178 static int vhost_set_backend_type(struct vhost_dev *dev,
    179                                   VhostBackendType backend_type)
    180 {
    181     int r = 0;
    182 
    183     switch (backend_type) {
    184 #ifdef CONFIG_VHOST_KERNEL
    185     case VHOST_BACKEND_TYPE_KERNEL:
    186         dev->vhost_ops = &kernel_ops;
    187         break;
    188 #endif
    189 #ifdef CONFIG_VHOST_USER
    190     case VHOST_BACKEND_TYPE_USER:
    191         dev->vhost_ops = &user_ops;
    192         break;
    193 #endif
    194 #ifdef CONFIG_VHOST_VDPA
    195     case VHOST_BACKEND_TYPE_VDPA:
    196         dev->vhost_ops = &vdpa_ops;
    197         break;
    198 #endif
    199     default:
    200         error_report("Unknown vhost backend type");
    201         r = -1;
    202     }
    203 
    204     return r;
    205 }
    206 
    207 static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
    208 {
    209     Error *err = NULL;
    210     struct vhost_log *log;
    211     uint64_t logsize = size * sizeof(*(log->log));
    212     int fd = -1;
    213 
    214     log = g_new0(struct vhost_log, 1);
    215     if (share) {
    216         log->log = qemu_memfd_alloc("vhost-log", logsize,
    217                                     F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
    218                                     &fd, &err);
    219         if (err) {
    220             error_report_err(err);
    221             g_free(log);
    222             return NULL;
    223         }
    224         memset(log->log, 0, logsize);
    225     } else {
    226         log->log = g_malloc0(logsize);
    227     }
    228 
    229     log->size = size;
    230     log->refcnt = 1;
    231     log->fd = fd;
    232 
    233     return log;
    234 }
    235 
    236 static struct vhost_log *vhost_log_get(uint64_t size, bool share)
    237 {
    238     struct vhost_log *log = share ? vhost_log_shm : vhost_log;
    239 
    240     if (!log || log->size != size) {
    241         log = vhost_log_alloc(size, share);
    242         if (share) {
    243             vhost_log_shm = log;
    244         } else {
    245             vhost_log = log;
    246         }
    247     } else {
    248         ++log->refcnt;
    249     }
    250 
    251     return log;
    252 }
    253 
    254 static void vhost_log_put(struct vhost_dev *dev, bool sync)
    255 {
    256     struct vhost_log *log = dev->log;
    257 
    258     if (!log) {
    259         return;
    260     }
    261 
    262     --log->refcnt;
    263     if (log->refcnt == 0) {
    264         /* Sync only the range covered by the old log */
    265         if (dev->log_size && sync) {
    266             vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
    267         }
    268 
    269         if (vhost_log == log) {
    270             g_free(log->log);
    271             vhost_log = NULL;
    272         } else if (vhost_log_shm == log) {
    273             qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
    274                             log->fd);
    275             vhost_log_shm = NULL;
    276         }
    277 
    278         g_free(log);
    279     }
    280 
    281     dev->log = NULL;
    282     dev->log_size = 0;
    283 }
    284 
    285 static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
    286 {
    287     return dev->vhost_ops->vhost_requires_shm_log &&
    288            dev->vhost_ops->vhost_requires_shm_log(dev);
    289 }
    290 
    291 static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
    292 {
    293     struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev));
    294     uint64_t log_base = (uintptr_t)log->log;
    295     int r;
    296 
    297     /* inform backend of log switching, this must be done before
    298        releasing the current log, to ensure no logging is lost */
    299     r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log);
    300     if (r < 0) {
    301         VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
    302     }
    303 
    304     vhost_log_put(dev, true);
    305     dev->log = log;
    306     dev->log_size = size;
    307 }
    308 
    309 static bool vhost_dev_has_iommu(struct vhost_dev *dev)
    310 {
    311     VirtIODevice *vdev = dev->vdev;
    312 
    313     /*
    314      * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
    315      * incremental memory mapping API via IOTLB API. For platform that
    316      * does not have IOMMU, there's no need to enable this feature
    317      * which may cause unnecessary IOTLB miss/update transactions.
    318      */
    319     if (vdev) {
    320         return virtio_bus_device_iommu_enabled(vdev) &&
    321             virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
    322     } else {
    323         return false;
    324     }
    325 }
    326 
    327 static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
    328                               hwaddr *plen, bool is_write)
    329 {
    330     if (!vhost_dev_has_iommu(dev)) {
    331         return cpu_physical_memory_map(addr, plen, is_write);
    332     } else {
    333         return (void *)(uintptr_t)addr;
    334     }
    335 }
    336 
    337 static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer,
    338                                hwaddr len, int is_write,
    339                                hwaddr access_len)
    340 {
    341     if (!vhost_dev_has_iommu(dev)) {
    342         cpu_physical_memory_unmap(buffer, len, is_write, access_len);
    343     }
    344 }
    345 
    346 static int vhost_verify_ring_part_mapping(void *ring_hva,
    347                                           uint64_t ring_gpa,
    348                                           uint64_t ring_size,
    349                                           void *reg_hva,
    350                                           uint64_t reg_gpa,
    351                                           uint64_t reg_size)
    352 {
    353     uint64_t hva_ring_offset;
    354     uint64_t ring_last = range_get_last(ring_gpa, ring_size);
    355     uint64_t reg_last = range_get_last(reg_gpa, reg_size);
    356 
    357     if (ring_last < reg_gpa || ring_gpa > reg_last) {
    358         return 0;
    359     }
    360     /* check that whole ring's is mapped */
    361     if (ring_last > reg_last) {
    362         return -ENOMEM;
    363     }
    364     /* check that ring's MemoryRegion wasn't replaced */
    365     hva_ring_offset = ring_gpa - reg_gpa;
    366     if (ring_hva != reg_hva + hva_ring_offset) {
    367         return -EBUSY;
    368     }
    369 
    370     return 0;
    371 }
    372 
    373 static int vhost_verify_ring_mappings(struct vhost_dev *dev,
    374                                       void *reg_hva,
    375                                       uint64_t reg_gpa,
    376                                       uint64_t reg_size)
    377 {
    378     int i, j;
    379     int r = 0;
    380     const char *part_name[] = {
    381         "descriptor table",
    382         "available ring",
    383         "used ring"
    384     };
    385 
    386     if (vhost_dev_has_iommu(dev)) {
    387         return 0;
    388     }
    389 
    390     for (i = 0; i < dev->nvqs; ++i) {
    391         struct vhost_virtqueue *vq = dev->vqs + i;
    392 
    393         if (vq->desc_phys == 0) {
    394             continue;
    395         }
    396 
    397         j = 0;
    398         r = vhost_verify_ring_part_mapping(
    399                 vq->desc, vq->desc_phys, vq->desc_size,
    400                 reg_hva, reg_gpa, reg_size);
    401         if (r) {
    402             break;
    403         }
    404 
    405         j++;
    406         r = vhost_verify_ring_part_mapping(
    407                 vq->avail, vq->avail_phys, vq->avail_size,
    408                 reg_hva, reg_gpa, reg_size);
    409         if (r) {
    410             break;
    411         }
    412 
    413         j++;
    414         r = vhost_verify_ring_part_mapping(
    415                 vq->used, vq->used_phys, vq->used_size,
    416                 reg_hva, reg_gpa, reg_size);
    417         if (r) {
    418             break;
    419         }
    420     }
    421 
    422     if (r == -ENOMEM) {
    423         error_report("Unable to map %s for ring %d", part_name[j], i);
    424     } else if (r == -EBUSY) {
    425         error_report("%s relocated for ring %d", part_name[j], i);
    426     }
    427     return r;
    428 }
    429 
    430 /*
    431  * vhost_section: identify sections needed for vhost access
    432  *
    433  * We only care about RAM sections here (where virtqueue and guest
    434  * internals accessed by virtio might live). If we find one we still
    435  * allow the backend to potentially filter it out of our list.
    436  */
    437 static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
    438 {
    439     MemoryRegion *mr = section->mr;
    440 
    441     if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) {
    442         uint8_t dirty_mask = memory_region_get_dirty_log_mask(mr);
    443         uint8_t handled_dirty;
    444 
    445         /*
    446          * Kernel based vhost doesn't handle any block which is doing
    447          * dirty-tracking other than migration for which it has
    448          * specific logging support. However for TCG the kernel never
    449          * gets involved anyway so we can also ignore it's
    450          * self-modiying code detection flags. However a vhost-user
    451          * client could still confuse a TCG guest if it re-writes
    452          * executable memory that has already been translated.
    453          */
    454         handled_dirty = (1 << DIRTY_MEMORY_MIGRATION) |
    455             (1 << DIRTY_MEMORY_CODE);
    456 
    457         if (dirty_mask & ~handled_dirty) {
    458             trace_vhost_reject_section(mr->name, 1);
    459             return false;
    460         }
    461 
    462         if (dev->vhost_ops->vhost_backend_mem_section_filter &&
    463             !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {
    464             trace_vhost_reject_section(mr->name, 2);
    465             return false;
    466         }
    467 
    468         trace_vhost_section(mr->name);
    469         return true;
    470     } else {
    471         trace_vhost_reject_section(mr->name, 3);
    472         return false;
    473     }
    474 }
    475 
    476 static void vhost_begin(MemoryListener *listener)
    477 {
    478     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
    479                                          memory_listener);
    480     dev->tmp_sections = NULL;
    481     dev->n_tmp_sections = 0;
    482 }
    483 
    484 static void vhost_commit(MemoryListener *listener)
    485 {
    486     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
    487                                          memory_listener);
    488     MemoryRegionSection *old_sections;
    489     int n_old_sections;
    490     uint64_t log_size;
    491     size_t regions_size;
    492     int r;
    493     int i;
    494     bool changed = false;
    495 
    496     /* Note we can be called before the device is started, but then
    497      * starting the device calls set_mem_table, so we need to have
    498      * built the data structures.
    499      */
    500     old_sections = dev->mem_sections;
    501     n_old_sections = dev->n_mem_sections;
    502     dev->mem_sections = dev->tmp_sections;
    503     dev->n_mem_sections = dev->n_tmp_sections;
    504 
    505     if (dev->n_mem_sections != n_old_sections) {
    506         changed = true;
    507     } else {
    508         /* Same size, lets check the contents */
    509         for (int i = 0; i < n_old_sections; i++) {
    510             if (!MemoryRegionSection_eq(&old_sections[i],
    511                                         &dev->mem_sections[i])) {
    512                 changed = true;
    513                 break;
    514             }
    515         }
    516     }
    517 
    518     trace_vhost_commit(dev->started, changed);
    519     if (!changed) {
    520         goto out;
    521     }
    522 
    523     /* Rebuild the regions list from the new sections list */
    524     regions_size = offsetof(struct vhost_memory, regions) +
    525                        dev->n_mem_sections * sizeof dev->mem->regions[0];
    526     dev->mem = g_realloc(dev->mem, regions_size);
    527     dev->mem->nregions = dev->n_mem_sections;
    528     used_memslots = dev->mem->nregions;
    529     for (i = 0; i < dev->n_mem_sections; i++) {
    530         struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
    531         struct MemoryRegionSection *mrs = dev->mem_sections + i;
    532 
    533         cur_vmr->guest_phys_addr = mrs->offset_within_address_space;
    534         cur_vmr->memory_size     = int128_get64(mrs->size);
    535         cur_vmr->userspace_addr  =
    536             (uintptr_t)memory_region_get_ram_ptr(mrs->mr) +
    537             mrs->offset_within_region;
    538         cur_vmr->flags_padding   = 0;
    539     }
    540 
    541     if (!dev->started) {
    542         goto out;
    543     }
    544 
    545     for (i = 0; i < dev->mem->nregions; i++) {
    546         if (vhost_verify_ring_mappings(dev,
    547                        (void *)(uintptr_t)dev->mem->regions[i].userspace_addr,
    548                        dev->mem->regions[i].guest_phys_addr,
    549                        dev->mem->regions[i].memory_size)) {
    550             error_report("Verify ring failure on region %d", i);
    551             abort();
    552         }
    553     }
    554 
    555     if (!dev->log_enabled) {
    556         r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
    557         if (r < 0) {
    558             VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
    559         }
    560         goto out;
    561     }
    562     log_size = vhost_get_log_size(dev);
    563     /* We allocate an extra 4K bytes to log,
    564      * to reduce the * number of reallocations. */
    565 #define VHOST_LOG_BUFFER (0x1000 / sizeof *dev->log)
    566     /* To log more, must increase log size before table update. */
    567     if (dev->log_size < log_size) {
    568         vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER);
    569     }
    570     r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem);
    571     if (r < 0) {
    572         VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
    573     }
    574     /* To log less, can only decrease log size after table update. */
    575     if (dev->log_size > log_size + VHOST_LOG_BUFFER) {
    576         vhost_dev_log_resize(dev, log_size);
    577     }
    578 
    579 out:
    580     /* Deref the old list of sections, this must happen _after_ the
    581      * vhost_set_mem_table to ensure the client isn't still using the
    582      * section we're about to unref.
    583      */
    584     while (n_old_sections--) {
    585         memory_region_unref(old_sections[n_old_sections].mr);
    586     }
    587     g_free(old_sections);
    588     return;
    589 }
    590 
    591 /* Adds the section data to the tmp_section structure.
    592  * It relies on the listener calling us in memory address order
    593  * and for each region (via the _add and _nop methods) to
    594  * join neighbours.
    595  */
    596 static void vhost_region_add_section(struct vhost_dev *dev,
    597                                      MemoryRegionSection *section)
    598 {
    599     bool need_add = true;
    600     uint64_t mrs_size = int128_get64(section->size);
    601     uint64_t mrs_gpa = section->offset_within_address_space;
    602     uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
    603                          section->offset_within_region;
    604     RAMBlock *mrs_rb = section->mr->ram_block;
    605 
    606     trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size,
    607                                    mrs_host);
    608 
    609     if (dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
    610         /* Round the section to it's page size */
    611         /* First align the start down to a page boundary */
    612         size_t mrs_page = qemu_ram_pagesize(mrs_rb);
    613         uint64_t alignage = mrs_host & (mrs_page - 1);
    614         if (alignage) {
    615             mrs_host -= alignage;
    616             mrs_size += alignage;
    617             mrs_gpa  -= alignage;
    618         }
    619         /* Now align the size up to a page boundary */
    620         alignage = mrs_size & (mrs_page - 1);
    621         if (alignage) {
    622             mrs_size += mrs_page - alignage;
    623         }
    624         trace_vhost_region_add_section_aligned(section->mr->name, mrs_gpa,
    625                                                mrs_size, mrs_host);
    626     }
    627 
    628     if (dev->n_tmp_sections) {
    629         /* Since we already have at least one section, lets see if
    630          * this extends it; since we're scanning in order, we only
    631          * have to look at the last one, and the FlatView that calls
    632          * us shouldn't have overlaps.
    633          */
    634         MemoryRegionSection *prev_sec = dev->tmp_sections +
    635                                                (dev->n_tmp_sections - 1);
    636         uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
    637         uint64_t prev_size = int128_get64(prev_sec->size);
    638         uint64_t prev_gpa_end   = range_get_last(prev_gpa_start, prev_size);
    639         uint64_t prev_host_start =
    640                         (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
    641                         prev_sec->offset_within_region;
    642         uint64_t prev_host_end   = range_get_last(prev_host_start, prev_size);
    643 
    644         if (mrs_gpa <= (prev_gpa_end + 1)) {
    645             /* OK, looks like overlapping/intersecting - it's possible that
    646              * the rounding to page sizes has made them overlap, but they should
    647              * match up in the same RAMBlock if they do.
    648              */
    649             if (mrs_gpa < prev_gpa_start) {
    650                 error_report("%s:Section '%s' rounded to %"PRIx64
    651                              " prior to previous '%s' %"PRIx64,
    652                              __func__, section->mr->name, mrs_gpa,
    653                              prev_sec->mr->name, prev_gpa_start);
    654                 /* A way to cleanly fail here would be better */
    655                 return;
    656             }
    657             /* Offset from the start of the previous GPA to this GPA */
    658             size_t offset = mrs_gpa - prev_gpa_start;
    659 
    660             if (prev_host_start + offset == mrs_host &&
    661                 section->mr == prev_sec->mr &&
    662                 (!dev->vhost_ops->vhost_backend_can_merge ||
    663                  dev->vhost_ops->vhost_backend_can_merge(dev,
    664                     mrs_host, mrs_size,
    665                     prev_host_start, prev_size))) {
    666                 uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
    667                 need_add = false;
    668                 prev_sec->offset_within_address_space =
    669                     MIN(prev_gpa_start, mrs_gpa);
    670                 prev_sec->offset_within_region =
    671                     MIN(prev_host_start, mrs_host) -
    672                     (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
    673                 prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
    674                                                mrs_host));
    675                 trace_vhost_region_add_section_merge(section->mr->name,
    676                                         int128_get64(prev_sec->size),
    677                                         prev_sec->offset_within_address_space,
    678                                         prev_sec->offset_within_region);
    679             } else {
    680                 /* adjoining regions are fine, but overlapping ones with
    681                  * different blocks/offsets shouldn't happen
    682                  */
    683                 if (mrs_gpa != prev_gpa_end + 1) {
    684                     error_report("%s: Overlapping but not coherent sections "
    685                                  "at %"PRIx64,
    686                                  __func__, mrs_gpa);
    687                     return;
    688                 }
    689             }
    690         }
    691     }
    692 
    693     if (need_add) {
    694         ++dev->n_tmp_sections;
    695         dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections,
    696                                     dev->n_tmp_sections);
    697         dev->tmp_sections[dev->n_tmp_sections - 1] = *section;
    698         /* The flatview isn't stable and we don't use it, making it NULL
    699          * means we can memcmp the list.
    700          */
    701         dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL;
    702         memory_region_ref(section->mr);
    703     }
    704 }
    705 
    706 /* Used for both add and nop callbacks */
    707 static void vhost_region_addnop(MemoryListener *listener,
    708                                 MemoryRegionSection *section)
    709 {
    710     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
    711                                          memory_listener);
    712 
    713     if (!vhost_section(dev, section)) {
    714         return;
    715     }
    716     vhost_region_add_section(dev, section);
    717 }
    718 
    719 static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
    720 {
    721     struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n);
    722     struct vhost_dev *hdev = iommu->hdev;
    723     hwaddr iova = iotlb->iova + iommu->iommu_offset;
    724 
    725     if (vhost_backend_invalidate_device_iotlb(hdev, iova,
    726                                               iotlb->addr_mask + 1)) {
    727         error_report("Fail to invalidate device iotlb");
    728     }
    729 }
    730 
    731 static void vhost_iommu_region_add(MemoryListener *listener,
    732                                    MemoryRegionSection *section)
    733 {
    734     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
    735                                          iommu_listener);
    736     struct vhost_iommu *iommu;
    737     Int128 end;
    738     int iommu_idx;
    739     IOMMUMemoryRegion *iommu_mr;
    740     int ret;
    741 
    742     if (!memory_region_is_iommu(section->mr)) {
    743         return;
    744     }
    745 
    746     iommu_mr = IOMMU_MEMORY_REGION(section->mr);
    747 
    748     iommu = g_malloc0(sizeof(*iommu));
    749     end = int128_add(int128_make64(section->offset_within_region),
    750                      section->size);
    751     end = int128_sub(end, int128_one());
    752     iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
    753                                                    MEMTXATTRS_UNSPECIFIED);
    754     iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify,
    755                         IOMMU_NOTIFIER_DEVIOTLB_UNMAP,
    756                         section->offset_within_region,
    757                         int128_get64(end),
    758                         iommu_idx);
    759     iommu->mr = section->mr;
    760     iommu->iommu_offset = section->offset_within_address_space -
    761                           section->offset_within_region;
    762     iommu->hdev = dev;
    763     ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
    764     if (ret) {
    765         /*
    766          * Some vIOMMUs do not support dev-iotlb yet.  If so, try to use the
    767          * UNMAP legacy message
    768          */
    769         iommu->n.notifier_flags = IOMMU_NOTIFIER_UNMAP;
    770         memory_region_register_iommu_notifier(section->mr, &iommu->n,
    771                                               &error_fatal);
    772     }
    773     QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next);
    774     /* TODO: can replay help performance here? */
    775 }
    776 
    777 static void vhost_iommu_region_del(MemoryListener *listener,
    778                                    MemoryRegionSection *section)
    779 {
    780     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
    781                                          iommu_listener);
    782     struct vhost_iommu *iommu;
    783 
    784     if (!memory_region_is_iommu(section->mr)) {
    785         return;
    786     }
    787 
    788     QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) {
    789         if (iommu->mr == section->mr &&
    790             iommu->n.start == section->offset_within_region) {
    791             memory_region_unregister_iommu_notifier(iommu->mr,
    792                                                     &iommu->n);
    793             QLIST_REMOVE(iommu, iommu_next);
    794             g_free(iommu);
    795             break;
    796         }
    797     }
    798 }
    799 
    800 static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
    801                                     struct vhost_virtqueue *vq,
    802                                     unsigned idx, bool enable_log)
    803 {
    804     struct vhost_vring_addr addr;
    805     int r;
    806     memset(&addr, 0, sizeof(struct vhost_vring_addr));
    807 
    808     if (dev->vhost_ops->vhost_vq_get_addr) {
    809         r = dev->vhost_ops->vhost_vq_get_addr(dev, &addr, vq);
    810         if (r < 0) {
    811             VHOST_OPS_DEBUG(r, "vhost_vq_get_addr failed");
    812             return r;
    813         }
    814     } else {
    815         addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc;
    816         addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail;
    817         addr.used_user_addr = (uint64_t)(unsigned long)vq->used;
    818     }
    819     addr.index = idx;
    820     addr.log_guest_addr = vq->used_phys;
    821     addr.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0;
    822     r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr);
    823     if (r < 0) {
    824         VHOST_OPS_DEBUG(r, "vhost_set_vring_addr failed");
    825     }
    826     return r;
    827 }
    828 
    829 static int vhost_dev_set_features(struct vhost_dev *dev,
    830                                   bool enable_log)
    831 {
    832     uint64_t features = dev->acked_features;
    833     int r;
    834     if (enable_log) {
    835         features |= 0x1ULL << VHOST_F_LOG_ALL;
    836     }
    837     if (!vhost_dev_has_iommu(dev)) {
    838         features &= ~(0x1ULL << VIRTIO_F_IOMMU_PLATFORM);
    839     }
    840     if (dev->vhost_ops->vhost_force_iommu) {
    841         if (dev->vhost_ops->vhost_force_iommu(dev) == true) {
    842             features |= 0x1ULL << VIRTIO_F_IOMMU_PLATFORM;
    843        }
    844     }
    845     r = dev->vhost_ops->vhost_set_features(dev, features);
    846     if (r < 0) {
    847         VHOST_OPS_DEBUG(r, "vhost_set_features failed");
    848         goto out;
    849     }
    850     if (dev->vhost_ops->vhost_set_backend_cap) {
    851         r = dev->vhost_ops->vhost_set_backend_cap(dev);
    852         if (r < 0) {
    853             VHOST_OPS_DEBUG(r, "vhost_set_backend_cap failed");
    854             goto out;
    855         }
    856     }
    857 
    858 out:
    859     return r;
    860 }
    861 
    862 static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
    863 {
    864     int r, i, idx;
    865     hwaddr addr;
    866 
    867     r = vhost_dev_set_features(dev, enable_log);
    868     if (r < 0) {
    869         goto err_features;
    870     }
    871     for (i = 0; i < dev->nvqs; ++i) {
    872         idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
    873         addr = virtio_queue_get_desc_addr(dev->vdev, idx);
    874         if (!addr) {
    875             /*
    876              * The queue might not be ready for start. If this
    877              * is the case there is no reason to continue the process.
    878              * The similar logic is used by the vhost_virtqueue_start()
    879              * routine.
    880              */
    881             continue;
    882         }
    883         r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
    884                                      enable_log);
    885         if (r < 0) {
    886             goto err_vq;
    887         }
    888     }
    889     return 0;
    890 err_vq:
    891     for (; i >= 0; --i) {
    892         idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i);
    893         addr = virtio_queue_get_desc_addr(dev->vdev, idx);
    894         if (!addr) {
    895             continue;
    896         }
    897         vhost_virtqueue_set_addr(dev, dev->vqs + i, idx,
    898                                  dev->log_enabled);
    899     }
    900     vhost_dev_set_features(dev, dev->log_enabled);
    901 err_features:
    902     return r;
    903 }
    904 
    905 static int vhost_migration_log(MemoryListener *listener, bool enable)
    906 {
    907     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
    908                                          memory_listener);
    909     int r;
    910     if (enable == dev->log_enabled) {
    911         return 0;
    912     }
    913     if (!dev->started) {
    914         dev->log_enabled = enable;
    915         return 0;
    916     }
    917 
    918     r = 0;
    919     if (!enable) {
    920         r = vhost_dev_set_log(dev, false);
    921         if (r < 0) {
    922             goto check_dev_state;
    923         }
    924         vhost_log_put(dev, false);
    925     } else {
    926         vhost_dev_log_resize(dev, vhost_get_log_size(dev));
    927         r = vhost_dev_set_log(dev, true);
    928         if (r < 0) {
    929             goto check_dev_state;
    930         }
    931     }
    932 
    933 check_dev_state:
    934     dev->log_enabled = enable;
    935     /*
    936      * vhost-user-* devices could change their state during log
    937      * initialization due to disconnect. So check dev state after
    938      * vhost communication.
    939      */
    940     if (!dev->started) {
    941         /*
    942          * Since device is in the stopped state, it is okay for
    943          * migration. Return success.
    944          */
    945         r = 0;
    946     }
    947     if (r) {
    948         /* An error occurred. */
    949         dev->log_enabled = false;
    950     }
    951 
    952     return r;
    953 }
    954 
    955 static void vhost_log_global_start(MemoryListener *listener)
    956 {
    957     int r;
    958 
    959     r = vhost_migration_log(listener, true);
    960     if (r < 0) {
    961         abort();
    962     }
    963 }
    964 
    965 static void vhost_log_global_stop(MemoryListener *listener)
    966 {
    967     int r;
    968 
    969     r = vhost_migration_log(listener, false);
    970     if (r < 0) {
    971         abort();
    972     }
    973 }
    974 
    975 static void vhost_log_start(MemoryListener *listener,
    976                             MemoryRegionSection *section,
    977                             int old, int new)
    978 {
    979     /* FIXME: implement */
    980 }
    981 
    982 static void vhost_log_stop(MemoryListener *listener,
    983                            MemoryRegionSection *section,
    984                            int old, int new)
    985 {
    986     /* FIXME: implement */
    987 }
    988 
    989 /* The vhost driver natively knows how to handle the vrings of non
    990  * cross-endian legacy devices and modern devices. Only legacy devices
    991  * exposed to a bi-endian guest may require the vhost driver to use a
    992  * specific endianness.
    993  */
    994 static inline bool vhost_needs_vring_endian(VirtIODevice *vdev)
    995 {
    996     if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
    997         return false;
    998     }
    999 #if HOST_BIG_ENDIAN
   1000     return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_LITTLE;
   1001 #else
   1002     return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG;
   1003 #endif
   1004 }
   1005 
   1006 static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
   1007                                                    bool is_big_endian,
   1008                                                    int vhost_vq_index)
   1009 {
   1010     int r;
   1011     struct vhost_vring_state s = {
   1012         .index = vhost_vq_index,
   1013         .num = is_big_endian
   1014     };
   1015 
   1016     r = dev->vhost_ops->vhost_set_vring_endian(dev, &s);
   1017     if (r < 0) {
   1018         VHOST_OPS_DEBUG(r, "vhost_set_vring_endian failed");
   1019     }
   1020     return r;
   1021 }
   1022 
   1023 static int vhost_memory_region_lookup(struct vhost_dev *hdev,
   1024                                       uint64_t gpa, uint64_t *uaddr,
   1025                                       uint64_t *len)
   1026 {
   1027     int i;
   1028 
   1029     for (i = 0; i < hdev->mem->nregions; i++) {
   1030         struct vhost_memory_region *reg = hdev->mem->regions + i;
   1031 
   1032         if (gpa >= reg->guest_phys_addr &&
   1033             reg->guest_phys_addr + reg->memory_size > gpa) {
   1034             *uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr;
   1035             *len = reg->guest_phys_addr + reg->memory_size - gpa;
   1036             return 0;
   1037         }
   1038     }
   1039 
   1040     return -EFAULT;
   1041 }
   1042 
   1043 int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
   1044 {
   1045     IOMMUTLBEntry iotlb;
   1046     uint64_t uaddr, len;
   1047     int ret = -EFAULT;
   1048 
   1049     RCU_READ_LOCK_GUARD();
   1050 
   1051     trace_vhost_iotlb_miss(dev, 1);
   1052 
   1053     iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
   1054                                           iova, write,
   1055                                           MEMTXATTRS_UNSPECIFIED);
   1056     if (iotlb.target_as != NULL) {
   1057         ret = vhost_memory_region_lookup(dev, iotlb.translated_addr,
   1058                                          &uaddr, &len);
   1059         if (ret) {
   1060             trace_vhost_iotlb_miss(dev, 3);
   1061             error_report("Fail to lookup the translated address "
   1062                          "%"PRIx64, iotlb.translated_addr);
   1063             goto out;
   1064         }
   1065 
   1066         len = MIN(iotlb.addr_mask + 1, len);
   1067         iova = iova & ~iotlb.addr_mask;
   1068 
   1069         ret = vhost_backend_update_device_iotlb(dev, iova, uaddr,
   1070                                                 len, iotlb.perm);
   1071         if (ret) {
   1072             trace_vhost_iotlb_miss(dev, 4);
   1073             error_report("Fail to update device iotlb");
   1074             goto out;
   1075         }
   1076     }
   1077 
   1078     trace_vhost_iotlb_miss(dev, 2);
   1079 
   1080 out:
   1081     return ret;
   1082 }
   1083 
   1084 int vhost_virtqueue_start(struct vhost_dev *dev,
   1085                           struct VirtIODevice *vdev,
   1086                           struct vhost_virtqueue *vq,
   1087                           unsigned idx)
   1088 {
   1089     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
   1090     VirtioBusState *vbus = VIRTIO_BUS(qbus);
   1091     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
   1092     hwaddr s, l, a;
   1093     int r;
   1094     int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
   1095     struct vhost_vring_file file = {
   1096         .index = vhost_vq_index
   1097     };
   1098     struct vhost_vring_state state = {
   1099         .index = vhost_vq_index
   1100     };
   1101     struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
   1102 
   1103     a = virtio_queue_get_desc_addr(vdev, idx);
   1104     if (a == 0) {
   1105         /* Queue might not be ready for start */
   1106         return 0;
   1107     }
   1108 
   1109     vq->num = state.num = virtio_queue_get_num(vdev, idx);
   1110     r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
   1111     if (r) {
   1112         VHOST_OPS_DEBUG(r, "vhost_set_vring_num failed");
   1113         return r;
   1114     }
   1115 
   1116     state.num = virtio_queue_get_last_avail_idx(vdev, idx);
   1117     r = dev->vhost_ops->vhost_set_vring_base(dev, &state);
   1118     if (r) {
   1119         VHOST_OPS_DEBUG(r, "vhost_set_vring_base failed");
   1120         return r;
   1121     }
   1122 
   1123     if (vhost_needs_vring_endian(vdev)) {
   1124         r = vhost_virtqueue_set_vring_endian_legacy(dev,
   1125                                                     virtio_is_big_endian(vdev),
   1126                                                     vhost_vq_index);
   1127         if (r) {
   1128             return r;
   1129         }
   1130     }
   1131 
   1132     vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx);
   1133     vq->desc_phys = a;
   1134     vq->desc = vhost_memory_map(dev, a, &l, false);
   1135     if (!vq->desc || l != s) {
   1136         r = -ENOMEM;
   1137         goto fail_alloc_desc;
   1138     }
   1139     vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx);
   1140     vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx);
   1141     vq->avail = vhost_memory_map(dev, a, &l, false);
   1142     if (!vq->avail || l != s) {
   1143         r = -ENOMEM;
   1144         goto fail_alloc_avail;
   1145     }
   1146     vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
   1147     vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
   1148     vq->used = vhost_memory_map(dev, a, &l, true);
   1149     if (!vq->used || l != s) {
   1150         r = -ENOMEM;
   1151         goto fail_alloc_used;
   1152     }
   1153 
   1154     r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
   1155     if (r < 0) {
   1156         goto fail_alloc;
   1157     }
   1158 
   1159     file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
   1160     r = dev->vhost_ops->vhost_set_vring_kick(dev, &file);
   1161     if (r) {
   1162         VHOST_OPS_DEBUG(r, "vhost_set_vring_kick failed");
   1163         goto fail_kick;
   1164     }
   1165 
   1166     /* Clear and discard previous events if any. */
   1167     event_notifier_test_and_clear(&vq->masked_notifier);
   1168 
   1169     /* Init vring in unmasked state, unless guest_notifier_mask
   1170      * will do it later.
   1171      */
   1172     if (!vdev->use_guest_notifier_mask) {
   1173         /* TODO: check and handle errors. */
   1174         vhost_virtqueue_mask(dev, vdev, idx, false);
   1175     }
   1176 
   1177     if (k->query_guest_notifiers &&
   1178         k->query_guest_notifiers(qbus->parent) &&
   1179         virtio_queue_vector(vdev, idx) == VIRTIO_NO_VECTOR) {
   1180         file.fd = -1;
   1181         r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
   1182         if (r) {
   1183             goto fail_vector;
   1184         }
   1185     }
   1186 
   1187     return 0;
   1188 
   1189 fail_vector:
   1190 fail_kick:
   1191 fail_alloc:
   1192     vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
   1193                        0, 0);
   1194 fail_alloc_used:
   1195     vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
   1196                        0, 0);
   1197 fail_alloc_avail:
   1198     vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
   1199                        0, 0);
   1200 fail_alloc_desc:
   1201     return r;
   1202 }
   1203 
   1204 void vhost_virtqueue_stop(struct vhost_dev *dev,
   1205                           struct VirtIODevice *vdev,
   1206                           struct vhost_virtqueue *vq,
   1207                           unsigned idx)
   1208 {
   1209     int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
   1210     struct vhost_vring_state state = {
   1211         .index = vhost_vq_index,
   1212     };
   1213     int r;
   1214 
   1215     if (virtio_queue_get_desc_addr(vdev, idx) == 0) {
   1216         /* Don't stop the virtqueue which might have not been started */
   1217         return;
   1218     }
   1219 
   1220     r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
   1221     if (r < 0) {
   1222         VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r);
   1223         /* Connection to the backend is broken, so let's sync internal
   1224          * last avail idx to the device used idx.
   1225          */
   1226         virtio_queue_restore_last_avail_idx(vdev, idx);
   1227     } else {
   1228         virtio_queue_set_last_avail_idx(vdev, idx, state.num);
   1229     }
   1230     virtio_queue_invalidate_signalled_used(vdev, idx);
   1231     virtio_queue_update_used_idx(vdev, idx);
   1232 
   1233     /* In the cross-endian case, we need to reset the vring endianness to
   1234      * native as legacy devices expect so by default.
   1235      */
   1236     if (vhost_needs_vring_endian(vdev)) {
   1237         vhost_virtqueue_set_vring_endian_legacy(dev,
   1238                                                 !virtio_is_big_endian(vdev),
   1239                                                 vhost_vq_index);
   1240     }
   1241 
   1242     vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
   1243                        1, virtio_queue_get_used_size(vdev, idx));
   1244     vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
   1245                        0, virtio_queue_get_avail_size(vdev, idx));
   1246     vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
   1247                        0, virtio_queue_get_desc_size(vdev, idx));
   1248 }
   1249 
   1250 static void vhost_eventfd_add(MemoryListener *listener,
   1251                               MemoryRegionSection *section,
   1252                               bool match_data, uint64_t data, EventNotifier *e)
   1253 {
   1254 }
   1255 
   1256 static void vhost_eventfd_del(MemoryListener *listener,
   1257                               MemoryRegionSection *section,
   1258                               bool match_data, uint64_t data, EventNotifier *e)
   1259 {
   1260 }
   1261 
   1262 static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
   1263                                                 int n, uint32_t timeout)
   1264 {
   1265     int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
   1266     struct vhost_vring_state state = {
   1267         .index = vhost_vq_index,
   1268         .num = timeout,
   1269     };
   1270     int r;
   1271 
   1272     if (!dev->vhost_ops->vhost_set_vring_busyloop_timeout) {
   1273         return -EINVAL;
   1274     }
   1275 
   1276     r = dev->vhost_ops->vhost_set_vring_busyloop_timeout(dev, &state);
   1277     if (r) {
   1278         VHOST_OPS_DEBUG(r, "vhost_set_vring_busyloop_timeout failed");
   1279         return r;
   1280     }
   1281 
   1282     return 0;
   1283 }
   1284 
   1285 static void vhost_virtqueue_error_notifier(EventNotifier *n)
   1286 {
   1287     struct vhost_virtqueue *vq = container_of(n, struct vhost_virtqueue,
   1288                                               error_notifier);
   1289     struct vhost_dev *dev = vq->dev;
   1290     int index = vq - dev->vqs;
   1291 
   1292     if (event_notifier_test_and_clear(n) && dev->vdev) {
   1293         VHOST_OPS_DEBUG(-EINVAL,  "vhost vring error in virtqueue %d",
   1294                         dev->vq_index + index);
   1295     }
   1296 }
   1297 
   1298 static int vhost_virtqueue_init(struct vhost_dev *dev,
   1299                                 struct vhost_virtqueue *vq, int n)
   1300 {
   1301     int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n);
   1302     struct vhost_vring_file file = {
   1303         .index = vhost_vq_index,
   1304     };
   1305     int r = event_notifier_init(&vq->masked_notifier, 0);
   1306     if (r < 0) {
   1307         return r;
   1308     }
   1309 
   1310     file.fd = event_notifier_get_wfd(&vq->masked_notifier);
   1311     r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
   1312     if (r) {
   1313         VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed");
   1314         goto fail_call;
   1315     }
   1316 
   1317     vq->dev = dev;
   1318 
   1319     if (dev->vhost_ops->vhost_set_vring_err) {
   1320         r = event_notifier_init(&vq->error_notifier, 0);
   1321         if (r < 0) {
   1322             goto fail_call;
   1323         }
   1324 
   1325         file.fd = event_notifier_get_fd(&vq->error_notifier);
   1326         r = dev->vhost_ops->vhost_set_vring_err(dev, &file);
   1327         if (r) {
   1328             VHOST_OPS_DEBUG(r, "vhost_set_vring_err failed");
   1329             goto fail_err;
   1330         }
   1331 
   1332         event_notifier_set_handler(&vq->error_notifier,
   1333                                    vhost_virtqueue_error_notifier);
   1334     }
   1335 
   1336     return 0;
   1337 
   1338 fail_err:
   1339     event_notifier_cleanup(&vq->error_notifier);
   1340 fail_call:
   1341     event_notifier_cleanup(&vq->masked_notifier);
   1342     return r;
   1343 }
   1344 
   1345 static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
   1346 {
   1347     event_notifier_cleanup(&vq->masked_notifier);
   1348     if (vq->dev->vhost_ops->vhost_set_vring_err) {
   1349         event_notifier_set_handler(&vq->error_notifier, NULL);
   1350         event_notifier_cleanup(&vq->error_notifier);
   1351     }
   1352 }
   1353 
   1354 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
   1355                    VhostBackendType backend_type, uint32_t busyloop_timeout,
   1356                    Error **errp)
   1357 {
   1358     uint64_t features;
   1359     int i, r, n_initialized_vqs = 0;
   1360 
   1361     hdev->vdev = NULL;
   1362     hdev->migration_blocker = NULL;
   1363 
   1364     r = vhost_set_backend_type(hdev, backend_type);
   1365     assert(r >= 0);
   1366 
   1367     r = hdev->vhost_ops->vhost_backend_init(hdev, opaque, errp);
   1368     if (r < 0) {
   1369         goto fail;
   1370     }
   1371 
   1372     r = hdev->vhost_ops->vhost_set_owner(hdev);
   1373     if (r < 0) {
   1374         error_setg_errno(errp, -r, "vhost_set_owner failed");
   1375         goto fail;
   1376     }
   1377 
   1378     r = hdev->vhost_ops->vhost_get_features(hdev, &features);
   1379     if (r < 0) {
   1380         error_setg_errno(errp, -r, "vhost_get_features failed");
   1381         goto fail;
   1382     }
   1383 
   1384     for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
   1385         r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
   1386         if (r < 0) {
   1387             error_setg_errno(errp, -r, "Failed to initialize virtqueue %d", i);
   1388             goto fail;
   1389         }
   1390     }
   1391 
   1392     if (busyloop_timeout) {
   1393         for (i = 0; i < hdev->nvqs; ++i) {
   1394             r = vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i,
   1395                                                      busyloop_timeout);
   1396             if (r < 0) {
   1397                 error_setg_errno(errp, -r, "Failed to set busyloop timeout");
   1398                 goto fail_busyloop;
   1399             }
   1400         }
   1401     }
   1402 
   1403     hdev->features = features;
   1404 
   1405     hdev->memory_listener = (MemoryListener) {
   1406         .name = "vhost",
   1407         .begin = vhost_begin,
   1408         .commit = vhost_commit,
   1409         .region_add = vhost_region_addnop,
   1410         .region_nop = vhost_region_addnop,
   1411         .log_start = vhost_log_start,
   1412         .log_stop = vhost_log_stop,
   1413         .log_sync = vhost_log_sync,
   1414         .log_global_start = vhost_log_global_start,
   1415         .log_global_stop = vhost_log_global_stop,
   1416         .eventfd_add = vhost_eventfd_add,
   1417         .eventfd_del = vhost_eventfd_del,
   1418         .priority = 10
   1419     };
   1420 
   1421     hdev->iommu_listener = (MemoryListener) {
   1422         .name = "vhost-iommu",
   1423         .region_add = vhost_iommu_region_add,
   1424         .region_del = vhost_iommu_region_del,
   1425     };
   1426 
   1427     if (hdev->migration_blocker == NULL) {
   1428         if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
   1429             error_setg(&hdev->migration_blocker,
   1430                        "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
   1431         } else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_alloc_check()) {
   1432             error_setg(&hdev->migration_blocker,
   1433                        "Migration disabled: failed to allocate shared memory");
   1434         }
   1435     }
   1436 
   1437     if (hdev->migration_blocker != NULL) {
   1438         r = migrate_add_blocker(hdev->migration_blocker, errp);
   1439         if (r < 0) {
   1440             error_free(hdev->migration_blocker);
   1441             goto fail_busyloop;
   1442         }
   1443     }
   1444 
   1445     hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
   1446     hdev->n_mem_sections = 0;
   1447     hdev->mem_sections = NULL;
   1448     hdev->log = NULL;
   1449     hdev->log_size = 0;
   1450     hdev->log_enabled = false;
   1451     hdev->started = false;
   1452     memory_listener_register(&hdev->memory_listener, &address_space_memory);
   1453     QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
   1454 
   1455     if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) {
   1456         error_setg(errp, "vhost backend memory slots limit is less"
   1457                    " than current number of present memory slots");
   1458         r = -EINVAL;
   1459         goto fail_busyloop;
   1460     }
   1461 
   1462     return 0;
   1463 
   1464 fail_busyloop:
   1465     if (busyloop_timeout) {
   1466         while (--i >= 0) {
   1467             vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i, 0);
   1468         }
   1469     }
   1470 fail:
   1471     hdev->nvqs = n_initialized_vqs;
   1472     vhost_dev_cleanup(hdev);
   1473     return r;
   1474 }
   1475 
   1476 void vhost_dev_cleanup(struct vhost_dev *hdev)
   1477 {
   1478     int i;
   1479 
   1480     trace_vhost_dev_cleanup(hdev);
   1481 
   1482     for (i = 0; i < hdev->nvqs; ++i) {
   1483         vhost_virtqueue_cleanup(hdev->vqs + i);
   1484     }
   1485     if (hdev->mem) {
   1486         /* those are only safe after successful init */
   1487         memory_listener_unregister(&hdev->memory_listener);
   1488         QLIST_REMOVE(hdev, entry);
   1489     }
   1490     if (hdev->migration_blocker) {
   1491         migrate_del_blocker(hdev->migration_blocker);
   1492         error_free(hdev->migration_blocker);
   1493     }
   1494     g_free(hdev->mem);
   1495     g_free(hdev->mem_sections);
   1496     if (hdev->vhost_ops) {
   1497         hdev->vhost_ops->vhost_backend_cleanup(hdev);
   1498     }
   1499     assert(!hdev->log);
   1500 
   1501     memset(hdev, 0, sizeof(struct vhost_dev));
   1502 }
   1503 
   1504 /* Stop processing guest IO notifications in qemu.
   1505  * Start processing them in vhost in kernel.
   1506  */
   1507 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
   1508 {
   1509     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
   1510     int i, r, e;
   1511 
   1512     /* We will pass the notifiers to the kernel, make sure that QEMU
   1513      * doesn't interfere.
   1514      */
   1515     r = virtio_device_grab_ioeventfd(vdev);
   1516     if (r < 0) {
   1517         error_report("binding does not support host notifiers");
   1518         goto fail;
   1519     }
   1520 
   1521     for (i = 0; i < hdev->nvqs; ++i) {
   1522         r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
   1523                                          true);
   1524         if (r < 0) {
   1525             error_report("vhost VQ %d notifier binding failed: %d", i, -r);
   1526             goto fail_vq;
   1527         }
   1528     }
   1529 
   1530     return 0;
   1531 fail_vq:
   1532     while (--i >= 0) {
   1533         e = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
   1534                                          false);
   1535         if (e < 0) {
   1536             error_report("vhost VQ %d notifier cleanup error: %d", i, -r);
   1537         }
   1538         assert (e >= 0);
   1539         virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
   1540     }
   1541     virtio_device_release_ioeventfd(vdev);
   1542 fail:
   1543     return r;
   1544 }
   1545 
   1546 /* Stop processing guest IO notifications in vhost.
   1547  * Start processing them in qemu.
   1548  * This might actually run the qemu handlers right away,
   1549  * so virtio in qemu must be completely setup when this is called.
   1550  */
   1551 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
   1552 {
   1553     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
   1554     int i, r;
   1555 
   1556     for (i = 0; i < hdev->nvqs; ++i) {
   1557         r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
   1558                                          false);
   1559         if (r < 0) {
   1560             error_report("vhost VQ %d notifier cleanup failed: %d", i, -r);
   1561         }
   1562         assert (r >= 0);
   1563         virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
   1564     }
   1565     virtio_device_release_ioeventfd(vdev);
   1566 }
   1567 
   1568 /* Test and clear event pending status.
   1569  * Should be called after unmask to avoid losing events.
   1570  */
   1571 bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
   1572 {
   1573     struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
   1574     assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
   1575     return event_notifier_test_and_clear(&vq->masked_notifier);
   1576 }
   1577 
   1578 /* Mask/unmask events from this vq. */
   1579 void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
   1580                          bool mask)
   1581 {
   1582     struct VirtQueue *vvq = virtio_get_queue(vdev, n);
   1583     int r, index = n - hdev->vq_index;
   1584     struct vhost_vring_file file;
   1585 
   1586     /* should only be called after backend is connected */
   1587     assert(hdev->vhost_ops);
   1588 
   1589     if (mask) {
   1590         assert(vdev->use_guest_notifier_mask);
   1591         file.fd = event_notifier_get_wfd(&hdev->vqs[index].masked_notifier);
   1592     } else {
   1593         file.fd = event_notifier_get_wfd(virtio_queue_get_guest_notifier(vvq));
   1594     }
   1595 
   1596     file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
   1597     r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file);
   1598     if (r < 0) {
   1599         VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed");
   1600     }
   1601 }
   1602 
   1603 uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
   1604                             uint64_t features)
   1605 {
   1606     const int *bit = feature_bits;
   1607     while (*bit != VHOST_INVALID_FEATURE_BIT) {
   1608         uint64_t bit_mask = (1ULL << *bit);
   1609         if (!(hdev->features & bit_mask)) {
   1610             features &= ~bit_mask;
   1611         }
   1612         bit++;
   1613     }
   1614     return features;
   1615 }
   1616 
   1617 void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
   1618                         uint64_t features)
   1619 {
   1620     const int *bit = feature_bits;
   1621     while (*bit != VHOST_INVALID_FEATURE_BIT) {
   1622         uint64_t bit_mask = (1ULL << *bit);
   1623         if (features & bit_mask) {
   1624             hdev->acked_features |= bit_mask;
   1625         }
   1626         bit++;
   1627     }
   1628 }
   1629 
   1630 int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
   1631                          uint32_t config_len, Error **errp)
   1632 {
   1633     assert(hdev->vhost_ops);
   1634 
   1635     if (hdev->vhost_ops->vhost_get_config) {
   1636         return hdev->vhost_ops->vhost_get_config(hdev, config, config_len,
   1637                                                  errp);
   1638     }
   1639 
   1640     error_setg(errp, "vhost_get_config not implemented");
   1641     return -ENOSYS;
   1642 }
   1643 
   1644 int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data,
   1645                          uint32_t offset, uint32_t size, uint32_t flags)
   1646 {
   1647     assert(hdev->vhost_ops);
   1648 
   1649     if (hdev->vhost_ops->vhost_set_config) {
   1650         return hdev->vhost_ops->vhost_set_config(hdev, data, offset,
   1651                                                  size, flags);
   1652     }
   1653 
   1654     return -ENOSYS;
   1655 }
   1656 
   1657 void vhost_dev_set_config_notifier(struct vhost_dev *hdev,
   1658                                    const VhostDevConfigOps *ops)
   1659 {
   1660     hdev->config_ops = ops;
   1661 }
   1662 
   1663 void vhost_dev_free_inflight(struct vhost_inflight *inflight)
   1664 {
   1665     if (inflight && inflight->addr) {
   1666         qemu_memfd_free(inflight->addr, inflight->size, inflight->fd);
   1667         inflight->addr = NULL;
   1668         inflight->fd = -1;
   1669     }
   1670 }
   1671 
   1672 static int vhost_dev_resize_inflight(struct vhost_inflight *inflight,
   1673                                      uint64_t new_size)
   1674 {
   1675     Error *err = NULL;
   1676     int fd = -1;
   1677     void *addr = qemu_memfd_alloc("vhost-inflight", new_size,
   1678                                   F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
   1679                                   &fd, &err);
   1680 
   1681     if (err) {
   1682         error_report_err(err);
   1683         return -ENOMEM;
   1684     }
   1685 
   1686     vhost_dev_free_inflight(inflight);
   1687     inflight->offset = 0;
   1688     inflight->addr = addr;
   1689     inflight->fd = fd;
   1690     inflight->size = new_size;
   1691 
   1692     return 0;
   1693 }
   1694 
   1695 void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f)
   1696 {
   1697     if (inflight->addr) {
   1698         qemu_put_be64(f, inflight->size);
   1699         qemu_put_be16(f, inflight->queue_size);
   1700         qemu_put_buffer(f, inflight->addr, inflight->size);
   1701     } else {
   1702         qemu_put_be64(f, 0);
   1703     }
   1704 }
   1705 
   1706 int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f)
   1707 {
   1708     uint64_t size;
   1709 
   1710     size = qemu_get_be64(f);
   1711     if (!size) {
   1712         return 0;
   1713     }
   1714 
   1715     if (inflight->size != size) {
   1716         int ret = vhost_dev_resize_inflight(inflight, size);
   1717         if (ret < 0) {
   1718             return ret;
   1719         }
   1720     }
   1721     inflight->queue_size = qemu_get_be16(f);
   1722 
   1723     qemu_get_buffer(f, inflight->addr, size);
   1724 
   1725     return 0;
   1726 }
   1727 
   1728 int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev)
   1729 {
   1730     int r;
   1731 
   1732     if (hdev->vhost_ops->vhost_get_inflight_fd == NULL ||
   1733         hdev->vhost_ops->vhost_set_inflight_fd == NULL) {
   1734         return 0;
   1735     }
   1736 
   1737     hdev->vdev = vdev;
   1738 
   1739     r = vhost_dev_set_features(hdev, hdev->log_enabled);
   1740     if (r < 0) {
   1741         VHOST_OPS_DEBUG(r, "vhost_dev_prepare_inflight failed");
   1742         return r;
   1743     }
   1744 
   1745     return 0;
   1746 }
   1747 
   1748 int vhost_dev_set_inflight(struct vhost_dev *dev,
   1749                            struct vhost_inflight *inflight)
   1750 {
   1751     int r;
   1752 
   1753     if (dev->vhost_ops->vhost_set_inflight_fd && inflight->addr) {
   1754         r = dev->vhost_ops->vhost_set_inflight_fd(dev, inflight);
   1755         if (r) {
   1756             VHOST_OPS_DEBUG(r, "vhost_set_inflight_fd failed");
   1757             return r;
   1758         }
   1759     }
   1760 
   1761     return 0;
   1762 }
   1763 
   1764 int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size,
   1765                            struct vhost_inflight *inflight)
   1766 {
   1767     int r;
   1768 
   1769     if (dev->vhost_ops->vhost_get_inflight_fd) {
   1770         r = dev->vhost_ops->vhost_get_inflight_fd(dev, queue_size, inflight);
   1771         if (r) {
   1772             VHOST_OPS_DEBUG(r, "vhost_get_inflight_fd failed");
   1773             return r;
   1774         }
   1775     }
   1776 
   1777     return 0;
   1778 }
   1779 
   1780 static int vhost_dev_set_vring_enable(struct vhost_dev *hdev, int enable)
   1781 {
   1782     if (!hdev->vhost_ops->vhost_set_vring_enable) {
   1783         return 0;
   1784     }
   1785 
   1786     /*
   1787      * For vhost-user devices, if VHOST_USER_F_PROTOCOL_FEATURES has not
   1788      * been negotiated, the rings start directly in the enabled state, and
   1789      * .vhost_set_vring_enable callback will fail since
   1790      * VHOST_USER_SET_VRING_ENABLE is not supported.
   1791      */
   1792     if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER &&
   1793         !virtio_has_feature(hdev->backend_features,
   1794                             VHOST_USER_F_PROTOCOL_FEATURES)) {
   1795         return 0;
   1796     }
   1797 
   1798     return hdev->vhost_ops->vhost_set_vring_enable(hdev, enable);
   1799 }
   1800 
   1801 /* Host notifiers must be enabled at this point. */
   1802 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
   1803 {
   1804     int i, r;
   1805 
   1806     /* should only be called after backend is connected */
   1807     assert(hdev->vhost_ops);
   1808 
   1809     trace_vhost_dev_start(hdev, vdev->name, vrings);
   1810 
   1811     vdev->vhost_started = true;
   1812     hdev->started = true;
   1813     hdev->vdev = vdev;
   1814 
   1815     r = vhost_dev_set_features(hdev, hdev->log_enabled);
   1816     if (r < 0) {
   1817         goto fail_features;
   1818     }
   1819 
   1820     if (vhost_dev_has_iommu(hdev)) {
   1821         memory_listener_register(&hdev->iommu_listener, vdev->dma_as);
   1822     }
   1823 
   1824     r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
   1825     if (r < 0) {
   1826         VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
   1827         goto fail_mem;
   1828     }
   1829     for (i = 0; i < hdev->nvqs; ++i) {
   1830         r = vhost_virtqueue_start(hdev,
   1831                                   vdev,
   1832                                   hdev->vqs + i,
   1833                                   hdev->vq_index + i);
   1834         if (r < 0) {
   1835             goto fail_vq;
   1836         }
   1837     }
   1838 
   1839     if (hdev->log_enabled) {
   1840         uint64_t log_base;
   1841 
   1842         hdev->log_size = vhost_get_log_size(hdev);
   1843         hdev->log = vhost_log_get(hdev->log_size,
   1844                                   vhost_dev_log_is_shared(hdev));
   1845         log_base = (uintptr_t)hdev->log->log;
   1846         r = hdev->vhost_ops->vhost_set_log_base(hdev,
   1847                                                 hdev->log_size ? log_base : 0,
   1848                                                 hdev->log);
   1849         if (r < 0) {
   1850             VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
   1851             goto fail_log;
   1852         }
   1853     }
   1854     if (vrings) {
   1855         r = vhost_dev_set_vring_enable(hdev, true);
   1856         if (r) {
   1857             goto fail_log;
   1858         }
   1859     }
   1860     if (hdev->vhost_ops->vhost_dev_start) {
   1861         r = hdev->vhost_ops->vhost_dev_start(hdev, true);
   1862         if (r) {
   1863             goto fail_start;
   1864         }
   1865     }
   1866     if (vhost_dev_has_iommu(hdev) &&
   1867         hdev->vhost_ops->vhost_set_iotlb_callback) {
   1868             hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
   1869 
   1870         /* Update used ring information for IOTLB to work correctly,
   1871          * vhost-kernel code requires for this.*/
   1872         for (i = 0; i < hdev->nvqs; ++i) {
   1873             struct vhost_virtqueue *vq = hdev->vqs + i;
   1874             vhost_device_iotlb_miss(hdev, vq->used_phys, true);
   1875         }
   1876     }
   1877     return 0;
   1878 fail_start:
   1879     if (vrings) {
   1880         vhost_dev_set_vring_enable(hdev, false);
   1881     }
   1882 fail_log:
   1883     vhost_log_put(hdev, false);
   1884 fail_vq:
   1885     while (--i >= 0) {
   1886         vhost_virtqueue_stop(hdev,
   1887                              vdev,
   1888                              hdev->vqs + i,
   1889                              hdev->vq_index + i);
   1890     }
   1891 
   1892 fail_mem:
   1893 fail_features:
   1894     vdev->vhost_started = false;
   1895     hdev->started = false;
   1896     return r;
   1897 }
   1898 
   1899 /* Host notifiers must be enabled at this point. */
   1900 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
   1901 {
   1902     int i;
   1903 
   1904     /* should only be called after backend is connected */
   1905     assert(hdev->vhost_ops);
   1906 
   1907     trace_vhost_dev_stop(hdev, vdev->name, vrings);
   1908 
   1909     if (hdev->vhost_ops->vhost_dev_start) {
   1910         hdev->vhost_ops->vhost_dev_start(hdev, false);
   1911     }
   1912     if (vrings) {
   1913         vhost_dev_set_vring_enable(hdev, false);
   1914     }
   1915     for (i = 0; i < hdev->nvqs; ++i) {
   1916         vhost_virtqueue_stop(hdev,
   1917                              vdev,
   1918                              hdev->vqs + i,
   1919                              hdev->vq_index + i);
   1920     }
   1921 
   1922     if (vhost_dev_has_iommu(hdev)) {
   1923         if (hdev->vhost_ops->vhost_set_iotlb_callback) {
   1924             hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
   1925         }
   1926         memory_listener_unregister(&hdev->iommu_listener);
   1927     }
   1928     vhost_log_put(hdev, true);
   1929     hdev->started = false;
   1930     vdev->vhost_started = false;
   1931     hdev->vdev = NULL;
   1932 }
   1933 
   1934 int vhost_net_set_backend(struct vhost_dev *hdev,
   1935                           struct vhost_vring_file *file)
   1936 {
   1937     if (hdev->vhost_ops->vhost_net_set_backend) {
   1938         return hdev->vhost_ops->vhost_net_set_backend(hdev, file);
   1939     }
   1940 
   1941     return -ENOSYS;
   1942 }