qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vmbus.c (75419B)


      1 /*
      2  * QEMU Hyper-V VMBus
      3  *
      4  * Copyright (c) 2017-2018 Virtuozzo International GmbH.
      5  *
      6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
      7  * See the COPYING file in the top-level directory.
      8  */
      9 
     10 #include "qemu/osdep.h"
     11 #include "qemu/error-report.h"
     12 #include "qemu/main-loop.h"
     13 #include "qapi/error.h"
     14 #include "migration/vmstate.h"
     15 #include "hw/qdev-properties.h"
     16 #include "hw/qdev-properties-system.h"
     17 #include "hw/hyperv/hyperv.h"
     18 #include "hw/hyperv/vmbus.h"
     19 #include "hw/hyperv/vmbus-bridge.h"
     20 #include "hw/sysbus.h"
     21 #include "cpu.h"
     22 #include "trace.h"
     23 
     24 enum {
     25     VMGPADL_INIT,
     26     VMGPADL_ALIVE,
     27     VMGPADL_TEARINGDOWN,
     28     VMGPADL_TORNDOWN,
     29 };
     30 
     31 struct VMBusGpadl {
     32     /* GPADL id */
     33     uint32_t id;
     34     /* associated channel id (rudimentary?) */
     35     uint32_t child_relid;
     36 
     37     /* number of pages in the GPADL as declared in GPADL_HEADER message */
     38     uint32_t num_gfns;
     39     /*
     40      * Due to limited message size, GPADL may not fit fully in a single
     41      * GPADL_HEADER message, and is further popluated using GPADL_BODY
     42      * messages.  @seen_gfns is the number of pages seen so far; once it
     43      * reaches @num_gfns, the GPADL is ready to use.
     44      */
     45     uint32_t seen_gfns;
     46     /* array of GFNs (of size @num_gfns once allocated) */
     47     uint64_t *gfns;
     48 
     49     uint8_t state;
     50 
     51     QTAILQ_ENTRY(VMBusGpadl) link;
     52     VMBus *vmbus;
     53     unsigned refcount;
     54 };
     55 
     56 /*
     57  * Wrap sequential read from / write to GPADL.
     58  */
     59 typedef struct GpadlIter {
     60     VMBusGpadl *gpadl;
     61     AddressSpace *as;
     62     DMADirection dir;
     63     /* offset into GPADL where the next i/o will be performed */
     64     uint32_t off;
     65     /*
     66      * Cached mapping of the currently accessed page, up to page boundary.
     67      * Updated lazily on i/o.
     68      * Note: MemoryRegionCache can not be used here because pages in the GPADL
     69      * are non-contiguous and may belong to different memory regions.
     70      */
     71     void *map;
     72     /* offset after last i/o (i.e. not affected by seek) */
     73     uint32_t last_off;
     74     /*
     75      * Indicator that the iterator is active and may have a cached mapping.
     76      * Allows to enforce bracketing of all i/o (which may create cached
     77      * mappings) and thus exclude mapping leaks.
     78      */
     79     bool active;
     80 } GpadlIter;
     81 
     82 /*
     83  * Ring buffer.  There are two of them, sitting in the same GPADL, for each
     84  * channel.
     85  * Each ring buffer consists of a set of pages, with the first page containing
     86  * the ring buffer header, and the remaining pages being for data packets.
     87  */
     88 typedef struct VMBusRingBufCommon {
     89     AddressSpace *as;
     90     /* GPA of the ring buffer header */
     91     dma_addr_t rb_addr;
     92     /* start and length of the ring buffer data area within GPADL */
     93     uint32_t base;
     94     uint32_t len;
     95 
     96     GpadlIter iter;
     97 } VMBusRingBufCommon;
     98 
     99 typedef struct VMBusSendRingBuf {
    100     VMBusRingBufCommon common;
    101     /* current write index, to be committed at the end of send */
    102     uint32_t wr_idx;
    103     /* write index at the start of send */
    104     uint32_t last_wr_idx;
    105     /* space to be requested from the guest */
    106     uint32_t wanted;
    107     /* space reserved for planned sends */
    108     uint32_t reserved;
    109     /* last seen read index */
    110     uint32_t last_seen_rd_idx;
    111 } VMBusSendRingBuf;
    112 
    113 typedef struct VMBusRecvRingBuf {
    114     VMBusRingBufCommon common;
    115     /* current read index, to be committed at the end of receive */
    116     uint32_t rd_idx;
    117     /* read index at the start of receive */
    118     uint32_t last_rd_idx;
    119     /* last seen write index */
    120     uint32_t last_seen_wr_idx;
    121 } VMBusRecvRingBuf;
    122 
    123 
    124 enum {
    125     VMOFFER_INIT,
    126     VMOFFER_SENDING,
    127     VMOFFER_SENT,
    128 };
    129 
    130 enum {
    131     VMCHAN_INIT,
    132     VMCHAN_OPENING,
    133     VMCHAN_OPEN,
    134 };
    135 
    136 struct VMBusChannel {
    137     VMBusDevice *dev;
    138 
    139     /* channel id */
    140     uint32_t id;
    141     /*
    142      * subchannel index within the device; subchannel #0 is "primary" and
    143      * always exists
    144      */
    145     uint16_t subchan_idx;
    146     uint32_t open_id;
    147     /* VP_INDEX of the vCPU to notify with (synthetic) interrupts */
    148     uint32_t target_vp;
    149     /* GPADL id to use for the ring buffers */
    150     uint32_t ringbuf_gpadl;
    151     /* start (in pages) of the send ring buffer within @ringbuf_gpadl */
    152     uint32_t ringbuf_send_offset;
    153 
    154     uint8_t offer_state;
    155     uint8_t state;
    156     bool is_open;
    157 
    158     /* main device worker; copied from the device class */
    159     VMBusChannelNotifyCb notify_cb;
    160     /*
    161      * guest->host notifications, either sent directly or dispatched via
    162      * interrupt page (older VMBus)
    163      */
    164     EventNotifier notifier;
    165 
    166     VMBus *vmbus;
    167     /*
    168      * SINT route to signal with host->guest notifications; may be shared with
    169      * the main VMBus SINT route
    170      */
    171     HvSintRoute *notify_route;
    172     VMBusGpadl *gpadl;
    173 
    174     VMBusSendRingBuf send_ringbuf;
    175     VMBusRecvRingBuf recv_ringbuf;
    176 
    177     QTAILQ_ENTRY(VMBusChannel) link;
    178 };
    179 
    180 /*
    181  * Hyper-V spec mandates that every message port has 16 buffers, which means
    182  * that the guest can post up to this many messages without blocking.
    183  * Therefore a queue for incoming messages has to be provided.
    184  * For outgoing (i.e. host->guest) messages there's no queue; the VMBus just
    185  * doesn't transition to a new state until the message is known to have been
    186  * successfully delivered to the respective SynIC message slot.
    187  */
    188 #define HV_MSG_QUEUE_LEN     16
    189 
    190 /* Hyper-V devices never use channel #0.  Must be something special. */
    191 #define VMBUS_FIRST_CHANID      1
    192 /* Each channel occupies one bit within a single event page sint slot. */
    193 #define VMBUS_CHANID_COUNT      (HV_EVENT_FLAGS_COUNT - VMBUS_FIRST_CHANID)
    194 /* Leave a few connection numbers for other purposes. */
    195 #define VMBUS_CHAN_CONNECTION_OFFSET     16
    196 
    197 /*
    198  * Since the success or failure of sending a message is reported
    199  * asynchronously, the VMBus state machine has effectively two entry points:
    200  * vmbus_run and vmbus_msg_cb (the latter is called when the host->guest
    201  * message delivery status becomes known).  Both are run as oneshot BHs on the
    202  * main aio context, ensuring serialization.
    203  */
    204 enum {
    205     VMBUS_LISTEN,
    206     VMBUS_HANDSHAKE,
    207     VMBUS_OFFER,
    208     VMBUS_CREATE_GPADL,
    209     VMBUS_TEARDOWN_GPADL,
    210     VMBUS_OPEN_CHANNEL,
    211     VMBUS_UNLOAD,
    212     VMBUS_STATE_MAX
    213 };
    214 
    215 struct VMBus {
    216     BusState parent;
    217 
    218     uint8_t state;
    219     /* protection against recursive aio_poll (see vmbus_run) */
    220     bool in_progress;
    221     /* whether there's a message being delivered to the guest */
    222     bool msg_in_progress;
    223     uint32_t version;
    224     /* VP_INDEX of the vCPU to send messages and interrupts to */
    225     uint32_t target_vp;
    226     HvSintRoute *sint_route;
    227     /*
    228      * interrupt page for older protocol versions; newer ones use SynIC event
    229      * flags directly
    230      */
    231     hwaddr int_page_gpa;
    232 
    233     DECLARE_BITMAP(chanid_bitmap, VMBUS_CHANID_COUNT);
    234 
    235     /* incoming message queue */
    236     struct hyperv_post_message_input rx_queue[HV_MSG_QUEUE_LEN];
    237     uint8_t rx_queue_head;
    238     uint8_t rx_queue_size;
    239     QemuMutex rx_queue_lock;
    240 
    241     QTAILQ_HEAD(, VMBusGpadl) gpadl_list;
    242     QTAILQ_HEAD(, VMBusChannel) channel_list;
    243 
    244     /*
    245      * guest->host notifications for older VMBus, to be dispatched via
    246      * interrupt page
    247      */
    248     EventNotifier notifier;
    249 };
    250 
    251 static bool gpadl_full(VMBusGpadl *gpadl)
    252 {
    253     return gpadl->seen_gfns == gpadl->num_gfns;
    254 }
    255 
    256 static VMBusGpadl *create_gpadl(VMBus *vmbus, uint32_t id,
    257                                 uint32_t child_relid, uint32_t num_gfns)
    258 {
    259     VMBusGpadl *gpadl = g_new0(VMBusGpadl, 1);
    260 
    261     gpadl->id = id;
    262     gpadl->child_relid = child_relid;
    263     gpadl->num_gfns = num_gfns;
    264     gpadl->gfns = g_new(uint64_t, num_gfns);
    265     QTAILQ_INSERT_HEAD(&vmbus->gpadl_list, gpadl, link);
    266     gpadl->vmbus = vmbus;
    267     gpadl->refcount = 1;
    268     return gpadl;
    269 }
    270 
    271 static void free_gpadl(VMBusGpadl *gpadl)
    272 {
    273     QTAILQ_REMOVE(&gpadl->vmbus->gpadl_list, gpadl, link);
    274     g_free(gpadl->gfns);
    275     g_free(gpadl);
    276 }
    277 
    278 static VMBusGpadl *find_gpadl(VMBus *vmbus, uint32_t gpadl_id)
    279 {
    280     VMBusGpadl *gpadl;
    281     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
    282         if (gpadl->id == gpadl_id) {
    283             return gpadl;
    284         }
    285     }
    286     return NULL;
    287 }
    288 
    289 VMBusGpadl *vmbus_get_gpadl(VMBusChannel *chan, uint32_t gpadl_id)
    290 {
    291     VMBusGpadl *gpadl = find_gpadl(chan->vmbus, gpadl_id);
    292     if (!gpadl || !gpadl_full(gpadl)) {
    293         return NULL;
    294     }
    295     gpadl->refcount++;
    296     return gpadl;
    297 }
    298 
    299 void vmbus_put_gpadl(VMBusGpadl *gpadl)
    300 {
    301     if (!gpadl) {
    302         return;
    303     }
    304     if (--gpadl->refcount) {
    305         return;
    306     }
    307     free_gpadl(gpadl);
    308 }
    309 
    310 uint32_t vmbus_gpadl_len(VMBusGpadl *gpadl)
    311 {
    312     return gpadl->num_gfns * TARGET_PAGE_SIZE;
    313 }
    314 
    315 static void gpadl_iter_init(GpadlIter *iter, VMBusGpadl *gpadl,
    316                             AddressSpace *as, DMADirection dir)
    317 {
    318     iter->gpadl = gpadl;
    319     iter->as = as;
    320     iter->dir = dir;
    321     iter->active = false;
    322 }
    323 
    324 static inline void gpadl_iter_cache_unmap(GpadlIter *iter)
    325 {
    326     uint32_t map_start_in_page = (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
    327     uint32_t io_end_in_page = ((iter->last_off - 1) & ~TARGET_PAGE_MASK) + 1;
    328 
    329     /* mapping is only done to do non-zero amount of i/o */
    330     assert(iter->last_off > 0);
    331     assert(map_start_in_page < io_end_in_page);
    332 
    333     dma_memory_unmap(iter->as, iter->map, TARGET_PAGE_SIZE - map_start_in_page,
    334                      iter->dir, io_end_in_page - map_start_in_page);
    335 }
    336 
    337 /*
    338  * Copy exactly @len bytes between the GPADL pointed to by @iter and @buf.
    339  * The direction of the copy is determined by @iter->dir.
    340  * The caller must ensure the operation overflows neither @buf nor the GPADL
    341  * (there's an assert for the latter).
    342  * Reuse the currently mapped page in the GPADL if possible.
    343  */
    344 static ssize_t gpadl_iter_io(GpadlIter *iter, void *buf, uint32_t len)
    345 {
    346     ssize_t ret = len;
    347 
    348     assert(iter->active);
    349 
    350     while (len) {
    351         uint32_t off_in_page = iter->off & ~TARGET_PAGE_MASK;
    352         uint32_t pgleft = TARGET_PAGE_SIZE - off_in_page;
    353         uint32_t cplen = MIN(pgleft, len);
    354         void *p;
    355 
    356         /* try to reuse the cached mapping */
    357         if (iter->map) {
    358             uint32_t map_start_in_page =
    359                 (uintptr_t)iter->map & ~TARGET_PAGE_MASK;
    360             uint32_t off_base = iter->off & ~TARGET_PAGE_MASK;
    361             uint32_t mapped_base = (iter->last_off - 1) & ~TARGET_PAGE_MASK;
    362             if (off_base != mapped_base || off_in_page < map_start_in_page) {
    363                 gpadl_iter_cache_unmap(iter);
    364                 iter->map = NULL;
    365             }
    366         }
    367 
    368         if (!iter->map) {
    369             dma_addr_t maddr;
    370             dma_addr_t mlen = pgleft;
    371             uint32_t idx = iter->off >> TARGET_PAGE_BITS;
    372             assert(idx < iter->gpadl->num_gfns);
    373 
    374             maddr = (iter->gpadl->gfns[idx] << TARGET_PAGE_BITS) | off_in_page;
    375 
    376             iter->map = dma_memory_map(iter->as, maddr, &mlen, iter->dir,
    377                                        MEMTXATTRS_UNSPECIFIED);
    378             if (mlen != pgleft) {
    379                 dma_memory_unmap(iter->as, iter->map, mlen, iter->dir, 0);
    380                 iter->map = NULL;
    381                 return -EFAULT;
    382             }
    383         }
    384 
    385         p = (void *)(uintptr_t)(((uintptr_t)iter->map & TARGET_PAGE_MASK) |
    386                 off_in_page);
    387         if (iter->dir == DMA_DIRECTION_FROM_DEVICE) {
    388             memcpy(p, buf, cplen);
    389         } else {
    390             memcpy(buf, p, cplen);
    391         }
    392 
    393         buf += cplen;
    394         len -= cplen;
    395         iter->off += cplen;
    396         iter->last_off = iter->off;
    397     }
    398 
    399     return ret;
    400 }
    401 
    402 /*
    403  * Position the iterator @iter at new offset @new_off.
    404  * If this results in the cached mapping being unusable with the new offset,
    405  * unmap it.
    406  */
    407 static inline void gpadl_iter_seek(GpadlIter *iter, uint32_t new_off)
    408 {
    409     assert(iter->active);
    410     iter->off = new_off;
    411 }
    412 
    413 /*
    414  * Start a series of i/o on the GPADL.
    415  * After this i/o and seek operations on @iter become legal.
    416  */
    417 static inline void gpadl_iter_start_io(GpadlIter *iter)
    418 {
    419     assert(!iter->active);
    420     /* mapping is cached lazily on i/o */
    421     iter->map = NULL;
    422     iter->active = true;
    423 }
    424 
    425 /*
    426  * End the eariler started series of i/o on the GPADL and release the cached
    427  * mapping if any.
    428  */
    429 static inline void gpadl_iter_end_io(GpadlIter *iter)
    430 {
    431     assert(iter->active);
    432 
    433     if (iter->map) {
    434         gpadl_iter_cache_unmap(iter);
    435     }
    436 
    437     iter->active = false;
    438 }
    439 
    440 static void vmbus_resched(VMBus *vmbus);
    441 static void vmbus_msg_cb(void *data, int status);
    442 
    443 ssize_t vmbus_iov_to_gpadl(VMBusChannel *chan, VMBusGpadl *gpadl, uint32_t off,
    444                            const struct iovec *iov, size_t iov_cnt)
    445 {
    446     GpadlIter iter;
    447     size_t i;
    448     ssize_t ret = 0;
    449 
    450     gpadl_iter_init(&iter, gpadl, chan->dev->dma_as,
    451                     DMA_DIRECTION_FROM_DEVICE);
    452     gpadl_iter_start_io(&iter);
    453     gpadl_iter_seek(&iter, off);
    454     for (i = 0; i < iov_cnt; i++) {
    455         ret = gpadl_iter_io(&iter, iov[i].iov_base, iov[i].iov_len);
    456         if (ret < 0) {
    457             goto out;
    458         }
    459     }
    460 out:
    461     gpadl_iter_end_io(&iter);
    462     return ret;
    463 }
    464 
    465 int vmbus_map_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
    466                   unsigned iov_cnt, size_t len, size_t off)
    467 {
    468     int ret_cnt = 0, ret;
    469     unsigned i;
    470     QEMUSGList *sgl = &req->sgl;
    471     ScatterGatherEntry *sg = sgl->sg;
    472 
    473     for (i = 0; i < sgl->nsg; i++) {
    474         if (sg[i].len > off) {
    475             break;
    476         }
    477         off -= sg[i].len;
    478     }
    479     for (; len && i < sgl->nsg; i++) {
    480         dma_addr_t mlen = MIN(sg[i].len - off, len);
    481         dma_addr_t addr = sg[i].base + off;
    482         len -= mlen;
    483         off = 0;
    484 
    485         for (; mlen; ret_cnt++) {
    486             dma_addr_t l = mlen;
    487             dma_addr_t a = addr;
    488 
    489             if (ret_cnt == iov_cnt) {
    490                 ret = -ENOBUFS;
    491                 goto err;
    492             }
    493 
    494             iov[ret_cnt].iov_base = dma_memory_map(sgl->as, a, &l, dir,
    495                                                    MEMTXATTRS_UNSPECIFIED);
    496             if (!l) {
    497                 ret = -EFAULT;
    498                 goto err;
    499             }
    500             iov[ret_cnt].iov_len = l;
    501             addr += l;
    502             mlen -= l;
    503         }
    504     }
    505 
    506     return ret_cnt;
    507 err:
    508     vmbus_unmap_sgl(req, dir, iov, ret_cnt, 0);
    509     return ret;
    510 }
    511 
    512 void vmbus_unmap_sgl(VMBusChanReq *req, DMADirection dir, struct iovec *iov,
    513                      unsigned iov_cnt, size_t accessed)
    514 {
    515     QEMUSGList *sgl = &req->sgl;
    516     unsigned i;
    517 
    518     for (i = 0; i < iov_cnt; i++) {
    519         size_t acsd = MIN(accessed, iov[i].iov_len);
    520         dma_memory_unmap(sgl->as, iov[i].iov_base, iov[i].iov_len, dir, acsd);
    521         accessed -= acsd;
    522     }
    523 }
    524 
    525 static const VMStateDescription vmstate_gpadl = {
    526     .name = "vmbus/gpadl",
    527     .version_id = 0,
    528     .minimum_version_id = 0,
    529     .fields = (VMStateField[]) {
    530         VMSTATE_UINT32(id, VMBusGpadl),
    531         VMSTATE_UINT32(child_relid, VMBusGpadl),
    532         VMSTATE_UINT32(num_gfns, VMBusGpadl),
    533         VMSTATE_UINT32(seen_gfns, VMBusGpadl),
    534         VMSTATE_VARRAY_UINT32_ALLOC(gfns, VMBusGpadl, num_gfns, 0,
    535                                     vmstate_info_uint64, uint64_t),
    536         VMSTATE_UINT8(state, VMBusGpadl),
    537         VMSTATE_END_OF_LIST()
    538     }
    539 };
    540 
    541 /*
    542  * Wrap the index into a ring buffer of @len bytes.
    543  * @idx is assumed not to exceed twice the size of the ringbuffer, so only
    544  * single wraparound is considered.
    545  */
    546 static inline uint32_t rb_idx_wrap(uint32_t idx, uint32_t len)
    547 {
    548     if (idx >= len) {
    549         idx -= len;
    550     }
    551     return idx;
    552 }
    553 
    554 /*
    555  * Circular difference between two indices into a ring buffer of @len bytes.
    556  * @allow_catchup - whether @idx1 may catch up @idx2; e.g. read index may catch
    557  * up write index but not vice versa.
    558  */
    559 static inline uint32_t rb_idx_delta(uint32_t idx1, uint32_t idx2, uint32_t len,
    560                                     bool allow_catchup)
    561 {
    562     return rb_idx_wrap(idx2 + len - idx1 - !allow_catchup, len);
    563 }
    564 
    565 static vmbus_ring_buffer *ringbuf_map_hdr(VMBusRingBufCommon *ringbuf)
    566 {
    567     vmbus_ring_buffer *rb;
    568     dma_addr_t mlen = sizeof(*rb);
    569 
    570     rb = dma_memory_map(ringbuf->as, ringbuf->rb_addr, &mlen,
    571                         DMA_DIRECTION_FROM_DEVICE, MEMTXATTRS_UNSPECIFIED);
    572     if (mlen != sizeof(*rb)) {
    573         dma_memory_unmap(ringbuf->as, rb, mlen,
    574                          DMA_DIRECTION_FROM_DEVICE, 0);
    575         return NULL;
    576     }
    577     return rb;
    578 }
    579 
    580 static void ringbuf_unmap_hdr(VMBusRingBufCommon *ringbuf,
    581                               vmbus_ring_buffer *rb, bool dirty)
    582 {
    583     assert(rb);
    584 
    585     dma_memory_unmap(ringbuf->as, rb, sizeof(*rb), DMA_DIRECTION_FROM_DEVICE,
    586                      dirty ? sizeof(*rb) : 0);
    587 }
    588 
    589 static void ringbuf_init_common(VMBusRingBufCommon *ringbuf, VMBusGpadl *gpadl,
    590                                 AddressSpace *as, DMADirection dir,
    591                                 uint32_t begin, uint32_t end)
    592 {
    593     ringbuf->as = as;
    594     ringbuf->rb_addr = gpadl->gfns[begin] << TARGET_PAGE_BITS;
    595     ringbuf->base = (begin + 1) << TARGET_PAGE_BITS;
    596     ringbuf->len = (end - begin - 1) << TARGET_PAGE_BITS;
    597     gpadl_iter_init(&ringbuf->iter, gpadl, as, dir);
    598 }
    599 
    600 static int ringbufs_init(VMBusChannel *chan)
    601 {
    602     vmbus_ring_buffer *rb;
    603     VMBusSendRingBuf *send_ringbuf = &chan->send_ringbuf;
    604     VMBusRecvRingBuf *recv_ringbuf = &chan->recv_ringbuf;
    605 
    606     if (chan->ringbuf_send_offset <= 1 ||
    607         chan->gpadl->num_gfns <= chan->ringbuf_send_offset + 1) {
    608         return -EINVAL;
    609     }
    610 
    611     ringbuf_init_common(&recv_ringbuf->common, chan->gpadl, chan->dev->dma_as,
    612                         DMA_DIRECTION_TO_DEVICE, 0, chan->ringbuf_send_offset);
    613     ringbuf_init_common(&send_ringbuf->common, chan->gpadl, chan->dev->dma_as,
    614                         DMA_DIRECTION_FROM_DEVICE, chan->ringbuf_send_offset,
    615                         chan->gpadl->num_gfns);
    616     send_ringbuf->wanted = 0;
    617     send_ringbuf->reserved = 0;
    618 
    619     rb = ringbuf_map_hdr(&recv_ringbuf->common);
    620     if (!rb) {
    621         return -EFAULT;
    622     }
    623     recv_ringbuf->rd_idx = recv_ringbuf->last_rd_idx = rb->read_index;
    624     ringbuf_unmap_hdr(&recv_ringbuf->common, rb, false);
    625 
    626     rb = ringbuf_map_hdr(&send_ringbuf->common);
    627     if (!rb) {
    628         return -EFAULT;
    629     }
    630     send_ringbuf->wr_idx = send_ringbuf->last_wr_idx = rb->write_index;
    631     send_ringbuf->last_seen_rd_idx = rb->read_index;
    632     rb->feature_bits |= VMBUS_RING_BUFFER_FEAT_PENDING_SZ;
    633     ringbuf_unmap_hdr(&send_ringbuf->common, rb, true);
    634 
    635     if (recv_ringbuf->rd_idx >= recv_ringbuf->common.len ||
    636         send_ringbuf->wr_idx >= send_ringbuf->common.len) {
    637         return -EOVERFLOW;
    638     }
    639 
    640     return 0;
    641 }
    642 
    643 /*
    644  * Perform io between the GPADL-backed ringbuffer @ringbuf and @buf, wrapping
    645  * around if needed.
    646  * @len is assumed not to exceed the size of the ringbuffer, so only single
    647  * wraparound is considered.
    648  */
    649 static ssize_t ringbuf_io(VMBusRingBufCommon *ringbuf, void *buf, uint32_t len)
    650 {
    651     ssize_t ret1 = 0, ret2 = 0;
    652     uint32_t remain = ringbuf->len + ringbuf->base - ringbuf->iter.off;
    653 
    654     if (len >= remain) {
    655         ret1 = gpadl_iter_io(&ringbuf->iter, buf, remain);
    656         if (ret1 < 0) {
    657             return ret1;
    658         }
    659         gpadl_iter_seek(&ringbuf->iter, ringbuf->base);
    660         buf += remain;
    661         len -= remain;
    662     }
    663     ret2 = gpadl_iter_io(&ringbuf->iter, buf, len);
    664     if (ret2 < 0) {
    665         return ret2;
    666     }
    667     return ret1 + ret2;
    668 }
    669 
    670 /*
    671  * Position the circular iterator within @ringbuf to offset @new_off, wrapping
    672  * around if needed.
    673  * @new_off is assumed not to exceed twice the size of the ringbuffer, so only
    674  * single wraparound is considered.
    675  */
    676 static inline void ringbuf_seek(VMBusRingBufCommon *ringbuf, uint32_t new_off)
    677 {
    678     gpadl_iter_seek(&ringbuf->iter,
    679                     ringbuf->base + rb_idx_wrap(new_off, ringbuf->len));
    680 }
    681 
    682 static inline uint32_t ringbuf_tell(VMBusRingBufCommon *ringbuf)
    683 {
    684     return ringbuf->iter.off - ringbuf->base;
    685 }
    686 
    687 static inline void ringbuf_start_io(VMBusRingBufCommon *ringbuf)
    688 {
    689     gpadl_iter_start_io(&ringbuf->iter);
    690 }
    691 
    692 static inline void ringbuf_end_io(VMBusRingBufCommon *ringbuf)
    693 {
    694     gpadl_iter_end_io(&ringbuf->iter);
    695 }
    696 
    697 VMBusDevice *vmbus_channel_device(VMBusChannel *chan)
    698 {
    699     return chan->dev;
    700 }
    701 
    702 VMBusChannel *vmbus_device_channel(VMBusDevice *dev, uint32_t chan_idx)
    703 {
    704     if (chan_idx >= dev->num_channels) {
    705         return NULL;
    706     }
    707     return &dev->channels[chan_idx];
    708 }
    709 
    710 uint32_t vmbus_channel_idx(VMBusChannel *chan)
    711 {
    712     return chan - chan->dev->channels;
    713 }
    714 
    715 void vmbus_channel_notify_host(VMBusChannel *chan)
    716 {
    717     event_notifier_set(&chan->notifier);
    718 }
    719 
    720 bool vmbus_channel_is_open(VMBusChannel *chan)
    721 {
    722     return chan->is_open;
    723 }
    724 
    725 /*
    726  * Notify the guest side about the data to work on in the channel ring buffer.
    727  * The notification is done by signaling a dedicated per-channel SynIC event
    728  * flag (more recent guests) or setting a bit in the interrupt page and firing
    729  * the VMBus SINT (older guests).
    730  */
    731 static int vmbus_channel_notify_guest(VMBusChannel *chan)
    732 {
    733     int res = 0;
    734     unsigned long *int_map, mask;
    735     unsigned idx;
    736     hwaddr addr = chan->vmbus->int_page_gpa;
    737     hwaddr len = TARGET_PAGE_SIZE / 2, dirty = 0;
    738 
    739     trace_vmbus_channel_notify_guest(chan->id);
    740 
    741     if (!addr) {
    742         return hyperv_set_event_flag(chan->notify_route, chan->id);
    743     }
    744 
    745     int_map = cpu_physical_memory_map(addr, &len, 1);
    746     if (len != TARGET_PAGE_SIZE / 2) {
    747         res = -ENXIO;
    748         goto unmap;
    749     }
    750 
    751     idx = BIT_WORD(chan->id);
    752     mask = BIT_MASK(chan->id);
    753     if ((qatomic_fetch_or(&int_map[idx], mask) & mask) != mask) {
    754         res = hyperv_sint_route_set_sint(chan->notify_route);
    755         dirty = len;
    756     }
    757 
    758 unmap:
    759     cpu_physical_memory_unmap(int_map, len, 1, dirty);
    760     return res;
    761 }
    762 
    763 #define VMBUS_PKT_TRAILER      sizeof(uint64_t)
    764 
    765 static uint32_t vmbus_pkt_hdr_set_offsets(vmbus_packet_hdr *hdr,
    766                                           uint32_t desclen, uint32_t msglen)
    767 {
    768     hdr->offset_qwords = sizeof(*hdr) / sizeof(uint64_t) +
    769         DIV_ROUND_UP(desclen, sizeof(uint64_t));
    770     hdr->len_qwords = hdr->offset_qwords +
    771         DIV_ROUND_UP(msglen, sizeof(uint64_t));
    772     return hdr->len_qwords * sizeof(uint64_t) + VMBUS_PKT_TRAILER;
    773 }
    774 
    775 /*
    776  * Simplified ring buffer operation with paired barriers annotations in the
    777  * producer and consumer loops:
    778  *
    779  * producer                           * consumer
    780  * ~~~~~~~~                           * ~~~~~~~~
    781  * write pending_send_sz              * read write_index
    782  * smp_mb                       [A]   * smp_mb                       [C]
    783  * read read_index                    * read packet
    784  * smp_mb                       [B]   * read/write out-of-band data
    785  * read/write out-of-band data        * smp_mb                       [B]
    786  * write packet                       * write read_index
    787  * smp_mb                       [C]   * smp_mb                       [A]
    788  * write write_index                  * read pending_send_sz
    789  * smp_wmb                      [D]   * smp_rmb                      [D]
    790  * write pending_send_sz              * read write_index
    791  * ...                                * ...
    792  */
    793 
    794 static inline uint32_t ringbuf_send_avail(VMBusSendRingBuf *ringbuf)
    795 {
    796     /* don't trust guest data */
    797     if (ringbuf->last_seen_rd_idx >= ringbuf->common.len) {
    798         return 0;
    799     }
    800     return rb_idx_delta(ringbuf->wr_idx, ringbuf->last_seen_rd_idx,
    801                         ringbuf->common.len, false);
    802 }
    803 
    804 static ssize_t ringbuf_send_update_idx(VMBusChannel *chan)
    805 {
    806     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
    807     vmbus_ring_buffer *rb;
    808     uint32_t written;
    809 
    810     written = rb_idx_delta(ringbuf->last_wr_idx, ringbuf->wr_idx,
    811                            ringbuf->common.len, true);
    812     if (!written) {
    813         return 0;
    814     }
    815 
    816     rb = ringbuf_map_hdr(&ringbuf->common);
    817     if (!rb) {
    818         return -EFAULT;
    819     }
    820 
    821     ringbuf->reserved -= written;
    822 
    823     /* prevent reorder with the data operation and packet write */
    824     smp_mb();                   /* barrier pair [C] */
    825     rb->write_index = ringbuf->wr_idx;
    826 
    827     /*
    828      * If the producer earlier indicated that it wants to be notified when the
    829      * consumer frees certain amount of space in the ring buffer, that amount
    830      * is reduced by the size of the completed write.
    831      */
    832     if (ringbuf->wanted) {
    833         /* otherwise reservation would fail */
    834         assert(ringbuf->wanted < written);
    835         ringbuf->wanted -= written;
    836         /* prevent reorder with write_index write */
    837         smp_wmb();              /* barrier pair [D] */
    838         rb->pending_send_sz = ringbuf->wanted;
    839     }
    840 
    841     /* prevent reorder with write_index or pending_send_sz write */
    842     smp_mb();                   /* barrier pair [A] */
    843     ringbuf->last_seen_rd_idx = rb->read_index;
    844 
    845     /*
    846      * The consumer may have missed the reduction of pending_send_sz and skip
    847      * notification, so re-check the blocking condition, and, if it's no longer
    848      * true, ensure processing another iteration by simulating consumer's
    849      * notification.
    850      */
    851     if (ringbuf_send_avail(ringbuf) >= ringbuf->wanted) {
    852         vmbus_channel_notify_host(chan);
    853     }
    854 
    855     /* skip notification by consumer's request */
    856     if (rb->interrupt_mask) {
    857         goto out;
    858     }
    859 
    860     /*
    861      * The consumer hasn't caught up with the producer's previous state so it's
    862      * not blocked.
    863      * (last_seen_rd_idx comes from the guest but it's safe to use w/o
    864      * validation here as it only affects notification.)
    865      */
    866     if (rb_idx_delta(ringbuf->last_seen_rd_idx, ringbuf->wr_idx,
    867                      ringbuf->common.len, true) > written) {
    868         goto out;
    869     }
    870 
    871     vmbus_channel_notify_guest(chan);
    872 out:
    873     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
    874     ringbuf->last_wr_idx = ringbuf->wr_idx;
    875     return written;
    876 }
    877 
    878 int vmbus_channel_reserve(VMBusChannel *chan,
    879                           uint32_t desclen, uint32_t msglen)
    880 {
    881     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
    882     vmbus_ring_buffer *rb = NULL;
    883     vmbus_packet_hdr hdr;
    884     uint32_t needed = ringbuf->reserved +
    885         vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
    886 
    887     /* avoid touching the guest memory if possible */
    888     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
    889         goto success;
    890     }
    891 
    892     rb = ringbuf_map_hdr(&ringbuf->common);
    893     if (!rb) {
    894         return -EFAULT;
    895     }
    896 
    897     /* fetch read index from guest memory and try again */
    898     ringbuf->last_seen_rd_idx = rb->read_index;
    899 
    900     if (likely(needed <= ringbuf_send_avail(ringbuf))) {
    901         goto success;
    902     }
    903 
    904     rb->pending_send_sz = needed;
    905 
    906     /*
    907      * The consumer may have made progress and freed up some space before
    908      * seeing updated pending_send_sz, so re-read read_index (preventing
    909      * reorder with the pending_send_sz write) and try again.
    910      */
    911     smp_mb();                   /* barrier pair [A] */
    912     ringbuf->last_seen_rd_idx = rb->read_index;
    913 
    914     if (needed > ringbuf_send_avail(ringbuf)) {
    915         goto out;
    916     }
    917 
    918 success:
    919     ringbuf->reserved = needed;
    920     needed = 0;
    921 
    922     /* clear pending_send_sz if it was set */
    923     if (ringbuf->wanted) {
    924         if (!rb) {
    925             rb = ringbuf_map_hdr(&ringbuf->common);
    926             if (!rb) {
    927                 /* failure to clear pending_send_sz is non-fatal */
    928                 goto out;
    929             }
    930         }
    931 
    932         rb->pending_send_sz = 0;
    933     }
    934 
    935     /* prevent reorder of the following data operation with read_index read */
    936     smp_mb();                   /* barrier pair [B] */
    937 
    938 out:
    939     if (rb) {
    940         ringbuf_unmap_hdr(&ringbuf->common, rb, ringbuf->wanted == needed);
    941     }
    942     ringbuf->wanted = needed;
    943     return needed ? -ENOSPC : 0;
    944 }
    945 
    946 ssize_t vmbus_channel_send(VMBusChannel *chan, uint16_t pkt_type,
    947                            void *desc, uint32_t desclen,
    948                            void *msg, uint32_t msglen,
    949                            bool need_comp, uint64_t transaction_id)
    950 {
    951     ssize_t ret = 0;
    952     vmbus_packet_hdr hdr;
    953     uint32_t totlen;
    954     VMBusSendRingBuf *ringbuf = &chan->send_ringbuf;
    955 
    956     if (!vmbus_channel_is_open(chan)) {
    957         return -EINVAL;
    958     }
    959 
    960     totlen = vmbus_pkt_hdr_set_offsets(&hdr, desclen, msglen);
    961     hdr.type = pkt_type;
    962     hdr.flags = need_comp ? VMBUS_PACKET_FLAG_REQUEST_COMPLETION : 0;
    963     hdr.transaction_id = transaction_id;
    964 
    965     assert(totlen <= ringbuf->reserved);
    966 
    967     ringbuf_start_io(&ringbuf->common);
    968     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx);
    969     ret = ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr));
    970     if (ret < 0) {
    971         goto out;
    972     }
    973     if (desclen) {
    974         assert(desc);
    975         ret = ringbuf_io(&ringbuf->common, desc, desclen);
    976         if (ret < 0) {
    977             goto out;
    978         }
    979         ringbuf_seek(&ringbuf->common,
    980                      ringbuf->wr_idx + hdr.offset_qwords * sizeof(uint64_t));
    981     }
    982     ret = ringbuf_io(&ringbuf->common, msg, msglen);
    983     if (ret < 0) {
    984         goto out;
    985     }
    986     ringbuf_seek(&ringbuf->common, ringbuf->wr_idx + totlen);
    987     ringbuf->wr_idx = ringbuf_tell(&ringbuf->common);
    988     ret = 0;
    989 out:
    990     ringbuf_end_io(&ringbuf->common);
    991     if (ret) {
    992         return ret;
    993     }
    994     return ringbuf_send_update_idx(chan);
    995 }
    996 
    997 ssize_t vmbus_channel_send_completion(VMBusChanReq *req,
    998                                       void *msg, uint32_t msglen)
    999 {
   1000     assert(req->need_comp);
   1001     return vmbus_channel_send(req->chan, VMBUS_PACKET_COMP, NULL, 0,
   1002                               msg, msglen, false, req->transaction_id);
   1003 }
   1004 
   1005 static int sgl_from_gpa_ranges(QEMUSGList *sgl, VMBusDevice *dev,
   1006                                VMBusRingBufCommon *ringbuf, uint32_t len)
   1007 {
   1008     int ret;
   1009     vmbus_pkt_gpa_direct hdr;
   1010     hwaddr curaddr = 0;
   1011     hwaddr curlen = 0;
   1012     int num;
   1013 
   1014     if (len < sizeof(hdr)) {
   1015         return -EIO;
   1016     }
   1017     ret = ringbuf_io(ringbuf, &hdr, sizeof(hdr));
   1018     if (ret < 0) {
   1019         return ret;
   1020     }
   1021     len -= sizeof(hdr);
   1022 
   1023     num = (len - hdr.rangecount * sizeof(vmbus_gpa_range)) / sizeof(uint64_t);
   1024     if (num < 0) {
   1025         return -EIO;
   1026     }
   1027     qemu_sglist_init(sgl, DEVICE(dev), num, ringbuf->as);
   1028 
   1029     for (; hdr.rangecount; hdr.rangecount--) {
   1030         vmbus_gpa_range range;
   1031 
   1032         if (len < sizeof(range)) {
   1033             goto eio;
   1034         }
   1035         ret = ringbuf_io(ringbuf, &range, sizeof(range));
   1036         if (ret < 0) {
   1037             goto err;
   1038         }
   1039         len -= sizeof(range);
   1040 
   1041         if (range.byte_offset & TARGET_PAGE_MASK) {
   1042             goto eio;
   1043         }
   1044 
   1045         for (; range.byte_count; range.byte_offset = 0) {
   1046             uint64_t paddr;
   1047             uint32_t plen = MIN(range.byte_count,
   1048                                 TARGET_PAGE_SIZE - range.byte_offset);
   1049 
   1050             if (len < sizeof(uint64_t)) {
   1051                 goto eio;
   1052             }
   1053             ret = ringbuf_io(ringbuf, &paddr, sizeof(paddr));
   1054             if (ret < 0) {
   1055                 goto err;
   1056             }
   1057             len -= sizeof(uint64_t);
   1058             paddr <<= TARGET_PAGE_BITS;
   1059             paddr |= range.byte_offset;
   1060             range.byte_count -= plen;
   1061 
   1062             if (curaddr + curlen == paddr) {
   1063                 /* consecutive fragments - join */
   1064                 curlen += plen;
   1065             } else {
   1066                 if (curlen) {
   1067                     qemu_sglist_add(sgl, curaddr, curlen);
   1068                 }
   1069 
   1070                 curaddr = paddr;
   1071                 curlen = plen;
   1072             }
   1073         }
   1074     }
   1075 
   1076     if (curlen) {
   1077         qemu_sglist_add(sgl, curaddr, curlen);
   1078     }
   1079 
   1080     return 0;
   1081 eio:
   1082     ret = -EIO;
   1083 err:
   1084     qemu_sglist_destroy(sgl);
   1085     return ret;
   1086 }
   1087 
   1088 static VMBusChanReq *vmbus_alloc_req(VMBusChannel *chan,
   1089                                      uint32_t size, uint16_t pkt_type,
   1090                                      uint32_t msglen, uint64_t transaction_id,
   1091                                      bool need_comp)
   1092 {
   1093     VMBusChanReq *req;
   1094     uint32_t msgoff = QEMU_ALIGN_UP(size, __alignof__(*req->msg));
   1095     uint32_t totlen = msgoff + msglen;
   1096 
   1097     req = g_malloc0(totlen);
   1098     req->chan = chan;
   1099     req->pkt_type = pkt_type;
   1100     req->msg = (void *)req + msgoff;
   1101     req->msglen = msglen;
   1102     req->transaction_id = transaction_id;
   1103     req->need_comp = need_comp;
   1104     return req;
   1105 }
   1106 
   1107 int vmbus_channel_recv_start(VMBusChannel *chan)
   1108 {
   1109     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
   1110     vmbus_ring_buffer *rb;
   1111 
   1112     rb = ringbuf_map_hdr(&ringbuf->common);
   1113     if (!rb) {
   1114         return -EFAULT;
   1115     }
   1116     ringbuf->last_seen_wr_idx = rb->write_index;
   1117     ringbuf_unmap_hdr(&ringbuf->common, rb, false);
   1118 
   1119     if (ringbuf->last_seen_wr_idx >= ringbuf->common.len) {
   1120         return -EOVERFLOW;
   1121     }
   1122 
   1123     /* prevent reorder of the following data operation with write_index read */
   1124     smp_mb();                   /* barrier pair [C] */
   1125     return 0;
   1126 }
   1127 
   1128 void *vmbus_channel_recv_peek(VMBusChannel *chan, uint32_t size)
   1129 {
   1130     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
   1131     vmbus_packet_hdr hdr = {};
   1132     VMBusChanReq *req;
   1133     uint32_t avail;
   1134     uint32_t totlen, pktlen, msglen, msgoff, desclen;
   1135 
   1136     assert(size >= sizeof(*req));
   1137 
   1138     /* safe as last_seen_wr_idx is validated in vmbus_channel_recv_start */
   1139     avail = rb_idx_delta(ringbuf->rd_idx, ringbuf->last_seen_wr_idx,
   1140                          ringbuf->common.len, true);
   1141     if (avail < sizeof(hdr)) {
   1142         return NULL;
   1143     }
   1144 
   1145     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx);
   1146     if (ringbuf_io(&ringbuf->common, &hdr, sizeof(hdr)) < 0) {
   1147         return NULL;
   1148     }
   1149 
   1150     pktlen = hdr.len_qwords * sizeof(uint64_t);
   1151     totlen = pktlen + VMBUS_PKT_TRAILER;
   1152     if (totlen > avail) {
   1153         return NULL;
   1154     }
   1155 
   1156     msgoff = hdr.offset_qwords * sizeof(uint64_t);
   1157     if (msgoff > pktlen || msgoff < sizeof(hdr)) {
   1158         error_report("%s: malformed packet: %u %u", __func__, msgoff, pktlen);
   1159         return NULL;
   1160     }
   1161 
   1162     msglen = pktlen - msgoff;
   1163 
   1164     req = vmbus_alloc_req(chan, size, hdr.type, msglen, hdr.transaction_id,
   1165                           hdr.flags & VMBUS_PACKET_FLAG_REQUEST_COMPLETION);
   1166 
   1167     switch (hdr.type) {
   1168     case VMBUS_PACKET_DATA_USING_GPA_DIRECT:
   1169         desclen = msgoff - sizeof(hdr);
   1170         if (sgl_from_gpa_ranges(&req->sgl, chan->dev, &ringbuf->common,
   1171                                 desclen) < 0) {
   1172             error_report("%s: failed to convert GPA ranges to SGL", __func__);
   1173             goto free_req;
   1174         }
   1175         break;
   1176     case VMBUS_PACKET_DATA_INBAND:
   1177     case VMBUS_PACKET_COMP:
   1178         break;
   1179     default:
   1180         error_report("%s: unexpected msg type: %x", __func__, hdr.type);
   1181         goto free_req;
   1182     }
   1183 
   1184     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + msgoff);
   1185     if (ringbuf_io(&ringbuf->common, req->msg, msglen) < 0) {
   1186         goto free_req;
   1187     }
   1188     ringbuf_seek(&ringbuf->common, ringbuf->rd_idx + totlen);
   1189 
   1190     return req;
   1191 free_req:
   1192     vmbus_free_req(req);
   1193     return NULL;
   1194 }
   1195 
   1196 void vmbus_channel_recv_pop(VMBusChannel *chan)
   1197 {
   1198     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
   1199     ringbuf->rd_idx = ringbuf_tell(&ringbuf->common);
   1200 }
   1201 
   1202 ssize_t vmbus_channel_recv_done(VMBusChannel *chan)
   1203 {
   1204     VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
   1205     vmbus_ring_buffer *rb;
   1206     uint32_t read;
   1207 
   1208     read = rb_idx_delta(ringbuf->last_rd_idx, ringbuf->rd_idx,
   1209                         ringbuf->common.len, true);
   1210     if (!read) {
   1211         return 0;
   1212     }
   1213 
   1214     rb = ringbuf_map_hdr(&ringbuf->common);
   1215     if (!rb) {
   1216         return -EFAULT;
   1217     }
   1218 
   1219     /* prevent reorder with the data operation and packet read */
   1220     smp_mb();                   /* barrier pair [B] */
   1221     rb->read_index = ringbuf->rd_idx;
   1222 
   1223     /* prevent reorder of the following pending_send_sz read */
   1224     smp_mb();                   /* barrier pair [A] */
   1225 
   1226     if (rb->interrupt_mask) {
   1227         goto out;
   1228     }
   1229 
   1230     if (rb->feature_bits & VMBUS_RING_BUFFER_FEAT_PENDING_SZ) {
   1231         uint32_t wr_idx, wr_avail;
   1232         uint32_t wanted = rb->pending_send_sz;
   1233 
   1234         if (!wanted) {
   1235             goto out;
   1236         }
   1237 
   1238         /* prevent reorder with pending_send_sz read */
   1239         smp_rmb();              /* barrier pair [D] */
   1240         wr_idx = rb->write_index;
   1241 
   1242         wr_avail = rb_idx_delta(wr_idx, ringbuf->rd_idx, ringbuf->common.len,
   1243                                 true);
   1244 
   1245         /* the producer wasn't blocked on the consumer state */
   1246         if (wr_avail >= read + wanted) {
   1247             goto out;
   1248         }
   1249         /* there's not enough space for the producer to make progress */
   1250         if (wr_avail < wanted) {
   1251             goto out;
   1252         }
   1253     }
   1254 
   1255     vmbus_channel_notify_guest(chan);
   1256 out:
   1257     ringbuf_unmap_hdr(&ringbuf->common, rb, true);
   1258     ringbuf->last_rd_idx = ringbuf->rd_idx;
   1259     return read;
   1260 }
   1261 
   1262 void vmbus_free_req(void *req)
   1263 {
   1264     VMBusChanReq *r = req;
   1265 
   1266     if (!req) {
   1267         return;
   1268     }
   1269 
   1270     if (r->sgl.dev) {
   1271         qemu_sglist_destroy(&r->sgl);
   1272     }
   1273     g_free(req);
   1274 }
   1275 
   1276 static void channel_event_cb(EventNotifier *e)
   1277 {
   1278     VMBusChannel *chan = container_of(e, VMBusChannel, notifier);
   1279     if (event_notifier_test_and_clear(e)) {
   1280         /*
   1281          * All receives are supposed to happen within the device worker, so
   1282          * bracket it with ringbuf_start/end_io on the receive ringbuffer, and
   1283          * potentially reuse the cached mapping throughout the worker.
   1284          * Can't do this for sends as they may happen outside the device
   1285          * worker.
   1286          */
   1287         VMBusRecvRingBuf *ringbuf = &chan->recv_ringbuf;
   1288         ringbuf_start_io(&ringbuf->common);
   1289         chan->notify_cb(chan);
   1290         ringbuf_end_io(&ringbuf->common);
   1291 
   1292     }
   1293 }
   1294 
   1295 static int alloc_chan_id(VMBus *vmbus)
   1296 {
   1297     int ret;
   1298 
   1299     ret = find_next_zero_bit(vmbus->chanid_bitmap, VMBUS_CHANID_COUNT, 0);
   1300     if (ret == VMBUS_CHANID_COUNT) {
   1301         return -ENOMEM;
   1302     }
   1303     return ret + VMBUS_FIRST_CHANID;
   1304 }
   1305 
   1306 static int register_chan_id(VMBusChannel *chan)
   1307 {
   1308     return test_and_set_bit(chan->id - VMBUS_FIRST_CHANID,
   1309                             chan->vmbus->chanid_bitmap) ? -EEXIST : 0;
   1310 }
   1311 
   1312 static void unregister_chan_id(VMBusChannel *chan)
   1313 {
   1314     clear_bit(chan->id - VMBUS_FIRST_CHANID, chan->vmbus->chanid_bitmap);
   1315 }
   1316 
   1317 static uint32_t chan_connection_id(VMBusChannel *chan)
   1318 {
   1319     return VMBUS_CHAN_CONNECTION_OFFSET + chan->id;
   1320 }
   1321 
   1322 static void init_channel(VMBus *vmbus, VMBusDevice *dev, VMBusDeviceClass *vdc,
   1323                          VMBusChannel *chan, uint16_t idx, Error **errp)
   1324 {
   1325     int res;
   1326 
   1327     chan->dev = dev;
   1328     chan->notify_cb = vdc->chan_notify_cb;
   1329     chan->subchan_idx = idx;
   1330     chan->vmbus = vmbus;
   1331 
   1332     res = alloc_chan_id(vmbus);
   1333     if (res < 0) {
   1334         error_setg(errp, "no spare channel id");
   1335         return;
   1336     }
   1337     chan->id = res;
   1338     register_chan_id(chan);
   1339 
   1340     /*
   1341      * The guest drivers depend on the device subchannels (idx #1+) to be
   1342      * offered after the primary channel (idx #0) of that device.  To ensure
   1343      * that, record the channels on the channel list in the order they appear
   1344      * within the device.
   1345      */
   1346     QTAILQ_INSERT_TAIL(&vmbus->channel_list, chan, link);
   1347 }
   1348 
   1349 static void deinit_channel(VMBusChannel *chan)
   1350 {
   1351     assert(chan->state == VMCHAN_INIT);
   1352     QTAILQ_REMOVE(&chan->vmbus->channel_list, chan, link);
   1353     unregister_chan_id(chan);
   1354 }
   1355 
   1356 static void create_channels(VMBus *vmbus, VMBusDevice *dev, Error **errp)
   1357 {
   1358     uint16_t i;
   1359     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(dev);
   1360     Error *err = NULL;
   1361 
   1362     dev->num_channels = vdc->num_channels ? vdc->num_channels(dev) : 1;
   1363     if (dev->num_channels < 1) {
   1364         error_setg(errp, "invalid #channels: %u", dev->num_channels);
   1365         return;
   1366     }
   1367 
   1368     dev->channels = g_new0(VMBusChannel, dev->num_channels);
   1369     for (i = 0; i < dev->num_channels; i++) {
   1370         init_channel(vmbus, dev, vdc, &dev->channels[i], i, &err);
   1371         if (err) {
   1372             goto err_init;
   1373         }
   1374     }
   1375 
   1376     return;
   1377 
   1378 err_init:
   1379     while (i--) {
   1380         deinit_channel(&dev->channels[i]);
   1381     }
   1382     error_propagate(errp, err);
   1383 }
   1384 
   1385 static void free_channels(VMBusDevice *dev)
   1386 {
   1387     uint16_t i;
   1388     for (i = 0; i < dev->num_channels; i++) {
   1389         deinit_channel(&dev->channels[i]);
   1390     }
   1391     g_free(dev->channels);
   1392 }
   1393 
   1394 static HvSintRoute *make_sint_route(VMBus *vmbus, uint32_t vp_index)
   1395 {
   1396     VMBusChannel *chan;
   1397 
   1398     if (vp_index == vmbus->target_vp) {
   1399         hyperv_sint_route_ref(vmbus->sint_route);
   1400         return vmbus->sint_route;
   1401     }
   1402 
   1403     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1404         if (chan->target_vp == vp_index && vmbus_channel_is_open(chan)) {
   1405             hyperv_sint_route_ref(chan->notify_route);
   1406             return chan->notify_route;
   1407         }
   1408     }
   1409 
   1410     return hyperv_sint_route_new(vp_index, VMBUS_SINT, NULL, NULL);
   1411 }
   1412 
   1413 static void open_channel(VMBusChannel *chan)
   1414 {
   1415     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
   1416 
   1417     chan->gpadl = vmbus_get_gpadl(chan, chan->ringbuf_gpadl);
   1418     if (!chan->gpadl) {
   1419         return;
   1420     }
   1421 
   1422     if (ringbufs_init(chan)) {
   1423         goto put_gpadl;
   1424     }
   1425 
   1426     if (event_notifier_init(&chan->notifier, 0)) {
   1427         goto put_gpadl;
   1428     }
   1429 
   1430     event_notifier_set_handler(&chan->notifier, channel_event_cb);
   1431 
   1432     if (hyperv_set_event_flag_handler(chan_connection_id(chan),
   1433                                       &chan->notifier)) {
   1434         goto cleanup_notifier;
   1435     }
   1436 
   1437     chan->notify_route = make_sint_route(chan->vmbus, chan->target_vp);
   1438     if (!chan->notify_route) {
   1439         goto clear_event_flag_handler;
   1440     }
   1441 
   1442     if (vdc->open_channel && vdc->open_channel(chan)) {
   1443         goto unref_sint_route;
   1444     }
   1445 
   1446     chan->is_open = true;
   1447     return;
   1448 
   1449 unref_sint_route:
   1450     hyperv_sint_route_unref(chan->notify_route);
   1451 clear_event_flag_handler:
   1452     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
   1453 cleanup_notifier:
   1454     event_notifier_set_handler(&chan->notifier, NULL);
   1455     event_notifier_cleanup(&chan->notifier);
   1456 put_gpadl:
   1457     vmbus_put_gpadl(chan->gpadl);
   1458 }
   1459 
   1460 static void close_channel(VMBusChannel *chan)
   1461 {
   1462     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
   1463 
   1464     if (!chan->is_open) {
   1465         return;
   1466     }
   1467 
   1468     if (vdc->close_channel) {
   1469         vdc->close_channel(chan);
   1470     }
   1471 
   1472     hyperv_sint_route_unref(chan->notify_route);
   1473     hyperv_set_event_flag_handler(chan_connection_id(chan), NULL);
   1474     event_notifier_set_handler(&chan->notifier, NULL);
   1475     event_notifier_cleanup(&chan->notifier);
   1476     vmbus_put_gpadl(chan->gpadl);
   1477     chan->is_open = false;
   1478 }
   1479 
   1480 static int channel_post_load(void *opaque, int version_id)
   1481 {
   1482     VMBusChannel *chan = opaque;
   1483 
   1484     return register_chan_id(chan);
   1485 }
   1486 
   1487 static const VMStateDescription vmstate_channel = {
   1488     .name = "vmbus/channel",
   1489     .version_id = 0,
   1490     .minimum_version_id = 0,
   1491     .post_load = channel_post_load,
   1492     .fields = (VMStateField[]) {
   1493         VMSTATE_UINT32(id, VMBusChannel),
   1494         VMSTATE_UINT16(subchan_idx, VMBusChannel),
   1495         VMSTATE_UINT32(open_id, VMBusChannel),
   1496         VMSTATE_UINT32(target_vp, VMBusChannel),
   1497         VMSTATE_UINT32(ringbuf_gpadl, VMBusChannel),
   1498         VMSTATE_UINT32(ringbuf_send_offset, VMBusChannel),
   1499         VMSTATE_UINT8(offer_state, VMBusChannel),
   1500         VMSTATE_UINT8(state, VMBusChannel),
   1501         VMSTATE_END_OF_LIST()
   1502     }
   1503 };
   1504 
   1505 static VMBusChannel *find_channel(VMBus *vmbus, uint32_t id)
   1506 {
   1507     VMBusChannel *chan;
   1508     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1509         if (chan->id == id) {
   1510             return chan;
   1511         }
   1512     }
   1513     return NULL;
   1514 }
   1515 
   1516 static int enqueue_incoming_message(VMBus *vmbus,
   1517                                     const struct hyperv_post_message_input *msg)
   1518 {
   1519     int ret = 0;
   1520     uint8_t idx, prev_size;
   1521 
   1522     qemu_mutex_lock(&vmbus->rx_queue_lock);
   1523 
   1524     if (vmbus->rx_queue_size == HV_MSG_QUEUE_LEN) {
   1525         ret = -ENOBUFS;
   1526         goto out;
   1527     }
   1528 
   1529     prev_size = vmbus->rx_queue_size;
   1530     idx = (vmbus->rx_queue_head + vmbus->rx_queue_size) % HV_MSG_QUEUE_LEN;
   1531     memcpy(&vmbus->rx_queue[idx], msg, sizeof(*msg));
   1532     vmbus->rx_queue_size++;
   1533 
   1534     /* only need to resched if the queue was empty before */
   1535     if (!prev_size) {
   1536         vmbus_resched(vmbus);
   1537     }
   1538 out:
   1539     qemu_mutex_unlock(&vmbus->rx_queue_lock);
   1540     return ret;
   1541 }
   1542 
   1543 static uint16_t vmbus_recv_message(const struct hyperv_post_message_input *msg,
   1544                                    void *data)
   1545 {
   1546     VMBus *vmbus = data;
   1547     struct vmbus_message_header *vmbus_msg;
   1548 
   1549     if (msg->message_type != HV_MESSAGE_VMBUS) {
   1550         return HV_STATUS_INVALID_HYPERCALL_INPUT;
   1551     }
   1552 
   1553     if (msg->payload_size < sizeof(struct vmbus_message_header)) {
   1554         return HV_STATUS_INVALID_HYPERCALL_INPUT;
   1555     }
   1556 
   1557     vmbus_msg = (struct vmbus_message_header *)msg->payload;
   1558 
   1559     trace_vmbus_recv_message(vmbus_msg->message_type, msg->payload_size);
   1560 
   1561     if (vmbus_msg->message_type == VMBUS_MSG_INVALID ||
   1562         vmbus_msg->message_type >= VMBUS_MSG_COUNT) {
   1563         error_report("vmbus: unknown message type %#x",
   1564                      vmbus_msg->message_type);
   1565         return HV_STATUS_INVALID_HYPERCALL_INPUT;
   1566     }
   1567 
   1568     if (enqueue_incoming_message(vmbus, msg)) {
   1569         return HV_STATUS_INSUFFICIENT_BUFFERS;
   1570     }
   1571     return HV_STATUS_SUCCESS;
   1572 }
   1573 
   1574 static bool vmbus_initialized(VMBus *vmbus)
   1575 {
   1576     return vmbus->version > 0 && vmbus->version <= VMBUS_VERSION_CURRENT;
   1577 }
   1578 
   1579 static void vmbus_reset_all(VMBus *vmbus)
   1580 {
   1581     qbus_reset_all(BUS(vmbus));
   1582 }
   1583 
   1584 static void post_msg(VMBus *vmbus, void *msgdata, uint32_t msglen)
   1585 {
   1586     int ret;
   1587     struct hyperv_message msg = {
   1588         .header.message_type = HV_MESSAGE_VMBUS,
   1589     };
   1590 
   1591     assert(!vmbus->msg_in_progress);
   1592     assert(msglen <= sizeof(msg.payload));
   1593     assert(msglen >= sizeof(struct vmbus_message_header));
   1594 
   1595     vmbus->msg_in_progress = true;
   1596 
   1597     trace_vmbus_post_msg(((struct vmbus_message_header *)msgdata)->message_type,
   1598                          msglen);
   1599 
   1600     memcpy(msg.payload, msgdata, msglen);
   1601     msg.header.payload_size = ROUND_UP(msglen, VMBUS_MESSAGE_SIZE_ALIGN);
   1602 
   1603     ret = hyperv_post_msg(vmbus->sint_route, &msg);
   1604     if (ret == 0 || ret == -EAGAIN) {
   1605         return;
   1606     }
   1607 
   1608     error_report("message delivery fatal failure: %d; aborting vmbus", ret);
   1609     vmbus_reset_all(vmbus);
   1610 }
   1611 
   1612 static int vmbus_init(VMBus *vmbus)
   1613 {
   1614     if (vmbus->target_vp != (uint32_t)-1) {
   1615         vmbus->sint_route = hyperv_sint_route_new(vmbus->target_vp, VMBUS_SINT,
   1616                                                   vmbus_msg_cb, vmbus);
   1617         if (!vmbus->sint_route) {
   1618             error_report("failed to set up SINT route");
   1619             return -ENOMEM;
   1620         }
   1621     }
   1622     return 0;
   1623 }
   1624 
   1625 static void vmbus_deinit(VMBus *vmbus)
   1626 {
   1627     VMBusGpadl *gpadl, *tmp_gpadl;
   1628     VMBusChannel *chan;
   1629 
   1630     QTAILQ_FOREACH_SAFE(gpadl, &vmbus->gpadl_list, link, tmp_gpadl) {
   1631         if (gpadl->state == VMGPADL_TORNDOWN) {
   1632             continue;
   1633         }
   1634         vmbus_put_gpadl(gpadl);
   1635     }
   1636 
   1637     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1638         chan->offer_state = VMOFFER_INIT;
   1639     }
   1640 
   1641     hyperv_sint_route_unref(vmbus->sint_route);
   1642     vmbus->sint_route = NULL;
   1643     vmbus->int_page_gpa = 0;
   1644     vmbus->target_vp = (uint32_t)-1;
   1645     vmbus->version = 0;
   1646     vmbus->state = VMBUS_LISTEN;
   1647     vmbus->msg_in_progress = false;
   1648 }
   1649 
   1650 static void handle_initiate_contact(VMBus *vmbus,
   1651                                     vmbus_message_initiate_contact *msg,
   1652                                     uint32_t msglen)
   1653 {
   1654     if (msglen < sizeof(*msg)) {
   1655         return;
   1656     }
   1657 
   1658     trace_vmbus_initiate_contact(msg->version_requested >> 16,
   1659                                  msg->version_requested & 0xffff,
   1660                                  msg->target_vcpu, msg->monitor_page1,
   1661                                  msg->monitor_page2, msg->interrupt_page);
   1662 
   1663     /*
   1664      * Reset vmbus on INITIATE_CONTACT regardless of its previous state.
   1665      * Useful, in particular, with vmbus-aware BIOS which can't shut vmbus down
   1666      * before handing over to OS loader.
   1667      */
   1668     vmbus_reset_all(vmbus);
   1669 
   1670     vmbus->target_vp = msg->target_vcpu;
   1671     vmbus->version = msg->version_requested;
   1672     if (vmbus->version < VMBUS_VERSION_WIN8) {
   1673         /* linux passes interrupt page even when it doesn't need it */
   1674         vmbus->int_page_gpa = msg->interrupt_page;
   1675     }
   1676     vmbus->state = VMBUS_HANDSHAKE;
   1677 
   1678     if (vmbus_init(vmbus)) {
   1679         error_report("failed to init vmbus; aborting");
   1680         vmbus_deinit(vmbus);
   1681         return;
   1682     }
   1683 }
   1684 
   1685 static void send_handshake(VMBus *vmbus)
   1686 {
   1687     struct vmbus_message_version_response msg = {
   1688         .header.message_type = VMBUS_MSG_VERSION_RESPONSE,
   1689         .version_supported = vmbus_initialized(vmbus),
   1690     };
   1691 
   1692     post_msg(vmbus, &msg, sizeof(msg));
   1693 }
   1694 
   1695 static void handle_request_offers(VMBus *vmbus, void *msgdata, uint32_t msglen)
   1696 {
   1697     VMBusChannel *chan;
   1698 
   1699     if (!vmbus_initialized(vmbus)) {
   1700         return;
   1701     }
   1702 
   1703     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1704         if (chan->offer_state == VMOFFER_INIT) {
   1705             chan->offer_state = VMOFFER_SENDING;
   1706             break;
   1707         }
   1708     }
   1709 
   1710     vmbus->state = VMBUS_OFFER;
   1711 }
   1712 
   1713 static void send_offer(VMBus *vmbus)
   1714 {
   1715     VMBusChannel *chan;
   1716     struct vmbus_message_header alloffers_msg = {
   1717         .message_type = VMBUS_MSG_ALLOFFERS_DELIVERED,
   1718     };
   1719 
   1720     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1721         if (chan->offer_state == VMOFFER_SENDING) {
   1722             VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(chan->dev);
   1723             /* Hyper-V wants LE GUIDs */
   1724             QemuUUID classid = qemu_uuid_bswap(vdc->classid);
   1725             QemuUUID instanceid = qemu_uuid_bswap(chan->dev->instanceid);
   1726             struct vmbus_message_offer_channel msg = {
   1727                 .header.message_type = VMBUS_MSG_OFFERCHANNEL,
   1728                 .child_relid = chan->id,
   1729                 .connection_id = chan_connection_id(chan),
   1730                 .channel_flags = vdc->channel_flags,
   1731                 .mmio_size_mb = vdc->mmio_size_mb,
   1732                 .sub_channel_index = vmbus_channel_idx(chan),
   1733                 .interrupt_flags = VMBUS_OFFER_INTERRUPT_DEDICATED,
   1734             };
   1735 
   1736             memcpy(msg.type_uuid, &classid, sizeof(classid));
   1737             memcpy(msg.instance_uuid, &instanceid, sizeof(instanceid));
   1738 
   1739             trace_vmbus_send_offer(chan->id, chan->dev);
   1740 
   1741             post_msg(vmbus, &msg, sizeof(msg));
   1742             return;
   1743         }
   1744     }
   1745 
   1746     /* no more offers, send terminator message */
   1747     trace_vmbus_terminate_offers();
   1748     post_msg(vmbus, &alloffers_msg, sizeof(alloffers_msg));
   1749 }
   1750 
   1751 static bool complete_offer(VMBus *vmbus)
   1752 {
   1753     VMBusChannel *chan;
   1754 
   1755     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1756         if (chan->offer_state == VMOFFER_SENDING) {
   1757             chan->offer_state = VMOFFER_SENT;
   1758             goto next_offer;
   1759         }
   1760     }
   1761     /*
   1762      * no transitioning channels found so this is completing the terminator
   1763      * message, and vmbus can move to the next state
   1764      */
   1765     return true;
   1766 
   1767 next_offer:
   1768     /* try to mark another channel for offering */
   1769     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1770         if (chan->offer_state == VMOFFER_INIT) {
   1771             chan->offer_state = VMOFFER_SENDING;
   1772             break;
   1773         }
   1774     }
   1775     /*
   1776      * if an offer has been sent there are more offers or the terminator yet to
   1777      * send, so no state transition for vmbus
   1778      */
   1779     return false;
   1780 }
   1781 
   1782 
   1783 static void handle_gpadl_header(VMBus *vmbus, vmbus_message_gpadl_header *msg,
   1784                                 uint32_t msglen)
   1785 {
   1786     VMBusGpadl *gpadl;
   1787     uint32_t num_gfns, i;
   1788 
   1789     /* must include at least one gpa range */
   1790     if (msglen < sizeof(*msg) + sizeof(msg->range[0]) ||
   1791         !vmbus_initialized(vmbus)) {
   1792         return;
   1793     }
   1794 
   1795     num_gfns = (msg->range_buflen - msg->rangecount * sizeof(msg->range[0])) /
   1796                sizeof(msg->range[0].pfn_array[0]);
   1797 
   1798     trace_vmbus_gpadl_header(msg->gpadl_id, num_gfns);
   1799 
   1800     /*
   1801      * In theory the GPADL_HEADER message can define a GPADL with multiple GPA
   1802      * ranges each with arbitrary size and alignment.  However in practice only
   1803      * single-range page-aligned GPADLs have been observed so just ignore
   1804      * anything else and simplify things greatly.
   1805      */
   1806     if (msg->rangecount != 1 || msg->range[0].byte_offset ||
   1807         (msg->range[0].byte_count != (num_gfns << TARGET_PAGE_BITS))) {
   1808         return;
   1809     }
   1810 
   1811     /* ignore requests to create already existing GPADLs */
   1812     if (find_gpadl(vmbus, msg->gpadl_id)) {
   1813         return;
   1814     }
   1815 
   1816     gpadl = create_gpadl(vmbus, msg->gpadl_id, msg->child_relid, num_gfns);
   1817 
   1818     for (i = 0; i < num_gfns &&
   1819          (void *)&msg->range[0].pfn_array[i + 1] <= (void *)msg + msglen;
   1820          i++) {
   1821         gpadl->gfns[gpadl->seen_gfns++] = msg->range[0].pfn_array[i];
   1822     }
   1823 
   1824     if (gpadl_full(gpadl)) {
   1825         vmbus->state = VMBUS_CREATE_GPADL;
   1826     }
   1827 }
   1828 
   1829 static void handle_gpadl_body(VMBus *vmbus, vmbus_message_gpadl_body *msg,
   1830                               uint32_t msglen)
   1831 {
   1832     VMBusGpadl *gpadl;
   1833     uint32_t num_gfns_left, i;
   1834 
   1835     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
   1836         return;
   1837     }
   1838 
   1839     trace_vmbus_gpadl_body(msg->gpadl_id);
   1840 
   1841     gpadl = find_gpadl(vmbus, msg->gpadl_id);
   1842     if (!gpadl) {
   1843         return;
   1844     }
   1845 
   1846     num_gfns_left = gpadl->num_gfns - gpadl->seen_gfns;
   1847     assert(num_gfns_left);
   1848 
   1849     for (i = 0; i < num_gfns_left &&
   1850          (void *)&msg->pfn_array[i + 1] <= (void *)msg + msglen; i++) {
   1851         gpadl->gfns[gpadl->seen_gfns++] = msg->pfn_array[i];
   1852     }
   1853 
   1854     if (gpadl_full(gpadl)) {
   1855         vmbus->state = VMBUS_CREATE_GPADL;
   1856     }
   1857 }
   1858 
   1859 static void send_create_gpadl(VMBus *vmbus)
   1860 {
   1861     VMBusGpadl *gpadl;
   1862 
   1863     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
   1864         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
   1865             struct vmbus_message_gpadl_created msg = {
   1866                 .header.message_type = VMBUS_MSG_GPADL_CREATED,
   1867                 .gpadl_id = gpadl->id,
   1868                 .child_relid = gpadl->child_relid,
   1869             };
   1870 
   1871             trace_vmbus_gpadl_created(gpadl->id);
   1872             post_msg(vmbus, &msg, sizeof(msg));
   1873             return;
   1874         }
   1875     }
   1876 
   1877     assert(false);
   1878 }
   1879 
   1880 static bool complete_create_gpadl(VMBus *vmbus)
   1881 {
   1882     VMBusGpadl *gpadl;
   1883 
   1884     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
   1885         if (gpadl_full(gpadl) && gpadl->state == VMGPADL_INIT) {
   1886             gpadl->state = VMGPADL_ALIVE;
   1887 
   1888             return true;
   1889         }
   1890     }
   1891 
   1892     assert(false);
   1893     return false;
   1894 }
   1895 
   1896 static void handle_gpadl_teardown(VMBus *vmbus,
   1897                                   vmbus_message_gpadl_teardown *msg,
   1898                                   uint32_t msglen)
   1899 {
   1900     VMBusGpadl *gpadl;
   1901 
   1902     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
   1903         return;
   1904     }
   1905 
   1906     trace_vmbus_gpadl_teardown(msg->gpadl_id);
   1907 
   1908     gpadl = find_gpadl(vmbus, msg->gpadl_id);
   1909     if (!gpadl || gpadl->state == VMGPADL_TORNDOWN) {
   1910         return;
   1911     }
   1912 
   1913     gpadl->state = VMGPADL_TEARINGDOWN;
   1914     vmbus->state = VMBUS_TEARDOWN_GPADL;
   1915 }
   1916 
   1917 static void send_teardown_gpadl(VMBus *vmbus)
   1918 {
   1919     VMBusGpadl *gpadl;
   1920 
   1921     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
   1922         if (gpadl->state == VMGPADL_TEARINGDOWN) {
   1923             struct vmbus_message_gpadl_torndown msg = {
   1924                 .header.message_type = VMBUS_MSG_GPADL_TORNDOWN,
   1925                 .gpadl_id = gpadl->id,
   1926             };
   1927 
   1928             trace_vmbus_gpadl_torndown(gpadl->id);
   1929             post_msg(vmbus, &msg, sizeof(msg));
   1930             return;
   1931         }
   1932     }
   1933 
   1934     assert(false);
   1935 }
   1936 
   1937 static bool complete_teardown_gpadl(VMBus *vmbus)
   1938 {
   1939     VMBusGpadl *gpadl;
   1940 
   1941     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
   1942         if (gpadl->state == VMGPADL_TEARINGDOWN) {
   1943             gpadl->state = VMGPADL_TORNDOWN;
   1944             vmbus_put_gpadl(gpadl);
   1945             return true;
   1946         }
   1947     }
   1948 
   1949     assert(false);
   1950     return false;
   1951 }
   1952 
   1953 static void handle_open_channel(VMBus *vmbus, vmbus_message_open_channel *msg,
   1954                                 uint32_t msglen)
   1955 {
   1956     VMBusChannel *chan;
   1957 
   1958     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
   1959         return;
   1960     }
   1961 
   1962     trace_vmbus_open_channel(msg->child_relid, msg->ring_buffer_gpadl_id,
   1963                              msg->target_vp);
   1964     chan = find_channel(vmbus, msg->child_relid);
   1965     if (!chan || chan->state != VMCHAN_INIT) {
   1966         return;
   1967     }
   1968 
   1969     chan->ringbuf_gpadl = msg->ring_buffer_gpadl_id;
   1970     chan->ringbuf_send_offset = msg->ring_buffer_offset;
   1971     chan->target_vp = msg->target_vp;
   1972     chan->open_id = msg->open_id;
   1973 
   1974     open_channel(chan);
   1975 
   1976     chan->state = VMCHAN_OPENING;
   1977     vmbus->state = VMBUS_OPEN_CHANNEL;
   1978 }
   1979 
   1980 static void send_open_channel(VMBus *vmbus)
   1981 {
   1982     VMBusChannel *chan;
   1983 
   1984     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   1985         if (chan->state == VMCHAN_OPENING) {
   1986             struct vmbus_message_open_result msg = {
   1987                 .header.message_type = VMBUS_MSG_OPENCHANNEL_RESULT,
   1988                 .child_relid = chan->id,
   1989                 .open_id = chan->open_id,
   1990                 .status = !vmbus_channel_is_open(chan),
   1991             };
   1992 
   1993             trace_vmbus_channel_open(chan->id, msg.status);
   1994             post_msg(vmbus, &msg, sizeof(msg));
   1995             return;
   1996         }
   1997     }
   1998 
   1999     assert(false);
   2000 }
   2001 
   2002 static bool complete_open_channel(VMBus *vmbus)
   2003 {
   2004     VMBusChannel *chan;
   2005 
   2006     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   2007         if (chan->state == VMCHAN_OPENING) {
   2008             if (vmbus_channel_is_open(chan)) {
   2009                 chan->state = VMCHAN_OPEN;
   2010                 /*
   2011                  * simulate guest notification of ringbuffer space made
   2012                  * available, for the channel protocols where the host
   2013                  * initiates the communication
   2014                  */
   2015                 vmbus_channel_notify_host(chan);
   2016             } else {
   2017                 chan->state = VMCHAN_INIT;
   2018             }
   2019             return true;
   2020         }
   2021     }
   2022 
   2023     assert(false);
   2024     return false;
   2025 }
   2026 
   2027 static void vdev_reset_on_close(VMBusDevice *vdev)
   2028 {
   2029     uint16_t i;
   2030 
   2031     for (i = 0; i < vdev->num_channels; i++) {
   2032         if (vmbus_channel_is_open(&vdev->channels[i])) {
   2033             return;
   2034         }
   2035     }
   2036 
   2037     /* all channels closed -- reset device */
   2038     qdev_reset_all(DEVICE(vdev));
   2039 }
   2040 
   2041 static void handle_close_channel(VMBus *vmbus, vmbus_message_close_channel *msg,
   2042                                  uint32_t msglen)
   2043 {
   2044     VMBusChannel *chan;
   2045 
   2046     if (msglen < sizeof(*msg) || !vmbus_initialized(vmbus)) {
   2047         return;
   2048     }
   2049 
   2050     trace_vmbus_close_channel(msg->child_relid);
   2051 
   2052     chan = find_channel(vmbus, msg->child_relid);
   2053     if (!chan) {
   2054         return;
   2055     }
   2056 
   2057     close_channel(chan);
   2058     chan->state = VMCHAN_INIT;
   2059 
   2060     vdev_reset_on_close(chan->dev);
   2061 }
   2062 
   2063 static void handle_unload(VMBus *vmbus, void *msg, uint32_t msglen)
   2064 {
   2065     vmbus->state = VMBUS_UNLOAD;
   2066 }
   2067 
   2068 static void send_unload(VMBus *vmbus)
   2069 {
   2070     vmbus_message_header msg = {
   2071         .message_type = VMBUS_MSG_UNLOAD_RESPONSE,
   2072     };
   2073 
   2074     qemu_mutex_lock(&vmbus->rx_queue_lock);
   2075     vmbus->rx_queue_size = 0;
   2076     qemu_mutex_unlock(&vmbus->rx_queue_lock);
   2077 
   2078     post_msg(vmbus, &msg, sizeof(msg));
   2079     return;
   2080 }
   2081 
   2082 static bool complete_unload(VMBus *vmbus)
   2083 {
   2084     vmbus_reset_all(vmbus);
   2085     return true;
   2086 }
   2087 
   2088 static void process_message(VMBus *vmbus)
   2089 {
   2090     struct hyperv_post_message_input *hv_msg;
   2091     struct vmbus_message_header *msg;
   2092     void *msgdata;
   2093     uint32_t msglen;
   2094 
   2095     qemu_mutex_lock(&vmbus->rx_queue_lock);
   2096 
   2097     if (!vmbus->rx_queue_size) {
   2098         goto unlock;
   2099     }
   2100 
   2101     hv_msg = &vmbus->rx_queue[vmbus->rx_queue_head];
   2102     msglen =  hv_msg->payload_size;
   2103     if (msglen < sizeof(*msg)) {
   2104         goto out;
   2105     }
   2106     msgdata = hv_msg->payload;
   2107     msg = (struct vmbus_message_header *)msgdata;
   2108 
   2109     trace_vmbus_process_incoming_message(msg->message_type);
   2110 
   2111     switch (msg->message_type) {
   2112     case VMBUS_MSG_INITIATE_CONTACT:
   2113         handle_initiate_contact(vmbus, msgdata, msglen);
   2114         break;
   2115     case VMBUS_MSG_REQUESTOFFERS:
   2116         handle_request_offers(vmbus, msgdata, msglen);
   2117         break;
   2118     case VMBUS_MSG_GPADL_HEADER:
   2119         handle_gpadl_header(vmbus, msgdata, msglen);
   2120         break;
   2121     case VMBUS_MSG_GPADL_BODY:
   2122         handle_gpadl_body(vmbus, msgdata, msglen);
   2123         break;
   2124     case VMBUS_MSG_GPADL_TEARDOWN:
   2125         handle_gpadl_teardown(vmbus, msgdata, msglen);
   2126         break;
   2127     case VMBUS_MSG_OPENCHANNEL:
   2128         handle_open_channel(vmbus, msgdata, msglen);
   2129         break;
   2130     case VMBUS_MSG_CLOSECHANNEL:
   2131         handle_close_channel(vmbus, msgdata, msglen);
   2132         break;
   2133     case VMBUS_MSG_UNLOAD:
   2134         handle_unload(vmbus, msgdata, msglen);
   2135         break;
   2136     default:
   2137         error_report("unknown message type %#x", msg->message_type);
   2138         break;
   2139     }
   2140 
   2141 out:
   2142     vmbus->rx_queue_size--;
   2143     vmbus->rx_queue_head++;
   2144     vmbus->rx_queue_head %= HV_MSG_QUEUE_LEN;
   2145 
   2146     vmbus_resched(vmbus);
   2147 unlock:
   2148     qemu_mutex_unlock(&vmbus->rx_queue_lock);
   2149 }
   2150 
   2151 static const struct {
   2152     void (*run)(VMBus *vmbus);
   2153     bool (*complete)(VMBus *vmbus);
   2154 } state_runner[] = {
   2155     [VMBUS_LISTEN]         = {process_message,     NULL},
   2156     [VMBUS_HANDSHAKE]      = {send_handshake,      NULL},
   2157     [VMBUS_OFFER]          = {send_offer,          complete_offer},
   2158     [VMBUS_CREATE_GPADL]   = {send_create_gpadl,   complete_create_gpadl},
   2159     [VMBUS_TEARDOWN_GPADL] = {send_teardown_gpadl, complete_teardown_gpadl},
   2160     [VMBUS_OPEN_CHANNEL]   = {send_open_channel,   complete_open_channel},
   2161     [VMBUS_UNLOAD]         = {send_unload,         complete_unload},
   2162 };
   2163 
   2164 static void vmbus_do_run(VMBus *vmbus)
   2165 {
   2166     if (vmbus->msg_in_progress) {
   2167         return;
   2168     }
   2169 
   2170     assert(vmbus->state < VMBUS_STATE_MAX);
   2171     assert(state_runner[vmbus->state].run);
   2172     state_runner[vmbus->state].run(vmbus);
   2173 }
   2174 
   2175 static void vmbus_run(void *opaque)
   2176 {
   2177     VMBus *vmbus = opaque;
   2178 
   2179     /* make sure no recursion happens (e.g. due to recursive aio_poll()) */
   2180     if (vmbus->in_progress) {
   2181         return;
   2182     }
   2183 
   2184     vmbus->in_progress = true;
   2185     /*
   2186      * FIXME: if vmbus_resched() is called from within vmbus_do_run(), it
   2187      * should go *after* the code that can result in aio_poll; otherwise
   2188      * reschedules can be missed.  No idea how to enforce that.
   2189      */
   2190     vmbus_do_run(vmbus);
   2191     vmbus->in_progress = false;
   2192 }
   2193 
   2194 static void vmbus_msg_cb(void *data, int status)
   2195 {
   2196     VMBus *vmbus = data;
   2197     bool (*complete)(VMBus *vmbus);
   2198 
   2199     assert(vmbus->msg_in_progress);
   2200 
   2201     trace_vmbus_msg_cb(status);
   2202 
   2203     if (status == -EAGAIN) {
   2204         goto out;
   2205     }
   2206     if (status) {
   2207         error_report("message delivery fatal failure: %d; aborting vmbus",
   2208                      status);
   2209         vmbus_reset_all(vmbus);
   2210         return;
   2211     }
   2212 
   2213     assert(vmbus->state < VMBUS_STATE_MAX);
   2214     complete = state_runner[vmbus->state].complete;
   2215     if (!complete || complete(vmbus)) {
   2216         vmbus->state = VMBUS_LISTEN;
   2217     }
   2218 out:
   2219     vmbus->msg_in_progress = false;
   2220     vmbus_resched(vmbus);
   2221 }
   2222 
   2223 static void vmbus_resched(VMBus *vmbus)
   2224 {
   2225     aio_bh_schedule_oneshot(qemu_get_aio_context(), vmbus_run, vmbus);
   2226 }
   2227 
   2228 static void vmbus_signal_event(EventNotifier *e)
   2229 {
   2230     VMBusChannel *chan;
   2231     VMBus *vmbus = container_of(e, VMBus, notifier);
   2232     unsigned long *int_map;
   2233     hwaddr addr, len;
   2234     bool is_dirty = false;
   2235 
   2236     if (!event_notifier_test_and_clear(e)) {
   2237         return;
   2238     }
   2239 
   2240     trace_vmbus_signal_event();
   2241 
   2242     if (!vmbus->int_page_gpa) {
   2243         return;
   2244     }
   2245 
   2246     addr = vmbus->int_page_gpa + TARGET_PAGE_SIZE / 2;
   2247     len = TARGET_PAGE_SIZE / 2;
   2248     int_map = cpu_physical_memory_map(addr, &len, 1);
   2249     if (len != TARGET_PAGE_SIZE / 2) {
   2250         goto unmap;
   2251     }
   2252 
   2253     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   2254         if (bitmap_test_and_clear_atomic(int_map, chan->id, 1)) {
   2255             if (!vmbus_channel_is_open(chan)) {
   2256                 continue;
   2257             }
   2258             vmbus_channel_notify_host(chan);
   2259             is_dirty = true;
   2260         }
   2261     }
   2262 
   2263 unmap:
   2264     cpu_physical_memory_unmap(int_map, len, 1, is_dirty);
   2265 }
   2266 
   2267 static void vmbus_dev_realize(DeviceState *dev, Error **errp)
   2268 {
   2269     VMBusDevice *vdev = VMBUS_DEVICE(dev);
   2270     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
   2271     VMBus *vmbus = VMBUS(qdev_get_parent_bus(dev));
   2272     BusChild *child;
   2273     Error *err = NULL;
   2274     char idstr[UUID_FMT_LEN + 1];
   2275 
   2276     assert(!qemu_uuid_is_null(&vdev->instanceid));
   2277 
   2278     if (!qemu_uuid_is_null(&vdc->instanceid)) {
   2279         /* Class wants to only have a single instance with a fixed UUID */
   2280         if (!qemu_uuid_is_equal(&vdev->instanceid, &vdc->instanceid)) {
   2281             error_setg(&err, "instance id can't be changed");
   2282             goto error_out;
   2283         }
   2284     }
   2285 
   2286     /* Check for instance id collision for this class id */
   2287     QTAILQ_FOREACH(child, &BUS(vmbus)->children, sibling) {
   2288         VMBusDevice *child_dev = VMBUS_DEVICE(child->child);
   2289 
   2290         if (child_dev == vdev) {
   2291             continue;
   2292         }
   2293 
   2294         if (qemu_uuid_is_equal(&child_dev->instanceid, &vdev->instanceid)) {
   2295             qemu_uuid_unparse(&vdev->instanceid, idstr);
   2296             error_setg(&err, "duplicate vmbus device instance id %s", idstr);
   2297             goto error_out;
   2298         }
   2299     }
   2300 
   2301     vdev->dma_as = &address_space_memory;
   2302 
   2303     create_channels(vmbus, vdev, &err);
   2304     if (err) {
   2305         goto error_out;
   2306     }
   2307 
   2308     if (vdc->vmdev_realize) {
   2309         vdc->vmdev_realize(vdev, &err);
   2310         if (err) {
   2311             goto err_vdc_realize;
   2312         }
   2313     }
   2314     return;
   2315 
   2316 err_vdc_realize:
   2317     free_channels(vdev);
   2318 error_out:
   2319     error_propagate(errp, err);
   2320 }
   2321 
   2322 static void vmbus_dev_reset(DeviceState *dev)
   2323 {
   2324     uint16_t i;
   2325     VMBusDevice *vdev = VMBUS_DEVICE(dev);
   2326     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
   2327 
   2328     if (vdev->channels) {
   2329         for (i = 0; i < vdev->num_channels; i++) {
   2330             VMBusChannel *chan = &vdev->channels[i];
   2331             close_channel(chan);
   2332             chan->state = VMCHAN_INIT;
   2333         }
   2334     }
   2335 
   2336     if (vdc->vmdev_reset) {
   2337         vdc->vmdev_reset(vdev);
   2338     }
   2339 }
   2340 
   2341 static void vmbus_dev_unrealize(DeviceState *dev)
   2342 {
   2343     VMBusDevice *vdev = VMBUS_DEVICE(dev);
   2344     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
   2345 
   2346     if (vdc->vmdev_unrealize) {
   2347         vdc->vmdev_unrealize(vdev);
   2348     }
   2349     free_channels(vdev);
   2350 }
   2351 
   2352 static Property vmbus_dev_props[] = {
   2353     DEFINE_PROP_UUID("instanceid", VMBusDevice, instanceid),
   2354     DEFINE_PROP_END_OF_LIST()
   2355 };
   2356 
   2357 
   2358 static void vmbus_dev_class_init(ObjectClass *klass, void *data)
   2359 {
   2360     DeviceClass *kdev = DEVICE_CLASS(klass);
   2361     device_class_set_props(kdev, vmbus_dev_props);
   2362     kdev->bus_type = TYPE_VMBUS;
   2363     kdev->realize = vmbus_dev_realize;
   2364     kdev->unrealize = vmbus_dev_unrealize;
   2365     kdev->reset = vmbus_dev_reset;
   2366 }
   2367 
   2368 static void vmbus_dev_instance_init(Object *obj)
   2369 {
   2370     VMBusDevice *vdev = VMBUS_DEVICE(obj);
   2371     VMBusDeviceClass *vdc = VMBUS_DEVICE_GET_CLASS(vdev);
   2372 
   2373     if (!qemu_uuid_is_null(&vdc->instanceid)) {
   2374         /* Class wants to only have a single instance with a fixed UUID */
   2375         vdev->instanceid = vdc->instanceid;
   2376     }
   2377 }
   2378 
   2379 const VMStateDescription vmstate_vmbus_dev = {
   2380     .name = TYPE_VMBUS_DEVICE,
   2381     .version_id = 0,
   2382     .minimum_version_id = 0,
   2383     .fields = (VMStateField[]) {
   2384         VMSTATE_UINT8_ARRAY(instanceid.data, VMBusDevice, 16),
   2385         VMSTATE_UINT16(num_channels, VMBusDevice),
   2386         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(channels, VMBusDevice,
   2387                                              num_channels, vmstate_channel,
   2388                                              VMBusChannel),
   2389         VMSTATE_END_OF_LIST()
   2390     }
   2391 };
   2392 
   2393 /* vmbus generic device base */
   2394 static const TypeInfo vmbus_dev_type_info = {
   2395     .name = TYPE_VMBUS_DEVICE,
   2396     .parent = TYPE_DEVICE,
   2397     .abstract = true,
   2398     .instance_size = sizeof(VMBusDevice),
   2399     .class_size = sizeof(VMBusDeviceClass),
   2400     .class_init = vmbus_dev_class_init,
   2401     .instance_init = vmbus_dev_instance_init,
   2402 };
   2403 
   2404 static void vmbus_realize(BusState *bus, Error **errp)
   2405 {
   2406     int ret = 0;
   2407     Error *local_err = NULL;
   2408     VMBus *vmbus = VMBUS(bus);
   2409 
   2410     qemu_mutex_init(&vmbus->rx_queue_lock);
   2411 
   2412     QTAILQ_INIT(&vmbus->gpadl_list);
   2413     QTAILQ_INIT(&vmbus->channel_list);
   2414 
   2415     ret = hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID,
   2416                                  vmbus_recv_message, vmbus);
   2417     if (ret != 0) {
   2418         error_setg(&local_err, "hyperv set message handler failed: %d", ret);
   2419         goto error_out;
   2420     }
   2421 
   2422     ret = event_notifier_init(&vmbus->notifier, 0);
   2423     if (ret != 0) {
   2424         error_setg(&local_err, "event notifier failed to init with %d", ret);
   2425         goto remove_msg_handler;
   2426     }
   2427 
   2428     event_notifier_set_handler(&vmbus->notifier, vmbus_signal_event);
   2429     ret = hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID,
   2430                                         &vmbus->notifier);
   2431     if (ret != 0) {
   2432         error_setg(&local_err, "hyperv set event handler failed with %d", ret);
   2433         goto clear_event_notifier;
   2434     }
   2435 
   2436     return;
   2437 
   2438 clear_event_notifier:
   2439     event_notifier_cleanup(&vmbus->notifier);
   2440 remove_msg_handler:
   2441     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
   2442 error_out:
   2443     qemu_mutex_destroy(&vmbus->rx_queue_lock);
   2444     error_propagate(errp, local_err);
   2445 }
   2446 
   2447 static void vmbus_unrealize(BusState *bus)
   2448 {
   2449     VMBus *vmbus = VMBUS(bus);
   2450 
   2451     hyperv_set_msg_handler(VMBUS_MESSAGE_CONNECTION_ID, NULL, NULL);
   2452     hyperv_set_event_flag_handler(VMBUS_EVENT_CONNECTION_ID, NULL);
   2453     event_notifier_cleanup(&vmbus->notifier);
   2454 
   2455     qemu_mutex_destroy(&vmbus->rx_queue_lock);
   2456 }
   2457 
   2458 static void vmbus_reset(BusState *bus)
   2459 {
   2460     vmbus_deinit(VMBUS(bus));
   2461 }
   2462 
   2463 static char *vmbus_get_dev_path(DeviceState *dev)
   2464 {
   2465     BusState *bus = qdev_get_parent_bus(dev);
   2466     return qdev_get_dev_path(bus->parent);
   2467 }
   2468 
   2469 static char *vmbus_get_fw_dev_path(DeviceState *dev)
   2470 {
   2471     VMBusDevice *vdev = VMBUS_DEVICE(dev);
   2472     char uuid[UUID_FMT_LEN + 1];
   2473 
   2474     qemu_uuid_unparse(&vdev->instanceid, uuid);
   2475     return g_strdup_printf("%s@%s", qdev_fw_name(dev), uuid);
   2476 }
   2477 
   2478 static void vmbus_class_init(ObjectClass *klass, void *data)
   2479 {
   2480     BusClass *k = BUS_CLASS(klass);
   2481 
   2482     k->get_dev_path = vmbus_get_dev_path;
   2483     k->get_fw_dev_path = vmbus_get_fw_dev_path;
   2484     k->realize = vmbus_realize;
   2485     k->unrealize = vmbus_unrealize;
   2486     k->reset = vmbus_reset;
   2487 }
   2488 
   2489 static int vmbus_pre_load(void *opaque)
   2490 {
   2491     VMBusChannel *chan;
   2492     VMBus *vmbus = VMBUS(opaque);
   2493 
   2494     /*
   2495      * channel IDs allocated by the source will come in the migration stream
   2496      * for each channel, so clean up the ones allocated at realize
   2497      */
   2498     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   2499         unregister_chan_id(chan);
   2500     }
   2501 
   2502     return 0;
   2503 }
   2504 static int vmbus_post_load(void *opaque, int version_id)
   2505 {
   2506     int ret;
   2507     VMBus *vmbus = VMBUS(opaque);
   2508     VMBusGpadl *gpadl;
   2509     VMBusChannel *chan;
   2510 
   2511     ret = vmbus_init(vmbus);
   2512     if (ret) {
   2513         return ret;
   2514     }
   2515 
   2516     QTAILQ_FOREACH(gpadl, &vmbus->gpadl_list, link) {
   2517         gpadl->vmbus = vmbus;
   2518         gpadl->refcount = 1;
   2519     }
   2520 
   2521     /*
   2522      * reopening channels depends on initialized vmbus so it's done here
   2523      * instead of channel_post_load()
   2524      */
   2525     QTAILQ_FOREACH(chan, &vmbus->channel_list, link) {
   2526 
   2527         if (chan->state == VMCHAN_OPENING || chan->state == VMCHAN_OPEN) {
   2528             open_channel(chan);
   2529         }
   2530 
   2531         if (chan->state != VMCHAN_OPEN) {
   2532             continue;
   2533         }
   2534 
   2535         if (!vmbus_channel_is_open(chan)) {
   2536             /* reopen failed, abort loading */
   2537             return -1;
   2538         }
   2539 
   2540         /* resume processing on the guest side if it missed the notification */
   2541         hyperv_sint_route_set_sint(chan->notify_route);
   2542         /* ditto on the host side */
   2543         vmbus_channel_notify_host(chan);
   2544     }
   2545 
   2546     vmbus_resched(vmbus);
   2547     return 0;
   2548 }
   2549 
   2550 static const VMStateDescription vmstate_post_message_input = {
   2551     .name = "vmbus/hyperv_post_message_input",
   2552     .version_id = 0,
   2553     .minimum_version_id = 0,
   2554     .fields = (VMStateField[]) {
   2555         /*
   2556          * skip connection_id and message_type as they are validated before
   2557          * queueing and ignored on dequeueing
   2558          */
   2559         VMSTATE_UINT32(payload_size, struct hyperv_post_message_input),
   2560         VMSTATE_UINT8_ARRAY(payload, struct hyperv_post_message_input,
   2561                             HV_MESSAGE_PAYLOAD_SIZE),
   2562         VMSTATE_END_OF_LIST()
   2563     }
   2564 };
   2565 
   2566 static bool vmbus_rx_queue_needed(void *opaque)
   2567 {
   2568     VMBus *vmbus = VMBUS(opaque);
   2569     return vmbus->rx_queue_size;
   2570 }
   2571 
   2572 static const VMStateDescription vmstate_rx_queue = {
   2573     .name = "vmbus/rx_queue",
   2574     .version_id = 0,
   2575     .minimum_version_id = 0,
   2576     .needed = vmbus_rx_queue_needed,
   2577     .fields = (VMStateField[]) {
   2578         VMSTATE_UINT8(rx_queue_head, VMBus),
   2579         VMSTATE_UINT8(rx_queue_size, VMBus),
   2580         VMSTATE_STRUCT_ARRAY(rx_queue, VMBus,
   2581                              HV_MSG_QUEUE_LEN, 0,
   2582                              vmstate_post_message_input,
   2583                              struct hyperv_post_message_input),
   2584         VMSTATE_END_OF_LIST()
   2585     }
   2586 };
   2587 
   2588 static const VMStateDescription vmstate_vmbus = {
   2589     .name = TYPE_VMBUS,
   2590     .version_id = 0,
   2591     .minimum_version_id = 0,
   2592     .pre_load = vmbus_pre_load,
   2593     .post_load = vmbus_post_load,
   2594     .fields = (VMStateField[]) {
   2595         VMSTATE_UINT8(state, VMBus),
   2596         VMSTATE_UINT32(version, VMBus),
   2597         VMSTATE_UINT32(target_vp, VMBus),
   2598         VMSTATE_UINT64(int_page_gpa, VMBus),
   2599         VMSTATE_QTAILQ_V(gpadl_list, VMBus, 0,
   2600                          vmstate_gpadl, VMBusGpadl, link),
   2601         VMSTATE_END_OF_LIST()
   2602     },
   2603     .subsections = (const VMStateDescription * []) {
   2604         &vmstate_rx_queue,
   2605         NULL
   2606     }
   2607 };
   2608 
   2609 static const TypeInfo vmbus_type_info = {
   2610     .name = TYPE_VMBUS,
   2611     .parent = TYPE_BUS,
   2612     .instance_size = sizeof(VMBus),
   2613     .class_init = vmbus_class_init,
   2614 };
   2615 
   2616 static void vmbus_bridge_realize(DeviceState *dev, Error **errp)
   2617 {
   2618     VMBusBridge *bridge = VMBUS_BRIDGE(dev);
   2619 
   2620     /*
   2621      * here there's at least one vmbus bridge that is being realized, so
   2622      * vmbus_bridge_find can only return NULL if it's not unique
   2623      */
   2624     if (!vmbus_bridge_find()) {
   2625         error_setg(errp, "there can be at most one %s in the system",
   2626                    TYPE_VMBUS_BRIDGE);
   2627         return;
   2628     }
   2629 
   2630     if (!hyperv_is_synic_enabled()) {
   2631         error_report("VMBus requires usable Hyper-V SynIC and VP_INDEX");
   2632         return;
   2633     }
   2634 
   2635     bridge->bus = VMBUS(qbus_new(TYPE_VMBUS, dev, "vmbus"));
   2636 }
   2637 
   2638 static char *vmbus_bridge_ofw_unit_address(const SysBusDevice *dev)
   2639 {
   2640     /* there can be only one VMBus */
   2641     return g_strdup("0");
   2642 }
   2643 
   2644 static const VMStateDescription vmstate_vmbus_bridge = {
   2645     .name = TYPE_VMBUS_BRIDGE,
   2646     .version_id = 0,
   2647     .minimum_version_id = 0,
   2648     .fields = (VMStateField[]) {
   2649         VMSTATE_STRUCT_POINTER(bus, VMBusBridge, vmstate_vmbus, VMBus),
   2650         VMSTATE_END_OF_LIST()
   2651     },
   2652 };
   2653 
   2654 static Property vmbus_bridge_props[] = {
   2655     DEFINE_PROP_UINT8("irq", VMBusBridge, irq, 7),
   2656     DEFINE_PROP_END_OF_LIST()
   2657 };
   2658 
   2659 static void vmbus_bridge_class_init(ObjectClass *klass, void *data)
   2660 {
   2661     DeviceClass *k = DEVICE_CLASS(klass);
   2662     SysBusDeviceClass *sk = SYS_BUS_DEVICE_CLASS(klass);
   2663 
   2664     k->realize = vmbus_bridge_realize;
   2665     k->fw_name = "vmbus";
   2666     sk->explicit_ofw_unit_address = vmbus_bridge_ofw_unit_address;
   2667     set_bit(DEVICE_CATEGORY_BRIDGE, k->categories);
   2668     k->vmsd = &vmstate_vmbus_bridge;
   2669     device_class_set_props(k, vmbus_bridge_props);
   2670     /* override SysBusDevice's default */
   2671     k->user_creatable = true;
   2672 }
   2673 
   2674 static const TypeInfo vmbus_bridge_type_info = {
   2675     .name = TYPE_VMBUS_BRIDGE,
   2676     .parent = TYPE_SYS_BUS_DEVICE,
   2677     .instance_size = sizeof(VMBusBridge),
   2678     .class_init = vmbus_bridge_class_init,
   2679 };
   2680 
   2681 static void vmbus_register_types(void)
   2682 {
   2683     type_register_static(&vmbus_bridge_type_info);
   2684     type_register_static(&vmbus_dev_type_info);
   2685     type_register_static(&vmbus_type_info);
   2686 }
   2687 
   2688 type_init(vmbus_register_types)