qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vhost-vdpa.c (21743B)


      1 /*
      2  * vhost-vdpa.c
      3  *
      4  * Copyright(c) 2017-2018 Intel Corporation.
      5  * Copyright(c) 2020 Red Hat, Inc.
      6  *
      7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
      8  * See the COPYING file in the top-level directory.
      9  *
     10  */
     11 
     12 #include "qemu/osdep.h"
     13 #include "clients.h"
     14 #include "hw/virtio/virtio-net.h"
     15 #include "net/vhost_net.h"
     16 #include "net/vhost-vdpa.h"
     17 #include "hw/virtio/vhost-vdpa.h"
     18 #include "qemu/config-file.h"
     19 #include "qemu/error-report.h"
     20 #include "qemu/log.h"
     21 #include "qemu/memalign.h"
     22 #include "qemu/option.h"
     23 #include "qapi/error.h"
     24 #include <linux/vhost.h>
     25 #include <sys/ioctl.h>
     26 #include <err.h>
     27 #include "standard-headers/linux/virtio_net.h"
     28 #include "monitor/monitor.h"
     29 #include "hw/virtio/vhost.h"
     30 
     31 /* Todo:need to add the multiqueue support here */
     32 typedef struct VhostVDPAState {
     33     NetClientState nc;
     34     struct vhost_vdpa vhost_vdpa;
     35     VHostNetState *vhost_net;
     36 
     37     /* Control commands shadow buffers */
     38     void *cvq_cmd_out_buffer;
     39     virtio_net_ctrl_ack *status;
     40 
     41     bool started;
     42 } VhostVDPAState;
     43 
     44 const int vdpa_feature_bits[] = {
     45     VIRTIO_F_NOTIFY_ON_EMPTY,
     46     VIRTIO_RING_F_INDIRECT_DESC,
     47     VIRTIO_RING_F_EVENT_IDX,
     48     VIRTIO_F_ANY_LAYOUT,
     49     VIRTIO_F_VERSION_1,
     50     VIRTIO_NET_F_CSUM,
     51     VIRTIO_NET_F_GUEST_CSUM,
     52     VIRTIO_NET_F_GSO,
     53     VIRTIO_NET_F_GUEST_TSO4,
     54     VIRTIO_NET_F_GUEST_TSO6,
     55     VIRTIO_NET_F_GUEST_ECN,
     56     VIRTIO_NET_F_GUEST_UFO,
     57     VIRTIO_NET_F_HOST_TSO4,
     58     VIRTIO_NET_F_HOST_TSO6,
     59     VIRTIO_NET_F_HOST_ECN,
     60     VIRTIO_NET_F_HOST_UFO,
     61     VIRTIO_NET_F_MRG_RXBUF,
     62     VIRTIO_NET_F_MTU,
     63     VIRTIO_NET_F_CTRL_RX,
     64     VIRTIO_NET_F_CTRL_RX_EXTRA,
     65     VIRTIO_NET_F_CTRL_VLAN,
     66     VIRTIO_NET_F_CTRL_MAC_ADDR,
     67     VIRTIO_NET_F_RSS,
     68     VIRTIO_NET_F_MQ,
     69     VIRTIO_NET_F_CTRL_VQ,
     70     VIRTIO_F_IOMMU_PLATFORM,
     71     VIRTIO_F_RING_PACKED,
     72     VIRTIO_F_RING_RESET,
     73     VIRTIO_NET_F_RSS,
     74     VIRTIO_NET_F_HASH_REPORT,
     75     VIRTIO_NET_F_GUEST_ANNOUNCE,
     76     VIRTIO_NET_F_STATUS,
     77     VHOST_INVALID_FEATURE_BIT
     78 };
     79 
     80 /** Supported device specific feature bits with SVQ */
     81 static const uint64_t vdpa_svq_device_features =
     82     BIT_ULL(VIRTIO_NET_F_CSUM) |
     83     BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
     84     BIT_ULL(VIRTIO_NET_F_MTU) |
     85     BIT_ULL(VIRTIO_NET_F_MAC) |
     86     BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
     87     BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
     88     BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
     89     BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
     90     BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
     91     BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
     92     BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
     93     BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
     94     BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
     95     BIT_ULL(VIRTIO_NET_F_STATUS) |
     96     BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
     97     BIT_ULL(VIRTIO_NET_F_MQ) |
     98     BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
     99     BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
    100     BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
    101     BIT_ULL(VIRTIO_NET_F_STANDBY);
    102 
    103 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
    104 {
    105     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
    106     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
    107     return s->vhost_net;
    108 }
    109 
    110 static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
    111 {
    112     uint32_t device_id;
    113     int ret;
    114     struct vhost_dev *hdev;
    115 
    116     hdev = (struct vhost_dev *)&net->dev;
    117     ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
    118     if (device_id != VIRTIO_ID_NET) {
    119         return -ENOTSUP;
    120     }
    121     return ret;
    122 }
    123 
    124 static int vhost_vdpa_add(NetClientState *ncs, void *be,
    125                           int queue_pair_index, int nvqs)
    126 {
    127     VhostNetOptions options;
    128     struct vhost_net *net = NULL;
    129     VhostVDPAState *s;
    130     int ret;
    131 
    132     options.backend_type = VHOST_BACKEND_TYPE_VDPA;
    133     assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
    134     s = DO_UPCAST(VhostVDPAState, nc, ncs);
    135     options.net_backend = ncs;
    136     options.opaque      = be;
    137     options.busyloop_timeout = 0;
    138     options.nvqs = nvqs;
    139 
    140     net = vhost_net_init(&options);
    141     if (!net) {
    142         error_report("failed to init vhost_net for queue");
    143         goto err_init;
    144     }
    145     s->vhost_net = net;
    146     ret = vhost_vdpa_net_check_device_id(net);
    147     if (ret) {
    148         goto err_check;
    149     }
    150     return 0;
    151 err_check:
    152     vhost_net_cleanup(net);
    153     g_free(net);
    154 err_init:
    155     return -1;
    156 }
    157 
    158 static void vhost_vdpa_cleanup(NetClientState *nc)
    159 {
    160     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
    161     struct vhost_dev *dev = &s->vhost_net->dev;
    162 
    163     qemu_vfree(s->cvq_cmd_out_buffer);
    164     qemu_vfree(s->status);
    165     if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
    166         g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
    167     }
    168     if (s->vhost_net) {
    169         vhost_net_cleanup(s->vhost_net);
    170         g_free(s->vhost_net);
    171         s->vhost_net = NULL;
    172     }
    173      if (s->vhost_vdpa.device_fd >= 0) {
    174         qemu_close(s->vhost_vdpa.device_fd);
    175         s->vhost_vdpa.device_fd = -1;
    176     }
    177 }
    178 
    179 static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
    180 {
    181     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
    182 
    183     return true;
    184 }
    185 
    186 static bool vhost_vdpa_has_ufo(NetClientState *nc)
    187 {
    188     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
    189     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
    190     uint64_t features = 0;
    191     features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
    192     features = vhost_net_get_features(s->vhost_net, features);
    193     return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
    194 
    195 }
    196 
    197 static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
    198                                        Error **errp)
    199 {
    200     const char *driver = object_class_get_name(oc);
    201 
    202     if (!g_str_has_prefix(driver, "virtio-net-")) {
    203         error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
    204         return false;
    205     }
    206 
    207     return true;
    208 }
    209 
    210 /** Dummy receive in case qemu falls back to userland tap networking */
    211 static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
    212                                   size_t size)
    213 {
    214     return size;
    215 }
    216 
    217 static NetClientInfo net_vhost_vdpa_info = {
    218         .type = NET_CLIENT_DRIVER_VHOST_VDPA,
    219         .size = sizeof(VhostVDPAState),
    220         .receive = vhost_vdpa_receive,
    221         .cleanup = vhost_vdpa_cleanup,
    222         .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
    223         .has_ufo = vhost_vdpa_has_ufo,
    224         .check_peer_type = vhost_vdpa_check_peer_type,
    225 };
    226 
    227 static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
    228 {
    229     VhostIOVATree *tree = v->iova_tree;
    230     DMAMap needle = {
    231         /*
    232          * No need to specify size or to look for more translations since
    233          * this contiguous chunk was allocated by us.
    234          */
    235         .translated_addr = (hwaddr)(uintptr_t)addr,
    236     };
    237     const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
    238     int r;
    239 
    240     if (unlikely(!map)) {
    241         error_report("Cannot locate expected map");
    242         return;
    243     }
    244 
    245     r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
    246     if (unlikely(r != 0)) {
    247         error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
    248     }
    249 
    250     vhost_iova_tree_remove(tree, *map);
    251 }
    252 
    253 static size_t vhost_vdpa_net_cvq_cmd_len(void)
    254 {
    255     /*
    256      * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
    257      * In buffer is always 1 byte, so it should fit here
    258      */
    259     return sizeof(struct virtio_net_ctrl_hdr) +
    260            2 * sizeof(struct virtio_net_ctrl_mac) +
    261            MAC_TABLE_ENTRIES * ETH_ALEN;
    262 }
    263 
    264 static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
    265 {
    266     return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
    267 }
    268 
    269 /** Map CVQ buffer. */
    270 static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
    271                                   bool write)
    272 {
    273     DMAMap map = {};
    274     int r;
    275 
    276     map.translated_addr = (hwaddr)(uintptr_t)buf;
    277     map.size = size - 1;
    278     map.perm = write ? IOMMU_RW : IOMMU_RO,
    279     r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
    280     if (unlikely(r != IOVA_OK)) {
    281         error_report("Cannot map injected element");
    282         return r;
    283     }
    284 
    285     r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
    286                            !write);
    287     if (unlikely(r < 0)) {
    288         goto dma_map_err;
    289     }
    290 
    291     return 0;
    292 
    293 dma_map_err:
    294     vhost_iova_tree_remove(v->iova_tree, map);
    295     return r;
    296 }
    297 
    298 static int vhost_vdpa_net_cvq_start(NetClientState *nc)
    299 {
    300     VhostVDPAState *s;
    301     int r;
    302 
    303     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
    304 
    305     s = DO_UPCAST(VhostVDPAState, nc, nc);
    306     if (!s->vhost_vdpa.shadow_vqs_enabled) {
    307         return 0;
    308     }
    309 
    310     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
    311                                vhost_vdpa_net_cvq_cmd_page_len(), false);
    312     if (unlikely(r < 0)) {
    313         return r;
    314     }
    315 
    316     r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
    317                                vhost_vdpa_net_cvq_cmd_page_len(), true);
    318     if (unlikely(r < 0)) {
    319         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
    320     }
    321 
    322     return r;
    323 }
    324 
    325 static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
    326 {
    327     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
    328 
    329     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
    330 
    331     if (s->vhost_vdpa.shadow_vqs_enabled) {
    332         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
    333         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
    334     }
    335 }
    336 
    337 static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
    338                                       size_t in_len)
    339 {
    340     /* Buffers for the device */
    341     const struct iovec out = {
    342         .iov_base = s->cvq_cmd_out_buffer,
    343         .iov_len = out_len,
    344     };
    345     const struct iovec in = {
    346         .iov_base = s->status,
    347         .iov_len = sizeof(virtio_net_ctrl_ack),
    348     };
    349     VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
    350     int r;
    351 
    352     r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
    353     if (unlikely(r != 0)) {
    354         if (unlikely(r == -ENOSPC)) {
    355             qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
    356                           __func__);
    357         }
    358         return r;
    359     }
    360 
    361     /*
    362      * We can poll here since we've had BQL from the time we sent the
    363      * descriptor. Also, we need to take the answer before SVQ pulls by itself,
    364      * when BQL is released
    365      */
    366     return vhost_svq_poll(svq);
    367 }
    368 
    369 static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
    370                                        uint8_t cmd, const void *data,
    371                                        size_t data_size)
    372 {
    373     const struct virtio_net_ctrl_hdr ctrl = {
    374         .class = class,
    375         .cmd = cmd,
    376     };
    377 
    378     assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
    379 
    380     memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
    381     memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
    382 
    383     return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size,
    384                                   sizeof(virtio_net_ctrl_ack));
    385 }
    386 
    387 static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
    388 {
    389     uint64_t features = n->parent_obj.guest_features;
    390     if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
    391         ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
    392                                                   VIRTIO_NET_CTRL_MAC_ADDR_SET,
    393                                                   n->mac, sizeof(n->mac));
    394         if (unlikely(dev_written < 0)) {
    395             return dev_written;
    396         }
    397 
    398         return *s->status != VIRTIO_NET_OK;
    399     }
    400 
    401     return 0;
    402 }
    403 
    404 static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
    405                                   const VirtIONet *n)
    406 {
    407     struct virtio_net_ctrl_mq mq;
    408     uint64_t features = n->parent_obj.guest_features;
    409     ssize_t dev_written;
    410 
    411     if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
    412         return 0;
    413     }
    414 
    415     mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
    416     dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
    417                                           VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
    418                                           sizeof(mq));
    419     if (unlikely(dev_written < 0)) {
    420         return dev_written;
    421     }
    422 
    423     return *s->status != VIRTIO_NET_OK;
    424 }
    425 
    426 static int vhost_vdpa_net_load(NetClientState *nc)
    427 {
    428     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
    429     struct vhost_vdpa *v = &s->vhost_vdpa;
    430     const VirtIONet *n;
    431     int r;
    432 
    433     assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
    434 
    435     if (!v->shadow_vqs_enabled) {
    436         return 0;
    437     }
    438 
    439     n = VIRTIO_NET(v->dev->vdev);
    440     r = vhost_vdpa_net_load_mac(s, n);
    441     if (unlikely(r < 0)) {
    442         return r;
    443     }
    444     r = vhost_vdpa_net_load_mq(s, n);
    445     if (unlikely(r)) {
    446         return r;
    447     }
    448 
    449     return 0;
    450 }
    451 
    452 static NetClientInfo net_vhost_vdpa_cvq_info = {
    453     .type = NET_CLIENT_DRIVER_VHOST_VDPA,
    454     .size = sizeof(VhostVDPAState),
    455     .receive = vhost_vdpa_receive,
    456     .start = vhost_vdpa_net_cvq_start,
    457     .load = vhost_vdpa_net_load,
    458     .stop = vhost_vdpa_net_cvq_stop,
    459     .cleanup = vhost_vdpa_cleanup,
    460     .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
    461     .has_ufo = vhost_vdpa_has_ufo,
    462     .check_peer_type = vhost_vdpa_check_peer_type,
    463 };
    464 
    465 /**
    466  * Validate and copy control virtqueue commands.
    467  *
    468  * Following QEMU guidelines, we offer a copy of the buffers to the device to
    469  * prevent TOCTOU bugs.
    470  */
    471 static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
    472                                             VirtQueueElement *elem,
    473                                             void *opaque)
    474 {
    475     VhostVDPAState *s = opaque;
    476     size_t in_len;
    477     virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
    478     /* Out buffer sent to both the vdpa device and the device model */
    479     struct iovec out = {
    480         .iov_base = s->cvq_cmd_out_buffer,
    481     };
    482     /* in buffer used for device model */
    483     const struct iovec in = {
    484         .iov_base = &status,
    485         .iov_len = sizeof(status),
    486     };
    487     ssize_t dev_written = -EINVAL;
    488 
    489     out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
    490                              s->cvq_cmd_out_buffer,
    491                              vhost_vdpa_net_cvq_cmd_len());
    492     dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
    493     if (unlikely(dev_written < 0)) {
    494         goto out;
    495     }
    496 
    497     if (unlikely(dev_written < sizeof(status))) {
    498         error_report("Insufficient written data (%zu)", dev_written);
    499         goto out;
    500     }
    501 
    502     if (*s->status != VIRTIO_NET_OK) {
    503         return VIRTIO_NET_ERR;
    504     }
    505 
    506     status = VIRTIO_NET_ERR;
    507     virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
    508     if (status != VIRTIO_NET_OK) {
    509         error_report("Bad CVQ processing in model");
    510     }
    511 
    512 out:
    513     in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
    514                           sizeof(status));
    515     if (unlikely(in_len < sizeof(status))) {
    516         error_report("Bad device CVQ written length");
    517     }
    518     vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
    519     g_free(elem);
    520     return dev_written < 0 ? dev_written : 0;
    521 }
    522 
    523 static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
    524     .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
    525 };
    526 
    527 static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
    528                                            const char *device,
    529                                            const char *name,
    530                                            int vdpa_device_fd,
    531                                            int queue_pair_index,
    532                                            int nvqs,
    533                                            bool is_datapath,
    534                                            bool svq,
    535                                            VhostIOVATree *iova_tree)
    536 {
    537     NetClientState *nc = NULL;
    538     VhostVDPAState *s;
    539     int ret = 0;
    540     assert(name);
    541     if (is_datapath) {
    542         nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
    543                                  name);
    544     } else {
    545         nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
    546                                          device, name);
    547     }
    548     qemu_set_info_str(nc, TYPE_VHOST_VDPA);
    549     s = DO_UPCAST(VhostVDPAState, nc, nc);
    550 
    551     s->vhost_vdpa.device_fd = vdpa_device_fd;
    552     s->vhost_vdpa.index = queue_pair_index;
    553     s->vhost_vdpa.shadow_vqs_enabled = svq;
    554     s->vhost_vdpa.iova_tree = iova_tree;
    555     if (!is_datapath) {
    556         s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
    557                                             vhost_vdpa_net_cvq_cmd_page_len());
    558         memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
    559         s->status = qemu_memalign(qemu_real_host_page_size(),
    560                                   vhost_vdpa_net_cvq_cmd_page_len());
    561         memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len());
    562 
    563         s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
    564         s->vhost_vdpa.shadow_vq_ops_opaque = s;
    565     }
    566     ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
    567     if (ret) {
    568         qemu_del_net_client(nc);
    569         return NULL;
    570     }
    571     return nc;
    572 }
    573 
    574 static int vhost_vdpa_get_iova_range(int fd,
    575                                      struct vhost_vdpa_iova_range *iova_range)
    576 {
    577     int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
    578 
    579     return ret < 0 ? -errno : 0;
    580 }
    581 
    582 static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
    583 {
    584     int ret = ioctl(fd, VHOST_GET_FEATURES, features);
    585     if (unlikely(ret < 0)) {
    586         error_setg_errno(errp, errno,
    587                          "Fail to query features from vhost-vDPA device");
    588     }
    589     return ret;
    590 }
    591 
    592 static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
    593                                           int *has_cvq, Error **errp)
    594 {
    595     unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
    596     g_autofree struct vhost_vdpa_config *config = NULL;
    597     __virtio16 *max_queue_pairs;
    598     int ret;
    599 
    600     if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
    601         *has_cvq = 1;
    602     } else {
    603         *has_cvq = 0;
    604     }
    605 
    606     if (features & (1 << VIRTIO_NET_F_MQ)) {
    607         config = g_malloc0(config_size + sizeof(*max_queue_pairs));
    608         config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
    609         config->len = sizeof(*max_queue_pairs);
    610 
    611         ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
    612         if (ret) {
    613             error_setg(errp, "Fail to get config from vhost-vDPA device");
    614             return -ret;
    615         }
    616 
    617         max_queue_pairs = (__virtio16 *)&config->buf;
    618 
    619         return lduw_le_p(max_queue_pairs);
    620     }
    621 
    622     return 1;
    623 }
    624 
    625 int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
    626                         NetClientState *peer, Error **errp)
    627 {
    628     const NetdevVhostVDPAOptions *opts;
    629     uint64_t features;
    630     int vdpa_device_fd;
    631     g_autofree NetClientState **ncs = NULL;
    632     g_autoptr(VhostIOVATree) iova_tree = NULL;
    633     NetClientState *nc;
    634     int queue_pairs, r, i = 0, has_cvq = 0;
    635 
    636     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
    637     opts = &netdev->u.vhost_vdpa;
    638     if (!opts->has_vhostdev && !opts->has_vhostfd) {
    639         error_setg(errp,
    640                    "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
    641         return -1;
    642     }
    643 
    644     if (opts->has_vhostdev && opts->has_vhostfd) {
    645         error_setg(errp,
    646                    "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
    647         return -1;
    648     }
    649 
    650     if (opts->has_vhostdev) {
    651         vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
    652         if (vdpa_device_fd == -1) {
    653             return -errno;
    654         }
    655     } else {
    656         /* has_vhostfd */
    657         vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
    658         if (vdpa_device_fd == -1) {
    659             error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
    660             return -1;
    661         }
    662     }
    663 
    664     r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
    665     if (unlikely(r < 0)) {
    666         goto err;
    667     }
    668 
    669     queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
    670                                                  &has_cvq, errp);
    671     if (queue_pairs < 0) {
    672         qemu_close(vdpa_device_fd);
    673         return queue_pairs;
    674     }
    675 
    676     if (opts->x_svq) {
    677         struct vhost_vdpa_iova_range iova_range;
    678 
    679         uint64_t invalid_dev_features =
    680             features & ~vdpa_svq_device_features &
    681             /* Transport are all accepted at this point */
    682             ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
    683                              VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
    684 
    685         if (invalid_dev_features) {
    686             error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
    687                        invalid_dev_features);
    688             goto err_svq;
    689         }
    690 
    691         vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
    692         iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
    693     }
    694 
    695     ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
    696 
    697     for (i = 0; i < queue_pairs; i++) {
    698         ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
    699                                      vdpa_device_fd, i, 2, true, opts->x_svq,
    700                                      iova_tree);
    701         if (!ncs[i])
    702             goto err;
    703     }
    704 
    705     if (has_cvq) {
    706         nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
    707                                  vdpa_device_fd, i, 1, false,
    708                                  opts->x_svq, iova_tree);
    709         if (!nc)
    710             goto err;
    711     }
    712 
    713     /* iova_tree ownership belongs to last NetClientState */
    714     g_steal_pointer(&iova_tree);
    715     return 0;
    716 
    717 err:
    718     if (i) {
    719         for (i--; i >= 0; i--) {
    720             qemu_del_net_client(ncs[i]);
    721         }
    722     }
    723 
    724 err_svq:
    725     qemu_close(vdpa_device_fd);
    726 
    727     return -1;
    728 }