qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vhost-user-blk.c (16631B)


      1 /*
      2  * vhost-user-blk sample application
      3  *
      4  * Copyright (c) 2017 Intel Corporation. All rights reserved.
      5  *
      6  * Author:
      7  *  Changpeng Liu <changpeng.liu@intel.com>
      8  *
      9  * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
     10  * implementation by:
     11  *  Felipe Franciosi <felipe@nutanix.com>
     12  *  Anthony Liguori <aliguori@us.ibm.com>
     13  *
     14  * This work is licensed under the terms of the GNU GPL, version 2 only.
     15  * See the COPYING file in the top-level directory.
     16  */
     17 
     18 #include "qemu/osdep.h"
     19 #include "standard-headers/linux/virtio_blk.h"
     20 #include "libvhost-user-glib.h"
     21 
     22 #if defined(__linux__)
     23 #include <linux/fs.h>
     24 #include <sys/ioctl.h>
     25 #endif
     26 
     27 enum {
     28     VHOST_USER_BLK_MAX_QUEUES = 8,
     29 };
     30 
     31 struct virtio_blk_inhdr {
     32     unsigned char status;
     33 };
     34 
     35 /* vhost user block device */
     36 typedef struct VubDev {
     37     VugDev parent;
     38     int blk_fd;
     39     struct virtio_blk_config blkcfg;
     40     bool enable_ro;
     41     char *blk_name;
     42     GMainLoop *loop;
     43 } VubDev;
     44 
     45 typedef struct VubReq {
     46     VuVirtqElement *elem;
     47     int64_t sector_num;
     48     size_t size;
     49     struct virtio_blk_inhdr *in;
     50     struct virtio_blk_outhdr *out;
     51     VubDev *vdev_blk;
     52     struct VuVirtq *vq;
     53 } VubReq;
     54 
     55 /* refer util/iov.c */
     56 static size_t vub_iov_size(const struct iovec *iov,
     57                               const unsigned int iov_cnt)
     58 {
     59     size_t len;
     60     unsigned int i;
     61 
     62     len = 0;
     63     for (i = 0; i < iov_cnt; i++) {
     64         len += iov[i].iov_len;
     65     }
     66     return len;
     67 }
     68 
     69 static size_t vub_iov_to_buf(const struct iovec *iov,
     70                              const unsigned int iov_cnt, void *buf)
     71 {
     72     size_t len;
     73     unsigned int i;
     74 
     75     len = 0;
     76     for (i = 0; i < iov_cnt; i++) {
     77         memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
     78         len += iov[i].iov_len;
     79     }
     80     return len;
     81 }
     82 
     83 static void vub_panic_cb(VuDev *vu_dev, const char *buf)
     84 {
     85     VugDev *gdev;
     86     VubDev *vdev_blk;
     87 
     88     assert(vu_dev);
     89 
     90     gdev = container_of(vu_dev, VugDev, parent);
     91     vdev_blk = container_of(gdev, VubDev, parent);
     92     if (buf) {
     93         g_warning("vu_panic: %s", buf);
     94     }
     95 
     96     g_main_loop_quit(vdev_blk->loop);
     97 }
     98 
     99 static void vub_req_complete(VubReq *req)
    100 {
    101     VugDev *gdev = &req->vdev_blk->parent;
    102     VuDev *vu_dev = &gdev->parent;
    103 
    104     /* IO size with 1 extra status byte */
    105     vu_queue_push(vu_dev, req->vq, req->elem,
    106                   req->size + 1);
    107     vu_queue_notify(vu_dev, req->vq);
    108 
    109     g_free(req->elem);
    110     g_free(req);
    111 }
    112 
    113 static int vub_open(const char *file_name, bool wce)
    114 {
    115     int fd;
    116     int flags = O_RDWR;
    117 
    118     if (!wce) {
    119         flags |= O_DIRECT;
    120     }
    121 
    122     fd = open(file_name, flags);
    123     if (fd < 0) {
    124         fprintf(stderr, "Cannot open file %s, %s\n", file_name,
    125                 strerror(errno));
    126         return -1;
    127     }
    128 
    129     return fd;
    130 }
    131 
    132 static ssize_t
    133 vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
    134 {
    135     VubDev *vdev_blk = req->vdev_blk;
    136     ssize_t rc;
    137 
    138     if (!iovcnt) {
    139         fprintf(stderr, "Invalid Read IOV count\n");
    140         return -1;
    141     }
    142 
    143     req->size = vub_iov_size(iov, iovcnt);
    144     rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
    145     if (rc < 0) {
    146         fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
    147                 vdev_blk->blk_name, req->sector_num, req->size,
    148                 strerror(errno));
    149         return -1;
    150     }
    151 
    152     return rc;
    153 }
    154 
    155 static ssize_t
    156 vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
    157 {
    158     VubDev *vdev_blk = req->vdev_blk;
    159     ssize_t rc;
    160 
    161     if (!iovcnt) {
    162         fprintf(stderr, "Invalid Write IOV count\n");
    163         return -1;
    164     }
    165 
    166     req->size = vub_iov_size(iov, iovcnt);
    167     rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
    168     if (rc < 0) {
    169         fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
    170                 vdev_blk->blk_name, req->sector_num, req->size,
    171                 strerror(errno));
    172         return -1;
    173     }
    174 
    175     return rc;
    176 }
    177 
    178 static int
    179 vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
    180                          uint32_t type)
    181 {
    182     struct virtio_blk_discard_write_zeroes *desc;
    183     ssize_t size;
    184     void *buf;
    185 
    186     size = vub_iov_size(iov, iovcnt);
    187     if (size != sizeof(*desc)) {
    188         fprintf(stderr, "Invalid size %zd, expect %zd\n", size, sizeof(*desc));
    189         return -1;
    190     }
    191     buf = g_new0(char, size);
    192     vub_iov_to_buf(iov, iovcnt, buf);
    193 
    194     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
    195     VubDev *vdev_blk = req->vdev_blk;
    196     desc = (struct virtio_blk_discard_write_zeroes *)buf;
    197     uint64_t range[2] = { le64toh(desc->sector) << 9,
    198                           le32toh(desc->num_sectors) << 9 };
    199     if (type == VIRTIO_BLK_T_DISCARD) {
    200         if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
    201             g_free(buf);
    202             return 0;
    203         }
    204     } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
    205         if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
    206             g_free(buf);
    207             return 0;
    208         }
    209     }
    210     #endif
    211 
    212     g_free(buf);
    213     return -1;
    214 }
    215 
    216 static void
    217 vub_flush(VubReq *req)
    218 {
    219     VubDev *vdev_blk = req->vdev_blk;
    220 
    221     fdatasync(vdev_blk->blk_fd);
    222 }
    223 
    224 static int vub_virtio_process_req(VubDev *vdev_blk,
    225                                      VuVirtq *vq)
    226 {
    227     VugDev *gdev = &vdev_blk->parent;
    228     VuDev *vu_dev = &gdev->parent;
    229     VuVirtqElement *elem;
    230     uint32_t type;
    231     unsigned in_num;
    232     unsigned out_num;
    233     VubReq *req;
    234 
    235     elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
    236     if (!elem) {
    237         return -1;
    238     }
    239 
    240     /* refer to hw/block/virtio_blk.c */
    241     if (elem->out_num < 1 || elem->in_num < 1) {
    242         fprintf(stderr, "virtio-blk request missing headers\n");
    243         g_free(elem);
    244         return -1;
    245     }
    246 
    247     req = g_new0(VubReq, 1);
    248     req->vdev_blk = vdev_blk;
    249     req->vq = vq;
    250     req->elem = elem;
    251 
    252     in_num = elem->in_num;
    253     out_num = elem->out_num;
    254 
    255     /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
    256     if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
    257         fprintf(stderr, "Invalid outhdr size\n");
    258         goto err;
    259     }
    260     req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
    261     out_num--;
    262 
    263     if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
    264         fprintf(stderr, "Invalid inhdr size\n");
    265         goto err;
    266     }
    267     req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
    268     in_num--;
    269 
    270     type = le32toh(req->out->type);
    271     switch (type & ~VIRTIO_BLK_T_BARRIER) {
    272     case VIRTIO_BLK_T_IN:
    273     case VIRTIO_BLK_T_OUT: {
    274         ssize_t ret = 0;
    275         bool is_write = type & VIRTIO_BLK_T_OUT;
    276         req->sector_num = le64toh(req->out->sector);
    277         if (is_write) {
    278             ret  = vub_writev(req, &elem->out_sg[1], out_num);
    279         } else {
    280             ret = vub_readv(req, &elem->in_sg[0], in_num);
    281         }
    282         if (ret >= 0) {
    283             req->in->status = VIRTIO_BLK_S_OK;
    284         } else {
    285             req->in->status = VIRTIO_BLK_S_IOERR;
    286         }
    287         vub_req_complete(req);
    288         break;
    289     }
    290     case VIRTIO_BLK_T_FLUSH:
    291         vub_flush(req);
    292         req->in->status = VIRTIO_BLK_S_OK;
    293         vub_req_complete(req);
    294         break;
    295     case VIRTIO_BLK_T_GET_ID: {
    296         size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
    297                           VIRTIO_BLK_ID_BYTES);
    298         snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
    299         req->in->status = VIRTIO_BLK_S_OK;
    300         req->size = elem->in_sg[0].iov_len;
    301         vub_req_complete(req);
    302         break;
    303     }
    304     case VIRTIO_BLK_T_DISCARD:
    305     case VIRTIO_BLK_T_WRITE_ZEROES: {
    306         int rc;
    307         rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
    308         if (rc == 0) {
    309             req->in->status = VIRTIO_BLK_S_OK;
    310         } else {
    311             req->in->status = VIRTIO_BLK_S_IOERR;
    312         }
    313         vub_req_complete(req);
    314         break;
    315     }
    316     default:
    317         req->in->status = VIRTIO_BLK_S_UNSUPP;
    318         vub_req_complete(req);
    319         break;
    320     }
    321 
    322     return 0;
    323 
    324 err:
    325     g_free(elem);
    326     g_free(req);
    327     return -1;
    328 }
    329 
    330 static void vub_process_vq(VuDev *vu_dev, int idx)
    331 {
    332     VugDev *gdev;
    333     VubDev *vdev_blk;
    334     VuVirtq *vq;
    335     int ret;
    336 
    337     gdev = container_of(vu_dev, VugDev, parent);
    338     vdev_blk = container_of(gdev, VubDev, parent);
    339     assert(vdev_blk);
    340 
    341     vq = vu_get_queue(vu_dev, idx);
    342     assert(vq);
    343 
    344     while (1) {
    345         ret = vub_virtio_process_req(vdev_blk, vq);
    346         if (ret) {
    347             break;
    348         }
    349     }
    350 }
    351 
    352 static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
    353 {
    354     VuVirtq *vq;
    355 
    356     assert(vu_dev);
    357 
    358     vq = vu_get_queue(vu_dev, idx);
    359     vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
    360 }
    361 
    362 static uint64_t
    363 vub_get_features(VuDev *dev)
    364 {
    365     uint64_t features;
    366     VugDev *gdev;
    367     VubDev *vdev_blk;
    368 
    369     gdev = container_of(dev, VugDev, parent);
    370     vdev_blk = container_of(gdev, VubDev, parent);
    371 
    372     features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
    373                1ull << VIRTIO_BLK_F_SEG_MAX |
    374                1ull << VIRTIO_BLK_F_TOPOLOGY |
    375                1ull << VIRTIO_BLK_F_BLK_SIZE |
    376                1ull << VIRTIO_BLK_F_FLUSH |
    377                #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
    378                1ull << VIRTIO_BLK_F_DISCARD |
    379                1ull << VIRTIO_BLK_F_WRITE_ZEROES |
    380                #endif
    381                1ull << VIRTIO_BLK_F_CONFIG_WCE;
    382 
    383     if (vdev_blk->enable_ro) {
    384         features |= 1ull << VIRTIO_BLK_F_RO;
    385     }
    386 
    387     return features;
    388 }
    389 
    390 static uint64_t
    391 vub_get_protocol_features(VuDev *dev)
    392 {
    393     return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
    394            1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
    395 }
    396 
    397 static int
    398 vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
    399 {
    400     VugDev *gdev;
    401     VubDev *vdev_blk;
    402 
    403     if (len > sizeof(struct virtio_blk_config)) {
    404         return -1;
    405     }
    406 
    407     gdev = container_of(vu_dev, VugDev, parent);
    408     vdev_blk = container_of(gdev, VubDev, parent);
    409     memcpy(config, &vdev_blk->blkcfg, len);
    410 
    411     return 0;
    412 }
    413 
    414 static int
    415 vub_set_config(VuDev *vu_dev, const uint8_t *data,
    416                uint32_t offset, uint32_t size, uint32_t flags)
    417 {
    418     VugDev *gdev;
    419     VubDev *vdev_blk;
    420     uint8_t wce;
    421     int fd;
    422 
    423     /* don't support live migration */
    424     if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
    425         return -1;
    426     }
    427 
    428     gdev = container_of(vu_dev, VugDev, parent);
    429     vdev_blk = container_of(gdev, VubDev, parent);
    430 
    431     if (offset != offsetof(struct virtio_blk_config, wce) ||
    432         size != 1) {
    433         return -1;
    434     }
    435 
    436     wce = *data;
    437     if (wce == vdev_blk->blkcfg.wce) {
    438         /* Do nothing as same with old configuration */
    439         return 0;
    440     }
    441 
    442     vdev_blk->blkcfg.wce = wce;
    443     fprintf(stdout, "Write Cache Policy Changed\n");
    444     if (vdev_blk->blk_fd >= 0) {
    445         close(vdev_blk->blk_fd);
    446         vdev_blk->blk_fd = -1;
    447     }
    448 
    449     fd = vub_open(vdev_blk->blk_name, wce);
    450     if (fd < 0) {
    451         fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
    452         vdev_blk->blk_fd = -1;
    453         return -1;
    454     }
    455     vdev_blk->blk_fd = fd;
    456 
    457     return 0;
    458 }
    459 
    460 static const VuDevIface vub_iface = {
    461     .get_features = vub_get_features,
    462     .queue_set_started = vub_queue_set_started,
    463     .get_protocol_features = vub_get_protocol_features,
    464     .get_config = vub_get_config,
    465     .set_config = vub_set_config,
    466 };
    467 
    468 static int unix_sock_new(char *unix_fn)
    469 {
    470     int sock;
    471     struct sockaddr_un un;
    472     size_t len;
    473 
    474     assert(unix_fn);
    475 
    476     sock = socket(AF_UNIX, SOCK_STREAM, 0);
    477     if (sock < 0) {
    478         perror("socket");
    479         return -1;
    480     }
    481 
    482     un.sun_family = AF_UNIX;
    483     (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
    484     len = sizeof(un.sun_family) + strlen(un.sun_path);
    485 
    486     (void)unlink(unix_fn);
    487     if (bind(sock, (struct sockaddr *)&un, len) < 0) {
    488         perror("bind");
    489         goto fail;
    490     }
    491 
    492     if (listen(sock, 1) < 0) {
    493         perror("listen");
    494         goto fail;
    495     }
    496 
    497     return sock;
    498 
    499 fail:
    500     (void)close(sock);
    501 
    502     return -1;
    503 }
    504 
    505 static void vub_free(struct VubDev *vdev_blk)
    506 {
    507     if (!vdev_blk) {
    508         return;
    509     }
    510 
    511     g_main_loop_unref(vdev_blk->loop);
    512     if (vdev_blk->blk_fd >= 0) {
    513         close(vdev_blk->blk_fd);
    514     }
    515     g_free(vdev_blk);
    516 }
    517 
    518 static uint32_t
    519 vub_get_blocksize(int fd)
    520 {
    521     uint32_t blocksize = 512;
    522 
    523 #if defined(__linux__) && defined(BLKSSZGET)
    524     if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
    525         return blocksize;
    526     }
    527 #endif
    528 
    529     return blocksize;
    530 }
    531 
    532 static void
    533 vub_initialize_config(int fd, struct virtio_blk_config *config)
    534 {
    535     off64_t capacity;
    536 
    537     capacity = lseek64(fd, 0, SEEK_END);
    538     config->capacity = capacity >> 9;
    539     config->blk_size = vub_get_blocksize(fd);
    540     config->size_max = 65536;
    541     config->seg_max = 128 - 2;
    542     config->min_io_size = 1;
    543     config->opt_io_size = 1;
    544     config->num_queues = 1;
    545     #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
    546     config->max_discard_sectors = 32768;
    547     config->max_discard_seg = 1;
    548     config->discard_sector_alignment = config->blk_size >> 9;
    549     config->max_write_zeroes_sectors = 32768;
    550     config->max_write_zeroes_seg = 1;
    551     #endif
    552 }
    553 
    554 static VubDev *
    555 vub_new(char *blk_file)
    556 {
    557     VubDev *vdev_blk;
    558 
    559     vdev_blk = g_new0(VubDev, 1);
    560     vdev_blk->loop = g_main_loop_new(NULL, FALSE);
    561     vdev_blk->blk_fd = vub_open(blk_file, 0);
    562     if (vdev_blk->blk_fd  < 0) {
    563         fprintf(stderr, "Error to open block device %s\n", blk_file);
    564         vub_free(vdev_blk);
    565         return NULL;
    566     }
    567     vdev_blk->enable_ro = false;
    568     vdev_blk->blkcfg.wce = 0;
    569     vdev_blk->blk_name = blk_file;
    570 
    571     /* fill virtio_blk_config with block parameters */
    572     vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
    573 
    574     return vdev_blk;
    575 }
    576 
    577 static int opt_fdnum = -1;
    578 static char *opt_socket_path;
    579 static char *opt_blk_file;
    580 static gboolean opt_print_caps;
    581 static gboolean opt_read_only;
    582 
    583 static GOptionEntry entries[] = {
    584     { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
    585       "Print capabilities", NULL },
    586     { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
    587       "Use inherited fd socket", "FDNUM" },
    588     { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
    589       "Use UNIX socket path", "PATH" },
    590     {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
    591      "block device or file path", "PATH"},
    592     { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
    593       "Enable read-only", NULL },
    594     { NULL, },
    595 };
    596 
    597 int main(int argc, char **argv)
    598 {
    599     int lsock = -1, csock = -1;
    600     VubDev *vdev_blk = NULL;
    601     GError *error = NULL;
    602     GOptionContext *context;
    603 
    604     context = g_option_context_new(NULL);
    605     g_option_context_add_main_entries(context, entries, NULL);
    606     if (!g_option_context_parse(context, &argc, &argv, &error)) {
    607         g_printerr("Option parsing failed: %s\n", error->message);
    608         exit(EXIT_FAILURE);
    609     }
    610     if (opt_print_caps) {
    611         g_print("{\n");
    612         g_print("  \"type\": \"block\",\n");
    613         g_print("  \"features\": [\n");
    614         g_print("    \"read-only\",\n");
    615         g_print("    \"blk-file\"\n");
    616         g_print("  ]\n");
    617         g_print("}\n");
    618         exit(EXIT_SUCCESS);
    619     }
    620 
    621     if (!opt_blk_file) {
    622         g_print("%s\n", g_option_context_get_help(context, true, NULL));
    623         exit(EXIT_FAILURE);
    624     }
    625 
    626     if (opt_socket_path) {
    627         lsock = unix_sock_new(opt_socket_path);
    628         if (lsock < 0) {
    629             exit(EXIT_FAILURE);
    630         }
    631     } else if (opt_fdnum < 0) {
    632         g_print("%s\n", g_option_context_get_help(context, true, NULL));
    633         exit(EXIT_FAILURE);
    634     } else {
    635         lsock = opt_fdnum;
    636     }
    637 
    638     csock = accept(lsock, NULL, NULL);
    639     if (csock < 0) {
    640         g_printerr("Accept error %s\n", strerror(errno));
    641         exit(EXIT_FAILURE);
    642     }
    643 
    644     vdev_blk = vub_new(opt_blk_file);
    645     if (!vdev_blk) {
    646         exit(EXIT_FAILURE);
    647     }
    648     if (opt_read_only) {
    649         vdev_blk->enable_ro = true;
    650     }
    651 
    652     if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
    653                   vub_panic_cb, &vub_iface)) {
    654         g_printerr("Failed to initialize libvhost-user-glib\n");
    655         exit(EXIT_FAILURE);
    656     }
    657 
    658     g_main_loop_run(vdev_blk->loop);
    659     g_main_loop_unref(vdev_blk->loop);
    660     g_option_context_free(context);
    661     vug_deinit(&vdev_blk->parent);
    662     vub_free(vdev_blk);
    663     if (csock >= 0) {
    664         close(csock);
    665     }
    666     if (lsock >= 0) {
    667         close(lsock);
    668     }
    669     g_free(opt_socket_path);
    670     g_free(opt_blk_file);
    671 
    672     return 0;
    673 }