qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vduse-blk.c (11318B)


      1 /*
      2  * Export QEMU block device via VDUSE
      3  *
      4  * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights reserved.
      5  *
      6  * Author:
      7  *   Xie Yongji <xieyongji@bytedance.com>
      8  *
      9  * This work is licensed under the terms of the GNU GPL, version 2 or
     10  * later.  See the COPYING file in the top-level directory.
     11  */
     12 
     13 #include <sys/eventfd.h>
     14 
     15 #include "qemu/osdep.h"
     16 #include "qapi/error.h"
     17 #include "block/export.h"
     18 #include "qemu/error-report.h"
     19 #include "util/block-helpers.h"
     20 #include "subprojects/libvduse/libvduse.h"
     21 #include "virtio-blk-handler.h"
     22 
     23 #include "standard-headers/linux/virtio_blk.h"
     24 
     25 #define VDUSE_DEFAULT_NUM_QUEUE 1
     26 #define VDUSE_DEFAULT_QUEUE_SIZE 256
     27 
     28 typedef struct VduseBlkExport {
     29     BlockExport export;
     30     VirtioBlkHandler handler;
     31     VduseDev *dev;
     32     uint16_t num_queues;
     33     char *recon_file;
     34     unsigned int inflight;
     35 } VduseBlkExport;
     36 
     37 typedef struct VduseBlkReq {
     38     VduseVirtqElement elem;
     39     VduseVirtq *vq;
     40 } VduseBlkReq;
     41 
     42 static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
     43 {
     44     vblk_exp->inflight++;
     45 }
     46 
     47 static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
     48 {
     49     if (--vblk_exp->inflight == 0) {
     50         aio_wait_kick();
     51     }
     52 }
     53 
     54 static void vduse_blk_req_complete(VduseBlkReq *req, size_t in_len)
     55 {
     56     vduse_queue_push(req->vq, &req->elem, in_len);
     57     vduse_queue_notify(req->vq);
     58 
     59     free(req);
     60 }
     61 
     62 static void coroutine_fn vduse_blk_virtio_process_req(void *opaque)
     63 {
     64     VduseBlkReq *req = opaque;
     65     VduseVirtq *vq = req->vq;
     66     VduseDev *dev = vduse_queue_get_dev(vq);
     67     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
     68     VirtioBlkHandler *handler = &vblk_exp->handler;
     69     VduseVirtqElement *elem = &req->elem;
     70     struct iovec *in_iov = elem->in_sg;
     71     struct iovec *out_iov = elem->out_sg;
     72     unsigned in_num = elem->in_num;
     73     unsigned out_num = elem->out_num;
     74     int in_len;
     75 
     76     in_len = virtio_blk_process_req(handler, in_iov,
     77                                     out_iov, in_num, out_num);
     78     if (in_len < 0) {
     79         free(req);
     80         return;
     81     }
     82 
     83     vduse_blk_req_complete(req, in_len);
     84     vduse_blk_inflight_dec(vblk_exp);
     85 }
     86 
     87 static void vduse_blk_vq_handler(VduseDev *dev, VduseVirtq *vq)
     88 {
     89     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
     90 
     91     while (1) {
     92         VduseBlkReq *req;
     93 
     94         req = vduse_queue_pop(vq, sizeof(VduseBlkReq));
     95         if (!req) {
     96             break;
     97         }
     98         req->vq = vq;
     99 
    100         Coroutine *co =
    101             qemu_coroutine_create(vduse_blk_virtio_process_req, req);
    102 
    103         vduse_blk_inflight_inc(vblk_exp);
    104         qemu_coroutine_enter(co);
    105     }
    106 }
    107 
    108 static void on_vduse_vq_kick(void *opaque)
    109 {
    110     VduseVirtq *vq = opaque;
    111     VduseDev *dev = vduse_queue_get_dev(vq);
    112     int fd = vduse_queue_get_fd(vq);
    113     eventfd_t kick_data;
    114 
    115     if (eventfd_read(fd, &kick_data) == -1) {
    116         error_report("failed to read data from eventfd");
    117         return;
    118     }
    119 
    120     vduse_blk_vq_handler(dev, vq);
    121 }
    122 
    123 static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
    124 {
    125     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
    126 
    127     aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
    128                        true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
    129     /* Make sure we don't miss any kick afer reconnecting */
    130     eventfd_write(vduse_queue_get_fd(vq), 1);
    131 }
    132 
    133 static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
    134 {
    135     VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
    136 
    137     aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
    138                        true, NULL, NULL, NULL, NULL, NULL);
    139 }
    140 
    141 static const VduseOps vduse_blk_ops = {
    142     .enable_queue = vduse_blk_enable_queue,
    143     .disable_queue = vduse_blk_disable_queue,
    144 };
    145 
    146 static void on_vduse_dev_kick(void *opaque)
    147 {
    148     VduseDev *dev = opaque;
    149 
    150     vduse_dev_handler(dev);
    151 }
    152 
    153 static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
    154 {
    155     int i;
    156 
    157     aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
    158                        true, on_vduse_dev_kick, NULL, NULL, NULL,
    159                        vblk_exp->dev);
    160 
    161     for (i = 0; i < vblk_exp->num_queues; i++) {
    162         VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
    163         int fd = vduse_queue_get_fd(vq);
    164 
    165         if (fd < 0) {
    166             continue;
    167         }
    168         aio_set_fd_handler(vblk_exp->export.ctx, fd, true,
    169                            on_vduse_vq_kick, NULL, NULL, NULL, vq);
    170     }
    171 }
    172 
    173 static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
    174 {
    175     int i;
    176 
    177     for (i = 0; i < vblk_exp->num_queues; i++) {
    178         VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
    179         int fd = vduse_queue_get_fd(vq);
    180 
    181         if (fd < 0) {
    182             continue;
    183         }
    184         aio_set_fd_handler(vblk_exp->export.ctx, fd,
    185                            true, NULL, NULL, NULL, NULL, NULL);
    186     }
    187     aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
    188                        true, NULL, NULL, NULL, NULL, NULL);
    189 
    190     AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0);
    191 }
    192 
    193 
    194 static void blk_aio_attached(AioContext *ctx, void *opaque)
    195 {
    196     VduseBlkExport *vblk_exp = opaque;
    197 
    198     vblk_exp->export.ctx = ctx;
    199     vduse_blk_attach_ctx(vblk_exp, ctx);
    200 }
    201 
    202 static void blk_aio_detach(void *opaque)
    203 {
    204     VduseBlkExport *vblk_exp = opaque;
    205 
    206     vduse_blk_detach_ctx(vblk_exp);
    207     vblk_exp->export.ctx = NULL;
    208 }
    209 
    210 static void vduse_blk_resize(void *opaque)
    211 {
    212     BlockExport *exp = opaque;
    213     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
    214     struct virtio_blk_config config;
    215 
    216     config.capacity =
    217             cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
    218     vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity),
    219                             offsetof(struct virtio_blk_config, capacity),
    220                             (char *)&config.capacity);
    221 }
    222 
    223 static const BlockDevOps vduse_block_ops = {
    224     .resize_cb = vduse_blk_resize,
    225 };
    226 
    227 static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
    228                                 Error **errp)
    229 {
    230     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
    231     BlockExportOptionsVduseBlk *vblk_opts = &opts->u.vduse_blk;
    232     uint64_t logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
    233     uint16_t num_queues = VDUSE_DEFAULT_NUM_QUEUE;
    234     uint16_t queue_size = VDUSE_DEFAULT_QUEUE_SIZE;
    235     Error *local_err = NULL;
    236     struct virtio_blk_config config = { 0 };
    237     uint64_t features;
    238     int i, ret;
    239 
    240     if (vblk_opts->has_num_queues) {
    241         num_queues = vblk_opts->num_queues;
    242         if (num_queues == 0) {
    243             error_setg(errp, "num-queues must be greater than 0");
    244             return -EINVAL;
    245         }
    246     }
    247 
    248     if (vblk_opts->has_queue_size) {
    249         queue_size = vblk_opts->queue_size;
    250         if (queue_size <= 2 || !is_power_of_2(queue_size) ||
    251             queue_size > VIRTQUEUE_MAX_SIZE) {
    252             error_setg(errp, "queue-size is invalid");
    253             return -EINVAL;
    254         }
    255     }
    256 
    257     if (vblk_opts->has_logical_block_size) {
    258         logical_block_size = vblk_opts->logical_block_size;
    259         check_block_size(exp->id, "logical-block-size", logical_block_size,
    260                          &local_err);
    261         if (local_err) {
    262             error_propagate(errp, local_err);
    263             return -EINVAL;
    264         }
    265     }
    266     vblk_exp->num_queues = num_queues;
    267     vblk_exp->handler.blk = exp->blk;
    268     vblk_exp->handler.serial = g_strdup(vblk_opts->has_serial ?
    269                                         vblk_opts->serial : "");
    270     vblk_exp->handler.logical_block_size = logical_block_size;
    271     vblk_exp->handler.writable = opts->writable;
    272 
    273     config.capacity =
    274             cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
    275     config.seg_max = cpu_to_le32(queue_size - 2);
    276     config.min_io_size = cpu_to_le16(1);
    277     config.opt_io_size = cpu_to_le32(1);
    278     config.num_queues = cpu_to_le16(num_queues);
    279     config.blk_size = cpu_to_le32(logical_block_size);
    280     config.max_discard_sectors = cpu_to_le32(VIRTIO_BLK_MAX_DISCARD_SECTORS);
    281     config.max_discard_seg = cpu_to_le32(1);
    282     config.discard_sector_alignment =
    283         cpu_to_le32(logical_block_size >> VIRTIO_BLK_SECTOR_BITS);
    284     config.max_write_zeroes_sectors =
    285         cpu_to_le32(VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS);
    286     config.max_write_zeroes_seg = cpu_to_le32(1);
    287 
    288     features = vduse_get_virtio_features() |
    289                (1ULL << VIRTIO_BLK_F_SEG_MAX) |
    290                (1ULL << VIRTIO_BLK_F_TOPOLOGY) |
    291                (1ULL << VIRTIO_BLK_F_BLK_SIZE) |
    292                (1ULL << VIRTIO_BLK_F_FLUSH) |
    293                (1ULL << VIRTIO_BLK_F_DISCARD) |
    294                (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
    295 
    296     if (num_queues > 1) {
    297         features |= 1ULL << VIRTIO_BLK_F_MQ;
    298     }
    299     if (!opts->writable) {
    300         features |= 1ULL << VIRTIO_BLK_F_RO;
    301     }
    302 
    303     vblk_exp->dev = vduse_dev_create(vblk_opts->name, VIRTIO_ID_BLOCK, 0,
    304                                      features, num_queues,
    305                                      sizeof(struct virtio_blk_config),
    306                                      (char *)&config, &vduse_blk_ops,
    307                                      vblk_exp);
    308     if (!vblk_exp->dev) {
    309         error_setg(errp, "failed to create vduse device");
    310         ret = -ENOMEM;
    311         goto err_dev;
    312     }
    313 
    314     vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s",
    315                                            g_get_tmp_dir(), vblk_opts->name);
    316     if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) {
    317         error_setg(errp, "failed to set reconnect log file");
    318         ret = -EINVAL;
    319         goto err;
    320     }
    321 
    322     for (i = 0; i < num_queues; i++) {
    323         vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
    324     }
    325 
    326     aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true,
    327                        on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);
    328 
    329     blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
    330                                  vblk_exp);
    331 
    332     blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);
    333 
    334     return 0;
    335 err:
    336     vduse_dev_destroy(vblk_exp->dev);
    337     g_free(vblk_exp->recon_file);
    338 err_dev:
    339     g_free(vblk_exp->handler.serial);
    340     return ret;
    341 }
    342 
    343 static void vduse_blk_exp_delete(BlockExport *exp)
    344 {
    345     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
    346     int ret;
    347 
    348     blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
    349                                     vblk_exp);
    350     blk_set_dev_ops(exp->blk, NULL, NULL);
    351     ret = vduse_dev_destroy(vblk_exp->dev);
    352     if (ret != -EBUSY) {
    353         unlink(vblk_exp->recon_file);
    354     }
    355     g_free(vblk_exp->recon_file);
    356     g_free(vblk_exp->handler.serial);
    357 }
    358 
    359 static void vduse_blk_exp_request_shutdown(BlockExport *exp)
    360 {
    361     VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
    362 
    363     aio_context_acquire(vblk_exp->export.ctx);
    364     vduse_blk_detach_ctx(vblk_exp);
    365     aio_context_acquire(vblk_exp->export.ctx);
    366 }
    367 
    368 const BlockExportDriver blk_exp_vduse_blk = {
    369     .type               = BLOCK_EXPORT_TYPE_VDUSE_BLK,
    370     .instance_size      = sizeof(VduseBlkExport),
    371     .create             = vduse_blk_exp_create,
    372     .delete             = vduse_blk_exp_delete,
    373     .request_shutdown   = vduse_blk_exp_request_shutdown,
    374 };