qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

blklogwrites.c (16333B)


      1 /*
      2  * Write logging blk driver based on blkverify and blkdebug.
      3  *
      4  * Copyright (c) 2017 Tuomas Tynkkynen <tuomas@tuxera.com>
      5  * Copyright (c) 2018 Aapo Vienamo <aapo@tuxera.com>
      6  * Copyright (c) 2018 Ari Sundholm <ari@tuxera.com>
      7  *
      8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
      9  * See the COPYING file in the top-level directory.
     10  */
     11 
     12 #include "qemu/osdep.h"
     13 #include "qapi/error.h"
     14 #include "qemu/sockets.h" /* for EINPROGRESS on Windows */
     15 #include "block/block_int.h"
     16 #include "qapi/qmp/qdict.h"
     17 #include "qapi/qmp/qstring.h"
     18 #include "qemu/cutils.h"
     19 #include "qemu/module.h"
     20 #include "qemu/option.h"
     21 
     22 /* Disk format stuff - taken from Linux drivers/md/dm-log-writes.c */
     23 
     24 #define LOG_FLUSH_FLAG   (1 << 0)
     25 #define LOG_FUA_FLAG     (1 << 1)
     26 #define LOG_DISCARD_FLAG (1 << 2)
     27 #define LOG_MARK_FLAG    (1 << 3)
     28 #define LOG_FLAG_MASK    (LOG_FLUSH_FLAG \
     29                          | LOG_FUA_FLAG \
     30                          | LOG_DISCARD_FLAG \
     31                          | LOG_MARK_FLAG)
     32 
     33 #define WRITE_LOG_VERSION 1ULL
     34 #define WRITE_LOG_MAGIC 0x6a736677736872ULL
     35 
     36 /* All fields are little-endian. */
     37 struct log_write_super {
     38     uint64_t magic;
     39     uint64_t version;
     40     uint64_t nr_entries;
     41     uint32_t sectorsize;
     42 } QEMU_PACKED;
     43 
     44 struct log_write_entry {
     45     uint64_t sector;
     46     uint64_t nr_sectors;
     47     uint64_t flags;
     48     uint64_t data_len;
     49 } QEMU_PACKED;
     50 
     51 /* End of disk format structures. */
     52 
     53 typedef struct {
     54     BdrvChild *log_file;
     55     uint32_t sectorsize;
     56     uint32_t sectorbits;
     57     uint64_t cur_log_sector;
     58     uint64_t nr_entries;
     59     uint64_t update_interval;
     60 } BDRVBlkLogWritesState;
     61 
     62 static QemuOptsList runtime_opts = {
     63     .name = "blklogwrites",
     64     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
     65     .desc = {
     66         {
     67             .name = "log-append",
     68             .type = QEMU_OPT_BOOL,
     69             .help = "Append to an existing log",
     70         },
     71         {
     72             .name = "log-sector-size",
     73             .type = QEMU_OPT_SIZE,
     74             .help = "Log sector size",
     75         },
     76         {
     77             .name = "log-super-update-interval",
     78             .type = QEMU_OPT_NUMBER,
     79             .help = "Log superblock update interval (# of write requests)",
     80         },
     81         { /* end of list */ }
     82     },
     83 };
     84 
     85 static inline uint32_t blk_log_writes_log2(uint32_t value)
     86 {
     87     assert(value > 0);
     88     return 31 - clz32(value);
     89 }
     90 
     91 static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size)
     92 {
     93     return is_power_of_2(sector_size) &&
     94         sector_size >= sizeof(struct log_write_super) &&
     95         sector_size >= sizeof(struct log_write_entry) &&
     96         sector_size < (1ull << 24);
     97 }
     98 
     99 static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
    100                                                    uint32_t sector_size,
    101                                                    uint64_t nr_entries,
    102                                                    Error **errp)
    103 {
    104     uint64_t cur_sector = 1;
    105     uint64_t cur_idx = 0;
    106     uint32_t sector_bits = blk_log_writes_log2(sector_size);
    107     struct log_write_entry cur_entry;
    108 
    109     while (cur_idx < nr_entries) {
    110         int read_ret = bdrv_pread(log, cur_sector << sector_bits,
    111                                   sizeof(cur_entry), &cur_entry, 0);
    112         if (read_ret < 0) {
    113             error_setg_errno(errp, -read_ret,
    114                              "Failed to read log entry %"PRIu64, cur_idx);
    115             return (uint64_t)-1ull;
    116         }
    117 
    118         if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) {
    119             error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64,
    120                        le64_to_cpu(cur_entry.flags), cur_idx);
    121             return (uint64_t)-1ull;
    122         }
    123 
    124         /* Account for the sector of the entry itself */
    125         ++cur_sector;
    126 
    127         /*
    128          * Account for the data of the write.
    129          * For discards, this data is not present.
    130          */
    131         if (!(cur_entry.flags & cpu_to_le64(LOG_DISCARD_FLAG))) {
    132             cur_sector += le64_to_cpu(cur_entry.nr_sectors);
    133         }
    134 
    135         ++cur_idx;
    136     }
    137 
    138     return cur_sector;
    139 }
    140 
    141 static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
    142                                Error **errp)
    143 {
    144     BDRVBlkLogWritesState *s = bs->opaque;
    145     QemuOpts *opts;
    146     Error *local_err = NULL;
    147     int ret;
    148     uint64_t log_sector_size;
    149     bool log_append;
    150 
    151     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    152     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
    153         ret = -EINVAL;
    154         goto fail;
    155     }
    156 
    157     /* Open the file */
    158     ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
    159     if (ret < 0) {
    160         goto fail;
    161     }
    162 
    163     /* Open the log file */
    164     s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_of_bds,
    165                                   BDRV_CHILD_METADATA, false, errp);
    166     if (!s->log_file) {
    167         ret = -EINVAL;
    168         goto fail;
    169     }
    170 
    171     log_append = qemu_opt_get_bool(opts, "log-append", false);
    172 
    173     if (log_append) {
    174         struct log_write_super log_sb = { 0, 0, 0, 0 };
    175 
    176         if (qemu_opt_find(opts, "log-sector-size")) {
    177             ret = -EINVAL;
    178             error_setg(errp, "log-append and log-sector-size are mutually "
    179                        "exclusive");
    180             goto fail_log;
    181         }
    182 
    183         /* Read log superblock or fake one for an empty log */
    184         if (!bdrv_getlength(s->log_file->bs)) {
    185             log_sb.magic      = cpu_to_le64(WRITE_LOG_MAGIC);
    186             log_sb.version    = cpu_to_le64(WRITE_LOG_VERSION);
    187             log_sb.nr_entries = cpu_to_le64(0);
    188             log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE);
    189         } else {
    190             ret = bdrv_pread(s->log_file, 0, sizeof(log_sb), &log_sb, 0);
    191             if (ret < 0) {
    192                 error_setg_errno(errp, -ret, "Could not read log superblock");
    193                 goto fail_log;
    194             }
    195         }
    196 
    197         if (log_sb.magic != cpu_to_le64(WRITE_LOG_MAGIC)) {
    198             ret = -EINVAL;
    199             error_setg(errp, "Invalid log superblock magic");
    200             goto fail_log;
    201         }
    202 
    203         if (log_sb.version != cpu_to_le64(WRITE_LOG_VERSION)) {
    204             ret = -EINVAL;
    205             error_setg(errp, "Unsupported log version %"PRIu64,
    206                        le64_to_cpu(log_sb.version));
    207             goto fail_log;
    208         }
    209 
    210         log_sector_size = le32_to_cpu(log_sb.sectorsize);
    211         s->cur_log_sector = 1;
    212         s->nr_entries = 0;
    213 
    214         if (blk_log_writes_sector_size_valid(log_sector_size)) {
    215             s->cur_log_sector =
    216                 blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size,
    217                                     le64_to_cpu(log_sb.nr_entries), &local_err);
    218             if (local_err) {
    219                 ret = -EINVAL;
    220                 error_propagate(errp, local_err);
    221                 goto fail_log;
    222             }
    223 
    224             s->nr_entries = le64_to_cpu(log_sb.nr_entries);
    225         }
    226     } else {
    227         log_sector_size = qemu_opt_get_size(opts, "log-sector-size",
    228                                             BDRV_SECTOR_SIZE);
    229         s->cur_log_sector = 1;
    230         s->nr_entries = 0;
    231     }
    232 
    233     if (!blk_log_writes_sector_size_valid(log_sector_size)) {
    234         ret = -EINVAL;
    235         error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size);
    236         goto fail_log;
    237     }
    238 
    239     s->sectorsize = log_sector_size;
    240     s->sectorbits = blk_log_writes_log2(log_sector_size);
    241     s->update_interval = qemu_opt_get_number(opts, "log-super-update-interval",
    242                                              4096);
    243     if (!s->update_interval) {
    244         ret = -EINVAL;
    245         error_setg(errp, "Invalid log superblock update interval %"PRIu64,
    246                    s->update_interval);
    247         goto fail_log;
    248     }
    249 
    250     ret = 0;
    251 fail_log:
    252     if (ret < 0) {
    253         bdrv_unref_child(bs, s->log_file);
    254         s->log_file = NULL;
    255     }
    256 fail:
    257     qemu_opts_del(opts);
    258     return ret;
    259 }
    260 
    261 static void blk_log_writes_close(BlockDriverState *bs)
    262 {
    263     BDRVBlkLogWritesState *s = bs->opaque;
    264 
    265     bdrv_unref_child(bs, s->log_file);
    266     s->log_file = NULL;
    267 }
    268 
    269 static int64_t blk_log_writes_getlength(BlockDriverState *bs)
    270 {
    271     return bdrv_getlength(bs->file->bs);
    272 }
    273 
    274 static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c,
    275                                       BdrvChildRole role,
    276                                       BlockReopenQueue *ro_q,
    277                                       uint64_t perm, uint64_t shrd,
    278                                       uint64_t *nperm, uint64_t *nshrd)
    279 {
    280     if (!c) {
    281         *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
    282         *nshrd = (shrd & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
    283         return;
    284     }
    285 
    286     bdrv_default_perms(bs, c, role, ro_q, perm, shrd,
    287                        nperm, nshrd);
    288 }
    289 
    290 static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp)
    291 {
    292     BDRVBlkLogWritesState *s = bs->opaque;
    293     bs->bl.request_alignment = s->sectorsize;
    294 }
    295 
    296 static int coroutine_fn
    297 blk_log_writes_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
    298                          QEMUIOVector *qiov, BdrvRequestFlags flags)
    299 {
    300     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
    301 }
    302 
    303 typedef struct BlkLogWritesFileReq {
    304     BlockDriverState *bs;
    305     uint64_t offset;
    306     uint64_t bytes;
    307     int file_flags;
    308     QEMUIOVector *qiov;
    309     int (*func)(struct BlkLogWritesFileReq *r);
    310     int file_ret;
    311 } BlkLogWritesFileReq;
    312 
    313 typedef struct {
    314     BlockDriverState *bs;
    315     QEMUIOVector *qiov;
    316     struct log_write_entry entry;
    317     uint64_t zero_size;
    318     int log_ret;
    319 } BlkLogWritesLogReq;
    320 
    321 static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
    322 {
    323     BDRVBlkLogWritesState *s = lr->bs->opaque;
    324     uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits;
    325 
    326     s->nr_entries++;
    327     s->cur_log_sector +=
    328             ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits;
    329 
    330     lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size,
    331                                   lr->qiov, 0);
    332 
    333     /* Logging for the "write zeroes" operation */
    334     if (lr->log_ret == 0 && lr->zero_size) {
    335         cur_log_offset = s->cur_log_sector << s->sectorbits;
    336         s->cur_log_sector +=
    337                 ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits;
    338 
    339         lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset,
    340                                             lr->zero_size, 0);
    341     }
    342 
    343     /* Update super block on flush or every update interval */
    344     if (lr->log_ret == 0 && ((lr->entry.flags & LOG_FLUSH_FLAG)
    345         || (s->nr_entries % s->update_interval == 0)))
    346     {
    347         struct log_write_super super = {
    348             .magic      = cpu_to_le64(WRITE_LOG_MAGIC),
    349             .version    = cpu_to_le64(WRITE_LOG_VERSION),
    350             .nr_entries = cpu_to_le64(s->nr_entries),
    351             .sectorsize = cpu_to_le32(s->sectorsize),
    352         };
    353         void *zeroes = g_malloc0(s->sectorsize - sizeof(super));
    354         QEMUIOVector qiov;
    355 
    356         qemu_iovec_init(&qiov, 2);
    357         qemu_iovec_add(&qiov, &super, sizeof(super));
    358         qemu_iovec_add(&qiov, zeroes, s->sectorsize - sizeof(super));
    359 
    360         lr->log_ret =
    361             bdrv_co_pwritev(s->log_file, 0, s->sectorsize, &qiov, 0);
    362         if (lr->log_ret == 0) {
    363             lr->log_ret = bdrv_co_flush(s->log_file->bs);
    364         }
    365         qemu_iovec_destroy(&qiov);
    366         g_free(zeroes);
    367     }
    368 }
    369 
    370 static void coroutine_fn blk_log_writes_co_do_file(BlkLogWritesFileReq *fr)
    371 {
    372     fr->file_ret = fr->func(fr);
    373 }
    374 
    375 static int coroutine_fn
    376 blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
    377                       QEMUIOVector *qiov, int flags,
    378                       int (*file_func)(BlkLogWritesFileReq *r),
    379                       uint64_t entry_flags, bool is_zero_write)
    380 {
    381     QEMUIOVector log_qiov;
    382     size_t niov = qiov ? qiov->niov : 0;
    383     BDRVBlkLogWritesState *s = bs->opaque;
    384     BlkLogWritesFileReq fr = {
    385         .bs         = bs,
    386         .offset     = offset,
    387         .bytes      = bytes,
    388         .file_flags = flags,
    389         .qiov       = qiov,
    390         .func       = file_func,
    391     };
    392     BlkLogWritesLogReq lr = {
    393         .bs             = bs,
    394         .qiov           = &log_qiov,
    395         .entry = {
    396             .sector     = cpu_to_le64(offset >> s->sectorbits),
    397             .nr_sectors = cpu_to_le64(bytes >> s->sectorbits),
    398             .flags      = cpu_to_le64(entry_flags),
    399             .data_len   = 0,
    400         },
    401         .zero_size = is_zero_write ? bytes : 0,
    402     };
    403     void *zeroes = g_malloc0(s->sectorsize - sizeof(lr.entry));
    404 
    405     assert((1 << s->sectorbits) == s->sectorsize);
    406     assert(bs->bl.request_alignment == s->sectorsize);
    407     assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
    408     assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
    409 
    410     qemu_iovec_init(&log_qiov, niov + 2);
    411     qemu_iovec_add(&log_qiov, &lr.entry, sizeof(lr.entry));
    412     qemu_iovec_add(&log_qiov, zeroes, s->sectorsize - sizeof(lr.entry));
    413     if (qiov) {
    414         qemu_iovec_concat(&log_qiov, qiov, 0, qiov->size);
    415     }
    416 
    417     blk_log_writes_co_do_file(&fr);
    418     blk_log_writes_co_do_log(&lr);
    419 
    420     qemu_iovec_destroy(&log_qiov);
    421     g_free(zeroes);
    422 
    423     if (lr.log_ret < 0) {
    424         return lr.log_ret;
    425     }
    426 
    427     return fr.file_ret;
    428 }
    429 
    430 static int coroutine_fn
    431 blk_log_writes_co_do_file_pwritev(BlkLogWritesFileReq *fr)
    432 {
    433     return bdrv_co_pwritev(fr->bs->file, fr->offset, fr->bytes,
    434                            fr->qiov, fr->file_flags);
    435 }
    436 
    437 static int coroutine_fn
    438 blk_log_writes_co_do_file_pwrite_zeroes(BlkLogWritesFileReq *fr)
    439 {
    440     return bdrv_co_pwrite_zeroes(fr->bs->file, fr->offset, fr->bytes,
    441                                  fr->file_flags);
    442 }
    443 
    444 static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr)
    445 {
    446     return bdrv_co_flush(fr->bs->file->bs);
    447 }
    448 
    449 static int coroutine_fn
    450 blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr)
    451 {
    452     return bdrv_co_pdiscard(fr->bs->file, fr->offset, fr->bytes);
    453 }
    454 
    455 static int coroutine_fn
    456 blk_log_writes_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
    457                           QEMUIOVector *qiov, BdrvRequestFlags flags)
    458 {
    459     return blk_log_writes_co_log(bs, offset, bytes, qiov, flags,
    460                                  blk_log_writes_co_do_file_pwritev, 0, false);
    461 }
    462 
    463 static int coroutine_fn
    464 blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
    465                                 int64_t bytes, BdrvRequestFlags flags)
    466 {
    467     return blk_log_writes_co_log(bs, offset, bytes, NULL, flags,
    468                                  blk_log_writes_co_do_file_pwrite_zeroes, 0,
    469                                  true);
    470 }
    471 
    472 static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
    473 {
    474     return blk_log_writes_co_log(bs, 0, 0, NULL, 0,
    475                                  blk_log_writes_co_do_file_flush,
    476                                  LOG_FLUSH_FLAG, false);
    477 }
    478 
    479 static int coroutine_fn
    480 blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
    481 {
    482     return blk_log_writes_co_log(bs, offset, bytes, NULL, 0,
    483                                  blk_log_writes_co_do_file_pdiscard,
    484                                  LOG_DISCARD_FLAG, false);
    485 }
    486 
    487 static const char *const blk_log_writes_strong_runtime_opts[] = {
    488     "log-append",
    489     "log-sector-size",
    490 
    491     NULL
    492 };
    493 
    494 static BlockDriver bdrv_blk_log_writes = {
    495     .format_name            = "blklogwrites",
    496     .instance_size          = sizeof(BDRVBlkLogWritesState),
    497 
    498     .bdrv_open              = blk_log_writes_open,
    499     .bdrv_close             = blk_log_writes_close,
    500     .bdrv_getlength         = blk_log_writes_getlength,
    501     .bdrv_child_perm        = blk_log_writes_child_perm,
    502     .bdrv_refresh_limits    = blk_log_writes_refresh_limits,
    503 
    504     .bdrv_co_preadv         = blk_log_writes_co_preadv,
    505     .bdrv_co_pwritev        = blk_log_writes_co_pwritev,
    506     .bdrv_co_pwrite_zeroes  = blk_log_writes_co_pwrite_zeroes,
    507     .bdrv_co_flush_to_disk  = blk_log_writes_co_flush_to_disk,
    508     .bdrv_co_pdiscard       = blk_log_writes_co_pdiscard,
    509 
    510     .is_filter              = true,
    511     .strong_runtime_opts    = blk_log_writes_strong_runtime_opts,
    512 };
    513 
    514 static void bdrv_blk_log_writes_init(void)
    515 {
    516     bdrv_register(&bdrv_blk_log_writes);
    517 }
    518 
    519 block_init(bdrv_blk_log_writes_init);