qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

commit.c (17193B)


      1 /*
      2  * Live block commit
      3  *
      4  * Copyright Red Hat, Inc. 2012
      5  *
      6  * Authors:
      7  *  Jeff Cody   <jcody@redhat.com>
      8  *  Based on stream.c by Stefan Hajnoczi
      9  *
     10  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
     11  * See the COPYING.LIB file in the top-level directory.
     12  *
     13  */
     14 
     15 #include "qemu/osdep.h"
     16 #include "qemu/cutils.h"
     17 #include "trace.h"
     18 #include "block/block_int.h"
     19 #include "block/blockjob_int.h"
     20 #include "qapi/error.h"
     21 #include "qapi/qmp/qerror.h"
     22 #include "qemu/ratelimit.h"
     23 #include "qemu/memalign.h"
     24 #include "sysemu/block-backend.h"
     25 
     26 enum {
     27     /*
     28      * Size of data buffer for populating the image file.  This should be large
     29      * enough to process multiple clusters in a single call, so that populating
     30      * contiguous regions of the image is efficient.
     31      */
     32     COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
     33 };
     34 
     35 typedef struct CommitBlockJob {
     36     BlockJob common;
     37     BlockDriverState *commit_top_bs;
     38     BlockBackend *top;
     39     BlockBackend *base;
     40     BlockDriverState *base_bs;
     41     BlockDriverState *base_overlay;
     42     BlockdevOnError on_error;
     43     bool base_read_only;
     44     bool chain_frozen;
     45     char *backing_file_str;
     46 } CommitBlockJob;
     47 
     48 static int commit_prepare(Job *job)
     49 {
     50     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
     51 
     52     bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
     53     s->chain_frozen = false;
     54 
     55     /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
     56      * the normal backing chain can be restored. */
     57     blk_unref(s->base);
     58     s->base = NULL;
     59 
     60     /* FIXME: bdrv_drop_intermediate treats total failures and partial failures
     61      * identically. Further work is needed to disambiguate these cases. */
     62     return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
     63                                   s->backing_file_str);
     64 }
     65 
     66 static void commit_abort(Job *job)
     67 {
     68     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
     69     BlockDriverState *top_bs = blk_bs(s->top);
     70 
     71     if (s->chain_frozen) {
     72         bdrv_unfreeze_backing_chain(s->commit_top_bs, s->base_bs);
     73     }
     74 
     75     /* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
     76     bdrv_ref(top_bs);
     77     bdrv_ref(s->commit_top_bs);
     78 
     79     if (s->base) {
     80         blk_unref(s->base);
     81     }
     82 
     83     /* free the blockers on the intermediate nodes so that bdrv_replace_nodes
     84      * can succeed */
     85     block_job_remove_all_bdrv(&s->common);
     86 
     87     /* If bdrv_drop_intermediate() failed (or was not invoked), remove the
     88      * commit filter driver from the backing chain now. Do this as the final
     89      * step so that the 'consistent read' permission can be granted.
     90      *
     91      * XXX Can (or should) we somehow keep 'consistent read' blocked even
     92      * after the failed/cancelled commit job is gone? If we already wrote
     93      * something to base, the intermediate images aren't valid any more. */
     94     bdrv_replace_node(s->commit_top_bs, s->commit_top_bs->backing->bs,
     95                       &error_abort);
     96 
     97     bdrv_unref(s->commit_top_bs);
     98     bdrv_unref(top_bs);
     99 }
    100 
    101 static void commit_clean(Job *job)
    102 {
    103     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
    104 
    105     /* restore base open flags here if appropriate (e.g., change the base back
    106      * to r/o). These reopens do not need to be atomic, since we won't abort
    107      * even on failure here */
    108     if (s->base_read_only) {
    109         bdrv_reopen_set_read_only(s->base_bs, true, NULL);
    110     }
    111 
    112     g_free(s->backing_file_str);
    113     blk_unref(s->top);
    114 }
    115 
    116 static int coroutine_fn commit_run(Job *job, Error **errp)
    117 {
    118     CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
    119     int64_t offset;
    120     uint64_t delay_ns = 0;
    121     int ret = 0;
    122     int64_t n = 0; /* bytes */
    123     QEMU_AUTO_VFREE void *buf = NULL;
    124     int64_t len, base_len;
    125 
    126     len = blk_getlength(s->top);
    127     if (len < 0) {
    128         return len;
    129     }
    130     job_progress_set_remaining(&s->common.job, len);
    131 
    132     base_len = blk_getlength(s->base);
    133     if (base_len < 0) {
    134         return base_len;
    135     }
    136 
    137     if (base_len < len) {
    138         ret = blk_co_truncate(s->base, len, false, PREALLOC_MODE_OFF, 0, NULL);
    139         if (ret) {
    140             return ret;
    141         }
    142     }
    143 
    144     buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
    145 
    146     for (offset = 0; offset < len; offset += n) {
    147         bool copy;
    148         bool error_in_source = true;
    149 
    150         /* Note that even when no rate limit is applied we need to yield
    151          * with no pending I/O here so that bdrv_drain_all() returns.
    152          */
    153         job_sleep_ns(&s->common.job, delay_ns);
    154         if (job_is_cancelled(&s->common.job)) {
    155             break;
    156         }
    157         /* Copy if allocated above the base */
    158         ret = bdrv_is_allocated_above(blk_bs(s->top), s->base_overlay, true,
    159                                       offset, COMMIT_BUFFER_SIZE, &n);
    160         copy = (ret > 0);
    161         trace_commit_one_iteration(s, offset, n, ret);
    162         if (copy) {
    163             assert(n < SIZE_MAX);
    164 
    165             ret = blk_co_pread(s->top, offset, n, buf, 0);
    166             if (ret >= 0) {
    167                 ret = blk_co_pwrite(s->base, offset, n, buf, 0);
    168                 if (ret < 0) {
    169                     error_in_source = false;
    170                 }
    171             }
    172         }
    173         if (ret < 0) {
    174             BlockErrorAction action =
    175                 block_job_error_action(&s->common, s->on_error,
    176                                        error_in_source, -ret);
    177             if (action == BLOCK_ERROR_ACTION_REPORT) {
    178                 return ret;
    179             } else {
    180                 n = 0;
    181                 continue;
    182             }
    183         }
    184         /* Publish progress */
    185         job_progress_update(&s->common.job, n);
    186 
    187         if (copy) {
    188             delay_ns = block_job_ratelimit_get_delay(&s->common, n);
    189         } else {
    190             delay_ns = 0;
    191         }
    192     }
    193 
    194     return 0;
    195 }
    196 
    197 static const BlockJobDriver commit_job_driver = {
    198     .job_driver = {
    199         .instance_size = sizeof(CommitBlockJob),
    200         .job_type      = JOB_TYPE_COMMIT,
    201         .free          = block_job_free,
    202         .user_resume   = block_job_user_resume,
    203         .run           = commit_run,
    204         .prepare       = commit_prepare,
    205         .abort         = commit_abort,
    206         .clean         = commit_clean
    207     },
    208 };
    209 
    210 static int coroutine_fn bdrv_commit_top_preadv(BlockDriverState *bs,
    211     int64_t offset, int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
    212 {
    213     return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
    214 }
    215 
    216 static void bdrv_commit_top_refresh_filename(BlockDriverState *bs)
    217 {
    218     pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
    219             bs->backing->bs->filename);
    220 }
    221 
    222 static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
    223                                        BdrvChildRole role,
    224                                        BlockReopenQueue *reopen_queue,
    225                                        uint64_t perm, uint64_t shared,
    226                                        uint64_t *nperm, uint64_t *nshared)
    227 {
    228     *nperm = 0;
    229     *nshared = BLK_PERM_ALL;
    230 }
    231 
    232 /* Dummy node that provides consistent read to its users without requiring it
    233  * from its backing file and that allows writes on the backing file chain. */
    234 static BlockDriver bdrv_commit_top = {
    235     .format_name                = "commit_top",
    236     .bdrv_co_preadv             = bdrv_commit_top_preadv,
    237     .bdrv_refresh_filename      = bdrv_commit_top_refresh_filename,
    238     .bdrv_child_perm            = bdrv_commit_top_child_perm,
    239 
    240     .is_filter                  = true,
    241     .filtered_child_is_backing  = true,
    242 };
    243 
    244 void commit_start(const char *job_id, BlockDriverState *bs,
    245                   BlockDriverState *base, BlockDriverState *top,
    246                   int creation_flags, int64_t speed,
    247                   BlockdevOnError on_error, const char *backing_file_str,
    248                   const char *filter_node_name, Error **errp)
    249 {
    250     CommitBlockJob *s;
    251     BlockDriverState *iter;
    252     BlockDriverState *commit_top_bs = NULL;
    253     BlockDriverState *filtered_base;
    254     int64_t base_size, top_size;
    255     uint64_t base_perms, iter_shared_perms;
    256     int ret;
    257 
    258     GLOBAL_STATE_CODE();
    259 
    260     assert(top != bs);
    261     if (bdrv_skip_filters(top) == bdrv_skip_filters(base)) {
    262         error_setg(errp, "Invalid files for merge: top and base are the same");
    263         return;
    264     }
    265 
    266     base_size = bdrv_getlength(base);
    267     if (base_size < 0) {
    268         error_setg_errno(errp, -base_size, "Could not inquire base image size");
    269         return;
    270     }
    271 
    272     top_size = bdrv_getlength(top);
    273     if (top_size < 0) {
    274         error_setg_errno(errp, -top_size, "Could not inquire top image size");
    275         return;
    276     }
    277 
    278     base_perms = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
    279     if (base_size < top_size) {
    280         base_perms |= BLK_PERM_RESIZE;
    281     }
    282 
    283     s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
    284                          speed, creation_flags, NULL, NULL, errp);
    285     if (!s) {
    286         return;
    287     }
    288 
    289     /* convert base to r/w, if necessary */
    290     s->base_read_only = bdrv_is_read_only(base);
    291     if (s->base_read_only) {
    292         if (bdrv_reopen_set_read_only(base, false, errp) != 0) {
    293             goto fail;
    294         }
    295     }
    296 
    297     /* Insert commit_top block node above top, so we can block consistent read
    298      * on the backing chain below it */
    299     commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, filter_node_name, 0,
    300                                          errp);
    301     if (commit_top_bs == NULL) {
    302         goto fail;
    303     }
    304     if (!filter_node_name) {
    305         commit_top_bs->implicit = true;
    306     }
    307 
    308     /* So that we can always drop this node */
    309     commit_top_bs->never_freeze = true;
    310 
    311     commit_top_bs->total_sectors = top->total_sectors;
    312 
    313     ret = bdrv_append(commit_top_bs, top, errp);
    314     bdrv_unref(commit_top_bs); /* referenced by new parents or failed */
    315     if (ret < 0) {
    316         commit_top_bs = NULL;
    317         goto fail;
    318     }
    319 
    320     s->commit_top_bs = commit_top_bs;
    321 
    322     /*
    323      * Block all nodes between top and base, because they will
    324      * disappear from the chain after this operation.
    325      * Note that this assumes that the user is fine with removing all
    326      * nodes (including R/W filters) between top and base.  Assuring
    327      * this is the responsibility of the interface (i.e. whoever calls
    328      * commit_start()).
    329      */
    330     s->base_overlay = bdrv_find_overlay(top, base);
    331     assert(s->base_overlay);
    332 
    333     /*
    334      * The topmost node with
    335      * bdrv_skip_filters(filtered_base) == bdrv_skip_filters(base)
    336      */
    337     filtered_base = bdrv_cow_bs(s->base_overlay);
    338     assert(bdrv_skip_filters(filtered_base) == bdrv_skip_filters(base));
    339 
    340     /*
    341      * XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
    342      * at s->base (if writes are blocked for a node, they are also blocked
    343      * for its backing file). The other options would be a second filter
    344      * driver above s->base.
    345      */
    346     iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
    347 
    348     for (iter = top; iter != base; iter = bdrv_filter_or_cow_bs(iter)) {
    349         if (iter == filtered_base) {
    350             /*
    351              * From here on, all nodes are filters on the base.  This
    352              * allows us to share BLK_PERM_CONSISTENT_READ.
    353              */
    354             iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
    355         }
    356 
    357         ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
    358                                  iter_shared_perms, errp);
    359         if (ret < 0) {
    360             goto fail;
    361         }
    362     }
    363 
    364     if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) {
    365         goto fail;
    366     }
    367     s->chain_frozen = true;
    368 
    369     ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
    370     if (ret < 0) {
    371         goto fail;
    372     }
    373 
    374     s->base = blk_new(s->common.job.aio_context,
    375                       base_perms,
    376                       BLK_PERM_CONSISTENT_READ
    377                       | BLK_PERM_WRITE_UNCHANGED);
    378     ret = blk_insert_bs(s->base, base, errp);
    379     if (ret < 0) {
    380         goto fail;
    381     }
    382     blk_set_disable_request_queuing(s->base, true);
    383     s->base_bs = base;
    384 
    385     /* Required permissions are already taken with block_job_add_bdrv() */
    386     s->top = blk_new(s->common.job.aio_context, 0, BLK_PERM_ALL);
    387     ret = blk_insert_bs(s->top, top, errp);
    388     if (ret < 0) {
    389         goto fail;
    390     }
    391     blk_set_disable_request_queuing(s->top, true);
    392 
    393     s->backing_file_str = g_strdup(backing_file_str);
    394     s->on_error = on_error;
    395 
    396     trace_commit_start(bs, base, top, s);
    397     job_start(&s->common.job);
    398     return;
    399 
    400 fail:
    401     if (s->chain_frozen) {
    402         bdrv_unfreeze_backing_chain(commit_top_bs, base);
    403     }
    404     if (s->base) {
    405         blk_unref(s->base);
    406     }
    407     if (s->top) {
    408         blk_unref(s->top);
    409     }
    410     if (s->base_read_only) {
    411         bdrv_reopen_set_read_only(base, true, NULL);
    412     }
    413     job_early_fail(&s->common.job);
    414     /* commit_top_bs has to be replaced after deleting the block job,
    415      * otherwise this would fail because of lack of permissions. */
    416     if (commit_top_bs) {
    417         bdrv_replace_node(commit_top_bs, top, &error_abort);
    418     }
    419 }
    420 
    421 
    422 #define COMMIT_BUF_SIZE (2048 * BDRV_SECTOR_SIZE)
    423 
    424 /* commit COW file into the raw image */
    425 int bdrv_commit(BlockDriverState *bs)
    426 {
    427     BlockBackend *src, *backing;
    428     BlockDriverState *backing_file_bs = NULL;
    429     BlockDriverState *commit_top_bs = NULL;
    430     BlockDriver *drv = bs->drv;
    431     AioContext *ctx;
    432     int64_t offset, length, backing_length;
    433     int ro;
    434     int64_t n;
    435     int ret = 0;
    436     QEMU_AUTO_VFREE uint8_t *buf = NULL;
    437     Error *local_err = NULL;
    438 
    439     GLOBAL_STATE_CODE();
    440 
    441     if (!drv)
    442         return -ENOMEDIUM;
    443 
    444     backing_file_bs = bdrv_cow_bs(bs);
    445 
    446     if (!backing_file_bs) {
    447         return -ENOTSUP;
    448     }
    449 
    450     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
    451         bdrv_op_is_blocked(backing_file_bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL))
    452     {
    453         return -EBUSY;
    454     }
    455 
    456     ro = bdrv_is_read_only(backing_file_bs);
    457 
    458     if (ro) {
    459         if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) {
    460             return -EACCES;
    461         }
    462     }
    463 
    464     ctx = bdrv_get_aio_context(bs);
    465     /* WRITE_UNCHANGED is required for bdrv_make_empty() */
    466     src = blk_new(ctx, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
    467                   BLK_PERM_ALL);
    468     backing = blk_new(ctx, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL);
    469 
    470     ret = blk_insert_bs(src, bs, &local_err);
    471     if (ret < 0) {
    472         error_report_err(local_err);
    473         goto ro_cleanup;
    474     }
    475 
    476     /* Insert commit_top block node above backing, so we can write to it */
    477     commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
    478                                          &local_err);
    479     if (commit_top_bs == NULL) {
    480         error_report_err(local_err);
    481         goto ro_cleanup;
    482     }
    483 
    484     bdrv_set_backing_hd(commit_top_bs, backing_file_bs, &error_abort);
    485     bdrv_set_backing_hd(bs, commit_top_bs, &error_abort);
    486 
    487     ret = blk_insert_bs(backing, backing_file_bs, &local_err);
    488     if (ret < 0) {
    489         error_report_err(local_err);
    490         goto ro_cleanup;
    491     }
    492 
    493     length = blk_getlength(src);
    494     if (length < 0) {
    495         ret = length;
    496         goto ro_cleanup;
    497     }
    498 
    499     backing_length = blk_getlength(backing);
    500     if (backing_length < 0) {
    501         ret = backing_length;
    502         goto ro_cleanup;
    503     }
    504 
    505     /* If our top snapshot is larger than the backing file image,
    506      * grow the backing file image if possible.  If not possible,
    507      * we must return an error */
    508     if (length > backing_length) {
    509         ret = blk_truncate(backing, length, false, PREALLOC_MODE_OFF, 0,
    510                            &local_err);
    511         if (ret < 0) {
    512             error_report_err(local_err);
    513             goto ro_cleanup;
    514         }
    515     }
    516 
    517     /* blk_try_blockalign() for src will choose an alignment that works for
    518      * backing as well, so no need to compare the alignment manually. */
    519     buf = blk_try_blockalign(src, COMMIT_BUF_SIZE);
    520     if (buf == NULL) {
    521         ret = -ENOMEM;
    522         goto ro_cleanup;
    523     }
    524 
    525     for (offset = 0; offset < length; offset += n) {
    526         ret = bdrv_is_allocated(bs, offset, COMMIT_BUF_SIZE, &n);
    527         if (ret < 0) {
    528             goto ro_cleanup;
    529         }
    530         if (ret) {
    531             ret = blk_pread(src, offset, n, buf, 0);
    532             if (ret < 0) {
    533                 goto ro_cleanup;
    534             }
    535 
    536             ret = blk_pwrite(backing, offset, n, buf, 0);
    537             if (ret < 0) {
    538                 goto ro_cleanup;
    539             }
    540         }
    541     }
    542 
    543     ret = blk_make_empty(src, NULL);
    544     /* Ignore -ENOTSUP */
    545     if (ret < 0 && ret != -ENOTSUP) {
    546         goto ro_cleanup;
    547     }
    548 
    549     blk_flush(src);
    550 
    551     /*
    552      * Make sure all data we wrote to the backing device is actually
    553      * stable on disk.
    554      */
    555     blk_flush(backing);
    556 
    557     ret = 0;
    558 ro_cleanup:
    559     blk_unref(backing);
    560     if (bdrv_cow_bs(bs) != backing_file_bs) {
    561         bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
    562     }
    563     bdrv_unref(commit_top_bs);
    564     blk_unref(src);
    565 
    566     if (ro) {
    567         /* ignoring error return here */
    568         bdrv_reopen_set_read_only(backing_file_bs, true, NULL);
    569     }
    570 
    571     return ret;
    572 }