qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

test-bdrv-drain.c (72651B)


      1 /*
      2  * Block node draining tests
      3  *
      4  * Copyright (c) 2017 Kevin Wolf <kwolf@redhat.com>
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a copy
      7  * of this software and associated documentation files (the "Software"), to deal
      8  * in the Software without restriction, including without limitation the rights
      9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10  * copies of the Software, and to permit persons to whom the Software is
     11  * furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     22  * THE SOFTWARE.
     23  */
     24 
     25 #include "qemu/osdep.h"
     26 #include "block/block.h"
     27 #include "block/blockjob_int.h"
     28 #include "sysemu/block-backend.h"
     29 #include "qapi/error.h"
     30 #include "qemu/main-loop.h"
     31 #include "iothread.h"
     32 
     33 static QemuEvent done_event;
     34 
     35 typedef struct BDRVTestState {
     36     int drain_count;
     37     AioContext *bh_indirection_ctx;
     38     bool sleep_in_drain_begin;
     39 } BDRVTestState;
     40 
     41 static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
     42 {
     43     BDRVTestState *s = bs->opaque;
     44     s->drain_count++;
     45     if (s->sleep_in_drain_begin) {
     46         qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
     47     }
     48 }
     49 
     50 static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
     51 {
     52     BDRVTestState *s = bs->opaque;
     53     s->drain_count--;
     54 }
     55 
     56 static void bdrv_test_close(BlockDriverState *bs)
     57 {
     58     BDRVTestState *s = bs->opaque;
     59     g_assert_cmpint(s->drain_count, >, 0);
     60 }
     61 
     62 static void co_reenter_bh(void *opaque)
     63 {
     64     aio_co_wake(opaque);
     65 }
     66 
     67 static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
     68                                             int64_t offset, int64_t bytes,
     69                                             QEMUIOVector *qiov,
     70                                             BdrvRequestFlags flags)
     71 {
     72     BDRVTestState *s = bs->opaque;
     73 
     74     /* We want this request to stay until the polling loop in drain waits for
     75      * it to complete. We need to sleep a while as bdrv_drain_invoke() comes
     76      * first and polls its result, too, but it shouldn't accidentally complete
     77      * this request yet. */
     78     qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
     79 
     80     if (s->bh_indirection_ctx) {
     81         aio_bh_schedule_oneshot(s->bh_indirection_ctx, co_reenter_bh,
     82                                 qemu_coroutine_self());
     83         qemu_coroutine_yield();
     84     }
     85 
     86     return 0;
     87 }
     88 
     89 static int bdrv_test_change_backing_file(BlockDriverState *bs,
     90                                          const char *backing_file,
     91                                          const char *backing_fmt)
     92 {
     93     return 0;
     94 }
     95 
     96 static BlockDriver bdrv_test = {
     97     .format_name            = "test",
     98     .instance_size          = sizeof(BDRVTestState),
     99     .supports_backing       = true,
    100 
    101     .bdrv_close             = bdrv_test_close,
    102     .bdrv_co_preadv         = bdrv_test_co_preadv,
    103 
    104     .bdrv_co_drain_begin    = bdrv_test_co_drain_begin,
    105     .bdrv_co_drain_end      = bdrv_test_co_drain_end,
    106 
    107     .bdrv_child_perm        = bdrv_default_perms,
    108 
    109     .bdrv_change_backing_file = bdrv_test_change_backing_file,
    110 };
    111 
    112 static void aio_ret_cb(void *opaque, int ret)
    113 {
    114     int *aio_ret = opaque;
    115     *aio_ret = ret;
    116 }
    117 
    118 typedef struct CallInCoroutineData {
    119     void (*entry)(void);
    120     bool done;
    121 } CallInCoroutineData;
    122 
    123 static coroutine_fn void call_in_coroutine_entry(void *opaque)
    124 {
    125     CallInCoroutineData *data = opaque;
    126 
    127     data->entry();
    128     data->done = true;
    129 }
    130 
    131 static void call_in_coroutine(void (*entry)(void))
    132 {
    133     Coroutine *co;
    134     CallInCoroutineData data = {
    135         .entry  = entry,
    136         .done   = false,
    137     };
    138 
    139     co = qemu_coroutine_create(call_in_coroutine_entry, &data);
    140     qemu_coroutine_enter(co);
    141     while (!data.done) {
    142         aio_poll(qemu_get_aio_context(), true);
    143     }
    144 }
    145 
    146 enum drain_type {
    147     BDRV_DRAIN_ALL,
    148     BDRV_DRAIN,
    149     BDRV_SUBTREE_DRAIN,
    150     DRAIN_TYPE_MAX,
    151 };
    152 
    153 static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
    154 {
    155     switch (drain_type) {
    156     case BDRV_DRAIN_ALL:        bdrv_drain_all_begin(); break;
    157     case BDRV_DRAIN:            bdrv_drained_begin(bs); break;
    158     case BDRV_SUBTREE_DRAIN:    bdrv_subtree_drained_begin(bs); break;
    159     default:                    g_assert_not_reached();
    160     }
    161 }
    162 
    163 static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
    164 {
    165     switch (drain_type) {
    166     case BDRV_DRAIN_ALL:        bdrv_drain_all_end(); break;
    167     case BDRV_DRAIN:            bdrv_drained_end(bs); break;
    168     case BDRV_SUBTREE_DRAIN:    bdrv_subtree_drained_end(bs); break;
    169     default:                    g_assert_not_reached();
    170     }
    171 }
    172 
    173 static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs)
    174 {
    175     if (drain_type != BDRV_DRAIN_ALL) {
    176         aio_context_acquire(bdrv_get_aio_context(bs));
    177     }
    178     do_drain_begin(drain_type, bs);
    179     if (drain_type != BDRV_DRAIN_ALL) {
    180         aio_context_release(bdrv_get_aio_context(bs));
    181     }
    182 }
    183 
    184 static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs)
    185 {
    186     if (drain_type != BDRV_DRAIN_ALL) {
    187         aio_context_acquire(bdrv_get_aio_context(bs));
    188     }
    189     do_drain_end(drain_type, bs);
    190     if (drain_type != BDRV_DRAIN_ALL) {
    191         aio_context_release(bdrv_get_aio_context(bs));
    192     }
    193 }
    194 
    195 static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
    196 {
    197     BlockBackend *blk;
    198     BlockDriverState *bs, *backing;
    199     BDRVTestState *s, *backing_s;
    200     BlockAIOCB *acb;
    201     int aio_ret;
    202 
    203     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
    204 
    205     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    206     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
    207                               &error_abort);
    208     s = bs->opaque;
    209     blk_insert_bs(blk, bs, &error_abort);
    210 
    211     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
    212     backing_s = backing->opaque;
    213     bdrv_set_backing_hd(bs, backing, &error_abort);
    214 
    215     /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
    216     g_assert_cmpint(s->drain_count, ==, 0);
    217     g_assert_cmpint(backing_s->drain_count, ==, 0);
    218 
    219     do_drain_begin(drain_type, bs);
    220 
    221     g_assert_cmpint(s->drain_count, ==, 1);
    222     g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
    223 
    224     do_drain_end(drain_type, bs);
    225 
    226     g_assert_cmpint(s->drain_count, ==, 0);
    227     g_assert_cmpint(backing_s->drain_count, ==, 0);
    228 
    229     /* Now do the same while a request is pending */
    230     aio_ret = -EINPROGRESS;
    231     acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
    232     g_assert(acb != NULL);
    233     g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
    234 
    235     g_assert_cmpint(s->drain_count, ==, 0);
    236     g_assert_cmpint(backing_s->drain_count, ==, 0);
    237 
    238     do_drain_begin(drain_type, bs);
    239 
    240     g_assert_cmpint(aio_ret, ==, 0);
    241     g_assert_cmpint(s->drain_count, ==, 1);
    242     g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
    243 
    244     do_drain_end(drain_type, bs);
    245 
    246     g_assert_cmpint(s->drain_count, ==, 0);
    247     g_assert_cmpint(backing_s->drain_count, ==, 0);
    248 
    249     bdrv_unref(backing);
    250     bdrv_unref(bs);
    251     blk_unref(blk);
    252 }
    253 
    254 static void test_drv_cb_drain_all(void)
    255 {
    256     test_drv_cb_common(BDRV_DRAIN_ALL, true);
    257 }
    258 
    259 static void test_drv_cb_drain(void)
    260 {
    261     test_drv_cb_common(BDRV_DRAIN, false);
    262 }
    263 
    264 static void test_drv_cb_drain_subtree(void)
    265 {
    266     test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
    267 }
    268 
    269 static void test_drv_cb_co_drain_all(void)
    270 {
    271     call_in_coroutine(test_drv_cb_drain_all);
    272 }
    273 
    274 static void test_drv_cb_co_drain(void)
    275 {
    276     call_in_coroutine(test_drv_cb_drain);
    277 }
    278 
    279 static void test_drv_cb_co_drain_subtree(void)
    280 {
    281     call_in_coroutine(test_drv_cb_drain_subtree);
    282 }
    283 
    284 static void test_quiesce_common(enum drain_type drain_type, bool recursive)
    285 {
    286     BlockBackend *blk;
    287     BlockDriverState *bs, *backing;
    288 
    289     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    290     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
    291                               &error_abort);
    292     blk_insert_bs(blk, bs, &error_abort);
    293 
    294     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
    295     bdrv_set_backing_hd(bs, backing, &error_abort);
    296 
    297     g_assert_cmpint(bs->quiesce_counter, ==, 0);
    298     g_assert_cmpint(backing->quiesce_counter, ==, 0);
    299 
    300     do_drain_begin(drain_type, bs);
    301 
    302     g_assert_cmpint(bs->quiesce_counter, ==, 1);
    303     g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
    304 
    305     do_drain_end(drain_type, bs);
    306 
    307     g_assert_cmpint(bs->quiesce_counter, ==, 0);
    308     g_assert_cmpint(backing->quiesce_counter, ==, 0);
    309 
    310     bdrv_unref(backing);
    311     bdrv_unref(bs);
    312     blk_unref(blk);
    313 }
    314 
    315 static void test_quiesce_drain_all(void)
    316 {
    317     test_quiesce_common(BDRV_DRAIN_ALL, true);
    318 }
    319 
    320 static void test_quiesce_drain(void)
    321 {
    322     test_quiesce_common(BDRV_DRAIN, false);
    323 }
    324 
    325 static void test_quiesce_drain_subtree(void)
    326 {
    327     test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
    328 }
    329 
    330 static void test_quiesce_co_drain_all(void)
    331 {
    332     call_in_coroutine(test_quiesce_drain_all);
    333 }
    334 
    335 static void test_quiesce_co_drain(void)
    336 {
    337     call_in_coroutine(test_quiesce_drain);
    338 }
    339 
    340 static void test_quiesce_co_drain_subtree(void)
    341 {
    342     call_in_coroutine(test_quiesce_drain_subtree);
    343 }
    344 
    345 static void test_nested(void)
    346 {
    347     BlockBackend *blk;
    348     BlockDriverState *bs, *backing;
    349     BDRVTestState *s, *backing_s;
    350     enum drain_type outer, inner;
    351 
    352     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    353     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
    354                               &error_abort);
    355     s = bs->opaque;
    356     blk_insert_bs(blk, bs, &error_abort);
    357 
    358     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
    359     backing_s = backing->opaque;
    360     bdrv_set_backing_hd(bs, backing, &error_abort);
    361 
    362     for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
    363         for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
    364             int backing_quiesce = (outer != BDRV_DRAIN) +
    365                                   (inner != BDRV_DRAIN);
    366 
    367             g_assert_cmpint(bs->quiesce_counter, ==, 0);
    368             g_assert_cmpint(backing->quiesce_counter, ==, 0);
    369             g_assert_cmpint(s->drain_count, ==, 0);
    370             g_assert_cmpint(backing_s->drain_count, ==, 0);
    371 
    372             do_drain_begin(outer, bs);
    373             do_drain_begin(inner, bs);
    374 
    375             g_assert_cmpint(bs->quiesce_counter, ==, 2);
    376             g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
    377             g_assert_cmpint(s->drain_count, ==, 2);
    378             g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
    379 
    380             do_drain_end(inner, bs);
    381             do_drain_end(outer, bs);
    382 
    383             g_assert_cmpint(bs->quiesce_counter, ==, 0);
    384             g_assert_cmpint(backing->quiesce_counter, ==, 0);
    385             g_assert_cmpint(s->drain_count, ==, 0);
    386             g_assert_cmpint(backing_s->drain_count, ==, 0);
    387         }
    388     }
    389 
    390     bdrv_unref(backing);
    391     bdrv_unref(bs);
    392     blk_unref(blk);
    393 }
    394 
    395 static void test_multiparent(void)
    396 {
    397     BlockBackend *blk_a, *blk_b;
    398     BlockDriverState *bs_a, *bs_b, *backing;
    399     BDRVTestState *a_s, *b_s, *backing_s;
    400 
    401     blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    402     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
    403                                 &error_abort);
    404     a_s = bs_a->opaque;
    405     blk_insert_bs(blk_a, bs_a, &error_abort);
    406 
    407     blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    408     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
    409                                 &error_abort);
    410     b_s = bs_b->opaque;
    411     blk_insert_bs(blk_b, bs_b, &error_abort);
    412 
    413     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
    414     backing_s = backing->opaque;
    415     bdrv_set_backing_hd(bs_a, backing, &error_abort);
    416     bdrv_set_backing_hd(bs_b, backing, &error_abort);
    417 
    418     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
    419     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
    420     g_assert_cmpint(backing->quiesce_counter, ==, 0);
    421     g_assert_cmpint(a_s->drain_count, ==, 0);
    422     g_assert_cmpint(b_s->drain_count, ==, 0);
    423     g_assert_cmpint(backing_s->drain_count, ==, 0);
    424 
    425     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
    426 
    427     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
    428     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
    429     g_assert_cmpint(backing->quiesce_counter, ==, 1);
    430     g_assert_cmpint(a_s->drain_count, ==, 1);
    431     g_assert_cmpint(b_s->drain_count, ==, 1);
    432     g_assert_cmpint(backing_s->drain_count, ==, 1);
    433 
    434     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
    435 
    436     g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
    437     g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
    438     g_assert_cmpint(backing->quiesce_counter, ==, 2);
    439     g_assert_cmpint(a_s->drain_count, ==, 2);
    440     g_assert_cmpint(b_s->drain_count, ==, 2);
    441     g_assert_cmpint(backing_s->drain_count, ==, 2);
    442 
    443     do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
    444 
    445     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
    446     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
    447     g_assert_cmpint(backing->quiesce_counter, ==, 1);
    448     g_assert_cmpint(a_s->drain_count, ==, 1);
    449     g_assert_cmpint(b_s->drain_count, ==, 1);
    450     g_assert_cmpint(backing_s->drain_count, ==, 1);
    451 
    452     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
    453 
    454     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
    455     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
    456     g_assert_cmpint(backing->quiesce_counter, ==, 0);
    457     g_assert_cmpint(a_s->drain_count, ==, 0);
    458     g_assert_cmpint(b_s->drain_count, ==, 0);
    459     g_assert_cmpint(backing_s->drain_count, ==, 0);
    460 
    461     bdrv_unref(backing);
    462     bdrv_unref(bs_a);
    463     bdrv_unref(bs_b);
    464     blk_unref(blk_a);
    465     blk_unref(blk_b);
    466 }
    467 
    468 static void test_graph_change_drain_subtree(void)
    469 {
    470     BlockBackend *blk_a, *blk_b;
    471     BlockDriverState *bs_a, *bs_b, *backing;
    472     BDRVTestState *a_s, *b_s, *backing_s;
    473 
    474     blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    475     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
    476                                 &error_abort);
    477     a_s = bs_a->opaque;
    478     blk_insert_bs(blk_a, bs_a, &error_abort);
    479 
    480     blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    481     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
    482                                 &error_abort);
    483     b_s = bs_b->opaque;
    484     blk_insert_bs(blk_b, bs_b, &error_abort);
    485 
    486     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
    487     backing_s = backing->opaque;
    488     bdrv_set_backing_hd(bs_a, backing, &error_abort);
    489 
    490     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
    491     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
    492     g_assert_cmpint(backing->quiesce_counter, ==, 0);
    493     g_assert_cmpint(a_s->drain_count, ==, 0);
    494     g_assert_cmpint(b_s->drain_count, ==, 0);
    495     g_assert_cmpint(backing_s->drain_count, ==, 0);
    496 
    497     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
    498     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
    499     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
    500     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
    501     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
    502 
    503     bdrv_set_backing_hd(bs_b, backing, &error_abort);
    504     g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
    505     g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
    506     g_assert_cmpint(backing->quiesce_counter, ==, 5);
    507     g_assert_cmpint(a_s->drain_count, ==, 5);
    508     g_assert_cmpint(b_s->drain_count, ==, 5);
    509     g_assert_cmpint(backing_s->drain_count, ==, 5);
    510 
    511     bdrv_set_backing_hd(bs_b, NULL, &error_abort);
    512     g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
    513     g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
    514     g_assert_cmpint(backing->quiesce_counter, ==, 3);
    515     g_assert_cmpint(a_s->drain_count, ==, 3);
    516     g_assert_cmpint(b_s->drain_count, ==, 2);
    517     g_assert_cmpint(backing_s->drain_count, ==, 3);
    518 
    519     bdrv_set_backing_hd(bs_b, backing, &error_abort);
    520     g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
    521     g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
    522     g_assert_cmpint(backing->quiesce_counter, ==, 5);
    523     g_assert_cmpint(a_s->drain_count, ==, 5);
    524     g_assert_cmpint(b_s->drain_count, ==, 5);
    525     g_assert_cmpint(backing_s->drain_count, ==, 5);
    526 
    527     do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
    528     do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
    529     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
    530     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
    531     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
    532 
    533     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
    534     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
    535     g_assert_cmpint(backing->quiesce_counter, ==, 0);
    536     g_assert_cmpint(a_s->drain_count, ==, 0);
    537     g_assert_cmpint(b_s->drain_count, ==, 0);
    538     g_assert_cmpint(backing_s->drain_count, ==, 0);
    539 
    540     bdrv_unref(backing);
    541     bdrv_unref(bs_a);
    542     bdrv_unref(bs_b);
    543     blk_unref(blk_a);
    544     blk_unref(blk_b);
    545 }
    546 
    547 static void test_graph_change_drain_all(void)
    548 {
    549     BlockBackend *blk_a, *blk_b;
    550     BlockDriverState *bs_a, *bs_b;
    551     BDRVTestState *a_s, *b_s;
    552 
    553     /* Create node A with a BlockBackend */
    554     blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    555     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
    556                                 &error_abort);
    557     a_s = bs_a->opaque;
    558     blk_insert_bs(blk_a, bs_a, &error_abort);
    559 
    560     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
    561     g_assert_cmpint(a_s->drain_count, ==, 0);
    562 
    563     /* Call bdrv_drain_all_begin() */
    564     bdrv_drain_all_begin();
    565 
    566     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
    567     g_assert_cmpint(a_s->drain_count, ==, 1);
    568 
    569     /* Create node B with a BlockBackend */
    570     blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    571     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
    572                                 &error_abort);
    573     b_s = bs_b->opaque;
    574     blk_insert_bs(blk_b, bs_b, &error_abort);
    575 
    576     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
    577     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
    578     g_assert_cmpint(a_s->drain_count, ==, 1);
    579     g_assert_cmpint(b_s->drain_count, ==, 1);
    580 
    581     /* Unref and finally delete node A */
    582     blk_unref(blk_a);
    583 
    584     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
    585     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
    586     g_assert_cmpint(a_s->drain_count, ==, 1);
    587     g_assert_cmpint(b_s->drain_count, ==, 1);
    588 
    589     bdrv_unref(bs_a);
    590 
    591     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
    592     g_assert_cmpint(b_s->drain_count, ==, 1);
    593 
    594     /* End the drained section */
    595     bdrv_drain_all_end();
    596 
    597     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
    598     g_assert_cmpint(b_s->drain_count, ==, 0);
    599     g_assert_cmpint(qemu_get_aio_context()->external_disable_cnt, ==, 0);
    600 
    601     bdrv_unref(bs_b);
    602     blk_unref(blk_b);
    603 }
    604 
    605 struct test_iothread_data {
    606     BlockDriverState *bs;
    607     enum drain_type drain_type;
    608     int *aio_ret;
    609 };
    610 
    611 static void test_iothread_drain_entry(void *opaque)
    612 {
    613     struct test_iothread_data *data = opaque;
    614 
    615     aio_context_acquire(bdrv_get_aio_context(data->bs));
    616     do_drain_begin(data->drain_type, data->bs);
    617     g_assert_cmpint(*data->aio_ret, ==, 0);
    618     do_drain_end(data->drain_type, data->bs);
    619     aio_context_release(bdrv_get_aio_context(data->bs));
    620 
    621     qemu_event_set(&done_event);
    622 }
    623 
    624 static void test_iothread_aio_cb(void *opaque, int ret)
    625 {
    626     int *aio_ret = opaque;
    627     *aio_ret = ret;
    628     qemu_event_set(&done_event);
    629 }
    630 
    631 static void test_iothread_main_thread_bh(void *opaque)
    632 {
    633     struct test_iothread_data *data = opaque;
    634 
    635     /* Test that the AioContext is not yet locked in a random BH that is
    636      * executed during drain, otherwise this would deadlock. */
    637     aio_context_acquire(bdrv_get_aio_context(data->bs));
    638     bdrv_flush(data->bs);
    639     aio_context_release(bdrv_get_aio_context(data->bs));
    640 }
    641 
    642 /*
    643  * Starts an AIO request on a BDS that runs in the AioContext of iothread 1.
    644  * The request involves a BH on iothread 2 before it can complete.
    645  *
    646  * @drain_thread = 0 means that do_drain_begin/end are called from the main
    647  * thread, @drain_thread = 1 means that they are called from iothread 1. Drain
    648  * for this BDS cannot be called from iothread 2 because only the main thread
    649  * may do cross-AioContext polling.
    650  */
    651 static void test_iothread_common(enum drain_type drain_type, int drain_thread)
    652 {
    653     BlockBackend *blk;
    654     BlockDriverState *bs;
    655     BDRVTestState *s;
    656     BlockAIOCB *acb;
    657     int aio_ret;
    658     struct test_iothread_data data;
    659 
    660     IOThread *a = iothread_new();
    661     IOThread *b = iothread_new();
    662     AioContext *ctx_a = iothread_get_aio_context(a);
    663     AioContext *ctx_b = iothread_get_aio_context(b);
    664 
    665     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
    666 
    667     /* bdrv_drain_all() may only be called from the main loop thread */
    668     if (drain_type == BDRV_DRAIN_ALL && drain_thread != 0) {
    669         goto out;
    670     }
    671 
    672     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    673     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
    674                               &error_abort);
    675     s = bs->opaque;
    676     blk_insert_bs(blk, bs, &error_abort);
    677     blk_set_disable_request_queuing(blk, true);
    678 
    679     blk_set_aio_context(blk, ctx_a, &error_abort);
    680     aio_context_acquire(ctx_a);
    681 
    682     s->bh_indirection_ctx = ctx_b;
    683 
    684     aio_ret = -EINPROGRESS;
    685     qemu_event_reset(&done_event);
    686 
    687     if (drain_thread == 0) {
    688         acb = blk_aio_preadv(blk, 0, &qiov, 0, test_iothread_aio_cb, &aio_ret);
    689     } else {
    690         acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
    691     }
    692     g_assert(acb != NULL);
    693     g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
    694 
    695     aio_context_release(ctx_a);
    696 
    697     data = (struct test_iothread_data) {
    698         .bs         = bs,
    699         .drain_type = drain_type,
    700         .aio_ret    = &aio_ret,
    701     };
    702 
    703     switch (drain_thread) {
    704     case 0:
    705         if (drain_type != BDRV_DRAIN_ALL) {
    706             aio_context_acquire(ctx_a);
    707         }
    708 
    709         aio_bh_schedule_oneshot(ctx_a, test_iothread_main_thread_bh, &data);
    710 
    711         /* The request is running on the IOThread a. Draining its block device
    712          * will make sure that it has completed as far as the BDS is concerned,
    713          * but the drain in this thread can continue immediately after
    714          * bdrv_dec_in_flight() and aio_ret might be assigned only slightly
    715          * later. */
    716         do_drain_begin(drain_type, bs);
    717         g_assert_cmpint(bs->in_flight, ==, 0);
    718 
    719         if (drain_type != BDRV_DRAIN_ALL) {
    720             aio_context_release(ctx_a);
    721         }
    722         qemu_event_wait(&done_event);
    723         if (drain_type != BDRV_DRAIN_ALL) {
    724             aio_context_acquire(ctx_a);
    725         }
    726 
    727         g_assert_cmpint(aio_ret, ==, 0);
    728         do_drain_end(drain_type, bs);
    729 
    730         if (drain_type != BDRV_DRAIN_ALL) {
    731             aio_context_release(ctx_a);
    732         }
    733         break;
    734     case 1:
    735         aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data);
    736         qemu_event_wait(&done_event);
    737         break;
    738     default:
    739         g_assert_not_reached();
    740     }
    741 
    742     aio_context_acquire(ctx_a);
    743     blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
    744     aio_context_release(ctx_a);
    745 
    746     bdrv_unref(bs);
    747     blk_unref(blk);
    748 
    749 out:
    750     iothread_join(a);
    751     iothread_join(b);
    752 }
    753 
    754 static void test_iothread_drain_all(void)
    755 {
    756     test_iothread_common(BDRV_DRAIN_ALL, 0);
    757     test_iothread_common(BDRV_DRAIN_ALL, 1);
    758 }
    759 
    760 static void test_iothread_drain(void)
    761 {
    762     test_iothread_common(BDRV_DRAIN, 0);
    763     test_iothread_common(BDRV_DRAIN, 1);
    764 }
    765 
    766 static void test_iothread_drain_subtree(void)
    767 {
    768     test_iothread_common(BDRV_SUBTREE_DRAIN, 0);
    769     test_iothread_common(BDRV_SUBTREE_DRAIN, 1);
    770 }
    771 
    772 
    773 typedef struct TestBlockJob {
    774     BlockJob common;
    775     BlockDriverState *bs;
    776     int run_ret;
    777     int prepare_ret;
    778     bool running;
    779     bool should_complete;
    780 } TestBlockJob;
    781 
    782 static int test_job_prepare(Job *job)
    783 {
    784     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
    785 
    786     /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
    787     bdrv_flush(s->bs);
    788     return s->prepare_ret;
    789 }
    790 
    791 static void test_job_commit(Job *job)
    792 {
    793     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
    794 
    795     /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
    796     bdrv_flush(s->bs);
    797 }
    798 
    799 static void test_job_abort(Job *job)
    800 {
    801     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
    802 
    803     /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
    804     bdrv_flush(s->bs);
    805 }
    806 
    807 static int coroutine_fn test_job_run(Job *job, Error **errp)
    808 {
    809     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
    810 
    811     /* We are running the actual job code past the pause point in
    812      * job_co_entry(). */
    813     s->running = true;
    814 
    815     job_transition_to_ready(&s->common.job);
    816     while (!s->should_complete) {
    817         /* Avoid job_sleep_ns() because it marks the job as !busy. We want to
    818          * emulate some actual activity (probably some I/O) here so that drain
    819          * has to wait for this activity to stop. */
    820         qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
    821 
    822         job_pause_point(&s->common.job);
    823     }
    824 
    825     return s->run_ret;
    826 }
    827 
    828 static void test_job_complete(Job *job, Error **errp)
    829 {
    830     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
    831     s->should_complete = true;
    832 }
    833 
    834 BlockJobDriver test_job_driver = {
    835     .job_driver = {
    836         .instance_size  = sizeof(TestBlockJob),
    837         .free           = block_job_free,
    838         .user_resume    = block_job_user_resume,
    839         .run            = test_job_run,
    840         .complete       = test_job_complete,
    841         .prepare        = test_job_prepare,
    842         .commit         = test_job_commit,
    843         .abort          = test_job_abort,
    844     },
    845 };
    846 
    847 enum test_job_result {
    848     TEST_JOB_SUCCESS,
    849     TEST_JOB_FAIL_RUN,
    850     TEST_JOB_FAIL_PREPARE,
    851 };
    852 
    853 enum test_job_drain_node {
    854     TEST_JOB_DRAIN_SRC,
    855     TEST_JOB_DRAIN_SRC_CHILD,
    856     TEST_JOB_DRAIN_SRC_PARENT,
    857 };
    858 
    859 static void test_blockjob_common_drain_node(enum drain_type drain_type,
    860                                             bool use_iothread,
    861                                             enum test_job_result result,
    862                                             enum test_job_drain_node drain_node)
    863 {
    864     BlockBackend *blk_src, *blk_target;
    865     BlockDriverState *src, *src_backing, *src_overlay, *target, *drain_bs;
    866     BlockJob *job;
    867     TestBlockJob *tjob;
    868     IOThread *iothread = NULL;
    869     AioContext *ctx;
    870     int ret;
    871 
    872     src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
    873                                &error_abort);
    874     src_backing = bdrv_new_open_driver(&bdrv_test, "source-backing",
    875                                        BDRV_O_RDWR, &error_abort);
    876     src_overlay = bdrv_new_open_driver(&bdrv_test, "source-overlay",
    877                                        BDRV_O_RDWR, &error_abort);
    878 
    879     bdrv_set_backing_hd(src_overlay, src, &error_abort);
    880     bdrv_unref(src);
    881     bdrv_set_backing_hd(src, src_backing, &error_abort);
    882     bdrv_unref(src_backing);
    883 
    884     blk_src = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    885     blk_insert_bs(blk_src, src_overlay, &error_abort);
    886 
    887     switch (drain_node) {
    888     case TEST_JOB_DRAIN_SRC:
    889         drain_bs = src;
    890         break;
    891     case TEST_JOB_DRAIN_SRC_CHILD:
    892         drain_bs = src_backing;
    893         break;
    894     case TEST_JOB_DRAIN_SRC_PARENT:
    895         drain_bs = src_overlay;
    896         break;
    897     default:
    898         g_assert_not_reached();
    899     }
    900 
    901     if (use_iothread) {
    902         iothread = iothread_new();
    903         ctx = iothread_get_aio_context(iothread);
    904         blk_set_aio_context(blk_src, ctx, &error_abort);
    905     } else {
    906         ctx = qemu_get_aio_context();
    907     }
    908 
    909     target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
    910                                   &error_abort);
    911     blk_target = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
    912     blk_insert_bs(blk_target, target, &error_abort);
    913     blk_set_allow_aio_context_change(blk_target, true);
    914 
    915     aio_context_acquire(ctx);
    916     tjob = block_job_create("job0", &test_job_driver, NULL, src,
    917                             0, BLK_PERM_ALL,
    918                             0, 0, NULL, NULL, &error_abort);
    919     tjob->bs = src;
    920     job = &tjob->common;
    921     block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
    922 
    923     switch (result) {
    924     case TEST_JOB_SUCCESS:
    925         break;
    926     case TEST_JOB_FAIL_RUN:
    927         tjob->run_ret = -EIO;
    928         break;
    929     case TEST_JOB_FAIL_PREPARE:
    930         tjob->prepare_ret = -EIO;
    931         break;
    932     }
    933     aio_context_release(ctx);
    934 
    935     job_start(&job->job);
    936 
    937     if (use_iothread) {
    938         /* job_co_entry() is run in the I/O thread, wait for the actual job
    939          * code to start (we don't want to catch the job in the pause point in
    940          * job_co_entry(). */
    941         while (!tjob->running) {
    942             aio_poll(qemu_get_aio_context(), false);
    943         }
    944     }
    945 
    946     WITH_JOB_LOCK_GUARD() {
    947         g_assert_cmpint(job->job.pause_count, ==, 0);
    948         g_assert_false(job->job.paused);
    949         g_assert_true(tjob->running);
    950         g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
    951     }
    952 
    953     do_drain_begin_unlocked(drain_type, drain_bs);
    954 
    955     WITH_JOB_LOCK_GUARD() {
    956         if (drain_type == BDRV_DRAIN_ALL) {
    957             /* bdrv_drain_all() drains both src and target */
    958             g_assert_cmpint(job->job.pause_count, ==, 2);
    959         } else {
    960             g_assert_cmpint(job->job.pause_count, ==, 1);
    961         }
    962         g_assert_true(job->job.paused);
    963         g_assert_false(job->job.busy); /* The job is paused */
    964     }
    965 
    966     do_drain_end_unlocked(drain_type, drain_bs);
    967 
    968     if (use_iothread) {
    969         /*
    970          * Here we are waiting for the paused status to change,
    971          * so don't bother protecting the read every time.
    972          *
    973          * paused is reset in the I/O thread, wait for it
    974          */
    975         while (job->job.paused) {
    976             aio_poll(qemu_get_aio_context(), false);
    977         }
    978     }
    979 
    980     WITH_JOB_LOCK_GUARD() {
    981         g_assert_cmpint(job->job.pause_count, ==, 0);
    982         g_assert_false(job->job.paused);
    983         g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
    984     }
    985 
    986     do_drain_begin_unlocked(drain_type, target);
    987 
    988     WITH_JOB_LOCK_GUARD() {
    989         if (drain_type == BDRV_DRAIN_ALL) {
    990             /* bdrv_drain_all() drains both src and target */
    991             g_assert_cmpint(job->job.pause_count, ==, 2);
    992         } else {
    993             g_assert_cmpint(job->job.pause_count, ==, 1);
    994         }
    995         g_assert_true(job->job.paused);
    996         g_assert_false(job->job.busy); /* The job is paused */
    997     }
    998 
    999     do_drain_end_unlocked(drain_type, target);
   1000 
   1001     if (use_iothread) {
   1002         /*
   1003          * Here we are waiting for the paused status to change,
   1004          * so don't bother protecting the read every time.
   1005          *
   1006          * paused is reset in the I/O thread, wait for it
   1007          */
   1008         while (job->job.paused) {
   1009             aio_poll(qemu_get_aio_context(), false);
   1010         }
   1011     }
   1012 
   1013     WITH_JOB_LOCK_GUARD() {
   1014         g_assert_cmpint(job->job.pause_count, ==, 0);
   1015         g_assert_false(job->job.paused);
   1016         g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
   1017     }
   1018 
   1019     WITH_JOB_LOCK_GUARD() {
   1020         ret = job_complete_sync_locked(&job->job, &error_abort);
   1021     }
   1022     g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO));
   1023 
   1024     aio_context_acquire(ctx);
   1025     if (use_iothread) {
   1026         blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort);
   1027         assert(blk_get_aio_context(blk_target) == qemu_get_aio_context());
   1028     }
   1029     aio_context_release(ctx);
   1030 
   1031     blk_unref(blk_src);
   1032     blk_unref(blk_target);
   1033     bdrv_unref(src_overlay);
   1034     bdrv_unref(target);
   1035 
   1036     if (iothread) {
   1037         iothread_join(iothread);
   1038     }
   1039 }
   1040 
   1041 static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
   1042                                  enum test_job_result result)
   1043 {
   1044     test_blockjob_common_drain_node(drain_type, use_iothread, result,
   1045                                     TEST_JOB_DRAIN_SRC);
   1046     test_blockjob_common_drain_node(drain_type, use_iothread, result,
   1047                                     TEST_JOB_DRAIN_SRC_CHILD);
   1048     if (drain_type == BDRV_SUBTREE_DRAIN) {
   1049         test_blockjob_common_drain_node(drain_type, use_iothread, result,
   1050                                         TEST_JOB_DRAIN_SRC_PARENT);
   1051     }
   1052 }
   1053 
   1054 static void test_blockjob_drain_all(void)
   1055 {
   1056     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS);
   1057 }
   1058 
   1059 static void test_blockjob_drain(void)
   1060 {
   1061     test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS);
   1062 }
   1063 
   1064 static void test_blockjob_drain_subtree(void)
   1065 {
   1066     test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS);
   1067 }
   1068 
   1069 static void test_blockjob_error_drain_all(void)
   1070 {
   1071     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN);
   1072     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_PREPARE);
   1073 }
   1074 
   1075 static void test_blockjob_error_drain(void)
   1076 {
   1077     test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_RUN);
   1078     test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE);
   1079 }
   1080 
   1081 static void test_blockjob_error_drain_subtree(void)
   1082 {
   1083     test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN);
   1084     test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE);
   1085 }
   1086 
   1087 static void test_blockjob_iothread_drain_all(void)
   1088 {
   1089     test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS);
   1090 }
   1091 
   1092 static void test_blockjob_iothread_drain(void)
   1093 {
   1094     test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS);
   1095 }
   1096 
   1097 static void test_blockjob_iothread_drain_subtree(void)
   1098 {
   1099     test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS);
   1100 }
   1101 
   1102 static void test_blockjob_iothread_error_drain_all(void)
   1103 {
   1104     test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN);
   1105     test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_PREPARE);
   1106 }
   1107 
   1108 static void test_blockjob_iothread_error_drain(void)
   1109 {
   1110     test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_RUN);
   1111     test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE);
   1112 }
   1113 
   1114 static void test_blockjob_iothread_error_drain_subtree(void)
   1115 {
   1116     test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN);
   1117     test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE);
   1118 }
   1119 
   1120 
   1121 typedef struct BDRVTestTopState {
   1122     BdrvChild *wait_child;
   1123 } BDRVTestTopState;
   1124 
   1125 static void bdrv_test_top_close(BlockDriverState *bs)
   1126 {
   1127     BdrvChild *c, *next_c;
   1128     QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
   1129         bdrv_unref_child(bs, c);
   1130     }
   1131 }
   1132 
   1133 static int coroutine_fn bdrv_test_top_co_preadv(BlockDriverState *bs,
   1134                                                 int64_t offset, int64_t bytes,
   1135                                                 QEMUIOVector *qiov,
   1136                                                 BdrvRequestFlags flags)
   1137 {
   1138     BDRVTestTopState *tts = bs->opaque;
   1139     return bdrv_co_preadv(tts->wait_child, offset, bytes, qiov, flags);
   1140 }
   1141 
   1142 static BlockDriver bdrv_test_top_driver = {
   1143     .format_name            = "test_top_driver",
   1144     .instance_size          = sizeof(BDRVTestTopState),
   1145 
   1146     .bdrv_close             = bdrv_test_top_close,
   1147     .bdrv_co_preadv         = bdrv_test_top_co_preadv,
   1148 
   1149     .bdrv_child_perm        = bdrv_default_perms,
   1150 };
   1151 
   1152 typedef struct TestCoDeleteByDrainData {
   1153     BlockBackend *blk;
   1154     bool detach_instead_of_delete;
   1155     bool done;
   1156 } TestCoDeleteByDrainData;
   1157 
   1158 static void coroutine_fn test_co_delete_by_drain(void *opaque)
   1159 {
   1160     TestCoDeleteByDrainData *dbdd = opaque;
   1161     BlockBackend *blk = dbdd->blk;
   1162     BlockDriverState *bs = blk_bs(blk);
   1163     BDRVTestTopState *tts = bs->opaque;
   1164     void *buffer = g_malloc(65536);
   1165     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536);
   1166 
   1167     /* Pretend some internal write operation from parent to child.
   1168      * Important: We have to read from the child, not from the parent!
   1169      * Draining works by first propagating it all up the tree to the
   1170      * root and then waiting for drainage from root to the leaves
   1171      * (protocol nodes).  If we have a request waiting on the root,
   1172      * everything will be drained before we go back down the tree, but
   1173      * we do not want that.  We want to be in the middle of draining
   1174      * when this following requests returns. */
   1175     bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0);
   1176 
   1177     g_assert_cmpint(bs->refcnt, ==, 1);
   1178 
   1179     if (!dbdd->detach_instead_of_delete) {
   1180         blk_unref(blk);
   1181     } else {
   1182         BdrvChild *c, *next_c;
   1183         QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
   1184             bdrv_unref_child(bs, c);
   1185         }
   1186     }
   1187 
   1188     dbdd->done = true;
   1189     g_free(buffer);
   1190 }
   1191 
   1192 /**
   1193  * Test what happens when some BDS has some children, you drain one of
   1194  * them and this results in the BDS being deleted.
   1195  *
   1196  * If @detach_instead_of_delete is set, the BDS is not going to be
   1197  * deleted but will only detach all of its children.
   1198  */
   1199 static void do_test_delete_by_drain(bool detach_instead_of_delete,
   1200                                     enum drain_type drain_type)
   1201 {
   1202     BlockBackend *blk;
   1203     BlockDriverState *bs, *child_bs, *null_bs;
   1204     BDRVTestTopState *tts;
   1205     TestCoDeleteByDrainData dbdd;
   1206     Coroutine *co;
   1207 
   1208     bs = bdrv_new_open_driver(&bdrv_test_top_driver, "top", BDRV_O_RDWR,
   1209                               &error_abort);
   1210     bs->total_sectors = 65536 >> BDRV_SECTOR_BITS;
   1211     tts = bs->opaque;
   1212 
   1213     null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
   1214                         &error_abort);
   1215     bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds,
   1216                       BDRV_CHILD_DATA, &error_abort);
   1217 
   1218     /* This child will be the one to pass to requests through to, and
   1219      * it will stall until a drain occurs */
   1220     child_bs = bdrv_new_open_driver(&bdrv_test, "child", BDRV_O_RDWR,
   1221                                     &error_abort);
   1222     child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS;
   1223     /* Takes our reference to child_bs */
   1224     tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child",
   1225                                         &child_of_bds,
   1226                                         BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY,
   1227                                         &error_abort);
   1228 
   1229     /* This child is just there to be deleted
   1230      * (for detach_instead_of_delete == true) */
   1231     null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
   1232                         &error_abort);
   1233     bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA,
   1234                       &error_abort);
   1235 
   1236     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
   1237     blk_insert_bs(blk, bs, &error_abort);
   1238 
   1239     /* Referenced by blk now */
   1240     bdrv_unref(bs);
   1241 
   1242     g_assert_cmpint(bs->refcnt, ==, 1);
   1243     g_assert_cmpint(child_bs->refcnt, ==, 1);
   1244     g_assert_cmpint(null_bs->refcnt, ==, 1);
   1245 
   1246 
   1247     dbdd = (TestCoDeleteByDrainData){
   1248         .blk = blk,
   1249         .detach_instead_of_delete = detach_instead_of_delete,
   1250         .done = false,
   1251     };
   1252     co = qemu_coroutine_create(test_co_delete_by_drain, &dbdd);
   1253     qemu_coroutine_enter(co);
   1254 
   1255     /* Drain the child while the read operation is still pending.
   1256      * This should result in the operation finishing and
   1257      * test_co_delete_by_drain() resuming.  Thus, @bs will be deleted
   1258      * and the coroutine will exit while this drain operation is still
   1259      * in progress. */
   1260     switch (drain_type) {
   1261     case BDRV_DRAIN:
   1262         bdrv_ref(child_bs);
   1263         bdrv_drain(child_bs);
   1264         bdrv_unref(child_bs);
   1265         break;
   1266     case BDRV_SUBTREE_DRAIN:
   1267         /* Would have to ref/unref bs here for !detach_instead_of_delete, but
   1268          * then the whole test becomes pointless because the graph changes
   1269          * don't occur during the drain any more. */
   1270         assert(detach_instead_of_delete);
   1271         bdrv_subtree_drained_begin(bs);
   1272         bdrv_subtree_drained_end(bs);
   1273         break;
   1274     case BDRV_DRAIN_ALL:
   1275         bdrv_drain_all_begin();
   1276         bdrv_drain_all_end();
   1277         break;
   1278     default:
   1279         g_assert_not_reached();
   1280     }
   1281 
   1282     while (!dbdd.done) {
   1283         aio_poll(qemu_get_aio_context(), true);
   1284     }
   1285 
   1286     if (detach_instead_of_delete) {
   1287         /* Here, the reference has not passed over to the coroutine,
   1288          * so we have to delete the BB ourselves */
   1289         blk_unref(blk);
   1290     }
   1291 }
   1292 
   1293 static void test_delete_by_drain(void)
   1294 {
   1295     do_test_delete_by_drain(false, BDRV_DRAIN);
   1296 }
   1297 
   1298 static void test_detach_by_drain_all(void)
   1299 {
   1300     do_test_delete_by_drain(true, BDRV_DRAIN_ALL);
   1301 }
   1302 
   1303 static void test_detach_by_drain(void)
   1304 {
   1305     do_test_delete_by_drain(true, BDRV_DRAIN);
   1306 }
   1307 
   1308 static void test_detach_by_drain_subtree(void)
   1309 {
   1310     do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN);
   1311 }
   1312 
   1313 
   1314 struct detach_by_parent_data {
   1315     BlockDriverState *parent_b;
   1316     BdrvChild *child_b;
   1317     BlockDriverState *c;
   1318     BdrvChild *child_c;
   1319     bool by_parent_cb;
   1320 };
   1321 static struct detach_by_parent_data detach_by_parent_data;
   1322 
   1323 static void detach_indirect_bh(void *opaque)
   1324 {
   1325     struct detach_by_parent_data *data = opaque;
   1326 
   1327     bdrv_unref_child(data->parent_b, data->child_b);
   1328 
   1329     bdrv_ref(data->c);
   1330     data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C",
   1331                                       &child_of_bds, BDRV_CHILD_DATA,
   1332                                       &error_abort);
   1333 }
   1334 
   1335 static void detach_by_parent_aio_cb(void *opaque, int ret)
   1336 {
   1337     struct detach_by_parent_data *data = &detach_by_parent_data;
   1338 
   1339     g_assert_cmpint(ret, ==, 0);
   1340     if (data->by_parent_cb) {
   1341         detach_indirect_bh(data);
   1342     }
   1343 }
   1344 
   1345 static void detach_by_driver_cb_drained_begin(BdrvChild *child)
   1346 {
   1347     aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
   1348                             detach_indirect_bh, &detach_by_parent_data);
   1349     child_of_bds.drained_begin(child);
   1350 }
   1351 
   1352 static BdrvChildClass detach_by_driver_cb_class;
   1353 
   1354 /*
   1355  * Initial graph:
   1356  *
   1357  * PA     PB
   1358  *    \ /   \
   1359  *     A     B     C
   1360  *
   1361  * by_parent_cb == true:  Test that parent callbacks don't poll
   1362  *
   1363  *     PA has a pending write request whose callback changes the child nodes of
   1364  *     PB: It removes B and adds C instead. The subtree of PB is drained, which
   1365  *     will indirectly drain the write request, too.
   1366  *
   1367  * by_parent_cb == false: Test that bdrv_drain_invoke() doesn't poll
   1368  *
   1369  *     PA's BdrvChildClass has a .drained_begin callback that schedules a BH
   1370  *     that does the same graph change. If bdrv_drain_invoke() calls it, the
   1371  *     state is messed up, but if it is only polled in the single
   1372  *     BDRV_POLL_WHILE() at the end of the drain, this should work fine.
   1373  */
   1374 static void test_detach_indirect(bool by_parent_cb)
   1375 {
   1376     BlockBackend *blk;
   1377     BlockDriverState *parent_a, *parent_b, *a, *b, *c;
   1378     BdrvChild *child_a, *child_b;
   1379     BlockAIOCB *acb;
   1380 
   1381     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
   1382 
   1383     if (!by_parent_cb) {
   1384         detach_by_driver_cb_class = child_of_bds;
   1385         detach_by_driver_cb_class.drained_begin =
   1386             detach_by_driver_cb_drained_begin;
   1387     }
   1388 
   1389     /* Create all involved nodes */
   1390     parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR,
   1391                                     &error_abort);
   1392     parent_b = bdrv_new_open_driver(&bdrv_test, "parent-b", 0,
   1393                                     &error_abort);
   1394 
   1395     a = bdrv_new_open_driver(&bdrv_test, "a", BDRV_O_RDWR, &error_abort);
   1396     b = bdrv_new_open_driver(&bdrv_test, "b", BDRV_O_RDWR, &error_abort);
   1397     c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort);
   1398 
   1399     /* blk is a BB for parent-a */
   1400     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
   1401     blk_insert_bs(blk, parent_a, &error_abort);
   1402     bdrv_unref(parent_a);
   1403 
   1404     /* If we want to get bdrv_drain_invoke() to call aio_poll(), the driver
   1405      * callback must not return immediately. */
   1406     if (!by_parent_cb) {
   1407         BDRVTestState *s = parent_a->opaque;
   1408         s->sleep_in_drain_begin = true;
   1409     }
   1410 
   1411     /* Set child relationships */
   1412     bdrv_ref(b);
   1413     bdrv_ref(a);
   1414     child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds,
   1415                                 BDRV_CHILD_DATA, &error_abort);
   1416     child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_of_bds,
   1417                                 BDRV_CHILD_COW, &error_abort);
   1418 
   1419     bdrv_ref(a);
   1420     bdrv_attach_child(parent_a, a, "PA-A",
   1421                       by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class,
   1422                       BDRV_CHILD_DATA, &error_abort);
   1423 
   1424     g_assert_cmpint(parent_a->refcnt, ==, 1);
   1425     g_assert_cmpint(parent_b->refcnt, ==, 1);
   1426     g_assert_cmpint(a->refcnt, ==, 3);
   1427     g_assert_cmpint(b->refcnt, ==, 2);
   1428     g_assert_cmpint(c->refcnt, ==, 1);
   1429 
   1430     g_assert(QLIST_FIRST(&parent_b->children) == child_a);
   1431     g_assert(QLIST_NEXT(child_a, next) == child_b);
   1432     g_assert(QLIST_NEXT(child_b, next) == NULL);
   1433 
   1434     /* Start the evil write request */
   1435     detach_by_parent_data = (struct detach_by_parent_data) {
   1436         .parent_b = parent_b,
   1437         .child_b = child_b,
   1438         .c = c,
   1439         .by_parent_cb = by_parent_cb,
   1440     };
   1441     acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL);
   1442     g_assert(acb != NULL);
   1443 
   1444     /* Drain and check the expected result */
   1445     bdrv_subtree_drained_begin(parent_b);
   1446 
   1447     g_assert(detach_by_parent_data.child_c != NULL);
   1448 
   1449     g_assert_cmpint(parent_a->refcnt, ==, 1);
   1450     g_assert_cmpint(parent_b->refcnt, ==, 1);
   1451     g_assert_cmpint(a->refcnt, ==, 3);
   1452     g_assert_cmpint(b->refcnt, ==, 1);
   1453     g_assert_cmpint(c->refcnt, ==, 2);
   1454 
   1455     g_assert(QLIST_FIRST(&parent_b->children) == detach_by_parent_data.child_c);
   1456     g_assert(QLIST_NEXT(detach_by_parent_data.child_c, next) == child_a);
   1457     g_assert(QLIST_NEXT(child_a, next) == NULL);
   1458 
   1459     g_assert_cmpint(parent_a->quiesce_counter, ==, 1);
   1460     g_assert_cmpint(parent_b->quiesce_counter, ==, 1);
   1461     g_assert_cmpint(a->quiesce_counter, ==, 1);
   1462     g_assert_cmpint(b->quiesce_counter, ==, 0);
   1463     g_assert_cmpint(c->quiesce_counter, ==, 1);
   1464 
   1465     bdrv_subtree_drained_end(parent_b);
   1466 
   1467     bdrv_unref(parent_b);
   1468     blk_unref(blk);
   1469 
   1470     g_assert_cmpint(a->refcnt, ==, 1);
   1471     g_assert_cmpint(b->refcnt, ==, 1);
   1472     g_assert_cmpint(c->refcnt, ==, 1);
   1473     bdrv_unref(a);
   1474     bdrv_unref(b);
   1475     bdrv_unref(c);
   1476 }
   1477 
   1478 static void test_detach_by_parent_cb(void)
   1479 {
   1480     test_detach_indirect(true);
   1481 }
   1482 
   1483 static void test_detach_by_driver_cb(void)
   1484 {
   1485     test_detach_indirect(false);
   1486 }
   1487 
   1488 static void test_append_to_drained(void)
   1489 {
   1490     BlockBackend *blk;
   1491     BlockDriverState *base, *overlay;
   1492     BDRVTestState *base_s, *overlay_s;
   1493 
   1494     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
   1495     base = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
   1496     base_s = base->opaque;
   1497     blk_insert_bs(blk, base, &error_abort);
   1498 
   1499     overlay = bdrv_new_open_driver(&bdrv_test, "overlay", BDRV_O_RDWR,
   1500                                    &error_abort);
   1501     overlay_s = overlay->opaque;
   1502 
   1503     do_drain_begin(BDRV_DRAIN, base);
   1504     g_assert_cmpint(base->quiesce_counter, ==, 1);
   1505     g_assert_cmpint(base_s->drain_count, ==, 1);
   1506     g_assert_cmpint(base->in_flight, ==, 0);
   1507 
   1508     bdrv_append(overlay, base, &error_abort);
   1509     g_assert_cmpint(base->in_flight, ==, 0);
   1510     g_assert_cmpint(overlay->in_flight, ==, 0);
   1511 
   1512     g_assert_cmpint(base->quiesce_counter, ==, 1);
   1513     g_assert_cmpint(base_s->drain_count, ==, 1);
   1514     g_assert_cmpint(overlay->quiesce_counter, ==, 1);
   1515     g_assert_cmpint(overlay_s->drain_count, ==, 1);
   1516 
   1517     do_drain_end(BDRV_DRAIN, base);
   1518 
   1519     g_assert_cmpint(base->quiesce_counter, ==, 0);
   1520     g_assert_cmpint(base_s->drain_count, ==, 0);
   1521     g_assert_cmpint(overlay->quiesce_counter, ==, 0);
   1522     g_assert_cmpint(overlay_s->drain_count, ==, 0);
   1523 
   1524     bdrv_unref(overlay);
   1525     bdrv_unref(base);
   1526     blk_unref(blk);
   1527 }
   1528 
   1529 static void test_set_aio_context(void)
   1530 {
   1531     BlockDriverState *bs;
   1532     IOThread *a = iothread_new();
   1533     IOThread *b = iothread_new();
   1534     AioContext *ctx_a = iothread_get_aio_context(a);
   1535     AioContext *ctx_b = iothread_get_aio_context(b);
   1536 
   1537     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
   1538                               &error_abort);
   1539 
   1540     bdrv_drained_begin(bs);
   1541     bdrv_try_change_aio_context(bs, ctx_a, NULL, &error_abort);
   1542 
   1543     aio_context_acquire(ctx_a);
   1544     bdrv_drained_end(bs);
   1545 
   1546     bdrv_drained_begin(bs);
   1547     bdrv_try_change_aio_context(bs, ctx_b, NULL, &error_abort);
   1548     aio_context_release(ctx_a);
   1549     aio_context_acquire(ctx_b);
   1550     bdrv_try_change_aio_context(bs, qemu_get_aio_context(), NULL, &error_abort);
   1551     aio_context_release(ctx_b);
   1552     bdrv_drained_end(bs);
   1553 
   1554     bdrv_unref(bs);
   1555     iothread_join(a);
   1556     iothread_join(b);
   1557 }
   1558 
   1559 
   1560 typedef struct TestDropBackingBlockJob {
   1561     BlockJob common;
   1562     bool should_complete;
   1563     bool *did_complete;
   1564     BlockDriverState *detach_also;
   1565     BlockDriverState *bs;
   1566 } TestDropBackingBlockJob;
   1567 
   1568 static int coroutine_fn test_drop_backing_job_run(Job *job, Error **errp)
   1569 {
   1570     TestDropBackingBlockJob *s =
   1571         container_of(job, TestDropBackingBlockJob, common.job);
   1572 
   1573     while (!s->should_complete) {
   1574         job_sleep_ns(job, 0);
   1575     }
   1576 
   1577     return 0;
   1578 }
   1579 
   1580 static void test_drop_backing_job_commit(Job *job)
   1581 {
   1582     TestDropBackingBlockJob *s =
   1583         container_of(job, TestDropBackingBlockJob, common.job);
   1584 
   1585     bdrv_set_backing_hd(s->bs, NULL, &error_abort);
   1586     bdrv_set_backing_hd(s->detach_also, NULL, &error_abort);
   1587 
   1588     *s->did_complete = true;
   1589 }
   1590 
   1591 static const BlockJobDriver test_drop_backing_job_driver = {
   1592     .job_driver = {
   1593         .instance_size  = sizeof(TestDropBackingBlockJob),
   1594         .free           = block_job_free,
   1595         .user_resume    = block_job_user_resume,
   1596         .run            = test_drop_backing_job_run,
   1597         .commit         = test_drop_backing_job_commit,
   1598     }
   1599 };
   1600 
   1601 /**
   1602  * Creates a child node with three parent nodes on it, and then runs a
   1603  * block job on the final one, parent-node-2.
   1604  *
   1605  * The job is then asked to complete before a section where the child
   1606  * is drained.
   1607  *
   1608  * Ending this section will undrain the child's parents, first
   1609  * parent-node-2, then parent-node-1, then parent-node-0 -- the parent
   1610  * list is in reverse order of how they were added.  Ending the drain
   1611  * on parent-node-2 will resume the job, thus completing it and
   1612  * scheduling job_exit().
   1613  *
   1614  * Ending the drain on parent-node-1 will poll the AioContext, which
   1615  * lets job_exit() and thus test_drop_backing_job_commit() run.  That
   1616  * function first removes the child as parent-node-2's backing file.
   1617  *
   1618  * In old (and buggy) implementations, there are two problems with
   1619  * that:
   1620  * (A) bdrv_drain_invoke() polls for every node that leaves the
   1621  *     drained section.  This means that job_exit() is scheduled
   1622  *     before the child has left the drained section.  Its
   1623  *     quiesce_counter is therefore still 1 when it is removed from
   1624  *     parent-node-2.
   1625  *
   1626  * (B) bdrv_replace_child_noperm() calls drained_end() on the old
   1627  *     child's parents as many times as the child is quiesced.  This
   1628  *     means it will call drained_end() on parent-node-2 once.
   1629  *     Because parent-node-2 is no longer quiesced at this point, this
   1630  *     will fail.
   1631  *
   1632  * bdrv_replace_child_noperm() therefore must call drained_end() on
   1633  * the parent only if it really is still drained because the child is
   1634  * drained.
   1635  *
   1636  * If removing child from parent-node-2 was successful (as it should
   1637  * be), test_drop_backing_job_commit() will then also remove the child
   1638  * from parent-node-0.
   1639  *
   1640  * With an old version of our drain infrastructure ((A) above), that
   1641  * resulted in the following flow:
   1642  *
   1643  * 1. child attempts to leave its drained section.  The call recurses
   1644  *    to its parents.
   1645  *
   1646  * 2. parent-node-2 leaves the drained section.  Polling in
   1647  *    bdrv_drain_invoke() will schedule job_exit().
   1648  *
   1649  * 3. parent-node-1 leaves the drained section.  Polling in
   1650  *    bdrv_drain_invoke() will run job_exit(), thus disconnecting
   1651  *    parent-node-0 from the child node.
   1652  *
   1653  * 4. bdrv_parent_drained_end() uses a QLIST_FOREACH_SAFE() loop to
   1654  *    iterate over the parents.  Thus, it now accesses the BdrvChild
   1655  *    object that used to connect parent-node-0 and the child node.
   1656  *    However, that object no longer exists, so it accesses a dangling
   1657  *    pointer.
   1658  *
   1659  * The solution is to only poll once when running a bdrv_drained_end()
   1660  * operation, specifically at the end when all drained_end()
   1661  * operations for all involved nodes have been scheduled.
   1662  * Note that this also solves (A) above, thus hiding (B).
   1663  */
   1664 static void test_blockjob_commit_by_drained_end(void)
   1665 {
   1666     BlockDriverState *bs_child, *bs_parents[3];
   1667     TestDropBackingBlockJob *job;
   1668     bool job_has_completed = false;
   1669     int i;
   1670 
   1671     bs_child = bdrv_new_open_driver(&bdrv_test, "child-node", BDRV_O_RDWR,
   1672                                     &error_abort);
   1673 
   1674     for (i = 0; i < 3; i++) {
   1675         char name[32];
   1676         snprintf(name, sizeof(name), "parent-node-%i", i);
   1677         bs_parents[i] = bdrv_new_open_driver(&bdrv_test, name, BDRV_O_RDWR,
   1678                                              &error_abort);
   1679         bdrv_set_backing_hd(bs_parents[i], bs_child, &error_abort);
   1680     }
   1681 
   1682     job = block_job_create("job", &test_drop_backing_job_driver, NULL,
   1683                            bs_parents[2], 0, BLK_PERM_ALL, 0, 0, NULL, NULL,
   1684                            &error_abort);
   1685     job->bs = bs_parents[2];
   1686 
   1687     job->detach_also = bs_parents[0];
   1688     job->did_complete = &job_has_completed;
   1689 
   1690     job_start(&job->common.job);
   1691 
   1692     job->should_complete = true;
   1693     bdrv_drained_begin(bs_child);
   1694     g_assert(!job_has_completed);
   1695     bdrv_drained_end(bs_child);
   1696     g_assert(job_has_completed);
   1697 
   1698     bdrv_unref(bs_parents[0]);
   1699     bdrv_unref(bs_parents[1]);
   1700     bdrv_unref(bs_parents[2]);
   1701     bdrv_unref(bs_child);
   1702 }
   1703 
   1704 
   1705 typedef struct TestSimpleBlockJob {
   1706     BlockJob common;
   1707     bool should_complete;
   1708     bool *did_complete;
   1709 } TestSimpleBlockJob;
   1710 
   1711 static int coroutine_fn test_simple_job_run(Job *job, Error **errp)
   1712 {
   1713     TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
   1714 
   1715     while (!s->should_complete) {
   1716         job_sleep_ns(job, 0);
   1717     }
   1718 
   1719     return 0;
   1720 }
   1721 
   1722 static void test_simple_job_clean(Job *job)
   1723 {
   1724     TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
   1725     *s->did_complete = true;
   1726 }
   1727 
   1728 static const BlockJobDriver test_simple_job_driver = {
   1729     .job_driver = {
   1730         .instance_size  = sizeof(TestSimpleBlockJob),
   1731         .free           = block_job_free,
   1732         .user_resume    = block_job_user_resume,
   1733         .run            = test_simple_job_run,
   1734         .clean          = test_simple_job_clean,
   1735     },
   1736 };
   1737 
   1738 static int drop_intermediate_poll_update_filename(BdrvChild *child,
   1739                                                   BlockDriverState *new_base,
   1740                                                   const char *filename,
   1741                                                   Error **errp)
   1742 {
   1743     /*
   1744      * We are free to poll here, which may change the block graph, if
   1745      * it is not drained.
   1746      */
   1747 
   1748     /* If the job is not drained: Complete it, schedule job_exit() */
   1749     aio_poll(qemu_get_current_aio_context(), false);
   1750     /* If the job is not drained: Run job_exit(), finish the job */
   1751     aio_poll(qemu_get_current_aio_context(), false);
   1752 
   1753     return 0;
   1754 }
   1755 
   1756 /**
   1757  * Test a poll in the midst of bdrv_drop_intermediate().
   1758  *
   1759  * bdrv_drop_intermediate() calls BdrvChildClass.update_filename(),
   1760  * which can yield or poll.  This may lead to graph changes, unless
   1761  * the whole subtree in question is drained.
   1762  *
   1763  * We test this on the following graph:
   1764  *
   1765  *                    Job
   1766  *
   1767  *                     |
   1768  *                  job-node
   1769  *                     |
   1770  *                     v
   1771  *
   1772  *                  job-node
   1773  *
   1774  *                     |
   1775  *                  backing
   1776  *                     |
   1777  *                     v
   1778  *
   1779  * node-2 --chain--> node-1 --chain--> node-0
   1780  *
   1781  * We drop node-1 with bdrv_drop_intermediate(top=node-1, base=node-0).
   1782  *
   1783  * This first updates node-2's backing filename by invoking
   1784  * drop_intermediate_poll_update_filename(), which polls twice.  This
   1785  * causes the job to finish, which in turns causes the job-node to be
   1786  * deleted.
   1787  *
   1788  * bdrv_drop_intermediate() uses a QLIST_FOREACH_SAFE() loop, so it
   1789  * already has a pointer to the BdrvChild edge between job-node and
   1790  * node-1.  When it tries to handle that edge, we probably get a
   1791  * segmentation fault because the object no longer exists.
   1792  *
   1793  *
   1794  * The solution is for bdrv_drop_intermediate() to drain top's
   1795  * subtree.  This prevents graph changes from happening just because
   1796  * BdrvChildClass.update_filename() yields or polls.  Thus, the block
   1797  * job is paused during that drained section and must finish before or
   1798  * after.
   1799  *
   1800  * (In addition, bdrv_replace_child() must keep the job paused.)
   1801  */
   1802 static void test_drop_intermediate_poll(void)
   1803 {
   1804     static BdrvChildClass chain_child_class;
   1805     BlockDriverState *chain[3];
   1806     TestSimpleBlockJob *job;
   1807     BlockDriverState *job_node;
   1808     bool job_has_completed = false;
   1809     int i;
   1810     int ret;
   1811 
   1812     chain_child_class = child_of_bds;
   1813     chain_child_class.update_filename = drop_intermediate_poll_update_filename;
   1814 
   1815     for (i = 0; i < 3; i++) {
   1816         char name[32];
   1817         snprintf(name, 32, "node-%i", i);
   1818 
   1819         chain[i] = bdrv_new_open_driver(&bdrv_test, name, 0, &error_abort);
   1820     }
   1821 
   1822     job_node = bdrv_new_open_driver(&bdrv_test, "job-node", BDRV_O_RDWR,
   1823                                     &error_abort);
   1824     bdrv_set_backing_hd(job_node, chain[1], &error_abort);
   1825 
   1826     /*
   1827      * Establish the chain last, so the chain links are the first
   1828      * elements in the BDS.parents lists
   1829      */
   1830     for (i = 0; i < 3; i++) {
   1831         if (i) {
   1832             /* Takes the reference to chain[i - 1] */
   1833             bdrv_attach_child(chain[i], chain[i - 1], "chain",
   1834                               &chain_child_class, BDRV_CHILD_COW, &error_abort);
   1835         }
   1836     }
   1837 
   1838     job = block_job_create("job", &test_simple_job_driver, NULL, job_node,
   1839                            0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort);
   1840 
   1841     /* The job has a reference now */
   1842     bdrv_unref(job_node);
   1843 
   1844     job->did_complete = &job_has_completed;
   1845 
   1846     job_start(&job->common.job);
   1847     job->should_complete = true;
   1848 
   1849     g_assert(!job_has_completed);
   1850     ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
   1851     g_assert(ret == 0);
   1852     g_assert(job_has_completed);
   1853 
   1854     bdrv_unref(chain[2]);
   1855 }
   1856 
   1857 
   1858 typedef struct BDRVReplaceTestState {
   1859     bool was_drained;
   1860     bool was_undrained;
   1861     bool has_read;
   1862 
   1863     int drain_count;
   1864 
   1865     bool yield_before_read;
   1866     Coroutine *io_co;
   1867     Coroutine *drain_co;
   1868 } BDRVReplaceTestState;
   1869 
   1870 static void bdrv_replace_test_close(BlockDriverState *bs)
   1871 {
   1872 }
   1873 
   1874 /**
   1875  * If @bs has a backing file:
   1876  *   Yield if .yield_before_read is true (and wait for drain_begin to
   1877  *   wake us up).
   1878  *   Forward the read to bs->backing.  Set .has_read to true.
   1879  *   If drain_begin has woken us, wake it in turn.
   1880  *
   1881  * Otherwise:
   1882  *   Set .has_read to true and return success.
   1883  */
   1884 static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
   1885                                                     int64_t offset,
   1886                                                     int64_t bytes,
   1887                                                     QEMUIOVector *qiov,
   1888                                                     BdrvRequestFlags flags)
   1889 {
   1890     BDRVReplaceTestState *s = bs->opaque;
   1891 
   1892     if (bs->backing) {
   1893         int ret;
   1894 
   1895         g_assert(!s->drain_count);
   1896 
   1897         s->io_co = qemu_coroutine_self();
   1898         if (s->yield_before_read) {
   1899             s->yield_before_read = false;
   1900             qemu_coroutine_yield();
   1901         }
   1902         s->io_co = NULL;
   1903 
   1904         ret = bdrv_co_preadv(bs->backing, offset, bytes, qiov, 0);
   1905         s->has_read = true;
   1906 
   1907         /* Wake up drain_co if it runs */
   1908         if (s->drain_co) {
   1909             aio_co_wake(s->drain_co);
   1910         }
   1911 
   1912         return ret;
   1913     }
   1914 
   1915     s->has_read = true;
   1916     return 0;
   1917 }
   1918 
   1919 /**
   1920  * If .drain_count is 0, wake up .io_co if there is one; and set
   1921  * .was_drained.
   1922  * Increment .drain_count.
   1923  */
   1924 static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
   1925 {
   1926     BDRVReplaceTestState *s = bs->opaque;
   1927 
   1928     if (!s->drain_count) {
   1929         /* Keep waking io_co up until it is done */
   1930         s->drain_co = qemu_coroutine_self();
   1931         while (s->io_co) {
   1932             aio_co_wake(s->io_co);
   1933             s->io_co = NULL;
   1934             qemu_coroutine_yield();
   1935         }
   1936         s->drain_co = NULL;
   1937 
   1938         s->was_drained = true;
   1939     }
   1940     s->drain_count++;
   1941 }
   1942 
   1943 /**
   1944  * Reduce .drain_count, set .was_undrained once it reaches 0.
   1945  * If .drain_count reaches 0 and the node has a backing file, issue a
   1946  * read request.
   1947  */
   1948 static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
   1949 {
   1950     BDRVReplaceTestState *s = bs->opaque;
   1951 
   1952     g_assert(s->drain_count > 0);
   1953     if (!--s->drain_count) {
   1954         int ret;
   1955 
   1956         s->was_undrained = true;
   1957 
   1958         if (bs->backing) {
   1959             char data;
   1960             QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
   1961 
   1962             /* Queue a read request post-drain */
   1963             ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
   1964             g_assert(ret >= 0);
   1965         }
   1966     }
   1967 }
   1968 
   1969 static BlockDriver bdrv_replace_test = {
   1970     .format_name            = "replace_test",
   1971     .instance_size          = sizeof(BDRVReplaceTestState),
   1972     .supports_backing       = true,
   1973 
   1974     .bdrv_close             = bdrv_replace_test_close,
   1975     .bdrv_co_preadv         = bdrv_replace_test_co_preadv,
   1976 
   1977     .bdrv_co_drain_begin    = bdrv_replace_test_co_drain_begin,
   1978     .bdrv_co_drain_end      = bdrv_replace_test_co_drain_end,
   1979 
   1980     .bdrv_child_perm        = bdrv_default_perms,
   1981 };
   1982 
   1983 static void coroutine_fn test_replace_child_mid_drain_read_co(void *opaque)
   1984 {
   1985     int ret;
   1986     char data;
   1987 
   1988     ret = blk_co_pread(opaque, 0, 1, &data, 0);
   1989     g_assert(ret >= 0);
   1990 }
   1991 
   1992 /**
   1993  * We test two things:
   1994  * (1) bdrv_replace_child_noperm() must not undrain the parent if both
   1995  *     children are drained.
   1996  * (2) bdrv_replace_child_noperm() must never flush I/O requests to a
   1997  *     drained child.  If the old child is drained, it must flush I/O
   1998  *     requests after the new one has been attached.  If the new child
   1999  *     is drained, it must flush I/O requests before the old one is
   2000  *     detached.
   2001  *
   2002  * To do so, we create one parent node and two child nodes; then
   2003  * attach one of the children (old_child_bs) to the parent, then
   2004  * drain both old_child_bs and new_child_bs according to
   2005  * old_drain_count and new_drain_count, respectively, and finally
   2006  * we invoke bdrv_replace_node() to replace old_child_bs by
   2007  * new_child_bs.
   2008  *
   2009  * The test block driver we use here (bdrv_replace_test) has a read
   2010  * function that:
   2011  * - For the parent node, can optionally yield, and then forwards the
   2012  *   read to bdrv_preadv(),
   2013  * - For the child node, just returns immediately.
   2014  *
   2015  * If the read yields, the drain_begin function will wake it up.
   2016  *
   2017  * The drain_end function issues a read on the parent once it is fully
   2018  * undrained (which simulates requests starting to come in again).
   2019  */
   2020 static void do_test_replace_child_mid_drain(int old_drain_count,
   2021                                             int new_drain_count)
   2022 {
   2023     BlockBackend *parent_blk;
   2024     BlockDriverState *parent_bs;
   2025     BlockDriverState *old_child_bs, *new_child_bs;
   2026     BDRVReplaceTestState *parent_s;
   2027     BDRVReplaceTestState *old_child_s, *new_child_s;
   2028     Coroutine *io_co;
   2029     int i;
   2030 
   2031     parent_bs = bdrv_new_open_driver(&bdrv_replace_test, "parent", 0,
   2032                                      &error_abort);
   2033     parent_s = parent_bs->opaque;
   2034 
   2035     parent_blk = blk_new(qemu_get_aio_context(),
   2036                          BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
   2037     blk_insert_bs(parent_blk, parent_bs, &error_abort);
   2038 
   2039     old_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "old-child", 0,
   2040                                         &error_abort);
   2041     new_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "new-child", 0,
   2042                                         &error_abort);
   2043     old_child_s = old_child_bs->opaque;
   2044     new_child_s = new_child_bs->opaque;
   2045 
   2046     /* So that we can read something */
   2047     parent_bs->total_sectors = 1;
   2048     old_child_bs->total_sectors = 1;
   2049     new_child_bs->total_sectors = 1;
   2050 
   2051     bdrv_ref(old_child_bs);
   2052     bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds,
   2053                       BDRV_CHILD_COW, &error_abort);
   2054 
   2055     for (i = 0; i < old_drain_count; i++) {
   2056         bdrv_drained_begin(old_child_bs);
   2057     }
   2058     for (i = 0; i < new_drain_count; i++) {
   2059         bdrv_drained_begin(new_child_bs);
   2060     }
   2061 
   2062     if (!old_drain_count) {
   2063         /*
   2064          * Start a read operation that will yield, so it will not
   2065          * complete before the node is drained.
   2066          */
   2067         parent_s->yield_before_read = true;
   2068         io_co = qemu_coroutine_create(test_replace_child_mid_drain_read_co,
   2069                                       parent_blk);
   2070         qemu_coroutine_enter(io_co);
   2071     }
   2072 
   2073     /* If we have started a read operation, it should have yielded */
   2074     g_assert(!parent_s->has_read);
   2075 
   2076     /* Reset drained status so we can see what bdrv_replace_node() does */
   2077     parent_s->was_drained = false;
   2078     parent_s->was_undrained = false;
   2079 
   2080     g_assert(parent_bs->quiesce_counter == old_drain_count);
   2081     bdrv_replace_node(old_child_bs, new_child_bs, &error_abort);
   2082     g_assert(parent_bs->quiesce_counter == new_drain_count);
   2083 
   2084     if (!old_drain_count && !new_drain_count) {
   2085         /*
   2086          * From undrained to undrained drains and undrains the parent,
   2087          * because bdrv_replace_node() contains a drained section for
   2088          * @old_child_bs.
   2089          */
   2090         g_assert(parent_s->was_drained && parent_s->was_undrained);
   2091     } else if (!old_drain_count && new_drain_count) {
   2092         /*
   2093          * From undrained to drained should drain the parent and keep
   2094          * it that way.
   2095          */
   2096         g_assert(parent_s->was_drained && !parent_s->was_undrained);
   2097     } else if (old_drain_count && !new_drain_count) {
   2098         /*
   2099          * From drained to undrained should undrain the parent and
   2100          * keep it that way.
   2101          */
   2102         g_assert(!parent_s->was_drained && parent_s->was_undrained);
   2103     } else /* if (old_drain_count && new_drain_count) */ {
   2104         /*
   2105          * From drained to drained must not undrain the parent at any
   2106          * point
   2107          */
   2108         g_assert(!parent_s->was_drained && !parent_s->was_undrained);
   2109     }
   2110 
   2111     if (!old_drain_count || !new_drain_count) {
   2112         /*
   2113          * If !old_drain_count, we have started a read request before
   2114          * bdrv_replace_node().  If !new_drain_count, the parent must
   2115          * have been undrained at some point, and
   2116          * bdrv_replace_test_co_drain_end() starts a read request
   2117          * then.
   2118          */
   2119         g_assert(parent_s->has_read);
   2120     } else {
   2121         /*
   2122          * If the parent was never undrained, there is no way to start
   2123          * a read request.
   2124          */
   2125         g_assert(!parent_s->has_read);
   2126     }
   2127 
   2128     /* A drained child must have not received any request */
   2129     g_assert(!(old_drain_count && old_child_s->has_read));
   2130     g_assert(!(new_drain_count && new_child_s->has_read));
   2131 
   2132     for (i = 0; i < new_drain_count; i++) {
   2133         bdrv_drained_end(new_child_bs);
   2134     }
   2135     for (i = 0; i < old_drain_count; i++) {
   2136         bdrv_drained_end(old_child_bs);
   2137     }
   2138 
   2139     /*
   2140      * By now, bdrv_replace_test_co_drain_end() must have been called
   2141      * at some point while the new child was attached to the parent.
   2142      */
   2143     g_assert(parent_s->has_read);
   2144     g_assert(new_child_s->has_read);
   2145 
   2146     blk_unref(parent_blk);
   2147     bdrv_unref(parent_bs);
   2148     bdrv_unref(old_child_bs);
   2149     bdrv_unref(new_child_bs);
   2150 }
   2151 
   2152 static void test_replace_child_mid_drain(void)
   2153 {
   2154     int old_drain_count, new_drain_count;
   2155 
   2156     for (old_drain_count = 0; old_drain_count < 2; old_drain_count++) {
   2157         for (new_drain_count = 0; new_drain_count < 2; new_drain_count++) {
   2158             do_test_replace_child_mid_drain(old_drain_count, new_drain_count);
   2159         }
   2160     }
   2161 }
   2162 
   2163 int main(int argc, char **argv)
   2164 {
   2165     int ret;
   2166 
   2167     bdrv_init();
   2168     qemu_init_main_loop(&error_abort);
   2169 
   2170     g_test_init(&argc, &argv, NULL);
   2171     qemu_event_init(&done_event, false);
   2172 
   2173     g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
   2174     g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
   2175     g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
   2176                     test_drv_cb_drain_subtree);
   2177 
   2178     g_test_add_func("/bdrv-drain/driver-cb/co/drain_all",
   2179                     test_drv_cb_co_drain_all);
   2180     g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
   2181     g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
   2182                     test_drv_cb_co_drain_subtree);
   2183 
   2184 
   2185     g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
   2186     g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
   2187     g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
   2188                     test_quiesce_drain_subtree);
   2189 
   2190     g_test_add_func("/bdrv-drain/quiesce/co/drain_all",
   2191                     test_quiesce_co_drain_all);
   2192     g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
   2193     g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
   2194                     test_quiesce_co_drain_subtree);
   2195 
   2196     g_test_add_func("/bdrv-drain/nested", test_nested);
   2197     g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
   2198 
   2199     g_test_add_func("/bdrv-drain/graph-change/drain_subtree",
   2200                     test_graph_change_drain_subtree);
   2201     g_test_add_func("/bdrv-drain/graph-change/drain_all",
   2202                     test_graph_change_drain_all);
   2203 
   2204     g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all);
   2205     g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain);
   2206     g_test_add_func("/bdrv-drain/iothread/drain_subtree",
   2207                     test_iothread_drain_subtree);
   2208 
   2209     g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
   2210     g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
   2211     g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
   2212                     test_blockjob_drain_subtree);
   2213 
   2214     g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
   2215                     test_blockjob_error_drain_all);
   2216     g_test_add_func("/bdrv-drain/blockjob/error/drain",
   2217                     test_blockjob_error_drain);
   2218     g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree",
   2219                     test_blockjob_error_drain_subtree);
   2220 
   2221     g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
   2222                     test_blockjob_iothread_drain_all);
   2223     g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
   2224                     test_blockjob_iothread_drain);
   2225     g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
   2226                     test_blockjob_iothread_drain_subtree);
   2227 
   2228     g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
   2229                     test_blockjob_iothread_error_drain_all);
   2230     g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
   2231                     test_blockjob_iothread_error_drain);
   2232     g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree",
   2233                     test_blockjob_iothread_error_drain_subtree);
   2234 
   2235     g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
   2236     g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
   2237     g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
   2238     g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
   2239     g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
   2240     g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb);
   2241 
   2242     g_test_add_func("/bdrv-drain/attach/drain", test_append_to_drained);
   2243 
   2244     g_test_add_func("/bdrv-drain/set_aio_context", test_set_aio_context);
   2245 
   2246     g_test_add_func("/bdrv-drain/blockjob/commit_by_drained_end",
   2247                     test_blockjob_commit_by_drained_end);
   2248 
   2249     g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll",
   2250                     test_drop_intermediate_poll);
   2251 
   2252     g_test_add_func("/bdrv-drain/replace_child/mid-drain",
   2253                     test_replace_child_mid_drain);
   2254 
   2255     ret = g_test_run();
   2256     qemu_event_destroy(&done_event);
   2257     return ret;
   2258 }