qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

migration.c (27035B)


      1 /*
      2  * Migration support for VFIO devices
      3  *
      4  * Copyright NVIDIA, Inc. 2020
      5  *
      6  * This work is licensed under the terms of the GNU GPL, version 2. See
      7  * the COPYING file in the top-level directory.
      8  */
      9 
     10 #include "qemu/osdep.h"
     11 #include "qemu/main-loop.h"
     12 #include "qemu/cutils.h"
     13 #include <linux/vfio.h>
     14 #include <sys/ioctl.h>
     15 
     16 #include "sysemu/runstate.h"
     17 #include "hw/vfio/vfio-common.h"
     18 #include "migration/migration.h"
     19 #include "migration/vmstate.h"
     20 #include "migration/qemu-file.h"
     21 #include "migration/register.h"
     22 #include "migration/blocker.h"
     23 #include "migration/misc.h"
     24 #include "qapi/error.h"
     25 #include "exec/ramlist.h"
     26 #include "exec/ram_addr.h"
     27 #include "pci.h"
     28 #include "trace.h"
     29 #include "hw/hw.h"
     30 
     31 /*
     32  * Flags to be used as unique delimiters for VFIO devices in the migration
     33  * stream. These flags are composed as:
     34  * 0xffffffff => MSB 32-bit all 1s
     35  * 0xef10     => Magic ID, represents emulated (virtual) function IO
     36  * 0x0000     => 16-bits reserved for flags
     37  *
     38  * The beginning of state information is marked by _DEV_CONFIG_STATE,
     39  * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
     40  * certain state information is marked by _END_OF_STATE.
     41  */
     42 #define VFIO_MIG_FLAG_END_OF_STATE      (0xffffffffef100001ULL)
     43 #define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xffffffffef100002ULL)
     44 #define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xffffffffef100003ULL)
     45 #define VFIO_MIG_FLAG_DEV_DATA_STATE    (0xffffffffef100004ULL)
     46 
     47 static int64_t bytes_transferred;
     48 
     49 static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
     50                                   off_t off, bool iswrite)
     51 {
     52     int ret;
     53 
     54     ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
     55                     pread(vbasedev->fd, val, count, off);
     56     if (ret < count) {
     57         error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
     58                      HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count,
     59                      vbasedev->name, off, strerror(errno));
     60         return (ret < 0) ? ret : -EINVAL;
     61     }
     62     return 0;
     63 }
     64 
     65 static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count,
     66                        off_t off, bool iswrite)
     67 {
     68     int ret, done = 0;
     69     __u8 *tbuf = buf;
     70 
     71     while (count) {
     72         int bytes = 0;
     73 
     74         if (count >= 8 && !(off % 8)) {
     75             bytes = 8;
     76         } else if (count >= 4 && !(off % 4)) {
     77             bytes = 4;
     78         } else if (count >= 2 && !(off % 2)) {
     79             bytes = 2;
     80         } else {
     81             bytes = 1;
     82         }
     83 
     84         ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite);
     85         if (ret) {
     86             return ret;
     87         }
     88 
     89         count -= bytes;
     90         done += bytes;
     91         off += bytes;
     92         tbuf += bytes;
     93     }
     94     return done;
     95 }
     96 
     97 #define vfio_mig_read(f, v, c, o)       vfio_mig_rw(f, (__u8 *)v, c, o, false)
     98 #define vfio_mig_write(f, v, c, o)      vfio_mig_rw(f, (__u8 *)v, c, o, true)
     99 
    100 #define VFIO_MIG_STRUCT_OFFSET(f)       \
    101                                  offsetof(struct vfio_device_migration_info, f)
    102 /*
    103  * Change the device_state register for device @vbasedev. Bits set in @mask
    104  * are preserved, bits set in @value are set, and bits not set in either @mask
    105  * or @value are cleared in device_state. If the register cannot be accessed,
    106  * the resulting state would be invalid, or the device enters an error state,
    107  * an error is returned.
    108  */
    109 
    110 static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
    111                                     uint32_t value)
    112 {
    113     VFIOMigration *migration = vbasedev->migration;
    114     VFIORegion *region = &migration->region;
    115     off_t dev_state_off = region->fd_offset +
    116                           VFIO_MIG_STRUCT_OFFSET(device_state);
    117     uint32_t device_state;
    118     int ret;
    119 
    120     ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
    121                         dev_state_off);
    122     if (ret < 0) {
    123         return ret;
    124     }
    125 
    126     device_state = (device_state & mask) | value;
    127 
    128     if (!VFIO_DEVICE_STATE_VALID(device_state)) {
    129         return -EINVAL;
    130     }
    131 
    132     ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state),
    133                          dev_state_off);
    134     if (ret < 0) {
    135         int rret;
    136 
    137         rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state),
    138                              dev_state_off);
    139 
    140         if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) {
    141             hw_error("%s: Device in error state 0x%x", vbasedev->name,
    142                      device_state);
    143             return rret ? rret : -EIO;
    144         }
    145         return ret;
    146     }
    147 
    148     migration->device_state = device_state;
    149     trace_vfio_migration_set_state(vbasedev->name, device_state);
    150     return 0;
    151 }
    152 
    153 static void *get_data_section_size(VFIORegion *region, uint64_t data_offset,
    154                                    uint64_t data_size, uint64_t *size)
    155 {
    156     void *ptr = NULL;
    157     uint64_t limit = 0;
    158     int i;
    159 
    160     if (!region->mmaps) {
    161         if (size) {
    162             *size = MIN(data_size, region->size - data_offset);
    163         }
    164         return ptr;
    165     }
    166 
    167     for (i = 0; i < region->nr_mmaps; i++) {
    168         VFIOMmap *map = region->mmaps + i;
    169 
    170         if ((data_offset >= map->offset) &&
    171             (data_offset < map->offset + map->size)) {
    172 
    173             /* check if data_offset is within sparse mmap areas */
    174             ptr = map->mmap + data_offset - map->offset;
    175             if (size) {
    176                 *size = MIN(data_size, map->offset + map->size - data_offset);
    177             }
    178             break;
    179         } else if ((data_offset < map->offset) &&
    180                    (!limit || limit > map->offset)) {
    181             /*
    182              * data_offset is not within sparse mmap areas, find size of
    183              * non-mapped area. Check through all list since region->mmaps list
    184              * is not sorted.
    185              */
    186             limit = map->offset;
    187         }
    188     }
    189 
    190     if (!ptr && size) {
    191         *size = limit ? MIN(data_size, limit - data_offset) : data_size;
    192     }
    193     return ptr;
    194 }
    195 
    196 static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size)
    197 {
    198     VFIOMigration *migration = vbasedev->migration;
    199     VFIORegion *region = &migration->region;
    200     uint64_t data_offset = 0, data_size = 0, sz;
    201     int ret;
    202 
    203     ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
    204                       region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
    205     if (ret < 0) {
    206         return ret;
    207     }
    208 
    209     ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
    210                         region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
    211     if (ret < 0) {
    212         return ret;
    213     }
    214 
    215     trace_vfio_save_buffer(vbasedev->name, data_offset, data_size,
    216                            migration->pending_bytes);
    217 
    218     qemu_put_be64(f, data_size);
    219     sz = data_size;
    220 
    221     while (sz) {
    222         void *buf;
    223         uint64_t sec_size;
    224         bool buf_allocated = false;
    225 
    226         buf = get_data_section_size(region, data_offset, sz, &sec_size);
    227 
    228         if (!buf) {
    229             buf = g_try_malloc(sec_size);
    230             if (!buf) {
    231                 error_report("%s: Error allocating buffer ", __func__);
    232                 return -ENOMEM;
    233             }
    234             buf_allocated = true;
    235 
    236             ret = vfio_mig_read(vbasedev, buf, sec_size,
    237                                 region->fd_offset + data_offset);
    238             if (ret < 0) {
    239                 g_free(buf);
    240                 return ret;
    241             }
    242         }
    243 
    244         qemu_put_buffer(f, buf, sec_size);
    245 
    246         if (buf_allocated) {
    247             g_free(buf);
    248         }
    249         sz -= sec_size;
    250         data_offset += sec_size;
    251     }
    252 
    253     ret = qemu_file_get_error(f);
    254 
    255     if (!ret && size) {
    256         *size = data_size;
    257     }
    258 
    259     bytes_transferred += data_size;
    260     return ret;
    261 }
    262 
    263 static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
    264                             uint64_t data_size)
    265 {
    266     VFIORegion *region = &vbasedev->migration->region;
    267     uint64_t data_offset = 0, size, report_size;
    268     int ret;
    269 
    270     do {
    271         ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
    272                       region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset));
    273         if (ret < 0) {
    274             return ret;
    275         }
    276 
    277         if (data_offset + data_size > region->size) {
    278             /*
    279              * If data_size is greater than the data section of migration region
    280              * then iterate the write buffer operation. This case can occur if
    281              * size of migration region at destination is smaller than size of
    282              * migration region at source.
    283              */
    284             report_size = size = region->size - data_offset;
    285             data_size -= size;
    286         } else {
    287             report_size = size = data_size;
    288             data_size = 0;
    289         }
    290 
    291         trace_vfio_load_state_device_data(vbasedev->name, data_offset, size);
    292 
    293         while (size) {
    294             void *buf;
    295             uint64_t sec_size;
    296             bool buf_alloc = false;
    297 
    298             buf = get_data_section_size(region, data_offset, size, &sec_size);
    299 
    300             if (!buf) {
    301                 buf = g_try_malloc(sec_size);
    302                 if (!buf) {
    303                     error_report("%s: Error allocating buffer ", __func__);
    304                     return -ENOMEM;
    305                 }
    306                 buf_alloc = true;
    307             }
    308 
    309             qemu_get_buffer(f, buf, sec_size);
    310 
    311             if (buf_alloc) {
    312                 ret = vfio_mig_write(vbasedev, buf, sec_size,
    313                         region->fd_offset + data_offset);
    314                 g_free(buf);
    315 
    316                 if (ret < 0) {
    317                     return ret;
    318                 }
    319             }
    320             size -= sec_size;
    321             data_offset += sec_size;
    322         }
    323 
    324         ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
    325                         region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size));
    326         if (ret < 0) {
    327             return ret;
    328         }
    329     } while (data_size);
    330 
    331     return 0;
    332 }
    333 
    334 static int vfio_update_pending(VFIODevice *vbasedev)
    335 {
    336     VFIOMigration *migration = vbasedev->migration;
    337     VFIORegion *region = &migration->region;
    338     uint64_t pending_bytes = 0;
    339     int ret;
    340 
    341     ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
    342                     region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes));
    343     if (ret < 0) {
    344         migration->pending_bytes = 0;
    345         return ret;
    346     }
    347 
    348     migration->pending_bytes = pending_bytes;
    349     trace_vfio_update_pending(vbasedev->name, pending_bytes);
    350     return 0;
    351 }
    352 
    353 static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
    354 {
    355     VFIODevice *vbasedev = opaque;
    356 
    357     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE);
    358 
    359     if (vbasedev->ops && vbasedev->ops->vfio_save_config) {
    360         vbasedev->ops->vfio_save_config(vbasedev, f);
    361     }
    362 
    363     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
    364 
    365     trace_vfio_save_device_config_state(vbasedev->name);
    366 
    367     return qemu_file_get_error(f);
    368 }
    369 
    370 static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
    371 {
    372     VFIODevice *vbasedev = opaque;
    373     uint64_t data;
    374 
    375     if (vbasedev->ops && vbasedev->ops->vfio_load_config) {
    376         int ret;
    377 
    378         ret = vbasedev->ops->vfio_load_config(vbasedev, f);
    379         if (ret) {
    380             error_report("%s: Failed to load device config space",
    381                          vbasedev->name);
    382             return ret;
    383         }
    384     }
    385 
    386     data = qemu_get_be64(f);
    387     if (data != VFIO_MIG_FLAG_END_OF_STATE) {
    388         error_report("%s: Failed loading device config space, "
    389                      "end flag incorrect 0x%"PRIx64, vbasedev->name, data);
    390         return -EINVAL;
    391     }
    392 
    393     trace_vfio_load_device_config_state(vbasedev->name);
    394     return qemu_file_get_error(f);
    395 }
    396 
    397 static void vfio_migration_cleanup(VFIODevice *vbasedev)
    398 {
    399     VFIOMigration *migration = vbasedev->migration;
    400 
    401     if (migration->region.mmaps) {
    402         vfio_region_unmap(&migration->region);
    403     }
    404 }
    405 
    406 /* ---------------------------------------------------------------------- */
    407 
    408 static int vfio_save_setup(QEMUFile *f, void *opaque)
    409 {
    410     VFIODevice *vbasedev = opaque;
    411     VFIOMigration *migration = vbasedev->migration;
    412     int ret;
    413 
    414     trace_vfio_save_setup(vbasedev->name);
    415 
    416     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
    417 
    418     if (migration->region.mmaps) {
    419         /*
    420          * Calling vfio_region_mmap() from migration thread. Memory API called
    421          * from this function require locking the iothread when called from
    422          * outside the main loop thread.
    423          */
    424         qemu_mutex_lock_iothread();
    425         ret = vfio_region_mmap(&migration->region);
    426         qemu_mutex_unlock_iothread();
    427         if (ret) {
    428             error_report("%s: Failed to mmap VFIO migration region: %s",
    429                          vbasedev->name, strerror(-ret));
    430             error_report("%s: Falling back to slow path", vbasedev->name);
    431         }
    432     }
    433 
    434     ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
    435                                    VFIO_DEVICE_STATE_V1_SAVING);
    436     if (ret) {
    437         error_report("%s: Failed to set state SAVING", vbasedev->name);
    438         return ret;
    439     }
    440 
    441     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
    442 
    443     ret = qemu_file_get_error(f);
    444     if (ret) {
    445         return ret;
    446     }
    447 
    448     return 0;
    449 }
    450 
    451 static void vfio_save_cleanup(void *opaque)
    452 {
    453     VFIODevice *vbasedev = opaque;
    454 
    455     vfio_migration_cleanup(vbasedev);
    456     trace_vfio_save_cleanup(vbasedev->name);
    457 }
    458 
    459 static void vfio_save_pending(QEMUFile *f, void *opaque,
    460                               uint64_t threshold_size,
    461                               uint64_t *res_precopy_only,
    462                               uint64_t *res_compatible,
    463                               uint64_t *res_postcopy_only)
    464 {
    465     VFIODevice *vbasedev = opaque;
    466     VFIOMigration *migration = vbasedev->migration;
    467     int ret;
    468 
    469     ret = vfio_update_pending(vbasedev);
    470     if (ret) {
    471         return;
    472     }
    473 
    474     *res_precopy_only += migration->pending_bytes;
    475 
    476     trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
    477                             *res_postcopy_only, *res_compatible);
    478 }
    479 
    480 static int vfio_save_iterate(QEMUFile *f, void *opaque)
    481 {
    482     VFIODevice *vbasedev = opaque;
    483     VFIOMigration *migration = vbasedev->migration;
    484     uint64_t data_size;
    485     int ret;
    486 
    487     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
    488 
    489     if (migration->pending_bytes == 0) {
    490         ret = vfio_update_pending(vbasedev);
    491         if (ret) {
    492             return ret;
    493         }
    494 
    495         if (migration->pending_bytes == 0) {
    496             qemu_put_be64(f, 0);
    497             qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
    498             /* indicates data finished, goto complete phase */
    499             return 1;
    500         }
    501     }
    502 
    503     ret = vfio_save_buffer(f, vbasedev, &data_size);
    504     if (ret) {
    505         error_report("%s: vfio_save_buffer failed %s", vbasedev->name,
    506                      strerror(errno));
    507         return ret;
    508     }
    509 
    510     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
    511 
    512     ret = qemu_file_get_error(f);
    513     if (ret) {
    514         return ret;
    515     }
    516 
    517     /*
    518      * Reset pending_bytes as .save_live_pending is not called during savevm or
    519      * snapshot case, in such case vfio_update_pending() at the start of this
    520      * function updates pending_bytes.
    521      */
    522     migration->pending_bytes = 0;
    523     trace_vfio_save_iterate(vbasedev->name, data_size);
    524     return 0;
    525 }
    526 
    527 static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
    528 {
    529     VFIODevice *vbasedev = opaque;
    530     VFIOMigration *migration = vbasedev->migration;
    531     uint64_t data_size;
    532     int ret;
    533 
    534     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_RUNNING,
    535                                    VFIO_DEVICE_STATE_V1_SAVING);
    536     if (ret) {
    537         error_report("%s: Failed to set state STOP and SAVING",
    538                      vbasedev->name);
    539         return ret;
    540     }
    541 
    542     ret = vfio_update_pending(vbasedev);
    543     if (ret) {
    544         return ret;
    545     }
    546 
    547     while (migration->pending_bytes > 0) {
    548         qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE);
    549         ret = vfio_save_buffer(f, vbasedev, &data_size);
    550         if (ret < 0) {
    551             error_report("%s: Failed to save buffer", vbasedev->name);
    552             return ret;
    553         }
    554 
    555         if (data_size == 0) {
    556             break;
    557         }
    558 
    559         ret = vfio_update_pending(vbasedev);
    560         if (ret) {
    561             return ret;
    562         }
    563     }
    564 
    565     qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
    566 
    567     ret = qemu_file_get_error(f);
    568     if (ret) {
    569         return ret;
    570     }
    571 
    572     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_V1_SAVING, 0);
    573     if (ret) {
    574         error_report("%s: Failed to set state STOPPED", vbasedev->name);
    575         return ret;
    576     }
    577 
    578     trace_vfio_save_complete_precopy(vbasedev->name);
    579     return ret;
    580 }
    581 
    582 static void vfio_save_state(QEMUFile *f, void *opaque)
    583 {
    584     VFIODevice *vbasedev = opaque;
    585     int ret;
    586 
    587     ret = vfio_save_device_config_state(f, opaque);
    588     if (ret) {
    589         error_report("%s: Failed to save device config space",
    590                      vbasedev->name);
    591         qemu_file_set_error(f, ret);
    592     }
    593 }
    594 
    595 static int vfio_load_setup(QEMUFile *f, void *opaque)
    596 {
    597     VFIODevice *vbasedev = opaque;
    598     VFIOMigration *migration = vbasedev->migration;
    599     int ret = 0;
    600 
    601     if (migration->region.mmaps) {
    602         ret = vfio_region_mmap(&migration->region);
    603         if (ret) {
    604             error_report("%s: Failed to mmap VFIO migration region %d: %s",
    605                          vbasedev->name, migration->region.nr,
    606                          strerror(-ret));
    607             error_report("%s: Falling back to slow path", vbasedev->name);
    608         }
    609     }
    610 
    611     ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK,
    612                                    VFIO_DEVICE_STATE_V1_RESUMING);
    613     if (ret) {
    614         error_report("%s: Failed to set state RESUMING", vbasedev->name);
    615         if (migration->region.mmaps) {
    616             vfio_region_unmap(&migration->region);
    617         }
    618     }
    619     return ret;
    620 }
    621 
    622 static int vfio_load_cleanup(void *opaque)
    623 {
    624     VFIODevice *vbasedev = opaque;
    625 
    626     vfio_migration_cleanup(vbasedev);
    627     trace_vfio_load_cleanup(vbasedev->name);
    628     return 0;
    629 }
    630 
    631 static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
    632 {
    633     VFIODevice *vbasedev = opaque;
    634     int ret = 0;
    635     uint64_t data;
    636 
    637     data = qemu_get_be64(f);
    638     while (data != VFIO_MIG_FLAG_END_OF_STATE) {
    639 
    640         trace_vfio_load_state(vbasedev->name, data);
    641 
    642         switch (data) {
    643         case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
    644         {
    645             return vfio_load_device_config_state(f, opaque);
    646         }
    647         case VFIO_MIG_FLAG_DEV_SETUP_STATE:
    648         {
    649             data = qemu_get_be64(f);
    650             if (data == VFIO_MIG_FLAG_END_OF_STATE) {
    651                 return ret;
    652             } else {
    653                 error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64,
    654                              vbasedev->name, data);
    655                 return -EINVAL;
    656             }
    657             break;
    658         }
    659         case VFIO_MIG_FLAG_DEV_DATA_STATE:
    660         {
    661             uint64_t data_size = qemu_get_be64(f);
    662 
    663             if (data_size) {
    664                 ret = vfio_load_buffer(f, vbasedev, data_size);
    665                 if (ret < 0) {
    666                     return ret;
    667                 }
    668             }
    669             break;
    670         }
    671         default:
    672             error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
    673             return -EINVAL;
    674         }
    675 
    676         data = qemu_get_be64(f);
    677         ret = qemu_file_get_error(f);
    678         if (ret) {
    679             return ret;
    680         }
    681     }
    682     return ret;
    683 }
    684 
    685 static SaveVMHandlers savevm_vfio_handlers = {
    686     .save_setup = vfio_save_setup,
    687     .save_cleanup = vfio_save_cleanup,
    688     .save_live_pending = vfio_save_pending,
    689     .save_live_iterate = vfio_save_iterate,
    690     .save_live_complete_precopy = vfio_save_complete_precopy,
    691     .save_state = vfio_save_state,
    692     .load_setup = vfio_load_setup,
    693     .load_cleanup = vfio_load_cleanup,
    694     .load_state = vfio_load_state,
    695 };
    696 
    697 /* ---------------------------------------------------------------------- */
    698 
    699 static void vfio_vmstate_change(void *opaque, bool running, RunState state)
    700 {
    701     VFIODevice *vbasedev = opaque;
    702     VFIOMigration *migration = vbasedev->migration;
    703     uint32_t value, mask;
    704     int ret;
    705 
    706     if (vbasedev->migration->vm_running == running) {
    707         return;
    708     }
    709 
    710     if (running) {
    711         /*
    712          * Here device state can have one of _SAVING, _RESUMING or _STOP bit.
    713          * Transition from _SAVING to _RUNNING can happen if there is migration
    714          * failure, in that case clear _SAVING bit.
    715          * Transition from _RESUMING to _RUNNING occurs during resuming
    716          * phase, in that case clear _RESUMING bit.
    717          * In both the above cases, set _RUNNING bit.
    718          */
    719         mask = ~VFIO_DEVICE_STATE_MASK;
    720         value = VFIO_DEVICE_STATE_V1_RUNNING;
    721     } else {
    722         /*
    723          * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset
    724          * _RUNNING bit
    725          */
    726         mask = ~VFIO_DEVICE_STATE_V1_RUNNING;
    727 
    728         /*
    729          * When VM state transition to stop for savevm command, device should
    730          * start saving data.
    731          */
    732         if (state == RUN_STATE_SAVE_VM) {
    733             value = VFIO_DEVICE_STATE_V1_SAVING;
    734         } else {
    735             value = 0;
    736         }
    737     }
    738 
    739     ret = vfio_migration_set_state(vbasedev, mask, value);
    740     if (ret) {
    741         /*
    742          * Migration should be aborted in this case, but vm_state_notify()
    743          * currently does not support reporting failures.
    744          */
    745         error_report("%s: Failed to set device state 0x%x", vbasedev->name,
    746                      (migration->device_state & mask) | value);
    747         qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
    748     }
    749     vbasedev->migration->vm_running = running;
    750     trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
    751             (migration->device_state & mask) | value);
    752 }
    753 
    754 static void vfio_migration_state_notifier(Notifier *notifier, void *data)
    755 {
    756     MigrationState *s = data;
    757     VFIOMigration *migration = container_of(notifier, VFIOMigration,
    758                                             migration_state);
    759     VFIODevice *vbasedev = migration->vbasedev;
    760     int ret;
    761 
    762     trace_vfio_migration_state_notifier(vbasedev->name,
    763                                         MigrationStatus_str(s->state));
    764 
    765     switch (s->state) {
    766     case MIGRATION_STATUS_CANCELLING:
    767     case MIGRATION_STATUS_CANCELLED:
    768     case MIGRATION_STATUS_FAILED:
    769         bytes_transferred = 0;
    770         ret = vfio_migration_set_state(vbasedev,
    771                                        ~(VFIO_DEVICE_STATE_V1_SAVING |
    772                                          VFIO_DEVICE_STATE_V1_RESUMING),
    773                                        VFIO_DEVICE_STATE_V1_RUNNING);
    774         if (ret) {
    775             error_report("%s: Failed to set state RUNNING", vbasedev->name);
    776         }
    777     }
    778 }
    779 
    780 static void vfio_migration_exit(VFIODevice *vbasedev)
    781 {
    782     VFIOMigration *migration = vbasedev->migration;
    783 
    784     vfio_region_exit(&migration->region);
    785     vfio_region_finalize(&migration->region);
    786     g_free(vbasedev->migration);
    787     vbasedev->migration = NULL;
    788 }
    789 
    790 static int vfio_migration_init(VFIODevice *vbasedev,
    791                                struct vfio_region_info *info)
    792 {
    793     int ret;
    794     Object *obj;
    795     VFIOMigration *migration;
    796     char id[256] = "";
    797     g_autofree char *path = NULL, *oid = NULL;
    798 
    799     if (!vbasedev->ops->vfio_get_object) {
    800         return -EINVAL;
    801     }
    802 
    803     obj = vbasedev->ops->vfio_get_object(vbasedev);
    804     if (!obj) {
    805         return -EINVAL;
    806     }
    807 
    808     vbasedev->migration = g_new0(VFIOMigration, 1);
    809     vbasedev->migration->device_state = VFIO_DEVICE_STATE_V1_RUNNING;
    810     vbasedev->migration->vm_running = runstate_is_running();
    811 
    812     ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region,
    813                             info->index, "migration");
    814     if (ret) {
    815         error_report("%s: Failed to setup VFIO migration region %d: %s",
    816                      vbasedev->name, info->index, strerror(-ret));
    817         goto err;
    818     }
    819 
    820     if (!vbasedev->migration->region.size) {
    821         error_report("%s: Invalid zero-sized VFIO migration region %d",
    822                      vbasedev->name, info->index);
    823         ret = -EINVAL;
    824         goto err;
    825     }
    826 
    827     migration = vbasedev->migration;
    828     migration->vbasedev = vbasedev;
    829 
    830     oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
    831     if (oid) {
    832         path = g_strdup_printf("%s/vfio", oid);
    833     } else {
    834         path = g_strdup("vfio");
    835     }
    836     strpadcpy(id, sizeof(id), path, '\0');
    837 
    838     register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
    839                          vbasedev);
    840 
    841     migration->vm_state = qdev_add_vm_change_state_handler(vbasedev->dev,
    842                                                            vfio_vmstate_change,
    843                                                            vbasedev);
    844     migration->migration_state.notify = vfio_migration_state_notifier;
    845     add_migration_state_change_notifier(&migration->migration_state);
    846     return 0;
    847 
    848 err:
    849     vfio_migration_exit(vbasedev);
    850     return ret;
    851 }
    852 
    853 /* ---------------------------------------------------------------------- */
    854 
    855 int64_t vfio_mig_bytes_transferred(void)
    856 {
    857     return bytes_transferred;
    858 }
    859 
    860 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
    861 {
    862     VFIOContainer *container = vbasedev->group->container;
    863     struct vfio_region_info *info = NULL;
    864     int ret = -ENOTSUP;
    865 
    866     if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
    867         goto add_blocker;
    868     }
    869 
    870     ret = vfio_get_dev_region_info(vbasedev,
    871                                    VFIO_REGION_TYPE_MIGRATION_DEPRECATED,
    872                                    VFIO_REGION_SUBTYPE_MIGRATION_DEPRECATED,
    873                                    &info);
    874     if (ret) {
    875         goto add_blocker;
    876     }
    877 
    878     ret = vfio_migration_init(vbasedev, info);
    879     if (ret) {
    880         goto add_blocker;
    881     }
    882 
    883     trace_vfio_migration_probe(vbasedev->name, info->index);
    884     g_free(info);
    885     return 0;
    886 
    887 add_blocker:
    888     error_setg(&vbasedev->migration_blocker,
    889                "VFIO device doesn't support migration");
    890     g_free(info);
    891 
    892     ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
    893     if (ret < 0) {
    894         error_free(vbasedev->migration_blocker);
    895         vbasedev->migration_blocker = NULL;
    896     }
    897     return ret;
    898 }
    899 
    900 void vfio_migration_finalize(VFIODevice *vbasedev)
    901 {
    902     if (vbasedev->migration) {
    903         VFIOMigration *migration = vbasedev->migration;
    904 
    905         remove_migration_state_change_notifier(&migration->migration_state);
    906         qemu_del_vm_change_state_handler(migration->vm_state);
    907         unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev);
    908         vfio_migration_exit(vbasedev);
    909     }
    910 
    911     if (vbasedev->migration_blocker) {
    912         migrate_del_blocker(vbasedev->migration_blocker);
    913         error_free(vbasedev->migration_blocker);
    914         vbasedev->migration_blocker = NULL;
    915     }
    916 }