qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

qcow2-snapshot.c (34517B)


      1 /*
      2  * Block driver for the QCOW version 2 format
      3  *
      4  * Copyright (c) 2004-2006 Fabrice Bellard
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a copy
      7  * of this software and associated documentation files (the "Software"), to deal
      8  * in the Software without restriction, including without limitation the rights
      9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10  * copies of the Software, and to permit persons to whom the Software is
     11  * furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     22  * THE SOFTWARE.
     23  */
     24 
     25 #include "qemu/osdep.h"
     26 #include "sysemu/block-backend.h"
     27 #include "qapi/error.h"
     28 #include "qcow2.h"
     29 #include "qemu/bswap.h"
     30 #include "qemu/error-report.h"
     31 #include "qemu/cutils.h"
     32 #include "qemu/memalign.h"
     33 
     34 static void qcow2_free_single_snapshot(BlockDriverState *bs, int i)
     35 {
     36     BDRVQcow2State *s = bs->opaque;
     37 
     38     assert(i >= 0 && i < s->nb_snapshots);
     39     g_free(s->snapshots[i].name);
     40     g_free(s->snapshots[i].id_str);
     41     g_free(s->snapshots[i].unknown_extra_data);
     42     memset(&s->snapshots[i], 0, sizeof(s->snapshots[i]));
     43 }
     44 
     45 void qcow2_free_snapshots(BlockDriverState *bs)
     46 {
     47     BDRVQcow2State *s = bs->opaque;
     48     int i;
     49 
     50     for(i = 0; i < s->nb_snapshots; i++) {
     51         qcow2_free_single_snapshot(bs, i);
     52     }
     53     g_free(s->snapshots);
     54     s->snapshots = NULL;
     55     s->nb_snapshots = 0;
     56 }
     57 
     58 /*
     59  * If @repair is true, try to repair a broken snapshot table instead
     60  * of just returning an error:
     61  *
     62  * - If the snapshot table was too long, set *nb_clusters_reduced to
     63  *   the number of snapshots removed off the end.
     64  *   The caller will update the on-disk nb_snapshots accordingly;
     65  *   this leaks clusters, but is safe.
     66  *   (The on-disk information must be updated before
     67  *   qcow2_check_refcounts(), because that function relies on
     68  *   s->nb_snapshots to reflect the on-disk value.)
     69  *
     70  * - If there were snapshots with too much extra metadata, increment
     71  *   *extra_data_dropped for each.
     72  *   This requires the caller to eventually rewrite the whole snapshot
     73  *   table, which requires cluster allocation.  Therefore, this should
     74  *   be done only after qcow2_check_refcounts() made sure the refcount
     75  *   structures are valid.
     76  *   (In the meantime, the image is still valid because
     77  *   qcow2_check_refcounts() does not do anything with snapshots'
     78  *   extra data.)
     79  */
     80 static int qcow2_do_read_snapshots(BlockDriverState *bs, bool repair,
     81                                    int *nb_clusters_reduced,
     82                                    int *extra_data_dropped,
     83                                    Error **errp)
     84 {
     85     BDRVQcow2State *s = bs->opaque;
     86     QCowSnapshotHeader h;
     87     QCowSnapshotExtraData extra;
     88     QCowSnapshot *sn;
     89     int i, id_str_size, name_size;
     90     int64_t offset, pre_sn_offset;
     91     uint64_t table_length = 0;
     92     int ret;
     93 
     94     if (!s->nb_snapshots) {
     95         s->snapshots = NULL;
     96         s->snapshots_size = 0;
     97         return 0;
     98     }
     99 
    100     offset = s->snapshots_offset;
    101     s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
    102 
    103     for(i = 0; i < s->nb_snapshots; i++) {
    104         bool truncate_unknown_extra_data = false;
    105 
    106         pre_sn_offset = offset;
    107         table_length = ROUND_UP(table_length, 8);
    108 
    109         /* Read statically sized part of the snapshot header */
    110         offset = ROUND_UP(offset, 8);
    111         ret = bdrv_pread(bs->file, offset, sizeof(h), &h, 0);
    112         if (ret < 0) {
    113             error_setg_errno(errp, -ret, "Failed to read snapshot table");
    114             goto fail;
    115         }
    116 
    117         offset += sizeof(h);
    118         sn = s->snapshots + i;
    119         sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
    120         sn->l1_size = be32_to_cpu(h.l1_size);
    121         sn->vm_state_size = be32_to_cpu(h.vm_state_size);
    122         sn->date_sec = be32_to_cpu(h.date_sec);
    123         sn->date_nsec = be32_to_cpu(h.date_nsec);
    124         sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
    125         sn->extra_data_size = be32_to_cpu(h.extra_data_size);
    126 
    127         id_str_size = be16_to_cpu(h.id_str_size);
    128         name_size = be16_to_cpu(h.name_size);
    129 
    130         if (sn->extra_data_size > QCOW_MAX_SNAPSHOT_EXTRA_DATA) {
    131             if (!repair) {
    132                 ret = -EFBIG;
    133                 error_setg(errp, "Too much extra metadata in snapshot table "
    134                            "entry %i", i);
    135                 error_append_hint(errp, "You can force-remove this extra "
    136                                   "metadata with qemu-img check -r all\n");
    137                 goto fail;
    138             }
    139 
    140             fprintf(stderr, "Discarding too much extra metadata in snapshot "
    141                     "table entry %i (%" PRIu32 " > %u)\n",
    142                     i, sn->extra_data_size, QCOW_MAX_SNAPSHOT_EXTRA_DATA);
    143 
    144             (*extra_data_dropped)++;
    145             truncate_unknown_extra_data = true;
    146         }
    147 
    148         /* Read known extra data */
    149         ret = bdrv_pread(bs->file, offset,
    150                          MIN(sizeof(extra), sn->extra_data_size), &extra, 0);
    151         if (ret < 0) {
    152             error_setg_errno(errp, -ret, "Failed to read snapshot table");
    153             goto fail;
    154         }
    155         offset += MIN(sizeof(extra), sn->extra_data_size);
    156 
    157         if (sn->extra_data_size >= endof(QCowSnapshotExtraData,
    158                                          vm_state_size_large)) {
    159             sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
    160         }
    161 
    162         if (sn->extra_data_size >= endof(QCowSnapshotExtraData, disk_size)) {
    163             sn->disk_size = be64_to_cpu(extra.disk_size);
    164         } else {
    165             sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
    166         }
    167 
    168         if (sn->extra_data_size >= endof(QCowSnapshotExtraData, icount)) {
    169             sn->icount = be64_to_cpu(extra.icount);
    170         } else {
    171             sn->icount = -1ULL;
    172         }
    173 
    174         if (sn->extra_data_size > sizeof(extra)) {
    175             uint64_t extra_data_end;
    176             size_t unknown_extra_data_size;
    177 
    178             extra_data_end = offset + sn->extra_data_size - sizeof(extra);
    179 
    180             if (truncate_unknown_extra_data) {
    181                 sn->extra_data_size = QCOW_MAX_SNAPSHOT_EXTRA_DATA;
    182             }
    183 
    184             /* Store unknown extra data */
    185             unknown_extra_data_size = sn->extra_data_size - sizeof(extra);
    186             sn->unknown_extra_data = g_malloc(unknown_extra_data_size);
    187             ret = bdrv_pread(bs->file, offset, unknown_extra_data_size,
    188                              sn->unknown_extra_data, 0);
    189             if (ret < 0) {
    190                 error_setg_errno(errp, -ret,
    191                                  "Failed to read snapshot table");
    192                 goto fail;
    193             }
    194             offset = extra_data_end;
    195         }
    196 
    197         /* Read snapshot ID */
    198         sn->id_str = g_malloc(id_str_size + 1);
    199         ret = bdrv_pread(bs->file, offset, id_str_size, sn->id_str, 0);
    200         if (ret < 0) {
    201             error_setg_errno(errp, -ret, "Failed to read snapshot table");
    202             goto fail;
    203         }
    204         offset += id_str_size;
    205         sn->id_str[id_str_size] = '\0';
    206 
    207         /* Read snapshot name */
    208         sn->name = g_malloc(name_size + 1);
    209         ret = bdrv_pread(bs->file, offset, name_size, sn->name, 0);
    210         if (ret < 0) {
    211             error_setg_errno(errp, -ret, "Failed to read snapshot table");
    212             goto fail;
    213         }
    214         offset += name_size;
    215         sn->name[name_size] = '\0';
    216 
    217         /* Note that the extra data may have been truncated */
    218         table_length += sizeof(h) + sn->extra_data_size + id_str_size +
    219                         name_size;
    220         if (!repair) {
    221             assert(table_length == offset - s->snapshots_offset);
    222         }
    223 
    224         if (table_length > QCOW_MAX_SNAPSHOTS_SIZE ||
    225             offset - s->snapshots_offset > INT_MAX)
    226         {
    227             if (!repair) {
    228                 ret = -EFBIG;
    229                 error_setg(errp, "Snapshot table is too big");
    230                 error_append_hint(errp, "You can force-remove all %u "
    231                                   "overhanging snapshots with qemu-img check "
    232                                   "-r all\n", s->nb_snapshots - i);
    233                 goto fail;
    234             }
    235 
    236             fprintf(stderr, "Discarding %u overhanging snapshots (snapshot "
    237                     "table is too big)\n", s->nb_snapshots - i);
    238 
    239             *nb_clusters_reduced += (s->nb_snapshots - i);
    240 
    241             /* Discard current snapshot also */
    242             qcow2_free_single_snapshot(bs, i);
    243 
    244             /*
    245              * This leaks all the rest of the snapshot table and the
    246              * snapshots' clusters, but we run in check -r all mode,
    247              * so qcow2_check_refcounts() will take care of it.
    248              */
    249             s->nb_snapshots = i;
    250             offset = pre_sn_offset;
    251             break;
    252         }
    253     }
    254 
    255     assert(offset - s->snapshots_offset <= INT_MAX);
    256     s->snapshots_size = offset - s->snapshots_offset;
    257     return 0;
    258 
    259 fail:
    260     qcow2_free_snapshots(bs);
    261     return ret;
    262 }
    263 
    264 int qcow2_read_snapshots(BlockDriverState *bs, Error **errp)
    265 {
    266     return qcow2_do_read_snapshots(bs, false, NULL, NULL, errp);
    267 }
    268 
    269 /* add at the end of the file a new list of snapshots */
    270 int qcow2_write_snapshots(BlockDriverState *bs)
    271 {
    272     BDRVQcow2State *s = bs->opaque;
    273     QCowSnapshot *sn;
    274     QCowSnapshotHeader h;
    275     QCowSnapshotExtraData extra;
    276     int i, name_size, id_str_size, snapshots_size;
    277     struct {
    278         uint32_t nb_snapshots;
    279         uint64_t snapshots_offset;
    280     } QEMU_PACKED header_data;
    281     int64_t offset, snapshots_offset = 0;
    282     int ret;
    283 
    284     /* compute the size of the snapshots */
    285     offset = 0;
    286     for(i = 0; i < s->nb_snapshots; i++) {
    287         sn = s->snapshots + i;
    288         offset = ROUND_UP(offset, 8);
    289         offset += sizeof(h);
    290         offset += MAX(sizeof(extra), sn->extra_data_size);
    291         offset += strlen(sn->id_str);
    292         offset += strlen(sn->name);
    293 
    294         if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
    295             ret = -EFBIG;
    296             goto fail;
    297         }
    298     }
    299 
    300     assert(offset <= INT_MAX);
    301     snapshots_size = offset;
    302 
    303     /* Allocate space for the new snapshot list */
    304     snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
    305     offset = snapshots_offset;
    306     if (offset < 0) {
    307         ret = offset;
    308         goto fail;
    309     }
    310     ret = bdrv_flush(bs);
    311     if (ret < 0) {
    312         goto fail;
    313     }
    314 
    315     /* The snapshot list position has not yet been updated, so these clusters
    316      * must indeed be completely free */
    317     ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size, false);
    318     if (ret < 0) {
    319         goto fail;
    320     }
    321 
    322 
    323     /* Write all snapshots to the new list */
    324     for(i = 0; i < s->nb_snapshots; i++) {
    325         sn = s->snapshots + i;
    326         memset(&h, 0, sizeof(h));
    327         h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
    328         h.l1_size = cpu_to_be32(sn->l1_size);
    329         /* If it doesn't fit in 32 bit, older implementations should treat it
    330          * as a disk-only snapshot rather than truncate the VM state */
    331         if (sn->vm_state_size <= 0xffffffff) {
    332             h.vm_state_size = cpu_to_be32(sn->vm_state_size);
    333         }
    334         h.date_sec = cpu_to_be32(sn->date_sec);
    335         h.date_nsec = cpu_to_be32(sn->date_nsec);
    336         h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
    337         h.extra_data_size = cpu_to_be32(MAX(sizeof(extra),
    338                                             sn->extra_data_size));
    339 
    340         memset(&extra, 0, sizeof(extra));
    341         extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
    342         extra.disk_size = cpu_to_be64(sn->disk_size);
    343         extra.icount = cpu_to_be64(sn->icount);
    344 
    345         id_str_size = strlen(sn->id_str);
    346         name_size = strlen(sn->name);
    347         assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
    348         h.id_str_size = cpu_to_be16(id_str_size);
    349         h.name_size = cpu_to_be16(name_size);
    350         offset = ROUND_UP(offset, 8);
    351 
    352         ret = bdrv_pwrite(bs->file, offset, sizeof(h), &h, 0);
    353         if (ret < 0) {
    354             goto fail;
    355         }
    356         offset += sizeof(h);
    357 
    358         ret = bdrv_pwrite(bs->file, offset, sizeof(extra), &extra, 0);
    359         if (ret < 0) {
    360             goto fail;
    361         }
    362         offset += sizeof(extra);
    363 
    364         if (sn->extra_data_size > sizeof(extra)) {
    365             size_t unknown_extra_data_size =
    366                 sn->extra_data_size - sizeof(extra);
    367 
    368             /* qcow2_read_snapshots() ensures no unbounded allocation */
    369             assert(unknown_extra_data_size <= BDRV_REQUEST_MAX_BYTES);
    370             assert(sn->unknown_extra_data);
    371 
    372             ret = bdrv_pwrite(bs->file, offset, unknown_extra_data_size,
    373                               sn->unknown_extra_data, 0);
    374             if (ret < 0) {
    375                 goto fail;
    376             }
    377             offset += unknown_extra_data_size;
    378         }
    379 
    380         ret = bdrv_pwrite(bs->file, offset, id_str_size, sn->id_str, 0);
    381         if (ret < 0) {
    382             goto fail;
    383         }
    384         offset += id_str_size;
    385 
    386         ret = bdrv_pwrite(bs->file, offset, name_size, sn->name, 0);
    387         if (ret < 0) {
    388             goto fail;
    389         }
    390         offset += name_size;
    391     }
    392 
    393     /*
    394      * Update the header to point to the new snapshot table. This requires the
    395      * new table and its refcounts to be stable on disk.
    396      */
    397     ret = bdrv_flush(bs);
    398     if (ret < 0) {
    399         goto fail;
    400     }
    401 
    402     QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
    403                       endof(QCowHeader, nb_snapshots));
    404 
    405     header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
    406     header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
    407 
    408     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
    409                            sizeof(header_data), &header_data, 0);
    410     if (ret < 0) {
    411         goto fail;
    412     }
    413 
    414     /* free the old snapshot table */
    415     qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
    416                         QCOW2_DISCARD_SNAPSHOT);
    417     s->snapshots_offset = snapshots_offset;
    418     s->snapshots_size = snapshots_size;
    419     return 0;
    420 
    421 fail:
    422     if (snapshots_offset > 0) {
    423         qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
    424                             QCOW2_DISCARD_ALWAYS);
    425     }
    426     return ret;
    427 }
    428 
    429 int coroutine_fn qcow2_check_read_snapshot_table(BlockDriverState *bs,
    430                                                  BdrvCheckResult *result,
    431                                                  BdrvCheckMode fix)
    432 {
    433     BDRVQcow2State *s = bs->opaque;
    434     Error *local_err = NULL;
    435     int nb_clusters_reduced = 0;
    436     int extra_data_dropped = 0;
    437     int ret;
    438     struct {
    439         uint32_t nb_snapshots;
    440         uint64_t snapshots_offset;
    441     } QEMU_PACKED snapshot_table_pointer;
    442 
    443     /* qcow2_do_open() discards this information in check mode */
    444     ret = bdrv_co_pread(bs->file, offsetof(QCowHeader, nb_snapshots),
    445                         sizeof(snapshot_table_pointer), &snapshot_table_pointer,
    446                         0);
    447     if (ret < 0) {
    448         result->check_errors++;
    449         fprintf(stderr, "ERROR failed to read the snapshot table pointer from "
    450                 "the image header: %s\n", strerror(-ret));
    451         return ret;
    452     }
    453 
    454     s->snapshots_offset = be64_to_cpu(snapshot_table_pointer.snapshots_offset);
    455     s->nb_snapshots = be32_to_cpu(snapshot_table_pointer.nb_snapshots);
    456 
    457     if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS && (fix & BDRV_FIX_ERRORS)) {
    458         fprintf(stderr, "Discarding %u overhanging snapshots\n",
    459                 s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
    460 
    461         nb_clusters_reduced += s->nb_snapshots - QCOW_MAX_SNAPSHOTS;
    462         s->nb_snapshots = QCOW_MAX_SNAPSHOTS;
    463     }
    464 
    465     ret = qcow2_validate_table(bs, s->snapshots_offset, s->nb_snapshots,
    466                                sizeof(QCowSnapshotHeader),
    467                                sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS,
    468                                "snapshot table", &local_err);
    469     if (ret < 0) {
    470         result->check_errors++;
    471         error_reportf_err(local_err, "ERROR ");
    472 
    473         if (s->nb_snapshots > QCOW_MAX_SNAPSHOTS) {
    474             fprintf(stderr, "You can force-remove all %u overhanging snapshots "
    475                     "with qemu-img check -r all\n",
    476                     s->nb_snapshots - QCOW_MAX_SNAPSHOTS);
    477         }
    478 
    479         /* We did not read the snapshot table, so invalidate this information */
    480         s->snapshots_offset = 0;
    481         s->nb_snapshots = 0;
    482 
    483         return ret;
    484     }
    485 
    486     qemu_co_mutex_unlock(&s->lock);
    487     ret = qcow2_do_read_snapshots(bs, fix & BDRV_FIX_ERRORS,
    488                                   &nb_clusters_reduced, &extra_data_dropped,
    489                                   &local_err);
    490     qemu_co_mutex_lock(&s->lock);
    491     if (ret < 0) {
    492         result->check_errors++;
    493         error_reportf_err(local_err,
    494                           "ERROR failed to read the snapshot table: ");
    495 
    496         /* We did not read the snapshot table, so invalidate this information */
    497         s->snapshots_offset = 0;
    498         s->nb_snapshots = 0;
    499 
    500         return ret;
    501     }
    502     result->corruptions += nb_clusters_reduced + extra_data_dropped;
    503 
    504     if (nb_clusters_reduced) {
    505         /*
    506          * Update image header now, because:
    507          * (1) qcow2_check_refcounts() relies on s->nb_snapshots to be
    508          *     the same as what the image header says,
    509          * (2) this leaks clusters, but qcow2_check_refcounts() will
    510          *     fix that.
    511          */
    512         assert(fix & BDRV_FIX_ERRORS);
    513 
    514         snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
    515         ret = bdrv_co_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
    516                                   sizeof(snapshot_table_pointer.nb_snapshots),
    517                                   &snapshot_table_pointer.nb_snapshots, 0);
    518         if (ret < 0) {
    519             result->check_errors++;
    520             fprintf(stderr, "ERROR failed to update the snapshot count in the "
    521                     "image header: %s\n", strerror(-ret));
    522             return ret;
    523         }
    524 
    525         result->corruptions_fixed += nb_clusters_reduced;
    526         result->corruptions -= nb_clusters_reduced;
    527     }
    528 
    529     /*
    530      * All of v3 images' snapshot table entries need to have at least
    531      * 16 bytes of extra data.
    532      */
    533     if (s->qcow_version >= 3) {
    534         int i;
    535         for (i = 0; i < s->nb_snapshots; i++) {
    536             if (s->snapshots[i].extra_data_size <
    537                 sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
    538                 sizeof_field(QCowSnapshotExtraData, disk_size))
    539             {
    540                 result->corruptions++;
    541                 fprintf(stderr, "%s snapshot table entry %i is incomplete\n",
    542                         fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
    543             }
    544         }
    545     }
    546 
    547     return 0;
    548 }
    549 
    550 int coroutine_fn qcow2_check_fix_snapshot_table(BlockDriverState *bs,
    551                                                 BdrvCheckResult *result,
    552                                                 BdrvCheckMode fix)
    553 {
    554     BDRVQcow2State *s = bs->opaque;
    555     int ret;
    556 
    557     if (result->corruptions && (fix & BDRV_FIX_ERRORS)) {
    558         qemu_co_mutex_unlock(&s->lock);
    559         ret = qcow2_write_snapshots(bs);
    560         qemu_co_mutex_lock(&s->lock);
    561         if (ret < 0) {
    562             result->check_errors++;
    563             fprintf(stderr, "ERROR failed to update snapshot table: %s\n",
    564                     strerror(-ret));
    565             return ret;
    566         }
    567 
    568         result->corruptions_fixed += result->corruptions;
    569         result->corruptions = 0;
    570     }
    571 
    572     return 0;
    573 }
    574 
    575 static void find_new_snapshot_id(BlockDriverState *bs,
    576                                  char *id_str, int id_str_size)
    577 {
    578     BDRVQcow2State *s = bs->opaque;
    579     QCowSnapshot *sn;
    580     int i;
    581     unsigned long id, id_max = 0;
    582 
    583     for(i = 0; i < s->nb_snapshots; i++) {
    584         sn = s->snapshots + i;
    585         id = strtoul(sn->id_str, NULL, 10);
    586         if (id > id_max)
    587             id_max = id;
    588     }
    589     snprintf(id_str, id_str_size, "%lu", id_max + 1);
    590 }
    591 
    592 static int find_snapshot_by_id_and_name(BlockDriverState *bs,
    593                                         const char *id,
    594                                         const char *name)
    595 {
    596     BDRVQcow2State *s = bs->opaque;
    597     int i;
    598 
    599     if (id && name) {
    600         for (i = 0; i < s->nb_snapshots; i++) {
    601             if (!strcmp(s->snapshots[i].id_str, id) &&
    602                 !strcmp(s->snapshots[i].name, name)) {
    603                 return i;
    604             }
    605         }
    606     } else if (id) {
    607         for (i = 0; i < s->nb_snapshots; i++) {
    608             if (!strcmp(s->snapshots[i].id_str, id)) {
    609                 return i;
    610             }
    611         }
    612     } else if (name) {
    613         for (i = 0; i < s->nb_snapshots; i++) {
    614             if (!strcmp(s->snapshots[i].name, name)) {
    615                 return i;
    616             }
    617         }
    618     }
    619 
    620     return -1;
    621 }
    622 
    623 static int find_snapshot_by_id_or_name(BlockDriverState *bs,
    624                                        const char *id_or_name)
    625 {
    626     int ret;
    627 
    628     ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
    629     if (ret >= 0) {
    630         return ret;
    631     }
    632     return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
    633 }
    634 
    635 /* if no id is provided, a new one is constructed */
    636 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
    637 {
    638     BDRVQcow2State *s = bs->opaque;
    639     QCowSnapshot *new_snapshot_list = NULL;
    640     QCowSnapshot *old_snapshot_list = NULL;
    641     QCowSnapshot sn1, *sn = &sn1;
    642     int i, ret;
    643     uint64_t *l1_table = NULL;
    644     int64_t l1_table_offset;
    645 
    646     if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
    647         return -EFBIG;
    648     }
    649 
    650     if (has_data_file(bs)) {
    651         return -ENOTSUP;
    652     }
    653 
    654     memset(sn, 0, sizeof(*sn));
    655 
    656     /* Generate an ID */
    657     find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
    658 
    659     /* Populate sn with passed data */
    660     sn->id_str = g_strdup(sn_info->id_str);
    661     sn->name = g_strdup(sn_info->name);
    662 
    663     sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
    664     sn->vm_state_size = sn_info->vm_state_size;
    665     sn->date_sec = sn_info->date_sec;
    666     sn->date_nsec = sn_info->date_nsec;
    667     sn->vm_clock_nsec = sn_info->vm_clock_nsec;
    668     sn->icount = sn_info->icount;
    669     sn->extra_data_size = sizeof(QCowSnapshotExtraData);
    670 
    671     /* Allocate the L1 table of the snapshot and copy the current one there. */
    672     l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * L1E_SIZE);
    673     if (l1_table_offset < 0) {
    674         ret = l1_table_offset;
    675         goto fail;
    676     }
    677 
    678     sn->l1_table_offset = l1_table_offset;
    679     sn->l1_size = s->l1_size;
    680 
    681     l1_table = g_try_new(uint64_t, s->l1_size);
    682     if (s->l1_size && l1_table == NULL) {
    683         ret = -ENOMEM;
    684         goto fail;
    685     }
    686 
    687     for(i = 0; i < s->l1_size; i++) {
    688         l1_table[i] = cpu_to_be64(s->l1_table[i]);
    689     }
    690 
    691     ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
    692                                         s->l1_size * L1E_SIZE, false);
    693     if (ret < 0) {
    694         goto fail;
    695     }
    696 
    697     ret = bdrv_pwrite(bs->file, sn->l1_table_offset, s->l1_size * L1E_SIZE,
    698                       l1_table, 0);
    699     if (ret < 0) {
    700         goto fail;
    701     }
    702 
    703     g_free(l1_table);
    704     l1_table = NULL;
    705 
    706     /*
    707      * Increase the refcounts of all clusters and make sure everything is
    708      * stable on disk before updating the snapshot table to contain a pointer
    709      * to the new L1 table.
    710      */
    711     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
    712     if (ret < 0) {
    713         goto fail;
    714     }
    715 
    716     /* Append the new snapshot to the snapshot list */
    717     new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
    718     if (s->snapshots) {
    719         memcpy(new_snapshot_list, s->snapshots,
    720                s->nb_snapshots * sizeof(QCowSnapshot));
    721         old_snapshot_list = s->snapshots;
    722     }
    723     s->snapshots = new_snapshot_list;
    724     s->snapshots[s->nb_snapshots++] = *sn;
    725 
    726     ret = qcow2_write_snapshots(bs);
    727     if (ret < 0) {
    728         g_free(s->snapshots);
    729         s->snapshots = old_snapshot_list;
    730         s->nb_snapshots--;
    731         goto fail;
    732     }
    733 
    734     g_free(old_snapshot_list);
    735 
    736     /* The VM state isn't needed any more in the active L1 table; in fact, it
    737      * hurts by causing expensive COW for the next snapshot. */
    738     qcow2_cluster_discard(bs, qcow2_vm_state_offset(s),
    739                           ROUND_UP(sn->vm_state_size, s->cluster_size),
    740                           QCOW2_DISCARD_NEVER, false);
    741 
    742 #ifdef DEBUG_ALLOC
    743     {
    744       BdrvCheckResult result = {0};
    745       qcow2_check_refcounts(bs, &result, 0);
    746     }
    747 #endif
    748     return 0;
    749 
    750 fail:
    751     g_free(sn->id_str);
    752     g_free(sn->name);
    753     g_free(l1_table);
    754 
    755     return ret;
    756 }
    757 
    758 /* copy the snapshot 'snapshot_name' into the current disk image */
    759 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
    760 {
    761     BDRVQcow2State *s = bs->opaque;
    762     QCowSnapshot *sn;
    763     Error *local_err = NULL;
    764     int i, snapshot_index;
    765     int cur_l1_bytes, sn_l1_bytes;
    766     int ret;
    767     uint64_t *sn_l1_table = NULL;
    768 
    769     if (has_data_file(bs)) {
    770         return -ENOTSUP;
    771     }
    772 
    773     /* Search the snapshot */
    774     snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
    775     if (snapshot_index < 0) {
    776         return -ENOENT;
    777     }
    778     sn = &s->snapshots[snapshot_index];
    779 
    780     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
    781                                L1E_SIZE, QCOW_MAX_L1_SIZE,
    782                                "Snapshot L1 table", &local_err);
    783     if (ret < 0) {
    784         error_report_err(local_err);
    785         goto fail;
    786     }
    787 
    788     if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
    789         BlockBackend *blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL,
    790                                             &local_err);
    791         if (!blk) {
    792             error_report_err(local_err);
    793             ret = -ENOTSUP;
    794             goto fail;
    795         }
    796 
    797         ret = blk_truncate(blk, sn->disk_size, true, PREALLOC_MODE_OFF, 0,
    798                            &local_err);
    799         blk_unref(blk);
    800         if (ret < 0) {
    801             error_report_err(local_err);
    802             goto fail;
    803         }
    804     }
    805 
    806     /*
    807      * Make sure that the current L1 table is big enough to contain the whole
    808      * L1 table of the snapshot. If the snapshot L1 table is smaller, the
    809      * current one must be padded with zeros.
    810      */
    811     ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
    812     if (ret < 0) {
    813         goto fail;
    814     }
    815 
    816     cur_l1_bytes = s->l1_size * L1E_SIZE;
    817     sn_l1_bytes = sn->l1_size * L1E_SIZE;
    818 
    819     /*
    820      * Copy the snapshot L1 table to the current L1 table.
    821      *
    822      * Before overwriting the old current L1 table on disk, make sure to
    823      * increase all refcounts for the clusters referenced by the new one.
    824      * Decrease the refcount referenced by the old one only when the L1
    825      * table is overwritten.
    826      */
    827     sn_l1_table = g_try_malloc0(cur_l1_bytes);
    828     if (cur_l1_bytes && sn_l1_table == NULL) {
    829         ret = -ENOMEM;
    830         goto fail;
    831     }
    832 
    833     ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_bytes, sn_l1_table,
    834                      0);
    835     if (ret < 0) {
    836         goto fail;
    837     }
    838 
    839     ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
    840                                          sn->l1_size, 1);
    841     if (ret < 0) {
    842         goto fail;
    843     }
    844 
    845     ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
    846                                         s->l1_table_offset, cur_l1_bytes,
    847                                         false);
    848     if (ret < 0) {
    849         goto fail;
    850     }
    851 
    852     ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, cur_l1_bytes,
    853                            sn_l1_table, 0);
    854     if (ret < 0) {
    855         goto fail;
    856     }
    857 
    858     /*
    859      * Decrease refcount of clusters of current L1 table.
    860      *
    861      * At this point, the in-memory s->l1_table points to the old L1 table,
    862      * whereas on disk we already have the new one.
    863      *
    864      * qcow2_update_snapshot_refcount special cases the current L1 table to use
    865      * the in-memory data instead of really using the offset to load a new one,
    866      * which is why this works.
    867      */
    868     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
    869                                          s->l1_size, -1);
    870 
    871     /*
    872      * Now update the in-memory L1 table to be in sync with the on-disk one. We
    873      * need to do this even if updating refcounts failed.
    874      */
    875     for(i = 0;i < s->l1_size; i++) {
    876         s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
    877     }
    878 
    879     if (ret < 0) {
    880         goto fail;
    881     }
    882 
    883     g_free(sn_l1_table);
    884     sn_l1_table = NULL;
    885 
    886     /*
    887      * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
    888      * when we decreased the refcount of the old snapshot.
    889      */
    890     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
    891     if (ret < 0) {
    892         goto fail;
    893     }
    894 
    895 #ifdef DEBUG_ALLOC
    896     {
    897         BdrvCheckResult result = {0};
    898         qcow2_check_refcounts(bs, &result, 0);
    899     }
    900 #endif
    901     return 0;
    902 
    903 fail:
    904     g_free(sn_l1_table);
    905     return ret;
    906 }
    907 
    908 int qcow2_snapshot_delete(BlockDriverState *bs,
    909                           const char *snapshot_id,
    910                           const char *name,
    911                           Error **errp)
    912 {
    913     BDRVQcow2State *s = bs->opaque;
    914     QCowSnapshot sn;
    915     int snapshot_index, ret;
    916 
    917     if (has_data_file(bs)) {
    918         return -ENOTSUP;
    919     }
    920 
    921     /* Search the snapshot */
    922     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
    923     if (snapshot_index < 0) {
    924         error_setg(errp, "Can't find the snapshot");
    925         return -ENOENT;
    926     }
    927     sn = s->snapshots[snapshot_index];
    928 
    929     ret = qcow2_validate_table(bs, sn.l1_table_offset, sn.l1_size,
    930                                L1E_SIZE, QCOW_MAX_L1_SIZE,
    931                                "Snapshot L1 table", errp);
    932     if (ret < 0) {
    933         return ret;
    934     }
    935 
    936     /* Remove it from the snapshot list */
    937     memmove(s->snapshots + snapshot_index,
    938             s->snapshots + snapshot_index + 1,
    939             (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
    940     s->nb_snapshots--;
    941     ret = qcow2_write_snapshots(bs);
    942     if (ret < 0) {
    943         error_setg_errno(errp, -ret,
    944                          "Failed to remove snapshot from snapshot list");
    945         return ret;
    946     }
    947 
    948     /*
    949      * The snapshot is now unused, clean up. If we fail after this point, we
    950      * won't recover but just leak clusters.
    951      */
    952     g_free(sn.unknown_extra_data);
    953     g_free(sn.id_str);
    954     g_free(sn.name);
    955 
    956     /*
    957      * Now decrease the refcounts of clusters referenced by the snapshot and
    958      * free the L1 table.
    959      */
    960     ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
    961                                          sn.l1_size, -1);
    962     if (ret < 0) {
    963         error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
    964         return ret;
    965     }
    966     qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * L1E_SIZE,
    967                         QCOW2_DISCARD_SNAPSHOT);
    968 
    969     /* must update the copied flag on the current cluster offsets */
    970     ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
    971     if (ret < 0) {
    972         error_setg_errno(errp, -ret,
    973                          "Failed to update snapshot status in disk");
    974         return ret;
    975     }
    976 
    977 #ifdef DEBUG_ALLOC
    978     {
    979         BdrvCheckResult result = {0};
    980         qcow2_check_refcounts(bs, &result, 0);
    981     }
    982 #endif
    983     return 0;
    984 }
    985 
    986 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
    987 {
    988     BDRVQcow2State *s = bs->opaque;
    989     QEMUSnapshotInfo *sn_tab, *sn_info;
    990     QCowSnapshot *sn;
    991     int i;
    992 
    993     if (has_data_file(bs)) {
    994         return -ENOTSUP;
    995     }
    996     if (!s->nb_snapshots) {
    997         *psn_tab = NULL;
    998         return s->nb_snapshots;
    999     }
   1000 
   1001     sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
   1002     for(i = 0; i < s->nb_snapshots; i++) {
   1003         sn_info = sn_tab + i;
   1004         sn = s->snapshots + i;
   1005         pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
   1006                 sn->id_str);
   1007         pstrcpy(sn_info->name, sizeof(sn_info->name),
   1008                 sn->name);
   1009         sn_info->vm_state_size = sn->vm_state_size;
   1010         sn_info->date_sec = sn->date_sec;
   1011         sn_info->date_nsec = sn->date_nsec;
   1012         sn_info->vm_clock_nsec = sn->vm_clock_nsec;
   1013         sn_info->icount = sn->icount;
   1014     }
   1015     *psn_tab = sn_tab;
   1016     return s->nb_snapshots;
   1017 }
   1018 
   1019 int qcow2_snapshot_load_tmp(BlockDriverState *bs,
   1020                             const char *snapshot_id,
   1021                             const char *name,
   1022                             Error **errp)
   1023 {
   1024     int i, snapshot_index;
   1025     BDRVQcow2State *s = bs->opaque;
   1026     QCowSnapshot *sn;
   1027     uint64_t *new_l1_table;
   1028     int new_l1_bytes;
   1029     int ret;
   1030 
   1031     assert(bdrv_is_read_only(bs));
   1032 
   1033     /* Search the snapshot */
   1034     snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
   1035     if (snapshot_index < 0) {
   1036         error_setg(errp,
   1037                    "Can't find snapshot");
   1038         return -ENOENT;
   1039     }
   1040     sn = &s->snapshots[snapshot_index];
   1041 
   1042     /* Allocate and read in the snapshot's L1 table */
   1043     ret = qcow2_validate_table(bs, sn->l1_table_offset, sn->l1_size,
   1044                                L1E_SIZE, QCOW_MAX_L1_SIZE,
   1045                                "Snapshot L1 table", errp);
   1046     if (ret < 0) {
   1047         return ret;
   1048     }
   1049     new_l1_bytes = sn->l1_size * L1E_SIZE;
   1050     new_l1_table = qemu_try_blockalign(bs->file->bs, new_l1_bytes);
   1051     if (new_l1_table == NULL) {
   1052         return -ENOMEM;
   1053     }
   1054 
   1055     ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_bytes,
   1056                      new_l1_table, 0);
   1057     if (ret < 0) {
   1058         error_setg(errp, "Failed to read l1 table for snapshot");
   1059         qemu_vfree(new_l1_table);
   1060         return ret;
   1061     }
   1062 
   1063     /* Switch the L1 table */
   1064     qemu_vfree(s->l1_table);
   1065 
   1066     s->l1_size = sn->l1_size;
   1067     s->l1_table_offset = sn->l1_table_offset;
   1068     s->l1_table = new_l1_table;
   1069 
   1070     for(i = 0;i < s->l1_size; i++) {
   1071         be64_to_cpus(&s->l1_table[i]);
   1072     }
   1073 
   1074     return 0;
   1075 }