qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vmdk.c (100769B)


      1 /*
      2  * Block driver for the VMDK format
      3  *
      4  * Copyright (c) 2004 Fabrice Bellard
      5  * Copyright (c) 2005 Filip Navara
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a copy
      8  * of this software and associated documentation files (the "Software"), to deal
      9  * in the Software without restriction, including without limitation the rights
     10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     11  * copies of the Software, and to permit persons to whom the Software is
     12  * furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included in
     15  * all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     23  * THE SOFTWARE.
     24  */
     25 
     26 #include "qemu/osdep.h"
     27 #include "qapi/error.h"
     28 #include "block/block_int.h"
     29 #include "sysemu/block-backend.h"
     30 #include "qapi/qmp/qdict.h"
     31 #include "qapi/qmp/qerror.h"
     32 #include "qemu/error-report.h"
     33 #include "qemu/module.h"
     34 #include "qemu/option.h"
     35 #include "qemu/bswap.h"
     36 #include "qemu/memalign.h"
     37 #include "migration/blocker.h"
     38 #include "qemu/cutils.h"
     39 #include <zlib.h>
     40 
     41 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
     42 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
     43 #define VMDK4_COMPRESSION_DEFLATE 1
     44 #define VMDK4_FLAG_NL_DETECT (1 << 0)
     45 #define VMDK4_FLAG_RGD (1 << 1)
     46 /* Zeroed-grain enable bit */
     47 #define VMDK4_FLAG_ZERO_GRAIN   (1 << 2)
     48 #define VMDK4_FLAG_COMPRESS (1 << 16)
     49 #define VMDK4_FLAG_MARKER (1 << 17)
     50 #define VMDK4_GD_AT_END 0xffffffffffffffffULL
     51 
     52 #define VMDK_EXTENT_MAX_SECTORS (1ULL << 32)
     53 
     54 #define VMDK_GTE_ZEROED 0x1
     55 
     56 /* VMDK internal error codes */
     57 #define VMDK_OK      0
     58 #define VMDK_ERROR   (-1)
     59 /* Cluster not allocated */
     60 #define VMDK_UNALLOC (-2)
     61 #define VMDK_ZEROED  (-3)
     62 
     63 #define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
     64 #define BLOCK_OPT_TOOLSVERSION "toolsversion"
     65 
     66 typedef struct {
     67     uint32_t version;
     68     uint32_t flags;
     69     uint32_t disk_sectors;
     70     uint32_t granularity;
     71     uint32_t l1dir_offset;
     72     uint32_t l1dir_size;
     73     uint32_t file_sectors;
     74     uint32_t cylinders;
     75     uint32_t heads;
     76     uint32_t sectors_per_track;
     77 } QEMU_PACKED VMDK3Header;
     78 
     79 typedef struct {
     80     uint32_t version;
     81     uint32_t flags;
     82     uint64_t capacity;
     83     uint64_t granularity;
     84     uint64_t desc_offset;
     85     uint64_t desc_size;
     86     /* Number of GrainTableEntries per GrainTable */
     87     uint32_t num_gtes_per_gt;
     88     uint64_t rgd_offset;
     89     uint64_t gd_offset;
     90     uint64_t grain_offset;
     91     char filler[1];
     92     char check_bytes[4];
     93     uint16_t compressAlgorithm;
     94 } QEMU_PACKED VMDK4Header;
     95 
     96 typedef struct VMDKSESparseConstHeader {
     97     uint64_t magic;
     98     uint64_t version;
     99     uint64_t capacity;
    100     uint64_t grain_size;
    101     uint64_t grain_table_size;
    102     uint64_t flags;
    103     uint64_t reserved1;
    104     uint64_t reserved2;
    105     uint64_t reserved3;
    106     uint64_t reserved4;
    107     uint64_t volatile_header_offset;
    108     uint64_t volatile_header_size;
    109     uint64_t journal_header_offset;
    110     uint64_t journal_header_size;
    111     uint64_t journal_offset;
    112     uint64_t journal_size;
    113     uint64_t grain_dir_offset;
    114     uint64_t grain_dir_size;
    115     uint64_t grain_tables_offset;
    116     uint64_t grain_tables_size;
    117     uint64_t free_bitmap_offset;
    118     uint64_t free_bitmap_size;
    119     uint64_t backmap_offset;
    120     uint64_t backmap_size;
    121     uint64_t grains_offset;
    122     uint64_t grains_size;
    123     uint8_t pad[304];
    124 } QEMU_PACKED VMDKSESparseConstHeader;
    125 
    126 typedef struct VMDKSESparseVolatileHeader {
    127     uint64_t magic;
    128     uint64_t free_gt_number;
    129     uint64_t next_txn_seq_number;
    130     uint64_t replay_journal;
    131     uint8_t pad[480];
    132 } QEMU_PACKED VMDKSESparseVolatileHeader;
    133 
    134 #define L2_CACHE_SIZE 16
    135 
    136 typedef struct VmdkExtent {
    137     BdrvChild *file;
    138     bool flat;
    139     bool compressed;
    140     bool has_marker;
    141     bool has_zero_grain;
    142     bool sesparse;
    143     uint64_t sesparse_l2_tables_offset;
    144     uint64_t sesparse_clusters_offset;
    145     int32_t entry_size;
    146     int version;
    147     int64_t sectors;
    148     int64_t end_sector;
    149     int64_t flat_start_offset;
    150     int64_t l1_table_offset;
    151     int64_t l1_backup_table_offset;
    152     void *l1_table;
    153     uint32_t *l1_backup_table;
    154     unsigned int l1_size;
    155     uint32_t l1_entry_sectors;
    156 
    157     unsigned int l2_size;
    158     void *l2_cache;
    159     uint32_t l2_cache_offsets[L2_CACHE_SIZE];
    160     uint32_t l2_cache_counts[L2_CACHE_SIZE];
    161 
    162     int64_t cluster_sectors;
    163     int64_t next_cluster_sector;
    164     char *type;
    165 } VmdkExtent;
    166 
    167 typedef struct BDRVVmdkState {
    168     CoMutex lock;
    169     uint64_t desc_offset;
    170     bool cid_updated;
    171     bool cid_checked;
    172     uint32_t cid;
    173     uint32_t parent_cid;
    174     int num_extents;
    175     /* Extent array with num_extents entries, ascend ordered by address */
    176     VmdkExtent *extents;
    177     Error *migration_blocker;
    178     char *create_type;
    179 } BDRVVmdkState;
    180 
    181 typedef struct BDRVVmdkReopenState {
    182     bool *extents_using_bs_file;
    183 } BDRVVmdkReopenState;
    184 
    185 typedef struct VmdkMetaData {
    186     unsigned int l1_index;
    187     unsigned int l2_index;
    188     unsigned int l2_offset;
    189     bool new_allocation;
    190     uint32_t *l2_cache_entry;
    191 } VmdkMetaData;
    192 
    193 typedef struct VmdkGrainMarker {
    194     uint64_t lba;
    195     uint32_t size;
    196     uint8_t  data[];
    197 } QEMU_PACKED VmdkGrainMarker;
    198 
    199 enum {
    200     MARKER_END_OF_STREAM    = 0,
    201     MARKER_GRAIN_TABLE      = 1,
    202     MARKER_GRAIN_DIRECTORY  = 2,
    203     MARKER_FOOTER           = 3,
    204 };
    205 
    206 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
    207 {
    208     uint32_t magic;
    209 
    210     if (buf_size < 4) {
    211         return 0;
    212     }
    213     magic = be32_to_cpu(*(uint32_t *)buf);
    214     if (magic == VMDK3_MAGIC ||
    215         magic == VMDK4_MAGIC) {
    216         return 100;
    217     } else {
    218         const char *p = (const char *)buf;
    219         const char *end = p + buf_size;
    220         while (p < end) {
    221             if (*p == '#') {
    222                 /* skip comment line */
    223                 while (p < end && *p != '\n') {
    224                     p++;
    225                 }
    226                 p++;
    227                 continue;
    228             }
    229             if (*p == ' ') {
    230                 while (p < end && *p == ' ') {
    231                     p++;
    232                 }
    233                 /* skip '\r' if windows line endings used. */
    234                 if (p < end && *p == '\r') {
    235                     p++;
    236                 }
    237                 /* only accept blank lines before 'version=' line */
    238                 if (p == end || *p != '\n') {
    239                     return 0;
    240                 }
    241                 p++;
    242                 continue;
    243             }
    244             if (end - p >= strlen("version=X\n")) {
    245                 if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
    246                     strncmp("version=2\n", p, strlen("version=2\n")) == 0 ||
    247                     strncmp("version=3\n", p, strlen("version=3\n")) == 0) {
    248                     return 100;
    249                 }
    250             }
    251             if (end - p >= strlen("version=X\r\n")) {
    252                 if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
    253                     strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0 ||
    254                     strncmp("version=3\r\n", p, strlen("version=3\r\n")) == 0) {
    255                     return 100;
    256                 }
    257             }
    258             return 0;
    259         }
    260         return 0;
    261     }
    262 }
    263 
    264 #define SECTOR_SIZE 512
    265 #define DESC_SIZE (20 * SECTOR_SIZE)    /* 20 sectors of 512 bytes each */
    266 #define BUF_SIZE 4096
    267 #define HEADER_SIZE 512                 /* first sector of 512 bytes */
    268 
    269 static void vmdk_free_extents(BlockDriverState *bs)
    270 {
    271     int i;
    272     BDRVVmdkState *s = bs->opaque;
    273     VmdkExtent *e;
    274 
    275     for (i = 0; i < s->num_extents; i++) {
    276         e = &s->extents[i];
    277         g_free(e->l1_table);
    278         g_free(e->l2_cache);
    279         g_free(e->l1_backup_table);
    280         g_free(e->type);
    281         if (e->file != bs->file) {
    282             bdrv_unref_child(bs, e->file);
    283         }
    284     }
    285     g_free(s->extents);
    286 }
    287 
    288 static void vmdk_free_last_extent(BlockDriverState *bs)
    289 {
    290     BDRVVmdkState *s = bs->opaque;
    291 
    292     if (s->num_extents == 0) {
    293         return;
    294     }
    295     s->num_extents--;
    296     s->extents = g_renew(VmdkExtent, s->extents, s->num_extents);
    297 }
    298 
    299 /* Return -ve errno, or 0 on success and write CID into *pcid. */
    300 static int vmdk_read_cid(BlockDriverState *bs, int parent, uint32_t *pcid)
    301 {
    302     char *desc;
    303     uint32_t cid;
    304     const char *p_name, *cid_str;
    305     size_t cid_str_size;
    306     BDRVVmdkState *s = bs->opaque;
    307     int ret;
    308 
    309     desc = g_malloc0(DESC_SIZE);
    310     ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
    311     if (ret < 0) {
    312         goto out;
    313     }
    314 
    315     if (parent) {
    316         cid_str = "parentCID";
    317         cid_str_size = sizeof("parentCID");
    318     } else {
    319         cid_str = "CID";
    320         cid_str_size = sizeof("CID");
    321     }
    322 
    323     desc[DESC_SIZE - 1] = '\0';
    324     p_name = strstr(desc, cid_str);
    325     if (p_name == NULL) {
    326         ret = -EINVAL;
    327         goto out;
    328     }
    329     p_name += cid_str_size;
    330     if (sscanf(p_name, "%" SCNx32, &cid) != 1) {
    331         ret = -EINVAL;
    332         goto out;
    333     }
    334     *pcid = cid;
    335     ret = 0;
    336 
    337 out:
    338     g_free(desc);
    339     return ret;
    340 }
    341 
    342 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
    343 {
    344     char *desc, *tmp_desc;
    345     char *p_name, *tmp_str;
    346     BDRVVmdkState *s = bs->opaque;
    347     int ret = 0;
    348 
    349     desc = g_malloc0(DESC_SIZE);
    350     tmp_desc = g_malloc0(DESC_SIZE);
    351     ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
    352     if (ret < 0) {
    353         goto out;
    354     }
    355 
    356     desc[DESC_SIZE - 1] = '\0';
    357     tmp_str = strstr(desc, "parentCID");
    358     if (tmp_str == NULL) {
    359         ret = -EINVAL;
    360         goto out;
    361     }
    362 
    363     pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
    364     p_name = strstr(desc, "CID");
    365     if (p_name != NULL) {
    366         p_name += sizeof("CID");
    367         snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
    368         pstrcat(desc, DESC_SIZE, tmp_desc);
    369     }
    370 
    371     ret = bdrv_pwrite_sync(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
    372 
    373 out:
    374     g_free(desc);
    375     g_free(tmp_desc);
    376     return ret;
    377 }
    378 
    379 static int vmdk_is_cid_valid(BlockDriverState *bs)
    380 {
    381     BDRVVmdkState *s = bs->opaque;
    382     uint32_t cur_pcid;
    383 
    384     if (!s->cid_checked && bs->backing) {
    385         BlockDriverState *p_bs = bs->backing->bs;
    386 
    387         if (strcmp(p_bs->drv->format_name, "vmdk")) {
    388             /* Backing file is not in vmdk format, so it does not have
    389              * a CID, which makes the overlay's parent CID invalid */
    390             return 0;
    391         }
    392 
    393         if (vmdk_read_cid(p_bs, 0, &cur_pcid) != 0) {
    394             /* read failure: report as not valid */
    395             return 0;
    396         }
    397         if (s->parent_cid != cur_pcid) {
    398             /* CID not valid */
    399             return 0;
    400         }
    401     }
    402     s->cid_checked = true;
    403     /* CID valid */
    404     return 1;
    405 }
    406 
    407 static int vmdk_reopen_prepare(BDRVReopenState *state,
    408                                BlockReopenQueue *queue, Error **errp)
    409 {
    410     BDRVVmdkState *s;
    411     BDRVVmdkReopenState *rs;
    412     int i;
    413 
    414     assert(state != NULL);
    415     assert(state->bs != NULL);
    416     assert(state->opaque == NULL);
    417 
    418     s = state->bs->opaque;
    419 
    420     rs = g_new0(BDRVVmdkReopenState, 1);
    421     state->opaque = rs;
    422 
    423     /*
    424      * Check whether there are any extents stored in bs->file; if bs->file
    425      * changes, we will need to update their .file pointers to follow suit
    426      */
    427     rs->extents_using_bs_file = g_new(bool, s->num_extents);
    428     for (i = 0; i < s->num_extents; i++) {
    429         rs->extents_using_bs_file[i] = s->extents[i].file == state->bs->file;
    430     }
    431 
    432     return 0;
    433 }
    434 
    435 static void vmdk_reopen_clean(BDRVReopenState *state)
    436 {
    437     BDRVVmdkReopenState *rs = state->opaque;
    438 
    439     g_free(rs->extents_using_bs_file);
    440     g_free(rs);
    441     state->opaque = NULL;
    442 }
    443 
    444 static void vmdk_reopen_commit(BDRVReopenState *state)
    445 {
    446     BDRVVmdkState *s = state->bs->opaque;
    447     BDRVVmdkReopenState *rs = state->opaque;
    448     int i;
    449 
    450     for (i = 0; i < s->num_extents; i++) {
    451         if (rs->extents_using_bs_file[i]) {
    452             s->extents[i].file = state->bs->file;
    453         }
    454     }
    455 
    456     vmdk_reopen_clean(state);
    457 }
    458 
    459 static void vmdk_reopen_abort(BDRVReopenState *state)
    460 {
    461     vmdk_reopen_clean(state);
    462 }
    463 
    464 static int vmdk_parent_open(BlockDriverState *bs)
    465 {
    466     char *p_name;
    467     char *desc;
    468     BDRVVmdkState *s = bs->opaque;
    469     int ret;
    470 
    471     desc = g_malloc0(DESC_SIZE + 1);
    472     ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
    473     if (ret < 0) {
    474         goto out;
    475     }
    476 
    477     p_name = strstr(desc, "parentFileNameHint");
    478     if (p_name != NULL) {
    479         char *end_name;
    480 
    481         p_name += sizeof("parentFileNameHint") + 1;
    482         end_name = strchr(p_name, '\"');
    483         if (end_name == NULL) {
    484             ret = -EINVAL;
    485             goto out;
    486         }
    487         if ((end_name - p_name) > sizeof(bs->auto_backing_file) - 1) {
    488             ret = -EINVAL;
    489             goto out;
    490         }
    491 
    492         pstrcpy(bs->auto_backing_file, end_name - p_name + 1, p_name);
    493         pstrcpy(bs->backing_file, sizeof(bs->backing_file),
    494                 bs->auto_backing_file);
    495         pstrcpy(bs->backing_format, sizeof(bs->backing_format),
    496                 "vmdk");
    497     }
    498 
    499 out:
    500     g_free(desc);
    501     return ret;
    502 }
    503 
    504 /* Create and append extent to the extent array. Return the added VmdkExtent
    505  * address. return NULL if allocation failed. */
    506 static int vmdk_add_extent(BlockDriverState *bs,
    507                            BdrvChild *file, bool flat, int64_t sectors,
    508                            int64_t l1_offset, int64_t l1_backup_offset,
    509                            uint32_t l1_size,
    510                            int l2_size, uint64_t cluster_sectors,
    511                            VmdkExtent **new_extent,
    512                            Error **errp)
    513 {
    514     VmdkExtent *extent;
    515     BDRVVmdkState *s = bs->opaque;
    516     int64_t nb_sectors;
    517 
    518     if (cluster_sectors > 0x200000) {
    519         /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
    520         error_setg(errp, "Invalid granularity, image may be corrupt");
    521         return -EFBIG;
    522     }
    523     if (l1_size > 32 * 1024 * 1024) {
    524         /*
    525          * Although with big capacity and small l1_entry_sectors, we can get a
    526          * big l1_size, we don't want unbounded value to allocate the table.
    527          * Limit it to 32M, which is enough to store:
    528          *     8TB  - for both VMDK3 & VMDK4 with
    529          *            minimal cluster size: 512B
    530          *            minimal L2 table size: 512 entries
    531          *            8 TB is still more than the maximal value supported for
    532          *            VMDK3 & VMDK4 which is 2TB.
    533          *     64TB - for "ESXi seSparse Extent"
    534          *            minimal cluster size: 512B (default is 4KB)
    535          *            L2 table size: 4096 entries (const).
    536          *            64TB is more than the maximal value supported for
    537          *            seSparse VMDKs (which is slightly less than 64TB)
    538          */
    539         error_setg(errp, "L1 size too big");
    540         return -EFBIG;
    541     }
    542 
    543     nb_sectors = bdrv_nb_sectors(file->bs);
    544     if (nb_sectors < 0) {
    545         return nb_sectors;
    546     }
    547 
    548     s->extents = g_renew(VmdkExtent, s->extents, s->num_extents + 1);
    549     extent = &s->extents[s->num_extents];
    550     s->num_extents++;
    551 
    552     memset(extent, 0, sizeof(VmdkExtent));
    553     extent->file = file;
    554     extent->flat = flat;
    555     extent->sectors = sectors;
    556     extent->l1_table_offset = l1_offset;
    557     extent->l1_backup_table_offset = l1_backup_offset;
    558     extent->l1_size = l1_size;
    559     extent->l1_entry_sectors = l2_size * cluster_sectors;
    560     extent->l2_size = l2_size;
    561     extent->cluster_sectors = flat ? sectors : cluster_sectors;
    562     extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
    563     extent->entry_size = sizeof(uint32_t);
    564 
    565     if (s->num_extents > 1) {
    566         extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
    567     } else {
    568         extent->end_sector = extent->sectors;
    569     }
    570     bs->total_sectors = extent->end_sector;
    571     if (new_extent) {
    572         *new_extent = extent;
    573     }
    574     return 0;
    575 }
    576 
    577 static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
    578                             Error **errp)
    579 {
    580     int ret;
    581     size_t l1_size;
    582     int i;
    583 
    584     /* read the L1 table */
    585     l1_size = extent->l1_size * extent->entry_size;
    586     extent->l1_table = g_try_malloc(l1_size);
    587     if (l1_size && extent->l1_table == NULL) {
    588         return -ENOMEM;
    589     }
    590 
    591     ret = bdrv_pread(extent->file, extent->l1_table_offset, l1_size,
    592                      extent->l1_table, 0);
    593     if (ret < 0) {
    594         bdrv_refresh_filename(extent->file->bs);
    595         error_setg_errno(errp, -ret,
    596                          "Could not read l1 table from extent '%s'",
    597                          extent->file->bs->filename);
    598         goto fail_l1;
    599     }
    600     for (i = 0; i < extent->l1_size; i++) {
    601         if (extent->entry_size == sizeof(uint64_t)) {
    602             le64_to_cpus((uint64_t *)extent->l1_table + i);
    603         } else {
    604             assert(extent->entry_size == sizeof(uint32_t));
    605             le32_to_cpus((uint32_t *)extent->l1_table + i);
    606         }
    607     }
    608 
    609     if (extent->l1_backup_table_offset) {
    610         assert(!extent->sesparse);
    611         extent->l1_backup_table = g_try_malloc(l1_size);
    612         if (l1_size && extent->l1_backup_table == NULL) {
    613             ret = -ENOMEM;
    614             goto fail_l1;
    615         }
    616         ret = bdrv_pread(extent->file, extent->l1_backup_table_offset,
    617                          l1_size, extent->l1_backup_table, 0);
    618         if (ret < 0) {
    619             bdrv_refresh_filename(extent->file->bs);
    620             error_setg_errno(errp, -ret,
    621                              "Could not read l1 backup table from extent '%s'",
    622                              extent->file->bs->filename);
    623             goto fail_l1b;
    624         }
    625         for (i = 0; i < extent->l1_size; i++) {
    626             le32_to_cpus(&extent->l1_backup_table[i]);
    627         }
    628     }
    629 
    630     extent->l2_cache =
    631         g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
    632     return 0;
    633  fail_l1b:
    634     g_free(extent->l1_backup_table);
    635  fail_l1:
    636     g_free(extent->l1_table);
    637     return ret;
    638 }
    639 
    640 static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
    641                                  BdrvChild *file,
    642                                  int flags, Error **errp)
    643 {
    644     int ret;
    645     uint32_t magic;
    646     VMDK3Header header;
    647     VmdkExtent *extent = NULL;
    648 
    649     ret = bdrv_pread(file, sizeof(magic), sizeof(header), &header, 0);
    650     if (ret < 0) {
    651         bdrv_refresh_filename(file->bs);
    652         error_setg_errno(errp, -ret,
    653                          "Could not read header from file '%s'",
    654                          file->bs->filename);
    655         return ret;
    656     }
    657     ret = vmdk_add_extent(bs, file, false,
    658                           le32_to_cpu(header.disk_sectors),
    659                           (int64_t)le32_to_cpu(header.l1dir_offset) << 9,
    660                           0,
    661                           le32_to_cpu(header.l1dir_size),
    662                           4096,
    663                           le32_to_cpu(header.granularity),
    664                           &extent,
    665                           errp);
    666     if (ret < 0) {
    667         return ret;
    668     }
    669     ret = vmdk_init_tables(bs, extent, errp);
    670     if (ret) {
    671         /* free extent allocated by vmdk_add_extent */
    672         vmdk_free_last_extent(bs);
    673     }
    674     return ret;
    675 }
    676 
    677 #define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
    678 #define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
    679 
    680 /* Strict checks - format not officially documented */
    681 static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
    682                                         Error **errp)
    683 {
    684     header->magic = le64_to_cpu(header->magic);
    685     header->version = le64_to_cpu(header->version);
    686     header->grain_size = le64_to_cpu(header->grain_size);
    687     header->grain_table_size = le64_to_cpu(header->grain_table_size);
    688     header->flags = le64_to_cpu(header->flags);
    689     header->reserved1 = le64_to_cpu(header->reserved1);
    690     header->reserved2 = le64_to_cpu(header->reserved2);
    691     header->reserved3 = le64_to_cpu(header->reserved3);
    692     header->reserved4 = le64_to_cpu(header->reserved4);
    693 
    694     header->volatile_header_offset =
    695         le64_to_cpu(header->volatile_header_offset);
    696     header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
    697 
    698     header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
    699     header->journal_header_size = le64_to_cpu(header->journal_header_size);
    700 
    701     header->journal_offset = le64_to_cpu(header->journal_offset);
    702     header->journal_size = le64_to_cpu(header->journal_size);
    703 
    704     header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
    705     header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
    706 
    707     header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
    708     header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
    709 
    710     header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
    711     header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
    712 
    713     header->backmap_offset = le64_to_cpu(header->backmap_offset);
    714     header->backmap_size = le64_to_cpu(header->backmap_size);
    715 
    716     header->grains_offset = le64_to_cpu(header->grains_offset);
    717     header->grains_size = le64_to_cpu(header->grains_size);
    718 
    719     if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
    720         error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
    721                    header->magic);
    722         return -EINVAL;
    723     }
    724 
    725     if (header->version != 0x0000000200000001) {
    726         error_setg(errp, "Unsupported version: 0x%016" PRIx64,
    727                    header->version);
    728         return -ENOTSUP;
    729     }
    730 
    731     if (header->grain_size != 8) {
    732         error_setg(errp, "Unsupported grain size: %" PRIu64,
    733                    header->grain_size);
    734         return -ENOTSUP;
    735     }
    736 
    737     if (header->grain_table_size != 64) {
    738         error_setg(errp, "Unsupported grain table size: %" PRIu64,
    739                    header->grain_table_size);
    740         return -ENOTSUP;
    741     }
    742 
    743     if (header->flags != 0) {
    744         error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
    745                    header->flags);
    746         return -ENOTSUP;
    747     }
    748 
    749     if (header->reserved1 != 0 || header->reserved2 != 0 ||
    750         header->reserved3 != 0 || header->reserved4 != 0) {
    751         error_setg(errp, "Unsupported reserved bits:"
    752                    " 0x%016" PRIx64 " 0x%016" PRIx64
    753                    " 0x%016" PRIx64 " 0x%016" PRIx64,
    754                    header->reserved1, header->reserved2,
    755                    header->reserved3, header->reserved4);
    756         return -ENOTSUP;
    757     }
    758 
    759     /* check that padding is 0 */
    760     if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
    761         error_setg(errp, "Unsupported non-zero const header padding");
    762         return -ENOTSUP;
    763     }
    764 
    765     return 0;
    766 }
    767 
    768 static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
    769                                            Error **errp)
    770 {
    771     header->magic = le64_to_cpu(header->magic);
    772     header->free_gt_number = le64_to_cpu(header->free_gt_number);
    773     header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
    774     header->replay_journal = le64_to_cpu(header->replay_journal);
    775 
    776     if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
    777         error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
    778                    header->magic);
    779         return -EINVAL;
    780     }
    781 
    782     if (header->replay_journal) {
    783         error_setg(errp, "Image is dirty, Replaying journal not supported");
    784         return -ENOTSUP;
    785     }
    786 
    787     /* check that padding is 0 */
    788     if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
    789         error_setg(errp, "Unsupported non-zero volatile header padding");
    790         return -ENOTSUP;
    791     }
    792 
    793     return 0;
    794 }
    795 
    796 static int vmdk_open_se_sparse(BlockDriverState *bs,
    797                                BdrvChild *file,
    798                                int flags, Error **errp)
    799 {
    800     int ret;
    801     VMDKSESparseConstHeader const_header;
    802     VMDKSESparseVolatileHeader volatile_header;
    803     VmdkExtent *extent = NULL;
    804 
    805     ret = bdrv_apply_auto_read_only(bs,
    806             "No write support for seSparse images available", errp);
    807     if (ret < 0) {
    808         return ret;
    809     }
    810 
    811     assert(sizeof(const_header) == SECTOR_SIZE);
    812 
    813     ret = bdrv_pread(file, 0, sizeof(const_header), &const_header, 0);
    814     if (ret < 0) {
    815         bdrv_refresh_filename(file->bs);
    816         error_setg_errno(errp, -ret,
    817                          "Could not read const header from file '%s'",
    818                          file->bs->filename);
    819         return ret;
    820     }
    821 
    822     /* check const header */
    823     ret = check_se_sparse_const_header(&const_header, errp);
    824     if (ret < 0) {
    825         return ret;
    826     }
    827 
    828     assert(sizeof(volatile_header) == SECTOR_SIZE);
    829 
    830     ret = bdrv_pread(file, const_header.volatile_header_offset * SECTOR_SIZE,
    831                      sizeof(volatile_header), &volatile_header, 0);
    832     if (ret < 0) {
    833         bdrv_refresh_filename(file->bs);
    834         error_setg_errno(errp, -ret,
    835                          "Could not read volatile header from file '%s'",
    836                          file->bs->filename);
    837         return ret;
    838     }
    839 
    840     /* check volatile header */
    841     ret = check_se_sparse_volatile_header(&volatile_header, errp);
    842     if (ret < 0) {
    843         return ret;
    844     }
    845 
    846     ret = vmdk_add_extent(bs, file, false,
    847                           const_header.capacity,
    848                           const_header.grain_dir_offset * SECTOR_SIZE,
    849                           0,
    850                           const_header.grain_dir_size *
    851                           SECTOR_SIZE / sizeof(uint64_t),
    852                           const_header.grain_table_size *
    853                           SECTOR_SIZE / sizeof(uint64_t),
    854                           const_header.grain_size,
    855                           &extent,
    856                           errp);
    857     if (ret < 0) {
    858         return ret;
    859     }
    860 
    861     extent->sesparse = true;
    862     extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
    863     extent->sesparse_clusters_offset = const_header.grains_offset;
    864     extent->entry_size = sizeof(uint64_t);
    865 
    866     ret = vmdk_init_tables(bs, extent, errp);
    867     if (ret) {
    868         /* free extent allocated by vmdk_add_extent */
    869         vmdk_free_last_extent(bs);
    870     }
    871 
    872     return ret;
    873 }
    874 
    875 static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
    876                                QDict *options, Error **errp);
    877 
    878 static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp)
    879 {
    880     int64_t size;
    881     char *buf;
    882     int ret;
    883 
    884     size = bdrv_getlength(file->bs);
    885     if (size < 0) {
    886         error_setg_errno(errp, -size, "Could not access file");
    887         return NULL;
    888     }
    889 
    890     if (size < 4) {
    891         /* Both descriptor file and sparse image must be much larger than 4
    892          * bytes, also callers of vmdk_read_desc want to compare the first 4
    893          * bytes with VMDK4_MAGIC, let's error out if less is read. */
    894         error_setg(errp, "File is too small, not a valid image");
    895         return NULL;
    896     }
    897 
    898     size = MIN(size, (1 << 20) - 1);  /* avoid unbounded allocation */
    899     buf = g_malloc(size + 1);
    900 
    901     ret = bdrv_pread(file, desc_offset, size, buf, 0);
    902     if (ret < 0) {
    903         error_setg_errno(errp, -ret, "Could not read from file");
    904         g_free(buf);
    905         return NULL;
    906     }
    907     buf[size] = 0;
    908 
    909     return buf;
    910 }
    911 
    912 static int vmdk_open_vmdk4(BlockDriverState *bs,
    913                            BdrvChild *file,
    914                            int flags, QDict *options, Error **errp)
    915 {
    916     int ret;
    917     uint32_t magic;
    918     uint32_t l1_size, l1_entry_sectors;
    919     VMDK4Header header;
    920     VmdkExtent *extent = NULL;
    921     BDRVVmdkState *s = bs->opaque;
    922     int64_t l1_backup_offset = 0;
    923     bool compressed;
    924 
    925     ret = bdrv_pread(file, sizeof(magic), sizeof(header), &header, 0);
    926     if (ret < 0) {
    927         bdrv_refresh_filename(file->bs);
    928         error_setg_errno(errp, -ret,
    929                          "Could not read header from file '%s'",
    930                          file->bs->filename);
    931         return -EINVAL;
    932     }
    933     if (header.capacity == 0) {
    934         uint64_t desc_offset = le64_to_cpu(header.desc_offset);
    935         if (desc_offset) {
    936             char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
    937             if (!buf) {
    938                 return -EINVAL;
    939             }
    940             ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
    941             g_free(buf);
    942             return ret;
    943         }
    944     }
    945 
    946     if (!s->create_type) {
    947         s->create_type = g_strdup("monolithicSparse");
    948     }
    949 
    950     if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
    951         /*
    952          * The footer takes precedence over the header, so read it in. The
    953          * footer starts at offset -1024 from the end: One sector for the
    954          * footer, and another one for the end-of-stream marker.
    955          */
    956         struct {
    957             struct {
    958                 uint64_t val;
    959                 uint32_t size;
    960                 uint32_t type;
    961                 uint8_t pad[512 - 16];
    962             } QEMU_PACKED footer_marker;
    963 
    964             uint32_t magic;
    965             VMDK4Header header;
    966             uint8_t pad[512 - 4 - sizeof(VMDK4Header)];
    967 
    968             struct {
    969                 uint64_t val;
    970                 uint32_t size;
    971                 uint32_t type;
    972                 uint8_t pad[512 - 16];
    973             } QEMU_PACKED eos_marker;
    974         } QEMU_PACKED footer;
    975 
    976         ret = bdrv_pread(file, bs->file->bs->total_sectors * 512 - 1536,
    977                          sizeof(footer), &footer, 0);
    978         if (ret < 0) {
    979             error_setg_errno(errp, -ret, "Failed to read footer");
    980             return ret;
    981         }
    982 
    983         /* Some sanity checks for the footer */
    984         if (be32_to_cpu(footer.magic) != VMDK4_MAGIC ||
    985             le32_to_cpu(footer.footer_marker.size) != 0  ||
    986             le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER ||
    987             le64_to_cpu(footer.eos_marker.val) != 0  ||
    988             le32_to_cpu(footer.eos_marker.size) != 0  ||
    989             le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM)
    990         {
    991             error_setg(errp, "Invalid footer");
    992             return -EINVAL;
    993         }
    994 
    995         header = footer.header;
    996     }
    997 
    998     compressed =
    999         le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
   1000     if (le32_to_cpu(header.version) > 3) {
   1001         error_setg(errp, "Unsupported VMDK version %" PRIu32,
   1002                    le32_to_cpu(header.version));
   1003         return -ENOTSUP;
   1004     } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) &&
   1005                !compressed) {
   1006         /* VMware KB 2064959 explains that version 3 added support for
   1007          * persistent changed block tracking (CBT), and backup software can
   1008          * read it as version=1 if it doesn't care about the changed area
   1009          * information. So we are safe to enable read only. */
   1010         error_setg(errp, "VMDK version 3 must be read only");
   1011         return -EINVAL;
   1012     }
   1013 
   1014     if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
   1015         error_setg(errp, "L2 table size too big");
   1016         return -EINVAL;
   1017     }
   1018 
   1019     l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
   1020                         * le64_to_cpu(header.granularity);
   1021     if (l1_entry_sectors == 0) {
   1022         error_setg(errp, "L1 entry size is invalid");
   1023         return -EINVAL;
   1024     }
   1025     l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
   1026                 / l1_entry_sectors;
   1027     if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
   1028         l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
   1029     }
   1030     if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) {
   1031         error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
   1032                    (int64_t)(le64_to_cpu(header.grain_offset)
   1033                              * BDRV_SECTOR_SIZE));
   1034         return -EINVAL;
   1035     }
   1036 
   1037     ret = vmdk_add_extent(bs, file, false,
   1038                           le64_to_cpu(header.capacity),
   1039                           le64_to_cpu(header.gd_offset) << 9,
   1040                           l1_backup_offset,
   1041                           l1_size,
   1042                           le32_to_cpu(header.num_gtes_per_gt),
   1043                           le64_to_cpu(header.granularity),
   1044                           &extent,
   1045                           errp);
   1046     if (ret < 0) {
   1047         return ret;
   1048     }
   1049     extent->compressed =
   1050         le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
   1051     if (extent->compressed) {
   1052         g_free(s->create_type);
   1053         s->create_type = g_strdup("streamOptimized");
   1054     }
   1055     extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
   1056     extent->version = le32_to_cpu(header.version);
   1057     extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
   1058     ret = vmdk_init_tables(bs, extent, errp);
   1059     if (ret) {
   1060         /* free extent allocated by vmdk_add_extent */
   1061         vmdk_free_last_extent(bs);
   1062     }
   1063     return ret;
   1064 }
   1065 
   1066 /* find an option value out of descriptor file */
   1067 static int vmdk_parse_description(const char *desc, const char *opt_name,
   1068         char *buf, int buf_size)
   1069 {
   1070     char *opt_pos, *opt_end;
   1071     const char *end = desc + strlen(desc);
   1072 
   1073     opt_pos = strstr(desc, opt_name);
   1074     if (!opt_pos) {
   1075         return VMDK_ERROR;
   1076     }
   1077     /* Skip "=\"" following opt_name */
   1078     opt_pos += strlen(opt_name) + 2;
   1079     if (opt_pos >= end) {
   1080         return VMDK_ERROR;
   1081     }
   1082     opt_end = opt_pos;
   1083     while (opt_end < end && *opt_end != '"') {
   1084         opt_end++;
   1085     }
   1086     if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
   1087         return VMDK_ERROR;
   1088     }
   1089     pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
   1090     return VMDK_OK;
   1091 }
   1092 
   1093 /* Open an extent file and append to bs array */
   1094 static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
   1095                             char *buf, QDict *options, Error **errp)
   1096 {
   1097     uint32_t magic;
   1098 
   1099     magic = ldl_be_p(buf);
   1100     switch (magic) {
   1101         case VMDK3_MAGIC:
   1102             return vmdk_open_vmfs_sparse(bs, file, flags, errp);
   1103         case VMDK4_MAGIC:
   1104             return vmdk_open_vmdk4(bs, file, flags, options, errp);
   1105         default:
   1106             error_setg(errp, "Image not in VMDK format");
   1107             return -EINVAL;
   1108     }
   1109 }
   1110 
   1111 static const char *next_line(const char *s)
   1112 {
   1113     while (*s) {
   1114         if (*s == '\n') {
   1115             return s + 1;
   1116         }
   1117         s++;
   1118     }
   1119     return s;
   1120 }
   1121 
   1122 static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
   1123                               QDict *options, Error **errp)
   1124 {
   1125     int ret;
   1126     int matches;
   1127     char access[11];
   1128     char type[11];
   1129     char fname[512];
   1130     const char *p, *np;
   1131     int64_t sectors = 0;
   1132     int64_t flat_offset;
   1133     char *desc_file_dir = NULL;
   1134     char *extent_path;
   1135     BdrvChild *extent_file;
   1136     BdrvChildRole extent_role;
   1137     BDRVVmdkState *s = bs->opaque;
   1138     VmdkExtent *extent = NULL;
   1139     char extent_opt_prefix[32];
   1140     Error *local_err = NULL;
   1141 
   1142     for (p = desc; *p; p = next_line(p)) {
   1143         /* parse extent line in one of below formats:
   1144          *
   1145          * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
   1146          * RW [size in sectors] SPARSE "file-name.vmdk"
   1147          * RW [size in sectors] VMFS "file-name.vmdk"
   1148          * RW [size in sectors] VMFSSPARSE "file-name.vmdk"
   1149          * RW [size in sectors] SESPARSE "file-name.vmdk"
   1150          */
   1151         flat_offset = -1;
   1152         matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
   1153                          access, &sectors, type, fname, &flat_offset);
   1154         if (matches < 4 || strcmp(access, "RW")) {
   1155             continue;
   1156         } else if (!strcmp(type, "FLAT")) {
   1157             if (matches != 5 || flat_offset < 0) {
   1158                 goto invalid;
   1159             }
   1160         } else if (!strcmp(type, "VMFS")) {
   1161             if (matches == 4) {
   1162                 flat_offset = 0;
   1163             } else {
   1164                 goto invalid;
   1165             }
   1166         } else if (matches != 4) {
   1167             goto invalid;
   1168         }
   1169 
   1170         if (sectors <= 0 ||
   1171             (strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
   1172              strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
   1173              strcmp(type, "SESPARSE")) ||
   1174             (strcmp(access, "RW"))) {
   1175             continue;
   1176         }
   1177 
   1178         if (path_is_absolute(fname)) {
   1179             extent_path = g_strdup(fname);
   1180         } else {
   1181             if (!desc_file_dir) {
   1182                 desc_file_dir = bdrv_dirname(bs->file->bs, errp);
   1183                 if (!desc_file_dir) {
   1184                     bdrv_refresh_filename(bs->file->bs);
   1185                     error_prepend(errp, "Cannot use relative paths with VMDK "
   1186                                   "descriptor file '%s': ",
   1187                                   bs->file->bs->filename);
   1188                     ret = -EINVAL;
   1189                     goto out;
   1190                 }
   1191             }
   1192 
   1193             extent_path = g_strconcat(desc_file_dir, fname, NULL);
   1194         }
   1195 
   1196         ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents);
   1197         assert(ret < 32);
   1198 
   1199         extent_role = BDRV_CHILD_DATA;
   1200         if (strcmp(type, "FLAT") != 0 && strcmp(type, "VMFS") != 0) {
   1201             /* non-flat extents have metadata */
   1202             extent_role |= BDRV_CHILD_METADATA;
   1203         }
   1204 
   1205         extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix,
   1206                                       bs, &child_of_bds, extent_role, false,
   1207                                       &local_err);
   1208         g_free(extent_path);
   1209         if (local_err) {
   1210             error_propagate(errp, local_err);
   1211             ret = -EINVAL;
   1212             goto out;
   1213         }
   1214 
   1215         /* save to extents array */
   1216         if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) {
   1217             /* FLAT extent */
   1218 
   1219             ret = vmdk_add_extent(bs, extent_file, true, sectors,
   1220                             0, 0, 0, 0, 0, &extent, errp);
   1221             if (ret < 0) {
   1222                 bdrv_unref_child(bs, extent_file);
   1223                 goto out;
   1224             }
   1225             extent->flat_start_offset = flat_offset << 9;
   1226         } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
   1227             /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
   1228             char *buf = vmdk_read_desc(extent_file, 0, errp);
   1229             if (!buf) {
   1230                 ret = -EINVAL;
   1231             } else {
   1232                 ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf,
   1233                                        options, errp);
   1234             }
   1235             g_free(buf);
   1236             if (ret) {
   1237                 bdrv_unref_child(bs, extent_file);
   1238                 goto out;
   1239             }
   1240             extent = &s->extents[s->num_extents - 1];
   1241         } else if (!strcmp(type, "SESPARSE")) {
   1242             ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
   1243             if (ret) {
   1244                 bdrv_unref_child(bs, extent_file);
   1245                 goto out;
   1246             }
   1247             extent = &s->extents[s->num_extents - 1];
   1248         } else {
   1249             error_setg(errp, "Unsupported extent type '%s'", type);
   1250             bdrv_unref_child(bs, extent_file);
   1251             ret = -ENOTSUP;
   1252             goto out;
   1253         }
   1254         extent->type = g_strdup(type);
   1255     }
   1256 
   1257     ret = 0;
   1258     goto out;
   1259 
   1260 invalid:
   1261     np = next_line(p);
   1262     assert(np != p);
   1263     if (np[-1] == '\n') {
   1264         np--;
   1265     }
   1266     error_setg(errp, "Invalid extent line: %.*s", (int)(np - p), p);
   1267     ret = -EINVAL;
   1268 
   1269 out:
   1270     g_free(desc_file_dir);
   1271     return ret;
   1272 }
   1273 
   1274 static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
   1275                                QDict *options, Error **errp)
   1276 {
   1277     int ret;
   1278     char ct[128];
   1279     BDRVVmdkState *s = bs->opaque;
   1280 
   1281     if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
   1282         error_setg(errp, "invalid VMDK image descriptor");
   1283         ret = -EINVAL;
   1284         goto exit;
   1285     }
   1286     if (strcmp(ct, "monolithicFlat") &&
   1287         strcmp(ct, "vmfs") &&
   1288         strcmp(ct, "vmfsSparse") &&
   1289         strcmp(ct, "seSparse") &&
   1290         strcmp(ct, "twoGbMaxExtentSparse") &&
   1291         strcmp(ct, "twoGbMaxExtentFlat")) {
   1292         error_setg(errp, "Unsupported image type '%s'", ct);
   1293         ret = -ENOTSUP;
   1294         goto exit;
   1295     }
   1296     s->create_type = g_strdup(ct);
   1297     s->desc_offset = 0;
   1298     ret = vmdk_parse_extents(buf, bs, options, errp);
   1299 exit:
   1300     return ret;
   1301 }
   1302 
   1303 static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
   1304                      Error **errp)
   1305 {
   1306     char *buf;
   1307     int ret;
   1308     BDRVVmdkState *s = bs->opaque;
   1309     uint32_t magic;
   1310 
   1311     ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
   1312     if (ret < 0) {
   1313         return ret;
   1314     }
   1315 
   1316     buf = vmdk_read_desc(bs->file, 0, errp);
   1317     if (!buf) {
   1318         return -EINVAL;
   1319     }
   1320 
   1321     magic = ldl_be_p(buf);
   1322     switch (magic) {
   1323         case VMDK3_MAGIC:
   1324         case VMDK4_MAGIC:
   1325             ret = vmdk_open_sparse(bs, bs->file, flags, buf, options,
   1326                                    errp);
   1327             s->desc_offset = 0x200;
   1328             break;
   1329         default:
   1330             /* No data in the descriptor file */
   1331             bs->file->role &= ~BDRV_CHILD_DATA;
   1332 
   1333             /* Must succeed because we have given up permissions if anything */
   1334             bdrv_child_refresh_perms(bs, bs->file, &error_abort);
   1335 
   1336             ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
   1337             break;
   1338     }
   1339     if (ret) {
   1340         goto fail;
   1341     }
   1342 
   1343     /* try to open parent images, if exist */
   1344     ret = vmdk_parent_open(bs);
   1345     if (ret) {
   1346         goto fail;
   1347     }
   1348     ret = vmdk_read_cid(bs, 0, &s->cid);
   1349     if (ret) {
   1350         goto fail;
   1351     }
   1352     ret = vmdk_read_cid(bs, 1, &s->parent_cid);
   1353     if (ret) {
   1354         goto fail;
   1355     }
   1356     qemu_co_mutex_init(&s->lock);
   1357 
   1358     /* Disable migration when VMDK images are used */
   1359     error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
   1360                "does not support live migration",
   1361                bdrv_get_device_or_node_name(bs));
   1362     ret = migrate_add_blocker(s->migration_blocker, errp);
   1363     if (ret < 0) {
   1364         error_free(s->migration_blocker);
   1365         goto fail;
   1366     }
   1367 
   1368     g_free(buf);
   1369     return 0;
   1370 
   1371 fail:
   1372     g_free(buf);
   1373     g_free(s->create_type);
   1374     s->create_type = NULL;
   1375     vmdk_free_extents(bs);
   1376     return ret;
   1377 }
   1378 
   1379 
   1380 static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
   1381 {
   1382     BDRVVmdkState *s = bs->opaque;
   1383     int i;
   1384 
   1385     for (i = 0; i < s->num_extents; i++) {
   1386         if (!s->extents[i].flat) {
   1387             bs->bl.pwrite_zeroes_alignment =
   1388                 MAX(bs->bl.pwrite_zeroes_alignment,
   1389                     s->extents[i].cluster_sectors << BDRV_SECTOR_BITS);
   1390         }
   1391     }
   1392 }
   1393 
   1394 /**
   1395  * get_whole_cluster
   1396  *
   1397  * Copy backing file's cluster that covers @sector_num, otherwise write zero,
   1398  * to the cluster at @cluster_sector_num. If @zeroed is true, we're overwriting
   1399  * a zeroed cluster in the current layer and must not copy data from the
   1400  * backing file.
   1401  *
   1402  * If @skip_start_sector < @skip_end_sector, the relative range
   1403  * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
   1404  * it for call to write user data in the request.
   1405  */
   1406 static int coroutine_fn get_whole_cluster(BlockDriverState *bs,
   1407                                           VmdkExtent *extent,
   1408                                           uint64_t cluster_offset,
   1409                                           uint64_t offset,
   1410                                           uint64_t skip_start_bytes,
   1411                                           uint64_t skip_end_bytes,
   1412                                           bool zeroed)
   1413 {
   1414     int ret = VMDK_OK;
   1415     int64_t cluster_bytes;
   1416     uint8_t *whole_grain;
   1417     bool copy_from_backing;
   1418 
   1419     /* For COW, align request sector_num to cluster start */
   1420     cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
   1421     offset = QEMU_ALIGN_DOWN(offset, cluster_bytes);
   1422     whole_grain = qemu_blockalign(bs, cluster_bytes);
   1423     copy_from_backing = bs->backing && !zeroed;
   1424 
   1425     if (!copy_from_backing) {
   1426         memset(whole_grain, 0, skip_start_bytes);
   1427         memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes);
   1428     }
   1429 
   1430     assert(skip_end_bytes <= cluster_bytes);
   1431     /* we will be here if it's first write on non-exist grain(cluster).
   1432      * try to read from parent image, if exist */
   1433     if (bs->backing && !vmdk_is_cid_valid(bs)) {
   1434         ret = VMDK_ERROR;
   1435         goto exit;
   1436     }
   1437 
   1438     /* Read backing data before skip range */
   1439     if (skip_start_bytes > 0) {
   1440         if (copy_from_backing) {
   1441             /* qcow2 emits this on bs->file instead of bs->backing */
   1442             BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
   1443             ret = bdrv_co_pread(bs->backing, offset, skip_start_bytes,
   1444                                 whole_grain, 0);
   1445             if (ret < 0) {
   1446                 ret = VMDK_ERROR;
   1447                 goto exit;
   1448             }
   1449         }
   1450         BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE);
   1451         ret = bdrv_co_pwrite(extent->file, cluster_offset, skip_start_bytes,
   1452                              whole_grain, 0);
   1453         if (ret < 0) {
   1454             ret = VMDK_ERROR;
   1455             goto exit;
   1456         }
   1457     }
   1458     /* Read backing data after skip range */
   1459     if (skip_end_bytes < cluster_bytes) {
   1460         if (copy_from_backing) {
   1461             /* qcow2 emits this on bs->file instead of bs->backing */
   1462             BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
   1463             ret = bdrv_co_pread(bs->backing, offset + skip_end_bytes,
   1464                                 cluster_bytes - skip_end_bytes,
   1465                                 whole_grain + skip_end_bytes, 0);
   1466             if (ret < 0) {
   1467                 ret = VMDK_ERROR;
   1468                 goto exit;
   1469             }
   1470         }
   1471         BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE);
   1472         ret = bdrv_co_pwrite(extent->file, cluster_offset + skip_end_bytes,
   1473                              cluster_bytes - skip_end_bytes,
   1474                              whole_grain + skip_end_bytes, 0);
   1475         if (ret < 0) {
   1476             ret = VMDK_ERROR;
   1477             goto exit;
   1478         }
   1479     }
   1480 
   1481     ret = VMDK_OK;
   1482 exit:
   1483     qemu_vfree(whole_grain);
   1484     return ret;
   1485 }
   1486 
   1487 static int coroutine_fn vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
   1488                                       uint32_t offset)
   1489 {
   1490     offset = cpu_to_le32(offset);
   1491     /* update L2 table */
   1492     BLKDBG_EVENT(extent->file, BLKDBG_L2_UPDATE);
   1493     if (bdrv_co_pwrite(extent->file,
   1494                        ((int64_t)m_data->l2_offset * 512)
   1495                            + (m_data->l2_index * sizeof(offset)),
   1496                        sizeof(offset), &offset, 0) < 0) {
   1497         return VMDK_ERROR;
   1498     }
   1499     /* update backup L2 table */
   1500     if (extent->l1_backup_table_offset != 0) {
   1501         m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
   1502         if (bdrv_co_pwrite(extent->file,
   1503                            ((int64_t)m_data->l2_offset * 512)
   1504                                + (m_data->l2_index * sizeof(offset)),
   1505                            sizeof(offset), &offset, 0) < 0) {
   1506             return VMDK_ERROR;
   1507         }
   1508     }
   1509     if (bdrv_co_flush(extent->file->bs) < 0) {
   1510         return VMDK_ERROR;
   1511     }
   1512     if (m_data->l2_cache_entry) {
   1513         *m_data->l2_cache_entry = offset;
   1514     }
   1515 
   1516     return VMDK_OK;
   1517 }
   1518 
   1519 /**
   1520  * get_cluster_offset
   1521  *
   1522  * Look up cluster offset in extent file by sector number, and store in
   1523  * @cluster_offset.
   1524  *
   1525  * For flat extents, the start offset as parsed from the description file is
   1526  * returned.
   1527  *
   1528  * For sparse extents, look up in L1, L2 table. If allocate is true, return an
   1529  * offset for a new cluster and update L2 cache. If there is a backing file,
   1530  * COW is done before returning; otherwise, zeroes are written to the allocated
   1531  * cluster. Both COW and zero writing skips the sector range
   1532  * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller
   1533  * has new data to write there.
   1534  *
   1535  * Returns: VMDK_OK if cluster exists and mapped in the image.
   1536  *          VMDK_UNALLOC if cluster is not mapped and @allocate is false.
   1537  *          VMDK_ERROR if failed.
   1538  */
   1539 static int coroutine_fn get_cluster_offset(BlockDriverState *bs,
   1540                                            VmdkExtent *extent,
   1541                                            VmdkMetaData *m_data,
   1542                                            uint64_t offset,
   1543                                            bool allocate,
   1544                                            uint64_t *cluster_offset,
   1545                                            uint64_t skip_start_bytes,
   1546                                            uint64_t skip_end_bytes)
   1547 {
   1548     unsigned int l1_index, l2_offset, l2_index;
   1549     int min_index, i, j;
   1550     uint32_t min_count;
   1551     void *l2_table;
   1552     bool zeroed = false;
   1553     int64_t ret;
   1554     int64_t cluster_sector;
   1555     unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
   1556 
   1557     if (m_data) {
   1558         m_data->new_allocation = false;
   1559     }
   1560     if (extent->flat) {
   1561         *cluster_offset = extent->flat_start_offset;
   1562         return VMDK_OK;
   1563     }
   1564 
   1565     offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
   1566     l1_index = (offset >> 9) / extent->l1_entry_sectors;
   1567     if (l1_index >= extent->l1_size) {
   1568         return VMDK_ERROR;
   1569     }
   1570     if (extent->sesparse) {
   1571         uint64_t l2_offset_u64;
   1572 
   1573         assert(extent->entry_size == sizeof(uint64_t));
   1574 
   1575         l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
   1576         if (l2_offset_u64 == 0) {
   1577             l2_offset = 0;
   1578         } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
   1579             /*
   1580              * Top most nibble is 0x1 if grain table is allocated.
   1581              * strict check - top most 4 bytes must be 0x10000000 since max
   1582              * supported size is 64TB for disk - so no more than 64TB / 16MB
   1583              * grain directories which is smaller than uint32,
   1584              * where 16MB is the only supported default grain table coverage.
   1585              */
   1586             return VMDK_ERROR;
   1587         } else {
   1588             l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
   1589             l2_offset_u64 = extent->sesparse_l2_tables_offset +
   1590                 l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
   1591             if (l2_offset_u64 > 0x00000000ffffffff) {
   1592                 return VMDK_ERROR;
   1593             }
   1594             l2_offset = (unsigned int)(l2_offset_u64);
   1595         }
   1596     } else {
   1597         assert(extent->entry_size == sizeof(uint32_t));
   1598         l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
   1599     }
   1600     if (!l2_offset) {
   1601         return VMDK_UNALLOC;
   1602     }
   1603     for (i = 0; i < L2_CACHE_SIZE; i++) {
   1604         if (l2_offset == extent->l2_cache_offsets[i]) {
   1605             /* increment the hit count */
   1606             if (++extent->l2_cache_counts[i] == 0xffffffff) {
   1607                 for (j = 0; j < L2_CACHE_SIZE; j++) {
   1608                     extent->l2_cache_counts[j] >>= 1;
   1609                 }
   1610             }
   1611             l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
   1612             goto found;
   1613         }
   1614     }
   1615     /* not found: load a new entry in the least used one */
   1616     min_index = 0;
   1617     min_count = 0xffffffff;
   1618     for (i = 0; i < L2_CACHE_SIZE; i++) {
   1619         if (extent->l2_cache_counts[i] < min_count) {
   1620             min_count = extent->l2_cache_counts[i];
   1621             min_index = i;
   1622         }
   1623     }
   1624     l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
   1625     BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
   1626     if (bdrv_co_pread(extent->file,
   1627                 (int64_t)l2_offset * 512,
   1628                 l2_size_bytes,
   1629                 l2_table, 0
   1630             ) < 0) {
   1631         return VMDK_ERROR;
   1632     }
   1633 
   1634     extent->l2_cache_offsets[min_index] = l2_offset;
   1635     extent->l2_cache_counts[min_index] = 1;
   1636  found:
   1637     l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
   1638     if (m_data) {
   1639         m_data->l1_index = l1_index;
   1640         m_data->l2_index = l2_index;
   1641         m_data->l2_offset = l2_offset;
   1642         m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
   1643     }
   1644 
   1645     if (extent->sesparse) {
   1646         cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
   1647         switch (cluster_sector & 0xf000000000000000) {
   1648         case 0x0000000000000000:
   1649             /* unallocated grain */
   1650             if (cluster_sector != 0) {
   1651                 return VMDK_ERROR;
   1652             }
   1653             break;
   1654         case 0x1000000000000000:
   1655             /* scsi-unmapped grain - fallthrough */
   1656         case 0x2000000000000000:
   1657             /* zero grain */
   1658             zeroed = true;
   1659             break;
   1660         case 0x3000000000000000:
   1661             /* allocated grain */
   1662             cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
   1663                               ((cluster_sector & 0x0000ffffffffffff) << 12));
   1664             cluster_sector = extent->sesparse_clusters_offset +
   1665                 cluster_sector * extent->cluster_sectors;
   1666             break;
   1667         default:
   1668             return VMDK_ERROR;
   1669         }
   1670     } else {
   1671         cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
   1672 
   1673         if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
   1674             zeroed = true;
   1675         }
   1676     }
   1677 
   1678     if (!cluster_sector || zeroed) {
   1679         if (!allocate) {
   1680             return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
   1681         }
   1682         assert(!extent->sesparse);
   1683 
   1684         if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
   1685             return VMDK_ERROR;
   1686         }
   1687 
   1688         cluster_sector = extent->next_cluster_sector;
   1689         extent->next_cluster_sector += extent->cluster_sectors;
   1690 
   1691         /* First of all we write grain itself, to avoid race condition
   1692          * that may to corrupt the image.
   1693          * This problem may occur because of insufficient space on host disk
   1694          * or inappropriate VM shutdown.
   1695          */
   1696         ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE,
   1697                                 offset, skip_start_bytes, skip_end_bytes,
   1698                                 zeroed);
   1699         if (ret) {
   1700             return ret;
   1701         }
   1702         if (m_data) {
   1703             m_data->new_allocation = true;
   1704         }
   1705     }
   1706     *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
   1707     return VMDK_OK;
   1708 }
   1709 
   1710 static VmdkExtent *find_extent(BDRVVmdkState *s,
   1711                                 int64_t sector_num, VmdkExtent *start_hint)
   1712 {
   1713     VmdkExtent *extent = start_hint;
   1714 
   1715     if (!extent) {
   1716         extent = &s->extents[0];
   1717     }
   1718     while (extent < &s->extents[s->num_extents]) {
   1719         if (sector_num < extent->end_sector) {
   1720             return extent;
   1721         }
   1722         extent++;
   1723     }
   1724     return NULL;
   1725 }
   1726 
   1727 static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
   1728                                                    int64_t offset)
   1729 {
   1730     uint64_t extent_begin_offset, extent_relative_offset;
   1731     uint64_t cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE;
   1732 
   1733     extent_begin_offset =
   1734         (extent->end_sector - extent->sectors) * BDRV_SECTOR_SIZE;
   1735     extent_relative_offset = offset - extent_begin_offset;
   1736     return extent_relative_offset % cluster_size;
   1737 }
   1738 
   1739 static int coroutine_fn vmdk_co_block_status(BlockDriverState *bs,
   1740                                              bool want_zero,
   1741                                              int64_t offset, int64_t bytes,
   1742                                              int64_t *pnum, int64_t *map,
   1743                                              BlockDriverState **file)
   1744 {
   1745     BDRVVmdkState *s = bs->opaque;
   1746     int64_t index_in_cluster, n, ret;
   1747     uint64_t cluster_offset;
   1748     VmdkExtent *extent;
   1749 
   1750     extent = find_extent(s, offset >> BDRV_SECTOR_BITS, NULL);
   1751     if (!extent) {
   1752         return -EIO;
   1753     }
   1754     qemu_co_mutex_lock(&s->lock);
   1755     ret = get_cluster_offset(bs, extent, NULL, offset, false, &cluster_offset,
   1756                              0, 0);
   1757     qemu_co_mutex_unlock(&s->lock);
   1758 
   1759     index_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
   1760     switch (ret) {
   1761     case VMDK_ERROR:
   1762         ret = -EIO;
   1763         break;
   1764     case VMDK_UNALLOC:
   1765         ret = 0;
   1766         break;
   1767     case VMDK_ZEROED:
   1768         ret = BDRV_BLOCK_ZERO;
   1769         break;
   1770     case VMDK_OK:
   1771         ret = BDRV_BLOCK_DATA;
   1772         if (!extent->compressed) {
   1773             ret |= BDRV_BLOCK_OFFSET_VALID;
   1774             *map = cluster_offset + index_in_cluster;
   1775             if (extent->flat) {
   1776                 ret |= BDRV_BLOCK_RECURSE;
   1777             }
   1778         }
   1779         *file = extent->file->bs;
   1780         break;
   1781     }
   1782 
   1783     n = extent->cluster_sectors * BDRV_SECTOR_SIZE - index_in_cluster;
   1784     *pnum = MIN(n, bytes);
   1785     return ret;
   1786 }
   1787 
   1788 static int coroutine_fn
   1789 vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
   1790                   int64_t offset_in_cluster, QEMUIOVector *qiov,
   1791                   uint64_t qiov_offset, uint64_t n_bytes,
   1792                   uint64_t offset)
   1793 {
   1794     int ret;
   1795     VmdkGrainMarker *data = NULL;
   1796     uLongf buf_len;
   1797     QEMUIOVector local_qiov;
   1798     int64_t write_offset;
   1799     int64_t write_end_sector;
   1800 
   1801     if (extent->compressed) {
   1802         void *compressed_data;
   1803 
   1804         /* Only whole clusters */
   1805         if (offset_in_cluster ||
   1806             n_bytes > (extent->cluster_sectors * SECTOR_SIZE) ||
   1807             (n_bytes < (extent->cluster_sectors * SECTOR_SIZE) &&
   1808              offset + n_bytes != extent->end_sector * SECTOR_SIZE))
   1809         {
   1810             ret = -EINVAL;
   1811             goto out;
   1812         }
   1813 
   1814         if (!extent->has_marker) {
   1815             ret = -EINVAL;
   1816             goto out;
   1817         }
   1818         buf_len = (extent->cluster_sectors << 9) * 2;
   1819         data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
   1820 
   1821         compressed_data = g_malloc(n_bytes);
   1822         qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes);
   1823         ret = compress(data->data, &buf_len, compressed_data, n_bytes);
   1824         g_free(compressed_data);
   1825 
   1826         if (ret != Z_OK || buf_len == 0) {
   1827             ret = -EINVAL;
   1828             goto out;
   1829         }
   1830 
   1831         data->lba = cpu_to_le64(offset >> BDRV_SECTOR_BITS);
   1832         data->size = cpu_to_le32(buf_len);
   1833 
   1834         n_bytes = buf_len + sizeof(VmdkGrainMarker);
   1835         qemu_iovec_init_buf(&local_qiov, data, n_bytes);
   1836 
   1837         BLKDBG_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED);
   1838     } else {
   1839         qemu_iovec_init(&local_qiov, qiov->niov);
   1840         qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes);
   1841 
   1842         BLKDBG_EVENT(extent->file, BLKDBG_WRITE_AIO);
   1843     }
   1844 
   1845     write_offset = cluster_offset + offset_in_cluster;
   1846     ret = bdrv_co_pwritev(extent->file, write_offset, n_bytes,
   1847                           &local_qiov, 0);
   1848 
   1849     write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE);
   1850 
   1851     if (extent->compressed) {
   1852         extent->next_cluster_sector = write_end_sector;
   1853     } else {
   1854         extent->next_cluster_sector = MAX(extent->next_cluster_sector,
   1855                                           write_end_sector);
   1856     }
   1857 
   1858     if (ret < 0) {
   1859         goto out;
   1860     }
   1861     ret = 0;
   1862  out:
   1863     g_free(data);
   1864     if (!extent->compressed) {
   1865         qemu_iovec_destroy(&local_qiov);
   1866     }
   1867     return ret;
   1868 }
   1869 
   1870 static int coroutine_fn
   1871 vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
   1872                  int64_t offset_in_cluster, QEMUIOVector *qiov,
   1873                  int bytes)
   1874 {
   1875     int ret;
   1876     int cluster_bytes, buf_bytes;
   1877     uint8_t *cluster_buf, *compressed_data;
   1878     uint8_t *uncomp_buf;
   1879     uint32_t data_len;
   1880     VmdkGrainMarker *marker;
   1881     uLongf buf_len;
   1882 
   1883 
   1884     if (!extent->compressed) {
   1885         BLKDBG_EVENT(extent->file, BLKDBG_READ_AIO);
   1886         ret = bdrv_co_preadv(extent->file,
   1887                              cluster_offset + offset_in_cluster, bytes,
   1888                              qiov, 0);
   1889         if (ret < 0) {
   1890             return ret;
   1891         }
   1892         return 0;
   1893     }
   1894     cluster_bytes = extent->cluster_sectors * 512;
   1895     /* Read two clusters in case GrainMarker + compressed data > one cluster */
   1896     buf_bytes = cluster_bytes * 2;
   1897     cluster_buf = g_malloc(buf_bytes);
   1898     uncomp_buf = g_malloc(cluster_bytes);
   1899     BLKDBG_EVENT(extent->file, BLKDBG_READ_COMPRESSED);
   1900     ret = bdrv_co_pread(extent->file, cluster_offset, buf_bytes, cluster_buf,
   1901                         0);
   1902     if (ret < 0) {
   1903         goto out;
   1904     }
   1905     compressed_data = cluster_buf;
   1906     buf_len = cluster_bytes;
   1907     data_len = cluster_bytes;
   1908     if (extent->has_marker) {
   1909         marker = (VmdkGrainMarker *)cluster_buf;
   1910         compressed_data = marker->data;
   1911         data_len = le32_to_cpu(marker->size);
   1912     }
   1913     if (!data_len || data_len > buf_bytes) {
   1914         ret = -EINVAL;
   1915         goto out;
   1916     }
   1917     ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len);
   1918     if (ret != Z_OK) {
   1919         ret = -EINVAL;
   1920         goto out;
   1921 
   1922     }
   1923     if (offset_in_cluster < 0 ||
   1924             offset_in_cluster + bytes > buf_len) {
   1925         ret = -EINVAL;
   1926         goto out;
   1927     }
   1928     qemu_iovec_from_buf(qiov, 0, uncomp_buf + offset_in_cluster, bytes);
   1929     ret = 0;
   1930 
   1931  out:
   1932     g_free(uncomp_buf);
   1933     g_free(cluster_buf);
   1934     return ret;
   1935 }
   1936 
   1937 static int coroutine_fn
   1938 vmdk_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
   1939                QEMUIOVector *qiov, BdrvRequestFlags flags)
   1940 {
   1941     BDRVVmdkState *s = bs->opaque;
   1942     int ret;
   1943     uint64_t n_bytes, offset_in_cluster;
   1944     VmdkExtent *extent = NULL;
   1945     QEMUIOVector local_qiov;
   1946     uint64_t cluster_offset;
   1947     uint64_t bytes_done = 0;
   1948 
   1949     qemu_iovec_init(&local_qiov, qiov->niov);
   1950     qemu_co_mutex_lock(&s->lock);
   1951 
   1952     while (bytes > 0) {
   1953         extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
   1954         if (!extent) {
   1955             ret = -EIO;
   1956             goto fail;
   1957         }
   1958         ret = get_cluster_offset(bs, extent, NULL,
   1959                                  offset, false, &cluster_offset, 0, 0);
   1960         offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
   1961 
   1962         n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
   1963                              - offset_in_cluster);
   1964 
   1965         if (ret != VMDK_OK) {
   1966             /* if not allocated, try to read from parent image, if exist */
   1967             if (bs->backing && ret != VMDK_ZEROED) {
   1968                 if (!vmdk_is_cid_valid(bs)) {
   1969                     ret = -EINVAL;
   1970                     goto fail;
   1971                 }
   1972 
   1973                 qemu_iovec_reset(&local_qiov);
   1974                 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
   1975 
   1976                 /* qcow2 emits this on bs->file instead of bs->backing */
   1977                 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
   1978                 ret = bdrv_co_preadv(bs->backing, offset, n_bytes,
   1979                                      &local_qiov, 0);
   1980                 if (ret < 0) {
   1981                     goto fail;
   1982                 }
   1983             } else {
   1984                 qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
   1985             }
   1986         } else {
   1987             qemu_iovec_reset(&local_qiov);
   1988             qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
   1989 
   1990             ret = vmdk_read_extent(extent, cluster_offset, offset_in_cluster,
   1991                                    &local_qiov, n_bytes);
   1992             if (ret) {
   1993                 goto fail;
   1994             }
   1995         }
   1996         bytes -= n_bytes;
   1997         offset += n_bytes;
   1998         bytes_done += n_bytes;
   1999     }
   2000 
   2001     ret = 0;
   2002 fail:
   2003     qemu_co_mutex_unlock(&s->lock);
   2004     qemu_iovec_destroy(&local_qiov);
   2005 
   2006     return ret;
   2007 }
   2008 
   2009 /**
   2010  * vmdk_write:
   2011  * @zeroed:       buf is ignored (data is zero), use zeroed_grain GTE feature
   2012  *                if possible, otherwise return -ENOTSUP.
   2013  * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try
   2014  *                with each cluster. By dry run we can find if the zero write
   2015  *                is possible without modifying image data.
   2016  *
   2017  * Returns: error code with 0 for success.
   2018  */
   2019 static int coroutine_fn vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
   2020                                      uint64_t bytes, QEMUIOVector *qiov,
   2021                                      bool zeroed, bool zero_dry_run)
   2022 {
   2023     BDRVVmdkState *s = bs->opaque;
   2024     VmdkExtent *extent = NULL;
   2025     int ret;
   2026     int64_t offset_in_cluster, n_bytes;
   2027     uint64_t cluster_offset;
   2028     uint64_t bytes_done = 0;
   2029     VmdkMetaData m_data;
   2030 
   2031     if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) {
   2032         error_report("Wrong offset: offset=0x%" PRIx64
   2033                      " total_sectors=0x%" PRIx64,
   2034                      offset, bs->total_sectors);
   2035         return -EIO;
   2036     }
   2037 
   2038     while (bytes > 0) {
   2039         extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
   2040         if (!extent) {
   2041             return -EIO;
   2042         }
   2043         if (extent->sesparse) {
   2044             return -ENOTSUP;
   2045         }
   2046         offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
   2047         n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
   2048                              - offset_in_cluster);
   2049 
   2050         ret = get_cluster_offset(bs, extent, &m_data, offset,
   2051                                  !(extent->compressed || zeroed),
   2052                                  &cluster_offset, offset_in_cluster,
   2053                                  offset_in_cluster + n_bytes);
   2054         if (extent->compressed) {
   2055             if (ret == VMDK_OK) {
   2056                 /* Refuse write to allocated cluster for streamOptimized */
   2057                 error_report("Could not write to allocated cluster"
   2058                               " for streamOptimized");
   2059                 return -EIO;
   2060             } else if (!zeroed) {
   2061                 /* allocate */
   2062                 ret = get_cluster_offset(bs, extent, &m_data, offset,
   2063                                          true, &cluster_offset, 0, 0);
   2064             }
   2065         }
   2066         if (ret == VMDK_ERROR) {
   2067             return -EINVAL;
   2068         }
   2069         if (zeroed) {
   2070             /* Do zeroed write, buf is ignored */
   2071             if (extent->has_zero_grain &&
   2072                     offset_in_cluster == 0 &&
   2073                     n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) {
   2074                 n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE;
   2075                 if (!zero_dry_run && ret != VMDK_ZEROED) {
   2076                     /* update L2 tables */
   2077                     if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
   2078                             != VMDK_OK) {
   2079                         return -EIO;
   2080                     }
   2081                 }
   2082             } else {
   2083                 return -ENOTSUP;
   2084             }
   2085         } else {
   2086             ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster,
   2087                                     qiov, bytes_done, n_bytes, offset);
   2088             if (ret) {
   2089                 return ret;
   2090             }
   2091             if (m_data.new_allocation) {
   2092                 /* update L2 tables */
   2093                 if (vmdk_L2update(extent, &m_data,
   2094                                   cluster_offset >> BDRV_SECTOR_BITS)
   2095                         != VMDK_OK) {
   2096                     return -EIO;
   2097                 }
   2098             }
   2099         }
   2100         bytes -= n_bytes;
   2101         offset += n_bytes;
   2102         bytes_done += n_bytes;
   2103 
   2104         /* update CID on the first write every time the virtual disk is
   2105          * opened */
   2106         if (!s->cid_updated) {
   2107             ret = vmdk_write_cid(bs, g_random_int());
   2108             if (ret < 0) {
   2109                 return ret;
   2110             }
   2111             s->cid_updated = true;
   2112         }
   2113     }
   2114     return 0;
   2115 }
   2116 
   2117 static int coroutine_fn
   2118 vmdk_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
   2119                 QEMUIOVector *qiov, BdrvRequestFlags flags)
   2120 {
   2121     int ret;
   2122     BDRVVmdkState *s = bs->opaque;
   2123     qemu_co_mutex_lock(&s->lock);
   2124     ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false);
   2125     qemu_co_mutex_unlock(&s->lock);
   2126     return ret;
   2127 }
   2128 
   2129 static int coroutine_fn
   2130 vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
   2131                            QEMUIOVector *qiov)
   2132 {
   2133     if (bytes == 0) {
   2134         /* The caller will write bytes 0 to signal EOF.
   2135          * When receive it, we align EOF to a sector boundary. */
   2136         BDRVVmdkState *s = bs->opaque;
   2137         int i, ret;
   2138         int64_t length;
   2139 
   2140         for (i = 0; i < s->num_extents; i++) {
   2141             length = bdrv_getlength(s->extents[i].file->bs);
   2142             if (length < 0) {
   2143                 return length;
   2144             }
   2145             length = QEMU_ALIGN_UP(length, BDRV_SECTOR_SIZE);
   2146             ret = bdrv_co_truncate(s->extents[i].file, length, false,
   2147                                    PREALLOC_MODE_OFF, 0, NULL);
   2148             if (ret < 0) {
   2149                 return ret;
   2150             }
   2151         }
   2152         return 0;
   2153     }
   2154     return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
   2155 }
   2156 
   2157 static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
   2158                                               int64_t offset,
   2159                                               int64_t bytes,
   2160                                               BdrvRequestFlags flags)
   2161 {
   2162     int ret;
   2163     BDRVVmdkState *s = bs->opaque;
   2164 
   2165     qemu_co_mutex_lock(&s->lock);
   2166     /* write zeroes could fail if sectors not aligned to cluster, test it with
   2167      * dry_run == true before really updating image */
   2168     ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true);
   2169     if (!ret) {
   2170         ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false);
   2171     }
   2172     qemu_co_mutex_unlock(&s->lock);
   2173     return ret;
   2174 }
   2175 
   2176 static int vmdk_init_extent(BlockBackend *blk,
   2177                             int64_t filesize, bool flat,
   2178                             bool compress, bool zeroed_grain,
   2179                             Error **errp)
   2180 {
   2181     int ret, i;
   2182     VMDK4Header header;
   2183     uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
   2184     uint32_t *gd_buf = NULL;
   2185     int gd_buf_size;
   2186 
   2187     if (flat) {
   2188         ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
   2189         goto exit;
   2190     }
   2191     magic = cpu_to_be32(VMDK4_MAGIC);
   2192     memset(&header, 0, sizeof(header));
   2193     if (compress) {
   2194         header.version = 3;
   2195     } else if (zeroed_grain) {
   2196         header.version = 2;
   2197     } else {
   2198         header.version = 1;
   2199     }
   2200     header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
   2201                    | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
   2202                    | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
   2203     header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
   2204     header.capacity = filesize / BDRV_SECTOR_SIZE;
   2205     header.granularity = 128;
   2206     header.num_gtes_per_gt = BDRV_SECTOR_SIZE;
   2207 
   2208     grains = DIV_ROUND_UP(filesize / BDRV_SECTOR_SIZE, header.granularity);
   2209     gt_size = DIV_ROUND_UP(header.num_gtes_per_gt * sizeof(uint32_t),
   2210                            BDRV_SECTOR_SIZE);
   2211     gt_count = DIV_ROUND_UP(grains, header.num_gtes_per_gt);
   2212     gd_sectors = DIV_ROUND_UP(gt_count * sizeof(uint32_t), BDRV_SECTOR_SIZE);
   2213 
   2214     header.desc_offset = 1;
   2215     header.desc_size = 20;
   2216     header.rgd_offset = header.desc_offset + header.desc_size;
   2217     header.gd_offset = header.rgd_offset + gd_sectors + (gt_size * gt_count);
   2218     header.grain_offset =
   2219         ROUND_UP(header.gd_offset + gd_sectors + (gt_size * gt_count),
   2220                  header.granularity);
   2221     /* swap endianness for all header fields */
   2222     header.version = cpu_to_le32(header.version);
   2223     header.flags = cpu_to_le32(header.flags);
   2224     header.capacity = cpu_to_le64(header.capacity);
   2225     header.granularity = cpu_to_le64(header.granularity);
   2226     header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt);
   2227     header.desc_offset = cpu_to_le64(header.desc_offset);
   2228     header.desc_size = cpu_to_le64(header.desc_size);
   2229     header.rgd_offset = cpu_to_le64(header.rgd_offset);
   2230     header.gd_offset = cpu_to_le64(header.gd_offset);
   2231     header.grain_offset = cpu_to_le64(header.grain_offset);
   2232     header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm);
   2233 
   2234     header.check_bytes[0] = 0xa;
   2235     header.check_bytes[1] = 0x20;
   2236     header.check_bytes[2] = 0xd;
   2237     header.check_bytes[3] = 0xa;
   2238 
   2239     /* write all the data */
   2240     ret = blk_pwrite(blk, 0, sizeof(magic), &magic, 0);
   2241     if (ret < 0) {
   2242         error_setg(errp, QERR_IO_ERROR);
   2243         goto exit;
   2244     }
   2245     ret = blk_pwrite(blk, sizeof(magic), sizeof(header), &header, 0);
   2246     if (ret < 0) {
   2247         error_setg(errp, QERR_IO_ERROR);
   2248         goto exit;
   2249     }
   2250 
   2251     ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
   2252                        PREALLOC_MODE_OFF, 0, errp);
   2253     if (ret < 0) {
   2254         goto exit;
   2255     }
   2256 
   2257     /* write grain directory */
   2258     gd_buf_size = gd_sectors * BDRV_SECTOR_SIZE;
   2259     gd_buf = g_malloc0(gd_buf_size);
   2260     for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_sectors;
   2261          i < gt_count; i++, tmp += gt_size) {
   2262         gd_buf[i] = cpu_to_le32(tmp);
   2263     }
   2264     ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
   2265                      gd_buf_size, gd_buf, 0);
   2266     if (ret < 0) {
   2267         error_setg(errp, QERR_IO_ERROR);
   2268         goto exit;
   2269     }
   2270 
   2271     /* write backup grain directory */
   2272     for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_sectors;
   2273          i < gt_count; i++, tmp += gt_size) {
   2274         gd_buf[i] = cpu_to_le32(tmp);
   2275     }
   2276     ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
   2277                      gd_buf_size, gd_buf, 0);
   2278     if (ret < 0) {
   2279         error_setg(errp, QERR_IO_ERROR);
   2280     }
   2281 
   2282     ret = 0;
   2283 exit:
   2284     g_free(gd_buf);
   2285     return ret;
   2286 }
   2287 
   2288 static int vmdk_create_extent(const char *filename, int64_t filesize,
   2289                               bool flat, bool compress, bool zeroed_grain,
   2290                               BlockBackend **pbb,
   2291                               QemuOpts *opts, Error **errp)
   2292 {
   2293     int ret;
   2294     BlockBackend *blk = NULL;
   2295 
   2296     ret = bdrv_create_file(filename, opts, errp);
   2297     if (ret < 0) {
   2298         goto exit;
   2299     }
   2300 
   2301     blk = blk_new_open(filename, NULL, NULL,
   2302                        BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
   2303                        errp);
   2304     if (blk == NULL) {
   2305         ret = -EIO;
   2306         goto exit;
   2307     }
   2308 
   2309     blk_set_allow_write_beyond_eof(blk, true);
   2310 
   2311     ret = vmdk_init_extent(blk, filesize, flat, compress, zeroed_grain, errp);
   2312 exit:
   2313     if (blk) {
   2314         if (pbb) {
   2315             *pbb = blk;
   2316         } else {
   2317             blk_unref(blk);
   2318             blk = NULL;
   2319         }
   2320     }
   2321     return ret;
   2322 }
   2323 
   2324 static int filename_decompose(const char *filename, char *path, char *prefix,
   2325                               char *postfix, size_t buf_len, Error **errp)
   2326 {
   2327     const char *p, *q;
   2328 
   2329     if (filename == NULL || !strlen(filename)) {
   2330         error_setg(errp, "No filename provided");
   2331         return VMDK_ERROR;
   2332     }
   2333     p = strrchr(filename, '/');
   2334     if (p == NULL) {
   2335         p = strrchr(filename, '\\');
   2336     }
   2337     if (p == NULL) {
   2338         p = strrchr(filename, ':');
   2339     }
   2340     if (p != NULL) {
   2341         p++;
   2342         if (p - filename >= buf_len) {
   2343             return VMDK_ERROR;
   2344         }
   2345         pstrcpy(path, p - filename + 1, filename);
   2346     } else {
   2347         p = filename;
   2348         path[0] = '\0';
   2349     }
   2350     q = strrchr(p, '.');
   2351     if (q == NULL) {
   2352         pstrcpy(prefix, buf_len, p);
   2353         postfix[0] = '\0';
   2354     } else {
   2355         if (q - p >= buf_len) {
   2356             return VMDK_ERROR;
   2357         }
   2358         pstrcpy(prefix, q - p + 1, p);
   2359         pstrcpy(postfix, buf_len, q);
   2360     }
   2361     return VMDK_OK;
   2362 }
   2363 
   2364 /*
   2365  * idx == 0: get or create the descriptor file (also the image file if in a
   2366  *           non-split format.
   2367  * idx >= 1: get the n-th extent if in a split subformat
   2368  */
   2369 typedef BlockBackend *(*vmdk_create_extent_fn)(int64_t size,
   2370                                                int idx,
   2371                                                bool flat,
   2372                                                bool split,
   2373                                                bool compress,
   2374                                                bool zeroed_grain,
   2375                                                void *opaque,
   2376                                                Error **errp);
   2377 
   2378 static void vmdk_desc_add_extent(GString *desc,
   2379                                  const char *extent_line_fmt,
   2380                                  int64_t size, const char *filename)
   2381 {
   2382     char *basename = g_path_get_basename(filename);
   2383 
   2384     g_string_append_printf(desc, extent_line_fmt,
   2385                            DIV_ROUND_UP(size, BDRV_SECTOR_SIZE), basename);
   2386     g_free(basename);
   2387 }
   2388 
   2389 static int coroutine_fn vmdk_co_do_create(int64_t size,
   2390                                           BlockdevVmdkSubformat subformat,
   2391                                           BlockdevVmdkAdapterType adapter_type,
   2392                                           const char *backing_file,
   2393                                           const char *hw_version,
   2394                                           const char *toolsversion,
   2395                                           bool compat6,
   2396                                           bool zeroed_grain,
   2397                                           vmdk_create_extent_fn extent_fn,
   2398                                           void *opaque,
   2399                                           Error **errp)
   2400 {
   2401     int extent_idx;
   2402     BlockBackend *blk = NULL;
   2403     BlockBackend *extent_blk;
   2404     Error *local_err = NULL;
   2405     char *desc = NULL;
   2406     int ret = 0;
   2407     bool flat, split, compress;
   2408     GString *ext_desc_lines;
   2409     const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
   2410     int64_t extent_size;
   2411     int64_t created_size = 0;
   2412     const char *extent_line_fmt;
   2413     char *parent_desc_line = g_malloc0(BUF_SIZE);
   2414     uint32_t parent_cid = 0xffffffff;
   2415     uint32_t number_heads = 16;
   2416     uint32_t desc_offset = 0, desc_len;
   2417     const char desc_template[] =
   2418         "# Disk DescriptorFile\n"
   2419         "version=1\n"
   2420         "CID=%" PRIx32 "\n"
   2421         "parentCID=%" PRIx32 "\n"
   2422         "createType=\"%s\"\n"
   2423         "%s"
   2424         "\n"
   2425         "# Extent description\n"
   2426         "%s"
   2427         "\n"
   2428         "# The Disk Data Base\n"
   2429         "#DDB\n"
   2430         "\n"
   2431         "ddb.virtualHWVersion = \"%s\"\n"
   2432         "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
   2433         "ddb.geometry.heads = \"%" PRIu32 "\"\n"
   2434         "ddb.geometry.sectors = \"63\"\n"
   2435         "ddb.adapterType = \"%s\"\n"
   2436         "ddb.toolsVersion = \"%s\"\n";
   2437 
   2438     ext_desc_lines = g_string_new(NULL);
   2439 
   2440     /* Read out options */
   2441     if (compat6) {
   2442         if (hw_version) {
   2443             error_setg(errp,
   2444                        "compat6 cannot be enabled with hwversion set");
   2445             ret = -EINVAL;
   2446             goto exit;
   2447         }
   2448         hw_version = "6";
   2449     }
   2450     if (!hw_version) {
   2451         hw_version = "4";
   2452     }
   2453     if (!toolsversion) {
   2454         toolsversion = "2147483647";
   2455     }
   2456 
   2457     if (adapter_type != BLOCKDEV_VMDK_ADAPTER_TYPE_IDE) {
   2458         /* that's the number of heads with which vmware operates when
   2459            creating, exporting, etc. vmdk files with a non-ide adapter type */
   2460         number_heads = 255;
   2461     }
   2462     split = (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT) ||
   2463             (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTSPARSE);
   2464     flat = (subformat == BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICFLAT) ||
   2465            (subformat == BLOCKDEV_VMDK_SUBFORMAT_TWOGBMAXEXTENTFLAT);
   2466     compress = subformat == BLOCKDEV_VMDK_SUBFORMAT_STREAMOPTIMIZED;
   2467 
   2468     if (flat) {
   2469         extent_line_fmt = "RW %" PRId64 " FLAT \"%s\" 0\n";
   2470     } else {
   2471         extent_line_fmt = "RW %" PRId64 " SPARSE \"%s\"\n";
   2472     }
   2473     if (flat && backing_file) {
   2474         error_setg(errp, "Flat image can't have backing file");
   2475         ret = -ENOTSUP;
   2476         goto exit;
   2477     }
   2478     if (flat && zeroed_grain) {
   2479         error_setg(errp, "Flat image can't enable zeroed grain");
   2480         ret = -ENOTSUP;
   2481         goto exit;
   2482     }
   2483 
   2484     /* Create extents */
   2485     if (split) {
   2486         extent_size = split_size;
   2487     } else {
   2488         extent_size = size;
   2489     }
   2490     if (!split && !flat) {
   2491         created_size = extent_size;
   2492     } else {
   2493         created_size = 0;
   2494     }
   2495     /* Get the descriptor file BDS */
   2496     blk = extent_fn(created_size, 0, flat, split, compress, zeroed_grain,
   2497                     opaque, errp);
   2498     if (!blk) {
   2499         ret = -EIO;
   2500         goto exit;
   2501     }
   2502     if (!split && !flat) {
   2503         vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, created_size,
   2504                              blk_bs(blk)->filename);
   2505     }
   2506 
   2507     if (backing_file) {
   2508         BlockBackend *backing;
   2509         char *full_backing =
   2510             bdrv_get_full_backing_filename_from_filename(blk_bs(blk)->filename,
   2511                                                          backing_file,
   2512                                                          &local_err);
   2513         if (local_err) {
   2514             error_propagate(errp, local_err);
   2515             ret = -ENOENT;
   2516             goto exit;
   2517         }
   2518         assert(full_backing);
   2519 
   2520         backing = blk_new_open(full_backing, NULL, NULL,
   2521                                BDRV_O_NO_BACKING, errp);
   2522         g_free(full_backing);
   2523         if (backing == NULL) {
   2524             ret = -EIO;
   2525             goto exit;
   2526         }
   2527         if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) {
   2528             error_setg(errp, "Invalid backing file format: %s. Must be vmdk",
   2529                        blk_bs(backing)->drv->format_name);
   2530             blk_unref(backing);
   2531             ret = -EINVAL;
   2532             goto exit;
   2533         }
   2534         ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid);
   2535         blk_unref(backing);
   2536         if (ret) {
   2537             error_setg(errp, "Failed to read parent CID");
   2538             goto exit;
   2539         }
   2540         snprintf(parent_desc_line, BUF_SIZE,
   2541                 "parentFileNameHint=\"%s\"", backing_file);
   2542     }
   2543     extent_idx = 1;
   2544     while (created_size < size) {
   2545         int64_t cur_size = MIN(size - created_size, extent_size);
   2546         extent_blk = extent_fn(cur_size, extent_idx, flat, split, compress,
   2547                                zeroed_grain, opaque, errp);
   2548         if (!extent_blk) {
   2549             ret = -EINVAL;
   2550             goto exit;
   2551         }
   2552         vmdk_desc_add_extent(ext_desc_lines, extent_line_fmt, cur_size,
   2553                              blk_bs(extent_blk)->filename);
   2554         created_size += cur_size;
   2555         extent_idx++;
   2556         blk_unref(extent_blk);
   2557     }
   2558 
   2559     /* Check whether we got excess extents */
   2560     extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain,
   2561                            opaque, NULL);
   2562     if (extent_blk) {
   2563         blk_unref(extent_blk);
   2564         error_setg(errp, "List of extents contains unused extents");
   2565         ret = -EINVAL;
   2566         goto exit;
   2567     }
   2568 
   2569     /* generate descriptor file */
   2570     desc = g_strdup_printf(desc_template,
   2571                            g_random_int(),
   2572                            parent_cid,
   2573                            BlockdevVmdkSubformat_str(subformat),
   2574                            parent_desc_line,
   2575                            ext_desc_lines->str,
   2576                            hw_version,
   2577                            size /
   2578                                (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
   2579                            number_heads,
   2580                            BlockdevVmdkAdapterType_str(adapter_type),
   2581                            toolsversion);
   2582     desc_len = strlen(desc);
   2583     /* the descriptor offset = 0x200 */
   2584     if (!split && !flat) {
   2585         desc_offset = 0x200;
   2586     }
   2587 
   2588     ret = blk_co_pwrite(blk, desc_offset, desc_len, desc, 0);
   2589     if (ret < 0) {
   2590         error_setg_errno(errp, -ret, "Could not write description");
   2591         goto exit;
   2592     }
   2593     /* bdrv_pwrite write padding zeros to align to sector, we don't need that
   2594      * for description file */
   2595     if (desc_offset == 0) {
   2596         ret = blk_co_truncate(blk, desc_len, false, PREALLOC_MODE_OFF, 0, errp);
   2597         if (ret < 0) {
   2598             goto exit;
   2599         }
   2600     }
   2601     ret = 0;
   2602 exit:
   2603     if (blk) {
   2604         blk_unref(blk);
   2605     }
   2606     g_free(desc);
   2607     g_free(parent_desc_line);
   2608     g_string_free(ext_desc_lines, true);
   2609     return ret;
   2610 }
   2611 
   2612 typedef struct {
   2613     char *path;
   2614     char *prefix;
   2615     char *postfix;
   2616     QemuOpts *opts;
   2617 } VMDKCreateOptsData;
   2618 
   2619 static BlockBackend *vmdk_co_create_opts_cb(int64_t size, int idx,
   2620                                             bool flat, bool split, bool compress,
   2621                                             bool zeroed_grain, void *opaque,
   2622                                             Error **errp)
   2623 {
   2624     BlockBackend *blk = NULL;
   2625     BlockDriverState *bs = NULL;
   2626     VMDKCreateOptsData *data = opaque;
   2627     char *ext_filename = NULL;
   2628     char *rel_filename = NULL;
   2629 
   2630     /* We're done, don't create excess extents. */
   2631     if (size == -1) {
   2632         assert(errp == NULL);
   2633         return NULL;
   2634     }
   2635 
   2636     if (idx == 0) {
   2637         rel_filename = g_strdup_printf("%s%s", data->prefix, data->postfix);
   2638     } else if (split) {
   2639         rel_filename = g_strdup_printf("%s-%c%03d%s",
   2640                                        data->prefix,
   2641                                        flat ? 'f' : 's', idx, data->postfix);
   2642     } else {
   2643         assert(idx == 1);
   2644         rel_filename = g_strdup_printf("%s-flat%s", data->prefix, data->postfix);
   2645     }
   2646 
   2647     ext_filename = g_strdup_printf("%s%s", data->path, rel_filename);
   2648     g_free(rel_filename);
   2649 
   2650     if (vmdk_create_extent(ext_filename, size,
   2651                            flat, compress, zeroed_grain, &blk, data->opts,
   2652                            errp)) {
   2653         goto exit;
   2654     }
   2655     bdrv_unref(bs);
   2656 exit:
   2657     g_free(ext_filename);
   2658     return blk;
   2659 }
   2660 
   2661 static int coroutine_fn vmdk_co_create_opts(BlockDriver *drv,
   2662                                             const char *filename,
   2663                                             QemuOpts *opts,
   2664                                             Error **errp)
   2665 {
   2666     Error *local_err = NULL;
   2667     char *desc = NULL;
   2668     int64_t total_size = 0;
   2669     char *adapter_type = NULL;
   2670     BlockdevVmdkAdapterType adapter_type_enum;
   2671     char *backing_file = NULL;
   2672     char *hw_version = NULL;
   2673     char *toolsversion = NULL;
   2674     char *fmt = NULL;
   2675     BlockdevVmdkSubformat subformat;
   2676     int ret = 0;
   2677     char *path = g_malloc0(PATH_MAX);
   2678     char *prefix = g_malloc0(PATH_MAX);
   2679     char *postfix = g_malloc0(PATH_MAX);
   2680     char *desc_line = g_malloc0(BUF_SIZE);
   2681     char *ext_filename = g_malloc0(PATH_MAX);
   2682     char *desc_filename = g_malloc0(PATH_MAX);
   2683     char *parent_desc_line = g_malloc0(BUF_SIZE);
   2684     bool zeroed_grain;
   2685     bool compat6;
   2686     VMDKCreateOptsData data;
   2687     char *backing_fmt = NULL;
   2688 
   2689     backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
   2690     if (backing_fmt && strcmp(backing_fmt, "vmdk") != 0) {
   2691         error_setg(errp, "backing_file must be a vmdk image");
   2692         ret = -EINVAL;
   2693         goto exit;
   2694     }
   2695 
   2696     if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) {
   2697         ret = -EINVAL;
   2698         goto exit;
   2699     }
   2700     /* Read out options */
   2701     total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
   2702                           BDRV_SECTOR_SIZE);
   2703     adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
   2704     backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
   2705     hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION);
   2706     toolsversion = qemu_opt_get_del(opts, BLOCK_OPT_TOOLSVERSION);
   2707     compat6 = qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false);
   2708     if (strcmp(hw_version, "undefined") == 0) {
   2709         g_free(hw_version);
   2710         hw_version = NULL;
   2711     }
   2712     fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
   2713     zeroed_grain = qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false);
   2714 
   2715     if (adapter_type) {
   2716         adapter_type_enum = qapi_enum_parse(&BlockdevVmdkAdapterType_lookup,
   2717                                             adapter_type,
   2718                                             BLOCKDEV_VMDK_ADAPTER_TYPE_IDE,
   2719                                             &local_err);
   2720         if (local_err) {
   2721             error_propagate(errp, local_err);
   2722             ret = -EINVAL;
   2723             goto exit;
   2724         }
   2725     } else {
   2726         adapter_type_enum = BLOCKDEV_VMDK_ADAPTER_TYPE_IDE;
   2727     }
   2728 
   2729     if (!fmt) {
   2730         /* Default format to monolithicSparse */
   2731         subformat = BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE;
   2732     } else {
   2733         subformat = qapi_enum_parse(&BlockdevVmdkSubformat_lookup,
   2734                                     fmt,
   2735                                     BLOCKDEV_VMDK_SUBFORMAT_MONOLITHICSPARSE,
   2736                                     &local_err);
   2737         if (local_err) {
   2738             error_propagate(errp, local_err);
   2739             ret = -EINVAL;
   2740             goto exit;
   2741         }
   2742     }
   2743     data = (VMDKCreateOptsData){
   2744         .prefix = prefix,
   2745         .postfix = postfix,
   2746         .path = path,
   2747         .opts = opts,
   2748     };
   2749     ret = vmdk_co_do_create(total_size, subformat, adapter_type_enum,
   2750                             backing_file, hw_version, toolsversion, compat6,
   2751                             zeroed_grain, vmdk_co_create_opts_cb, &data, errp);
   2752 
   2753 exit:
   2754     g_free(backing_fmt);
   2755     g_free(adapter_type);
   2756     g_free(backing_file);
   2757     g_free(hw_version);
   2758     g_free(toolsversion);
   2759     g_free(fmt);
   2760     g_free(desc);
   2761     g_free(path);
   2762     g_free(prefix);
   2763     g_free(postfix);
   2764     g_free(desc_line);
   2765     g_free(ext_filename);
   2766     g_free(desc_filename);
   2767     g_free(parent_desc_line);
   2768     return ret;
   2769 }
   2770 
   2771 static BlockBackend *vmdk_co_create_cb(int64_t size, int idx,
   2772                                        bool flat, bool split, bool compress,
   2773                                        bool zeroed_grain, void *opaque,
   2774                                        Error **errp)
   2775 {
   2776     int ret;
   2777     BlockDriverState *bs;
   2778     BlockBackend *blk;
   2779     BlockdevCreateOptionsVmdk *opts = opaque;
   2780 
   2781     if (idx == 0) {
   2782         bs = bdrv_open_blockdev_ref(opts->file, errp);
   2783     } else {
   2784         int i;
   2785         BlockdevRefList *list = opts->extents;
   2786         for (i = 1; i < idx; i++) {
   2787             if (!list || !list->next) {
   2788                 error_setg(errp, "Extent [%d] not specified", i);
   2789                 return NULL;
   2790             }
   2791             list = list->next;
   2792         }
   2793         if (!list) {
   2794             error_setg(errp, "Extent [%d] not specified", idx - 1);
   2795             return NULL;
   2796         }
   2797         bs = bdrv_open_blockdev_ref(list->value, errp);
   2798     }
   2799     if (!bs) {
   2800         return NULL;
   2801     }
   2802     blk = blk_new_with_bs(bs,
   2803                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
   2804                           BLK_PERM_ALL, errp);
   2805     if (!blk) {
   2806         return NULL;
   2807     }
   2808     blk_set_allow_write_beyond_eof(blk, true);
   2809     bdrv_unref(bs);
   2810 
   2811     if (size != -1) {
   2812         ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp);
   2813         if (ret) {
   2814             blk_unref(blk);
   2815             blk = NULL;
   2816         }
   2817     }
   2818     return blk;
   2819 }
   2820 
   2821 static int coroutine_fn vmdk_co_create(BlockdevCreateOptions *create_options,
   2822                                        Error **errp)
   2823 {
   2824     int ret;
   2825     BlockdevCreateOptionsVmdk *opts;
   2826 
   2827     opts = &create_options->u.vmdk;
   2828 
   2829     /* Validate options */
   2830     if (!QEMU_IS_ALIGNED(opts->size, BDRV_SECTOR_SIZE)) {
   2831         error_setg(errp, "Image size must be a multiple of 512 bytes");
   2832         ret = -EINVAL;
   2833         goto out;
   2834     }
   2835 
   2836     ret = vmdk_co_do_create(opts->size,
   2837                             opts->subformat,
   2838                             opts->adapter_type,
   2839                             opts->backing_file,
   2840                             opts->hwversion,
   2841                             opts->toolsversion,
   2842                             false,
   2843                             opts->zeroed_grain,
   2844                             vmdk_co_create_cb,
   2845                             opts, errp);
   2846     return ret;
   2847 
   2848 out:
   2849     return ret;
   2850 }
   2851 
   2852 static void vmdk_close(BlockDriverState *bs)
   2853 {
   2854     BDRVVmdkState *s = bs->opaque;
   2855 
   2856     vmdk_free_extents(bs);
   2857     g_free(s->create_type);
   2858 
   2859     migrate_del_blocker(s->migration_blocker);
   2860     error_free(s->migration_blocker);
   2861 }
   2862 
   2863 static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
   2864 {
   2865     int i;
   2866     int64_t ret = 0;
   2867     int64_t r;
   2868     BDRVVmdkState *s = bs->opaque;
   2869 
   2870     ret = bdrv_get_allocated_file_size(bs->file->bs);
   2871     if (ret < 0) {
   2872         return ret;
   2873     }
   2874     for (i = 0; i < s->num_extents; i++) {
   2875         if (s->extents[i].file == bs->file) {
   2876             continue;
   2877         }
   2878         r = bdrv_get_allocated_file_size(s->extents[i].file->bs);
   2879         if (r < 0) {
   2880             return r;
   2881         }
   2882         ret += r;
   2883     }
   2884     return ret;
   2885 }
   2886 
   2887 static int vmdk_has_zero_init(BlockDriverState *bs)
   2888 {
   2889     int i;
   2890     BDRVVmdkState *s = bs->opaque;
   2891 
   2892     /* If has a flat extent and its underlying storage doesn't have zero init,
   2893      * return 0. */
   2894     for (i = 0; i < s->num_extents; i++) {
   2895         if (s->extents[i].flat) {
   2896             if (!bdrv_has_zero_init(s->extents[i].file->bs)) {
   2897                 return 0;
   2898             }
   2899         }
   2900     }
   2901     return 1;
   2902 }
   2903 
   2904 static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
   2905 {
   2906     ImageInfo *info = g_new0(ImageInfo, 1);
   2907 
   2908     bdrv_refresh_filename(extent->file->bs);
   2909     *info = (ImageInfo){
   2910         .filename         = g_strdup(extent->file->bs->filename),
   2911         .format           = g_strdup(extent->type),
   2912         .virtual_size     = extent->sectors * BDRV_SECTOR_SIZE,
   2913         .compressed       = extent->compressed,
   2914         .has_compressed   = extent->compressed,
   2915         .cluster_size     = extent->cluster_sectors * BDRV_SECTOR_SIZE,
   2916         .has_cluster_size = !extent->flat,
   2917     };
   2918 
   2919     return info;
   2920 }
   2921 
   2922 static int coroutine_fn vmdk_co_check(BlockDriverState *bs,
   2923                                       BdrvCheckResult *result,
   2924                                       BdrvCheckMode fix)
   2925 {
   2926     BDRVVmdkState *s = bs->opaque;
   2927     VmdkExtent *extent = NULL;
   2928     int64_t sector_num = 0;
   2929     int64_t total_sectors = bdrv_nb_sectors(bs);
   2930     int ret;
   2931     uint64_t cluster_offset;
   2932 
   2933     if (fix) {
   2934         return -ENOTSUP;
   2935     }
   2936 
   2937     for (;;) {
   2938         if (sector_num >= total_sectors) {
   2939             return 0;
   2940         }
   2941         extent = find_extent(s, sector_num, extent);
   2942         if (!extent) {
   2943             fprintf(stderr,
   2944                     "ERROR: could not find extent for sector %" PRId64 "\n",
   2945                     sector_num);
   2946             ret = -EINVAL;
   2947             break;
   2948         }
   2949         ret = get_cluster_offset(bs, extent, NULL,
   2950                                  sector_num << BDRV_SECTOR_BITS,
   2951                                  false, &cluster_offset, 0, 0);
   2952         if (ret == VMDK_ERROR) {
   2953             fprintf(stderr,
   2954                     "ERROR: could not get cluster_offset for sector %"
   2955                     PRId64 "\n", sector_num);
   2956             break;
   2957         }
   2958         if (ret == VMDK_OK) {
   2959             int64_t extent_len = bdrv_getlength(extent->file->bs);
   2960             if (extent_len < 0) {
   2961                 fprintf(stderr,
   2962                         "ERROR: could not get extent file length for sector %"
   2963                         PRId64 "\n", sector_num);
   2964                 ret = extent_len;
   2965                 break;
   2966             }
   2967             if (cluster_offset >= extent_len) {
   2968                 fprintf(stderr,
   2969                         "ERROR: cluster offset for sector %"
   2970                         PRId64 " points after EOF\n", sector_num);
   2971                 ret = -EINVAL;
   2972                 break;
   2973             }
   2974         }
   2975         sector_num += extent->cluster_sectors;
   2976     }
   2977 
   2978     result->corruptions++;
   2979     return ret;
   2980 }
   2981 
   2982 static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs,
   2983                                                  Error **errp)
   2984 {
   2985     int i;
   2986     BDRVVmdkState *s = bs->opaque;
   2987     ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
   2988     ImageInfoList **tail;
   2989 
   2990     *spec_info = (ImageInfoSpecific){
   2991         .type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
   2992         .u = {
   2993             .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1),
   2994         },
   2995     };
   2996 
   2997     *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) {
   2998         .create_type = g_strdup(s->create_type),
   2999         .cid = s->cid,
   3000         .parent_cid = s->parent_cid,
   3001     };
   3002 
   3003     tail = &spec_info->u.vmdk.data->extents;
   3004     for (i = 0; i < s->num_extents; i++) {
   3005         QAPI_LIST_APPEND(tail, vmdk_get_extent_info(&s->extents[i]));
   3006     }
   3007 
   3008     return spec_info;
   3009 }
   3010 
   3011 static bool vmdk_extents_type_eq(const VmdkExtent *a, const VmdkExtent *b)
   3012 {
   3013     return a->flat == b->flat &&
   3014            a->compressed == b->compressed &&
   3015            (a->flat || a->cluster_sectors == b->cluster_sectors);
   3016 }
   3017 
   3018 static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
   3019 {
   3020     int i;
   3021     BDRVVmdkState *s = bs->opaque;
   3022     assert(s->num_extents);
   3023 
   3024     /* See if we have multiple extents but they have different cases */
   3025     for (i = 1; i < s->num_extents; i++) {
   3026         if (!vmdk_extents_type_eq(&s->extents[0], &s->extents[i])) {
   3027             return -ENOTSUP;
   3028         }
   3029     }
   3030     bdi->needs_compressed_writes = s->extents[0].compressed;
   3031     if (!s->extents[0].flat) {
   3032         bdi->cluster_size = s->extents[0].cluster_sectors << BDRV_SECTOR_BITS;
   3033     }
   3034     return 0;
   3035 }
   3036 
   3037 static void vmdk_gather_child_options(BlockDriverState *bs, QDict *target,
   3038                                       bool backing_overridden)
   3039 {
   3040     /* No children but file and backing can be explicitly specified (TODO) */
   3041     qdict_put(target, "file",
   3042               qobject_ref(bs->file->bs->full_open_options));
   3043 
   3044     if (backing_overridden) {
   3045         if (bs->backing) {
   3046             qdict_put(target, "backing",
   3047                       qobject_ref(bs->backing->bs->full_open_options));
   3048         } else {
   3049             qdict_put_null(target, "backing");
   3050         }
   3051     }
   3052 }
   3053 
   3054 static QemuOptsList vmdk_create_opts = {
   3055     .name = "vmdk-create-opts",
   3056     .head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head),
   3057     .desc = {
   3058         {
   3059             .name = BLOCK_OPT_SIZE,
   3060             .type = QEMU_OPT_SIZE,
   3061             .help = "Virtual disk size"
   3062         },
   3063         {
   3064             .name = BLOCK_OPT_ADAPTER_TYPE,
   3065             .type = QEMU_OPT_STRING,
   3066             .help = "Virtual adapter type, can be one of "
   3067                     "ide (default), lsilogic, buslogic or legacyESX"
   3068         },
   3069         {
   3070             .name = BLOCK_OPT_BACKING_FILE,
   3071             .type = QEMU_OPT_STRING,
   3072             .help = "File name of a base image"
   3073         },
   3074         {
   3075             .name = BLOCK_OPT_BACKING_FMT,
   3076             .type = QEMU_OPT_STRING,
   3077             .help = "Must be 'vmdk' if present",
   3078         },
   3079         {
   3080             .name = BLOCK_OPT_COMPAT6,
   3081             .type = QEMU_OPT_BOOL,
   3082             .help = "VMDK version 6 image",
   3083             .def_value_str = "off"
   3084         },
   3085         {
   3086             .name = BLOCK_OPT_HWVERSION,
   3087             .type = QEMU_OPT_STRING,
   3088             .help = "VMDK hardware version",
   3089             .def_value_str = "undefined"
   3090         },
   3091         {
   3092             .name = BLOCK_OPT_TOOLSVERSION,
   3093             .type = QEMU_OPT_STRING,
   3094             .help = "VMware guest tools version",
   3095         },
   3096         {
   3097             .name = BLOCK_OPT_SUBFMT,
   3098             .type = QEMU_OPT_STRING,
   3099             .help =
   3100                 "VMDK flat extent format, can be one of "
   3101                 "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
   3102         },
   3103         {
   3104             .name = BLOCK_OPT_ZEROED_GRAIN,
   3105             .type = QEMU_OPT_BOOL,
   3106             .help = "Enable efficient zero writes "
   3107                     "using the zeroed-grain GTE feature"
   3108         },
   3109         { /* end of list */ }
   3110     }
   3111 };
   3112 
   3113 static BlockDriver bdrv_vmdk = {
   3114     .format_name                  = "vmdk",
   3115     .instance_size                = sizeof(BDRVVmdkState),
   3116     .bdrv_probe                   = vmdk_probe,
   3117     .bdrv_open                    = vmdk_open,
   3118     .bdrv_co_check                = vmdk_co_check,
   3119     .bdrv_reopen_prepare          = vmdk_reopen_prepare,
   3120     .bdrv_reopen_commit           = vmdk_reopen_commit,
   3121     .bdrv_reopen_abort            = vmdk_reopen_abort,
   3122     .bdrv_child_perm              = bdrv_default_perms,
   3123     .bdrv_co_preadv               = vmdk_co_preadv,
   3124     .bdrv_co_pwritev              = vmdk_co_pwritev,
   3125     .bdrv_co_pwritev_compressed   = vmdk_co_pwritev_compressed,
   3126     .bdrv_co_pwrite_zeroes        = vmdk_co_pwrite_zeroes,
   3127     .bdrv_close                   = vmdk_close,
   3128     .bdrv_co_create_opts          = vmdk_co_create_opts,
   3129     .bdrv_co_create               = vmdk_co_create,
   3130     .bdrv_co_block_status         = vmdk_co_block_status,
   3131     .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
   3132     .bdrv_has_zero_init           = vmdk_has_zero_init,
   3133     .bdrv_get_specific_info       = vmdk_get_specific_info,
   3134     .bdrv_refresh_limits          = vmdk_refresh_limits,
   3135     .bdrv_get_info                = vmdk_get_info,
   3136     .bdrv_gather_child_options    = vmdk_gather_child_options,
   3137 
   3138     .is_format                    = true,
   3139     .supports_backing             = true,
   3140     .create_opts                  = &vmdk_create_opts,
   3141 };
   3142 
   3143 static void bdrv_vmdk_init(void)
   3144 {
   3145     bdrv_register(&bdrv_vmdk);
   3146 }
   3147 
   3148 block_init(bdrv_vmdk_init);