qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vpc.c (37941B)


      1 /*
      2  * Block driver for Connectix / Microsoft Virtual PC images
      3  *
      4  * Copyright (c) 2005 Alex Beregszaszi
      5  * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a copy
      8  * of this software and associated documentation files (the "Software"), to deal
      9  * in the Software without restriction, including without limitation the rights
     10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     11  * copies of the Software, and to permit persons to whom the Software is
     12  * furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included in
     15  * all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     23  * THE SOFTWARE.
     24  */
     25 
     26 #include "qemu/osdep.h"
     27 #include "qapi/error.h"
     28 #include "block/block_int.h"
     29 #include "block/qdict.h"
     30 #include "sysemu/block-backend.h"
     31 #include "qemu/module.h"
     32 #include "qemu/option.h"
     33 #include "migration/blocker.h"
     34 #include "qemu/bswap.h"
     35 #include "qemu/uuid.h"
     36 #include "qemu/memalign.h"
     37 #include "qapi/qmp/qdict.h"
     38 #include "qapi/qobject-input-visitor.h"
     39 #include "qapi/qapi-visit-block-core.h"
     40 
     41 /**************************************************************/
     42 
     43 //#define CACHE
     44 
     45 enum vhd_type {
     46     VHD_FIXED           = 2,
     47     VHD_DYNAMIC         = 3,
     48     VHD_DIFFERENCING    = 4,
     49 };
     50 
     51 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
     52 #define VHD_TIMESTAMP_BASE 946684800
     53 
     54 #define VHD_CHS_MAX_C   65535LL
     55 #define VHD_CHS_MAX_H   16
     56 #define VHD_CHS_MAX_S   255
     57 
     58 #define VHD_MAX_SECTORS       0xff000000    /* 2040 GiB max image size */
     59 #define VHD_MAX_GEOMETRY      (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
     60 
     61 #define VPC_OPT_FORCE_SIZE "force_size"
     62 
     63 /* always big-endian */
     64 typedef struct vhd_footer {
     65     char        creator[8]; /* "conectix" */
     66     uint32_t    features;
     67     uint32_t    version;
     68 
     69     /* Offset of next header structure, 0xFFFFFFFF if none */
     70     uint64_t    data_offset;
     71 
     72     /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
     73     uint32_t    timestamp;
     74 
     75     char        creator_app[4]; /*  e.g., "vpc " */
     76     uint16_t    major;
     77     uint16_t    minor;
     78     char        creator_os[4]; /* "Wi2k" */
     79 
     80     uint64_t    orig_size;
     81     uint64_t    current_size;
     82 
     83     uint16_t    cyls;
     84     uint8_t     heads;
     85     uint8_t     secs_per_cyl;
     86 
     87     uint32_t    type;
     88 
     89     /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
     90        the bytes in the footer without the checksum field") */
     91     uint32_t    checksum;
     92 
     93     /* UUID used to identify a parent hard disk (backing file) */
     94     QemuUUID    uuid;
     95 
     96     uint8_t     in_saved_state;
     97     uint8_t     reserved[427];
     98 } QEMU_PACKED VHDFooter;
     99 
    100 QEMU_BUILD_BUG_ON(sizeof(VHDFooter) != 512);
    101 
    102 typedef struct vhd_dyndisk_header {
    103     char        magic[8]; /* "cxsparse" */
    104 
    105     /* Offset of next header structure, 0xFFFFFFFF if none */
    106     uint64_t    data_offset;
    107 
    108     /* Offset of the Block Allocation Table (BAT) */
    109     uint64_t    table_offset;
    110 
    111     uint32_t    version;
    112     uint32_t    max_table_entries; /* 32bit/entry */
    113 
    114     /* 2 MB by default, must be a power of two */
    115     uint32_t    block_size;
    116 
    117     uint32_t    checksum;
    118     uint8_t     parent_uuid[16];
    119     uint32_t    parent_timestamp;
    120     uint32_t    reserved;
    121 
    122     /* Backing file name (in UTF-16) */
    123     uint8_t     parent_name[512];
    124 
    125     struct {
    126         uint32_t    platform;
    127         uint32_t    data_space;
    128         uint32_t    data_length;
    129         uint32_t    reserved;
    130         uint64_t    data_offset;
    131     } parent_locator[8];
    132     uint8_t     reserved2[256];
    133 } QEMU_PACKED VHDDynDiskHeader;
    134 
    135 QEMU_BUILD_BUG_ON(sizeof(VHDDynDiskHeader) != 1024);
    136 
    137 typedef struct BDRVVPCState {
    138     CoMutex lock;
    139     VHDFooter footer;
    140     uint64_t free_data_block_offset;
    141     int max_table_entries;
    142     uint32_t *pagetable;
    143     uint64_t bat_offset;
    144     uint64_t last_bitmap_offset;
    145 
    146     uint32_t block_size;
    147     uint32_t bitmap_size;
    148     bool force_use_chs;
    149     bool force_use_sz;
    150 
    151 #ifdef CACHE
    152     uint8_t *pageentry_u8;
    153     uint32_t *pageentry_u32;
    154     uint16_t *pageentry_u16;
    155 
    156     uint64_t last_bitmap;
    157 #endif
    158 
    159     Error *migration_blocker;
    160 } BDRVVPCState;
    161 
    162 #define VPC_OPT_SIZE_CALC "force_size_calc"
    163 static QemuOptsList vpc_runtime_opts = {
    164     .name = "vpc-runtime-opts",
    165     .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
    166     .desc = {
    167         {
    168             .name = VPC_OPT_SIZE_CALC,
    169             .type = QEMU_OPT_STRING,
    170             .help = "Force disk size calculation to use either CHS geometry, "
    171                     "or use the disk current_size specified in the VHD footer. "
    172                     "{chs, current_size}"
    173         },
    174         { /* end of list */ }
    175     }
    176 };
    177 
    178 static QemuOptsList vpc_create_opts;
    179 
    180 static uint32_t vpc_checksum(void *p, size_t size)
    181 {
    182     uint8_t *buf = p;
    183     uint32_t res = 0;
    184     int i;
    185 
    186     for (i = 0; i < size; i++)
    187         res += buf[i];
    188 
    189     return ~res;
    190 }
    191 
    192 
    193 static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
    194 {
    195     if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
    196         return 100;
    197     return 0;
    198 }
    199 
    200 static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
    201                               Error **errp)
    202 {
    203     BDRVVPCState *s = bs->opaque;
    204     const char *size_calc;
    205 
    206     size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
    207 
    208     if (!size_calc) {
    209        /* no override, use autodetect only */
    210     } else if (!strcmp(size_calc, "current_size")) {
    211         s->force_use_sz = true;
    212     } else if (!strcmp(size_calc, "chs")) {
    213         s->force_use_chs = true;
    214     } else {
    215         error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
    216     }
    217 }
    218 
    219 static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
    220                     Error **errp)
    221 {
    222     BDRVVPCState *s = bs->opaque;
    223     int i;
    224     VHDFooter *footer;
    225     QemuOpts *opts = NULL;
    226     Error *local_err = NULL;
    227     bool use_chs;
    228     VHDDynDiskHeader dyndisk_header;
    229     uint32_t checksum;
    230     uint64_t computed_size;
    231     uint64_t pagetable_size;
    232     int disk_type = VHD_DYNAMIC;
    233     int ret;
    234     int64_t bs_size;
    235 
    236     ret = bdrv_open_file_child(NULL, options, "file", bs, errp);
    237     if (ret < 0) {
    238         return ret;
    239     }
    240 
    241     opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
    242     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
    243         ret = -EINVAL;
    244         goto fail;
    245     }
    246 
    247     vpc_parse_options(bs, opts, &local_err);
    248     if (local_err) {
    249         error_propagate(errp, local_err);
    250         ret = -EINVAL;
    251         goto fail;
    252     }
    253 
    254     ret = bdrv_pread(bs->file, 0, sizeof(s->footer), &s->footer, 0);
    255     if (ret < 0) {
    256         error_setg(errp, "Unable to read VHD header");
    257         goto fail;
    258     }
    259 
    260     footer = &s->footer;
    261     if (strncmp(footer->creator, "conectix", 8)) {
    262         int64_t offset = bdrv_getlength(bs->file->bs);
    263         if (offset < 0) {
    264             ret = offset;
    265             error_setg(errp, "Invalid file size");
    266             goto fail;
    267         } else if (offset < sizeof(*footer)) {
    268             ret = -EINVAL;
    269             error_setg(errp, "File too small for a VHD header");
    270             goto fail;
    271         }
    272 
    273         /* If a fixed disk, the footer is found only at the end of the file */
    274         ret = bdrv_pread(bs->file, offset - sizeof(*footer), sizeof(*footer),
    275                          footer, 0);
    276         if (ret < 0) {
    277             goto fail;
    278         }
    279         if (strncmp(footer->creator, "conectix", 8) ||
    280             be32_to_cpu(footer->type) != VHD_FIXED) {
    281             error_setg(errp, "invalid VPC image");
    282             ret = -EINVAL;
    283             goto fail;
    284         }
    285         disk_type = VHD_FIXED;
    286     }
    287 
    288     checksum = be32_to_cpu(footer->checksum);
    289     footer->checksum = 0;
    290     if (vpc_checksum(footer, sizeof(*footer)) != checksum) {
    291         error_setg(errp, "Incorrect header checksum");
    292         ret = -EINVAL;
    293         goto fail;
    294     }
    295 
    296     /* Write 'checksum' back to footer, or else will leave it with zero. */
    297     footer->checksum = cpu_to_be32(checksum);
    298 
    299     /* The visible size of a image in Virtual PC depends on the geometry
    300        rather than on the size stored in the footer (the size in the footer
    301        is too large usually) */
    302     bs->total_sectors = (int64_t)
    303         be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
    304 
    305     /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
    306      * VHD image sizes differently.  VPC will rely on CHS geometry,
    307      * while Hyper-V and disk2vhd use the size specified in the footer.
    308      *
    309      * We use a couple of approaches to try and determine the correct method:
    310      * look at the Creator App field, and look for images that have CHS
    311      * geometry that is the maximum value.
    312      *
    313      * If the CHS geometry is the maximum CHS geometry, then we assume that
    314      * the size is the footer->current_size to avoid truncation.  Otherwise,
    315      * we follow the table based on footer->creator_app:
    316      *
    317      *  Known creator apps:
    318      *      'vpc '  :  CHS              Virtual PC (uses disk geometry)
    319      *      'qemu'  :  CHS              QEMU (uses disk geometry)
    320      *      'qem2'  :  current_size     QEMU (uses current_size)
    321      *      'win '  :  current_size     Hyper-V
    322      *      'd2v '  :  current_size     Disk2vhd
    323      *      'tap\0' :  current_size     XenServer
    324      *      'CTXS'  :  current_size     XenConverter
    325      *
    326      *  The user can override the table values via drive options, however
    327      *  even with an override we will still use current_size for images
    328      *  that have CHS geometry of the maximum size.
    329      */
    330     use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
    331                !!strncmp(footer->creator_app, "qem2", 4) &&
    332                !!strncmp(footer->creator_app, "d2v ", 4) &&
    333                !!strncmp(footer->creator_app, "CTXS", 4) &&
    334                !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
    335 
    336     if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
    337         bs->total_sectors = be64_to_cpu(footer->current_size) /
    338                                         BDRV_SECTOR_SIZE;
    339     }
    340 
    341     /* Allow a maximum disk size of 2040 GiB */
    342     if (bs->total_sectors > VHD_MAX_SECTORS) {
    343         ret = -EFBIG;
    344         goto fail;
    345     }
    346 
    347     if (disk_type == VHD_DYNAMIC) {
    348         ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset),
    349                          sizeof(dyndisk_header), &dyndisk_header, 0);
    350         if (ret < 0) {
    351             error_setg(errp, "Error reading dynamic VHD header");
    352             goto fail;
    353         }
    354 
    355         if (strncmp(dyndisk_header.magic, "cxsparse", 8)) {
    356             error_setg(errp, "Invalid header magic");
    357             ret = -EINVAL;
    358             goto fail;
    359         }
    360 
    361         s->block_size = be32_to_cpu(dyndisk_header.block_size);
    362         if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
    363             error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
    364             ret = -EINVAL;
    365             goto fail;
    366         }
    367         s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
    368 
    369         s->max_table_entries = be32_to_cpu(dyndisk_header.max_table_entries);
    370 
    371         if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
    372             error_setg(errp, "Too many blocks");
    373             ret = -EINVAL;
    374             goto fail;
    375         }
    376 
    377         computed_size = (uint64_t) s->max_table_entries * s->block_size;
    378         if (computed_size < bs->total_sectors * 512) {
    379             error_setg(errp, "Page table too small");
    380             ret = -EINVAL;
    381             goto fail;
    382         }
    383 
    384         if (s->max_table_entries > SIZE_MAX / 4 ||
    385             s->max_table_entries > (int) INT_MAX / 4) {
    386             error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
    387                         s->max_table_entries);
    388             ret = -EINVAL;
    389             goto fail;
    390         }
    391 
    392         pagetable_size = (uint64_t) s->max_table_entries * 4;
    393 
    394         s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
    395         if (s->pagetable == NULL) {
    396             error_setg(errp, "Unable to allocate memory for page table");
    397             ret = -ENOMEM;
    398             goto fail;
    399         }
    400 
    401         s->bat_offset = be64_to_cpu(dyndisk_header.table_offset);
    402 
    403         ret = bdrv_pread(bs->file, s->bat_offset, pagetable_size,
    404                          s->pagetable, 0);
    405         if (ret < 0) {
    406             error_setg(errp, "Error reading pagetable");
    407             goto fail;
    408         }
    409 
    410         s->free_data_block_offset =
    411             ROUND_UP(s->bat_offset + pagetable_size, 512);
    412 
    413         for (i = 0; i < s->max_table_entries; i++) {
    414             be32_to_cpus(&s->pagetable[i]);
    415             if (s->pagetable[i] != 0xFFFFFFFF) {
    416                 int64_t next = (512 * (int64_t) s->pagetable[i]) +
    417                     s->bitmap_size + s->block_size;
    418 
    419                 if (next > s->free_data_block_offset) {
    420                     s->free_data_block_offset = next;
    421                 }
    422             }
    423         }
    424 
    425         bs_size = bdrv_getlength(bs->file->bs);
    426         if (bs_size < 0) {
    427             error_setg_errno(errp, -bs_size, "Unable to learn image size");
    428             ret = bs_size;
    429             goto fail;
    430         }
    431         if (s->free_data_block_offset > bs_size) {
    432             error_setg(errp, "block-vpc: free_data_block_offset points after "
    433                              "the end of file. The image has been truncated.");
    434             ret = -EINVAL;
    435             goto fail;
    436         }
    437 
    438         s->last_bitmap_offset = (int64_t) -1;
    439 
    440 #ifdef CACHE
    441         s->pageentry_u8 = g_malloc(512);
    442         s->pageentry_u32 = s->pageentry_u8;
    443         s->pageentry_u16 = s->pageentry_u8;
    444         s->last_pagetable = -1;
    445 #endif
    446     }
    447 
    448     /* Disable migration when VHD images are used */
    449     error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
    450                "does not support live migration",
    451                bdrv_get_device_or_node_name(bs));
    452     ret = migrate_add_blocker(s->migration_blocker, errp);
    453     if (ret < 0) {
    454         error_free(s->migration_blocker);
    455         goto fail;
    456     }
    457 
    458     qemu_co_mutex_init(&s->lock);
    459     qemu_opts_del(opts);
    460 
    461     return 0;
    462 
    463 fail:
    464     qemu_opts_del(opts);
    465     qemu_vfree(s->pagetable);
    466 #ifdef CACHE
    467     g_free(s->pageentry_u8);
    468 #endif
    469     return ret;
    470 }
    471 
    472 static int vpc_reopen_prepare(BDRVReopenState *state,
    473                               BlockReopenQueue *queue, Error **errp)
    474 {
    475     return 0;
    476 }
    477 
    478 /*
    479  * Returns the absolute byte offset of the given sector in the image file.
    480  * If the sector is not allocated, -1 is returned instead.
    481  * If an error occurred trying to write an updated block bitmap back to
    482  * the file, -2 is returned, and the error value is written to *err.
    483  * This can only happen for a write operation.
    484  *
    485  * The parameter write must be 1 if the offset will be used for a write
    486  * operation (the block bitmaps is updated then), 0 otherwise.
    487  * If write is true then err must not be NULL.
    488  */
    489 static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
    490                                        bool write, int *err)
    491 {
    492     BDRVVPCState *s = bs->opaque;
    493     uint64_t bitmap_offset, block_offset;
    494     uint32_t pagetable_index, offset_in_block;
    495 
    496     assert(!(write && err == NULL));
    497 
    498     pagetable_index = offset / s->block_size;
    499     offset_in_block = offset % s->block_size;
    500 
    501     if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
    502         return -1; /* not allocated */
    503 
    504     bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
    505     block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
    506 
    507     /* We must ensure that we don't write to any sectors which are marked as
    508        unused in the bitmap. We get away with setting all bits in the block
    509        bitmap each time we write to a new block. This might cause Virtual PC to
    510        miss sparse read optimization, but it's not a problem in terms of
    511        correctness. */
    512     if (write && (s->last_bitmap_offset != bitmap_offset)) {
    513         uint8_t bitmap[s->bitmap_size];
    514         int r;
    515 
    516         s->last_bitmap_offset = bitmap_offset;
    517         memset(bitmap, 0xff, s->bitmap_size);
    518         r = bdrv_pwrite_sync(bs->file, bitmap_offset, s->bitmap_size, bitmap,
    519                              0);
    520         if (r < 0) {
    521             *err = r;
    522             return -2;
    523         }
    524     }
    525 
    526     return block_offset;
    527 }
    528 
    529 /*
    530  * Writes the footer to the end of the image file. This is needed when the
    531  * file grows as it overwrites the old footer
    532  *
    533  * Returns 0 on success and < 0 on error
    534  */
    535 static int rewrite_footer(BlockDriverState *bs)
    536 {
    537     int ret;
    538     BDRVVPCState *s = bs->opaque;
    539     int64_t offset = s->free_data_block_offset;
    540 
    541     ret = bdrv_pwrite_sync(bs->file, offset, sizeof(s->footer), &s->footer, 0);
    542     if (ret < 0)
    543         return ret;
    544 
    545     return 0;
    546 }
    547 
    548 /*
    549  * Allocates a new block. This involves writing a new footer and updating
    550  * the Block Allocation Table to use the space at the old end of the image
    551  * file (overwriting the old footer)
    552  *
    553  * Returns the sectors' offset in the image file on success and < 0 on error
    554  */
    555 static int64_t alloc_block(BlockDriverState *bs, int64_t offset)
    556 {
    557     BDRVVPCState *s = bs->opaque;
    558     int64_t bat_offset;
    559     uint32_t index, bat_value;
    560     int ret;
    561     uint8_t bitmap[s->bitmap_size];
    562 
    563     /* Check if sector_num is valid */
    564     if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
    565         return -EINVAL;
    566     }
    567 
    568     /* Write entry into in-memory BAT */
    569     index = offset / s->block_size;
    570     assert(s->pagetable[index] == 0xFFFFFFFF);
    571     s->pagetable[index] = s->free_data_block_offset / 512;
    572 
    573     /* Initialize the block's bitmap */
    574     memset(bitmap, 0xff, s->bitmap_size);
    575     ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset,
    576                            s->bitmap_size, bitmap, 0);
    577     if (ret < 0) {
    578         return ret;
    579     }
    580 
    581     /* Write new footer (the old one will be overwritten) */
    582     s->free_data_block_offset += s->block_size + s->bitmap_size;
    583     ret = rewrite_footer(bs);
    584     if (ret < 0)
    585         goto fail;
    586 
    587     /* Write BAT entry to disk */
    588     bat_offset = s->bat_offset + (4 * index);
    589     bat_value = cpu_to_be32(s->pagetable[index]);
    590     ret = bdrv_pwrite_sync(bs->file, bat_offset, 4, &bat_value, 0);
    591     if (ret < 0)
    592         goto fail;
    593 
    594     return get_image_offset(bs, offset, false, NULL);
    595 
    596 fail:
    597     s->free_data_block_offset -= (s->block_size + s->bitmap_size);
    598     return ret;
    599 }
    600 
    601 static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    602 {
    603     BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
    604 
    605     if (be32_to_cpu(s->footer.type) != VHD_FIXED) {
    606         bdi->cluster_size = s->block_size;
    607     }
    608 
    609     return 0;
    610 }
    611 
    612 static int coroutine_fn
    613 vpc_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
    614               QEMUIOVector *qiov, BdrvRequestFlags flags)
    615 {
    616     BDRVVPCState *s = bs->opaque;
    617     int ret;
    618     int64_t image_offset;
    619     int64_t n_bytes;
    620     int64_t bytes_done = 0;
    621     QEMUIOVector local_qiov;
    622 
    623     if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
    624         return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
    625     }
    626 
    627     qemu_co_mutex_lock(&s->lock);
    628     qemu_iovec_init(&local_qiov, qiov->niov);
    629 
    630     while (bytes > 0) {
    631         image_offset = get_image_offset(bs, offset, false, NULL);
    632         n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
    633 
    634         if (image_offset == -1) {
    635             qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
    636         } else {
    637             qemu_iovec_reset(&local_qiov);
    638             qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
    639 
    640             qemu_co_mutex_unlock(&s->lock);
    641             ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
    642                                  &local_qiov, 0);
    643             qemu_co_mutex_lock(&s->lock);
    644             if (ret < 0) {
    645                 goto fail;
    646             }
    647         }
    648 
    649         bytes -= n_bytes;
    650         offset += n_bytes;
    651         bytes_done += n_bytes;
    652     }
    653 
    654     ret = 0;
    655 fail:
    656     qemu_iovec_destroy(&local_qiov);
    657     qemu_co_mutex_unlock(&s->lock);
    658 
    659     return ret;
    660 }
    661 
    662 static int coroutine_fn
    663 vpc_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
    664                QEMUIOVector *qiov, BdrvRequestFlags flags)
    665 {
    666     BDRVVPCState *s = bs->opaque;
    667     int64_t image_offset;
    668     int64_t n_bytes;
    669     int64_t bytes_done = 0;
    670     int ret = 0;
    671     QEMUIOVector local_qiov;
    672 
    673     if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
    674         return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
    675     }
    676 
    677     qemu_co_mutex_lock(&s->lock);
    678     qemu_iovec_init(&local_qiov, qiov->niov);
    679 
    680     while (bytes > 0) {
    681         image_offset = get_image_offset(bs, offset, true, &ret);
    682         if (image_offset == -2) {
    683             /* Failed to write block bitmap: can't proceed with write */
    684             goto fail;
    685         }
    686         n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
    687 
    688         if (image_offset == -1) {
    689             image_offset = alloc_block(bs, offset);
    690             if (image_offset < 0) {
    691                 ret = image_offset;
    692                 goto fail;
    693             }
    694         }
    695 
    696         qemu_iovec_reset(&local_qiov);
    697         qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
    698 
    699         qemu_co_mutex_unlock(&s->lock);
    700         ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
    701                               &local_qiov, 0);
    702         qemu_co_mutex_lock(&s->lock);
    703         if (ret < 0) {
    704             goto fail;
    705         }
    706 
    707         bytes -= n_bytes;
    708         offset += n_bytes;
    709         bytes_done += n_bytes;
    710     }
    711 
    712     ret = 0;
    713 fail:
    714     qemu_iovec_destroy(&local_qiov);
    715     qemu_co_mutex_unlock(&s->lock);
    716 
    717     return ret;
    718 }
    719 
    720 static int coroutine_fn vpc_co_block_status(BlockDriverState *bs,
    721                                             bool want_zero,
    722                                             int64_t offset, int64_t bytes,
    723                                             int64_t *pnum, int64_t *map,
    724                                             BlockDriverState **file)
    725 {
    726     BDRVVPCState *s = bs->opaque;
    727     int64_t image_offset;
    728     bool allocated;
    729     int ret;
    730     int64_t n;
    731 
    732     if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
    733         *pnum = bytes;
    734         *map = offset;
    735         *file = bs->file->bs;
    736         return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_RECURSE;
    737     }
    738 
    739     qemu_co_mutex_lock(&s->lock);
    740 
    741     image_offset = get_image_offset(bs, offset, false, NULL);
    742     allocated = (image_offset != -1);
    743     *pnum = 0;
    744     ret = BDRV_BLOCK_ZERO;
    745 
    746     do {
    747         /* All sectors in a block are contiguous (without using the bitmap) */
    748         n = ROUND_UP(offset + 1, s->block_size) - offset;
    749         n = MIN(n, bytes);
    750 
    751         *pnum += n;
    752         offset += n;
    753         bytes -= n;
    754         /* *pnum can't be greater than one block for allocated
    755          * sectors since there is always a bitmap in between. */
    756         if (allocated) {
    757             *file = bs->file->bs;
    758             *map = image_offset;
    759             ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
    760             break;
    761         }
    762         if (bytes == 0) {
    763             break;
    764         }
    765         image_offset = get_image_offset(bs, offset, false, NULL);
    766     } while (image_offset == -1);
    767 
    768     qemu_co_mutex_unlock(&s->lock);
    769     return ret;
    770 }
    771 
    772 /*
    773  * Calculates the number of cylinders, heads and sectors per cylinder
    774  * based on a given number of sectors. This is the algorithm described
    775  * in the VHD specification.
    776  *
    777  * Note that the geometry doesn't always exactly match total_sectors but
    778  * may round it down.
    779  *
    780  * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
    781  * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
    782  * and instead allow up to 255 heads.
    783  */
    784 static int calculate_geometry(int64_t total_sectors, uint16_t *cyls,
    785     uint8_t *heads, uint8_t *secs_per_cyl)
    786 {
    787     uint32_t cyls_times_heads;
    788 
    789     total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
    790 
    791     if (total_sectors >= 65535LL * 16 * 63) {
    792         *secs_per_cyl = 255;
    793         *heads = 16;
    794         cyls_times_heads = total_sectors / *secs_per_cyl;
    795     } else {
    796         *secs_per_cyl = 17;
    797         cyls_times_heads = total_sectors / *secs_per_cyl;
    798         *heads = DIV_ROUND_UP(cyls_times_heads, 1024);
    799 
    800         if (*heads < 4) {
    801             *heads = 4;
    802         }
    803 
    804         if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
    805             *secs_per_cyl = 31;
    806             *heads = 16;
    807             cyls_times_heads = total_sectors / *secs_per_cyl;
    808         }
    809 
    810         if (cyls_times_heads >= (*heads * 1024)) {
    811             *secs_per_cyl = 63;
    812             *heads = 16;
    813             cyls_times_heads = total_sectors / *secs_per_cyl;
    814         }
    815     }
    816 
    817     *cyls = cyls_times_heads / *heads;
    818 
    819     return 0;
    820 }
    821 
    822 static int create_dynamic_disk(BlockBackend *blk, VHDFooter *footer,
    823                                int64_t total_sectors)
    824 {
    825     VHDDynDiskHeader dyndisk_header;
    826     uint8_t bat_sector[512];
    827     size_t block_size, num_bat_entries;
    828     int i;
    829     int ret;
    830     int64_t offset = 0;
    831 
    832     /* Write the footer (twice: at the beginning and at the end) */
    833     block_size = 0x200000;
    834     num_bat_entries = DIV_ROUND_UP(total_sectors, block_size / 512);
    835 
    836     ret = blk_pwrite(blk, offset, sizeof(*footer), footer, 0);
    837     if (ret < 0) {
    838         goto fail;
    839     }
    840 
    841     offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
    842     ret = blk_pwrite(blk, offset, sizeof(*footer), footer, 0);
    843     if (ret < 0) {
    844         goto fail;
    845     }
    846 
    847     /* Write the initial BAT */
    848     offset = 3 * 512;
    849 
    850     memset(bat_sector, 0xFF, 512);
    851     for (i = 0; i < DIV_ROUND_UP(num_bat_entries * 4, 512); i++) {
    852         ret = blk_pwrite(blk, offset, 512, bat_sector, 0);
    853         if (ret < 0) {
    854             goto fail;
    855         }
    856         offset += 512;
    857     }
    858 
    859     /* Prepare the Dynamic Disk Header */
    860     memset(&dyndisk_header, 0, sizeof(dyndisk_header));
    861 
    862     memcpy(dyndisk_header.magic, "cxsparse", 8);
    863 
    864     /*
    865      * Note: The spec is actually wrong here for data_offset, it says
    866      * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
    867      */
    868     dyndisk_header.data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
    869     dyndisk_header.table_offset = cpu_to_be64(3 * 512);
    870     dyndisk_header.version = cpu_to_be32(0x00010000);
    871     dyndisk_header.block_size = cpu_to_be32(block_size);
    872     dyndisk_header.max_table_entries = cpu_to_be32(num_bat_entries);
    873 
    874     dyndisk_header.checksum = cpu_to_be32(
    875         vpc_checksum(&dyndisk_header, sizeof(dyndisk_header)));
    876 
    877     /* Write the header */
    878     offset = 512;
    879 
    880     ret = blk_pwrite(blk, offset, sizeof(dyndisk_header), &dyndisk_header, 0);
    881     if (ret < 0) {
    882         goto fail;
    883     }
    884 
    885     ret = 0;
    886  fail:
    887     return ret;
    888 }
    889 
    890 static int create_fixed_disk(BlockBackend *blk, VHDFooter *footer,
    891                              int64_t total_size, Error **errp)
    892 {
    893     int ret;
    894 
    895     /* Add footer to total size */
    896     total_size += sizeof(*footer);
    897 
    898     ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp);
    899     if (ret < 0) {
    900         return ret;
    901     }
    902 
    903     ret = blk_pwrite(blk, total_size - sizeof(*footer), sizeof(*footer),
    904                      footer, 0);
    905     if (ret < 0) {
    906         error_setg_errno(errp, -ret, "Unable to write VHD header");
    907         return ret;
    908     }
    909 
    910     return 0;
    911 }
    912 
    913 static int calculate_rounded_image_size(BlockdevCreateOptionsVpc *vpc_opts,
    914                                         uint16_t *out_cyls,
    915                                         uint8_t *out_heads,
    916                                         uint8_t *out_secs_per_cyl,
    917                                         int64_t *out_total_sectors,
    918                                         Error **errp)
    919 {
    920     int64_t total_size = vpc_opts->size;
    921     uint16_t cyls = 0;
    922     uint8_t heads = 0;
    923     uint8_t secs_per_cyl = 0;
    924     int64_t total_sectors;
    925     int i;
    926 
    927     /*
    928      * Calculate matching total_size and geometry. Increase the number of
    929      * sectors requested until we get enough (or fail). This ensures that
    930      * qemu-img convert doesn't truncate images, but rather rounds up.
    931      *
    932      * If the image size can't be represented by a spec conformant CHS geometry,
    933      * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
    934      * the image size from the VHD footer to calculate total_sectors.
    935      */
    936     if (vpc_opts->force_size) {
    937         /* This will force the use of total_size for sector count, below */
    938         cyls         = VHD_CHS_MAX_C;
    939         heads        = VHD_CHS_MAX_H;
    940         secs_per_cyl = VHD_CHS_MAX_S;
    941     } else {
    942         total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
    943         for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
    944             calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
    945         }
    946     }
    947 
    948     if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
    949         total_sectors = total_size / BDRV_SECTOR_SIZE;
    950         /* Allow a maximum disk size of 2040 GiB */
    951         if (total_sectors > VHD_MAX_SECTORS) {
    952             error_setg(errp, "Disk size is too large, max size is 2040 GiB");
    953             return -EFBIG;
    954         }
    955     } else {
    956         total_sectors = (int64_t) cyls * heads * secs_per_cyl;
    957     }
    958 
    959     *out_total_sectors = total_sectors;
    960     if (out_cyls) {
    961         *out_cyls = cyls;
    962         *out_heads = heads;
    963         *out_secs_per_cyl = secs_per_cyl;
    964     }
    965 
    966     return 0;
    967 }
    968 
    969 static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts,
    970                                       Error **errp)
    971 {
    972     BlockdevCreateOptionsVpc *vpc_opts;
    973     BlockBackend *blk = NULL;
    974     BlockDriverState *bs = NULL;
    975 
    976     VHDFooter footer;
    977     uint16_t cyls = 0;
    978     uint8_t heads = 0;
    979     uint8_t secs_per_cyl = 0;
    980     int64_t total_sectors;
    981     int64_t total_size;
    982     int disk_type;
    983     int ret = -EIO;
    984     QemuUUID uuid;
    985 
    986     assert(opts->driver == BLOCKDEV_DRIVER_VPC);
    987     vpc_opts = &opts->u.vpc;
    988 
    989     /* Validate options and set default values */
    990     total_size = vpc_opts->size;
    991 
    992     if (!vpc_opts->has_subformat) {
    993         vpc_opts->subformat = BLOCKDEV_VPC_SUBFORMAT_DYNAMIC;
    994     }
    995     switch (vpc_opts->subformat) {
    996     case BLOCKDEV_VPC_SUBFORMAT_DYNAMIC:
    997         disk_type = VHD_DYNAMIC;
    998         break;
    999     case BLOCKDEV_VPC_SUBFORMAT_FIXED:
   1000         disk_type = VHD_FIXED;
   1001         break;
   1002     default:
   1003         g_assert_not_reached();
   1004     }
   1005 
   1006     /* Create BlockBackend to write to the image */
   1007     bs = bdrv_open_blockdev_ref(vpc_opts->file, errp);
   1008     if (bs == NULL) {
   1009         return -EIO;
   1010     }
   1011 
   1012     blk = blk_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL,
   1013                           errp);
   1014     if (!blk) {
   1015         ret = -EPERM;
   1016         goto out;
   1017     }
   1018     blk_set_allow_write_beyond_eof(blk, true);
   1019 
   1020     /* Get geometry and check that it matches the image size*/
   1021     ret = calculate_rounded_image_size(vpc_opts, &cyls, &heads, &secs_per_cyl,
   1022                                        &total_sectors, errp);
   1023     if (ret < 0) {
   1024         goto out;
   1025     }
   1026 
   1027     if (total_size != total_sectors * BDRV_SECTOR_SIZE) {
   1028         error_setg(errp, "The requested image size cannot be represented in "
   1029                          "CHS geometry");
   1030         error_append_hint(errp, "Try size=%llu or force-size=on (the "
   1031                                 "latter makes the image incompatible with "
   1032                                 "Virtual PC)",
   1033                           total_sectors * BDRV_SECTOR_SIZE);
   1034         ret = -EINVAL;
   1035         goto out;
   1036     }
   1037 
   1038     /* Prepare the Hard Disk Footer */
   1039     memset(&footer, 0, sizeof(footer));
   1040 
   1041     memcpy(footer.creator, "conectix", 8);
   1042     if (vpc_opts->force_size) {
   1043         memcpy(footer.creator_app, "qem2", 4);
   1044     } else {
   1045         memcpy(footer.creator_app, "qemu", 4);
   1046     }
   1047     memcpy(footer.creator_os, "Wi2k", 4);
   1048 
   1049     footer.features = cpu_to_be32(0x02);
   1050     footer.version = cpu_to_be32(0x00010000);
   1051     if (disk_type == VHD_DYNAMIC) {
   1052         footer.data_offset = cpu_to_be64(sizeof(footer));
   1053     } else {
   1054         footer.data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
   1055     }
   1056     footer.timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
   1057 
   1058     /* Version of Virtual PC 2007 */
   1059     footer.major = cpu_to_be16(0x0005);
   1060     footer.minor = cpu_to_be16(0x0003);
   1061     footer.orig_size = cpu_to_be64(total_size);
   1062     footer.current_size = cpu_to_be64(total_size);
   1063     footer.cyls = cpu_to_be16(cyls);
   1064     footer.heads = heads;
   1065     footer.secs_per_cyl = secs_per_cyl;
   1066 
   1067     footer.type = cpu_to_be32(disk_type);
   1068 
   1069     qemu_uuid_generate(&uuid);
   1070     footer.uuid = uuid;
   1071 
   1072     footer.checksum = cpu_to_be32(vpc_checksum(&footer, sizeof(footer)));
   1073 
   1074     if (disk_type == VHD_DYNAMIC) {
   1075         ret = create_dynamic_disk(blk, &footer, total_sectors);
   1076         if (ret < 0) {
   1077             error_setg(errp, "Unable to create or write VHD header");
   1078         }
   1079     } else {
   1080         ret = create_fixed_disk(blk, &footer, total_size, errp);
   1081     }
   1082 
   1083 out:
   1084     blk_unref(blk);
   1085     bdrv_unref(bs);
   1086     return ret;
   1087 }
   1088 
   1089 static int coroutine_fn vpc_co_create_opts(BlockDriver *drv,
   1090                                            const char *filename,
   1091                                            QemuOpts *opts,
   1092                                            Error **errp)
   1093 {
   1094     BlockdevCreateOptions *create_options = NULL;
   1095     QDict *qdict;
   1096     Visitor *v;
   1097     BlockDriverState *bs = NULL;
   1098     int ret;
   1099 
   1100     static const QDictRenames opt_renames[] = {
   1101         { VPC_OPT_FORCE_SIZE,           "force-size" },
   1102         { NULL, NULL },
   1103     };
   1104 
   1105     /* Parse options and convert legacy syntax */
   1106     qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vpc_create_opts, true);
   1107 
   1108     if (!qdict_rename_keys(qdict, opt_renames, errp)) {
   1109         ret = -EINVAL;
   1110         goto fail;
   1111     }
   1112 
   1113     /* Create and open the file (protocol layer) */
   1114     ret = bdrv_create_file(filename, opts, errp);
   1115     if (ret < 0) {
   1116         goto fail;
   1117     }
   1118 
   1119     bs = bdrv_open(filename, NULL, NULL,
   1120                    BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
   1121     if (bs == NULL) {
   1122         ret = -EIO;
   1123         goto fail;
   1124     }
   1125 
   1126     /* Now get the QAPI type BlockdevCreateOptions */
   1127     qdict_put_str(qdict, "driver", "vpc");
   1128     qdict_put_str(qdict, "file", bs->node_name);
   1129 
   1130     v = qobject_input_visitor_new_flat_confused(qdict, errp);
   1131     if (!v) {
   1132         ret = -EINVAL;
   1133         goto fail;
   1134     }
   1135 
   1136     visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp);
   1137     visit_free(v);
   1138     if (!create_options) {
   1139         ret = -EINVAL;
   1140         goto fail;
   1141     }
   1142 
   1143     /* Silently round up size */
   1144     assert(create_options->driver == BLOCKDEV_DRIVER_VPC);
   1145     create_options->u.vpc.size =
   1146         ROUND_UP(create_options->u.vpc.size, BDRV_SECTOR_SIZE);
   1147 
   1148     if (!create_options->u.vpc.force_size) {
   1149         int64_t total_sectors;
   1150         ret = calculate_rounded_image_size(&create_options->u.vpc, NULL, NULL,
   1151                                            NULL, &total_sectors, errp);
   1152         if (ret < 0) {
   1153             goto fail;
   1154         }
   1155 
   1156         create_options->u.vpc.size = total_sectors * BDRV_SECTOR_SIZE;
   1157     }
   1158 
   1159 
   1160     /* Create the vpc image (format layer) */
   1161     ret = vpc_co_create(create_options, errp);
   1162 
   1163 fail:
   1164     qobject_unref(qdict);
   1165     bdrv_unref(bs);
   1166     qapi_free_BlockdevCreateOptions(create_options);
   1167     return ret;
   1168 }
   1169 
   1170 
   1171 static int vpc_has_zero_init(BlockDriverState *bs)
   1172 {
   1173     BDRVVPCState *s = bs->opaque;
   1174 
   1175     if (be32_to_cpu(s->footer.type) == VHD_FIXED) {
   1176         return bdrv_has_zero_init(bs->file->bs);
   1177     } else {
   1178         return 1;
   1179     }
   1180 }
   1181 
   1182 static void vpc_close(BlockDriverState *bs)
   1183 {
   1184     BDRVVPCState *s = bs->opaque;
   1185     qemu_vfree(s->pagetable);
   1186 #ifdef CACHE
   1187     g_free(s->pageentry_u8);
   1188 #endif
   1189 
   1190     migrate_del_blocker(s->migration_blocker);
   1191     error_free(s->migration_blocker);
   1192 }
   1193 
   1194 static QemuOptsList vpc_create_opts = {
   1195     .name = "vpc-create-opts",
   1196     .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
   1197     .desc = {
   1198         {
   1199             .name = BLOCK_OPT_SIZE,
   1200             .type = QEMU_OPT_SIZE,
   1201             .help = "Virtual disk size"
   1202         },
   1203         {
   1204             .name = BLOCK_OPT_SUBFMT,
   1205             .type = QEMU_OPT_STRING,
   1206             .help =
   1207                 "Type of virtual hard disk format. Supported formats are "
   1208                 "{dynamic (default) | fixed} "
   1209         },
   1210         {
   1211             .name = VPC_OPT_FORCE_SIZE,
   1212             .type = QEMU_OPT_BOOL,
   1213             .help = "Force disk size calculation to use the actual size "
   1214                     "specified, rather than using the nearest CHS-based "
   1215                     "calculation"
   1216         },
   1217         { /* end of list */ }
   1218     }
   1219 };
   1220 
   1221 static const char *const vpc_strong_runtime_opts[] = {
   1222     VPC_OPT_SIZE_CALC,
   1223 
   1224     NULL
   1225 };
   1226 
   1227 static BlockDriver bdrv_vpc = {
   1228     .format_name    = "vpc",
   1229     .instance_size  = sizeof(BDRVVPCState),
   1230 
   1231     .bdrv_probe             = vpc_probe,
   1232     .bdrv_open              = vpc_open,
   1233     .bdrv_close             = vpc_close,
   1234     .bdrv_reopen_prepare    = vpc_reopen_prepare,
   1235     .bdrv_child_perm        = bdrv_default_perms,
   1236     .bdrv_co_create         = vpc_co_create,
   1237     .bdrv_co_create_opts    = vpc_co_create_opts,
   1238 
   1239     .bdrv_co_preadv             = vpc_co_preadv,
   1240     .bdrv_co_pwritev            = vpc_co_pwritev,
   1241     .bdrv_co_block_status       = vpc_co_block_status,
   1242 
   1243     .bdrv_get_info          = vpc_get_info,
   1244 
   1245     .is_format              = true,
   1246     .create_opts            = &vpc_create_opts,
   1247     .bdrv_has_zero_init     = vpc_has_zero_init,
   1248     .strong_runtime_opts    = vpc_strong_runtime_opts,
   1249 };
   1250 
   1251 static void bdrv_vpc_init(void)
   1252 {
   1253     bdrv_register(&bdrv_vpc);
   1254 }
   1255 
   1256 block_init(bdrv_vpc_init);