qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

vhdx-log.c (31006B)


      1 /*
      2  * Block driver for Hyper-V VHDX Images
      3  *
      4  * Copyright (c) 2013 Red Hat, Inc.,
      5  *
      6  * Authors:
      7  *  Jeff Cody <jcody@redhat.com>
      8  *
      9  *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
     10  *  by Microsoft:
     11  *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
     12  *
     13  * This file covers the functionality of the metadata log writing, parsing, and
     14  * replay.
     15  *
     16  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
     17  * See the COPYING.LIB file in the top-level directory.
     18  *
     19  */
     20 
     21 #include "qemu/osdep.h"
     22 #include "qapi/error.h"
     23 #include "block/block_int.h"
     24 #include "qemu/error-report.h"
     25 #include "qemu/bswap.h"
     26 #include "qemu/memalign.h"
     27 #include "vhdx.h"
     28 
     29 
     30 typedef struct VHDXLogSequence {
     31     bool valid;
     32     uint32_t count;
     33     VHDXLogEntries log;
     34     VHDXLogEntryHeader hdr;
     35 } VHDXLogSequence;
     36 
     37 typedef struct VHDXLogDescEntries {
     38     VHDXLogEntryHeader hdr;
     39     VHDXLogDescriptor desc[];
     40 } VHDXLogDescEntries;
     41 
     42 static const MSGUID zero_guid = { 0 };
     43 
     44 /* The log located on the disk is circular buffer containing
     45  * sectors of 4096 bytes each.
     46  *
     47  * It is assumed for the read/write functions below that the
     48  * circular buffer scheme uses a 'one sector open' to indicate
     49  * the buffer is full.  Given the validation methods used for each
     50  * sector, this method should be compatible with other methods that
     51  * do not waste a sector.
     52  */
     53 
     54 
     55 /* Allow peeking at the hdr entry at the beginning of the current
     56  * read index, without advancing the read index */
     57 static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
     58                              VHDXLogEntryHeader *hdr)
     59 {
     60     int ret = 0;
     61     uint64_t offset;
     62     uint32_t read;
     63 
     64     assert(hdr != NULL);
     65 
     66     /* peek is only supported on sector boundaries */
     67     if (log->read % VHDX_LOG_SECTOR_SIZE) {
     68         ret = -EFAULT;
     69         goto exit;
     70     }
     71 
     72     read = log->read;
     73     /* we are guaranteed that a) log sectors are 4096 bytes,
     74      * and b) the log length is a multiple of 1MB. So, there
     75      * is always a round number of sectors in the buffer */
     76     if ((read + sizeof(VHDXLogEntryHeader)) > log->length) {
     77         read = 0;
     78     }
     79 
     80     if (read == log->write) {
     81         ret = -EINVAL;
     82         goto exit;
     83     }
     84 
     85     offset = log->offset + read;
     86 
     87     ret = bdrv_pread(bs->file, offset, sizeof(VHDXLogEntryHeader), hdr, 0);
     88     if (ret < 0) {
     89         goto exit;
     90     }
     91     vhdx_log_entry_hdr_le_import(hdr);
     92 
     93 exit:
     94     return ret;
     95 }
     96 
     97 /* Index increment for log, based on sector boundaries */
     98 static int vhdx_log_inc_idx(uint32_t idx, uint64_t length)
     99 {
    100     idx += VHDX_LOG_SECTOR_SIZE;
    101     /* we are guaranteed that a) log sectors are 4096 bytes,
    102      * and b) the log length is a multiple of 1MB. So, there
    103      * is always a round number of sectors in the buffer */
    104     return idx >= length ? 0 : idx;
    105 }
    106 
    107 
    108 /* Reset the log to empty */
    109 static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s)
    110 {
    111     MSGUID guid = { 0 };
    112     s->log.read = s->log.write = 0;
    113     /* a log guid of 0 indicates an empty log to any parser of v0
    114      * VHDX logs */
    115     vhdx_update_headers(bs, s, false, &guid);
    116 }
    117 
    118 /* Reads num_sectors from the log (all log sectors are 4096 bytes),
    119  * into buffer 'buffer'.  Upon return, *sectors_read will contain
    120  * the number of sectors successfully read.
    121  *
    122  * It is assumed that 'buffer' is already allocated, and of sufficient
    123  * size (i.e. >= 4096*num_sectors).
    124  *
    125  * If 'peek' is true, then the tail (read) pointer for the circular buffer is
    126  * not modified.
    127  *
    128  * 0 is returned on success, -errno otherwise.  */
    129 static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
    130                                  uint32_t *sectors_read, void *buffer,
    131                                  uint32_t num_sectors, bool peek)
    132 {
    133     int ret = 0;
    134     uint64_t offset;
    135     uint32_t read;
    136 
    137     read = log->read;
    138 
    139     *sectors_read = 0;
    140     while (num_sectors) {
    141         if (read == log->write) {
    142             /* empty */
    143             break;
    144         }
    145         offset = log->offset + read;
    146 
    147         ret = bdrv_pread(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer, 0);
    148         if (ret < 0) {
    149             goto exit;
    150         }
    151         read = vhdx_log_inc_idx(read, log->length);
    152 
    153         *sectors_read = *sectors_read + 1;
    154         num_sectors--;
    155     }
    156 
    157 exit:
    158     if (!peek) {
    159         log->read = read;
    160     }
    161     return ret;
    162 }
    163 
    164 /* Writes num_sectors to the log (all log sectors are 4096 bytes),
    165  * from buffer 'buffer'.  Upon return, *sectors_written will contain
    166  * the number of sectors successfully written.
    167  *
    168  * It is assumed that 'buffer' is at least 4096*num_sectors large.
    169  *
    170  * 0 is returned on success, -errno otherwise */
    171 static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
    172                                   uint32_t *sectors_written, void *buffer,
    173                                   uint32_t num_sectors)
    174 {
    175     int ret = 0;
    176     uint64_t offset;
    177     uint32_t write;
    178     void *buffer_tmp;
    179     BDRVVHDXState *s = bs->opaque;
    180 
    181     ret = vhdx_user_visible_write(bs, s);
    182     if (ret < 0) {
    183         goto exit;
    184     }
    185 
    186     write = log->write;
    187 
    188     buffer_tmp = buffer;
    189     while (num_sectors) {
    190 
    191         offset = log->offset + write;
    192         write = vhdx_log_inc_idx(write, log->length);
    193         if (write == log->read) {
    194             /* full */
    195             break;
    196         }
    197         ret = bdrv_pwrite(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer_tmp,
    198                           0);
    199         if (ret < 0) {
    200             goto exit;
    201         }
    202         buffer_tmp += VHDX_LOG_SECTOR_SIZE;
    203 
    204         log->write = write;
    205         *sectors_written = *sectors_written + 1;
    206         num_sectors--;
    207     }
    208 
    209 exit:
    210     return ret;
    211 }
    212 
    213 
    214 /* Validates a log entry header */
    215 static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
    216                                   BDRVVHDXState *s)
    217 {
    218     int valid = false;
    219 
    220     if (hdr->signature != VHDX_LOG_SIGNATURE) {
    221         goto exit;
    222     }
    223 
    224     /* if the individual entry length is larger than the whole log
    225      * buffer, that is obviously invalid */
    226     if (log->length < hdr->entry_length) {
    227         goto exit;
    228     }
    229 
    230     /* length of entire entry must be in units of 4KB (log sector size) */
    231     if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) {
    232         goto exit;
    233     }
    234 
    235     /* per spec, sequence # must be > 0 */
    236     if (hdr->sequence_number == 0) {
    237         goto exit;
    238     }
    239 
    240     /* log entries are only valid if they match the file-wide log guid
    241      * found in the active header */
    242     if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) {
    243         goto exit;
    244     }
    245 
    246     if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) {
    247         goto exit;
    248     }
    249 
    250     valid = true;
    251 
    252 exit:
    253     return valid;
    254 }
    255 
    256 /*
    257  * Given a log header, this will validate that the descriptors and the
    258  * corresponding data sectors (if applicable)
    259  *
    260  * Validation consists of:
    261  *      1. Making sure the sequence numbers matches the entry header
    262  *      2. Verifying a valid signature ('zero' or 'desc' for descriptors)
    263  *      3. File offset field is a multiple of 4KB
    264  *      4. If a data descriptor, the corresponding data sector
    265  *         has its signature ('data') and matching sequence number
    266  *
    267  * @desc: the data buffer containing the descriptor
    268  * @hdr:  the log entry header
    269  *
    270  * Returns true if valid
    271  */
    272 static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
    273                                    VHDXLogEntryHeader *hdr)
    274 {
    275     bool ret = false;
    276 
    277     if (desc->sequence_number != hdr->sequence_number) {
    278         goto exit;
    279     }
    280     if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) {
    281         goto exit;
    282     }
    283 
    284     if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
    285         if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
    286             /* valid */
    287             ret = true;
    288         }
    289     } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
    290             /* valid */
    291             ret = true;
    292     }
    293 
    294 exit:
    295     return ret;
    296 }
    297 
    298 
    299 /* Prior to sector data for a log entry, there is the header
    300  * and the descriptors referenced in the header:
    301  *
    302  * [] = 4KB sector
    303  *
    304  * [ hdr, desc ][   desc   ][ ... ][ data ][ ... ]
    305  *
    306  * The first sector in a log entry has a 64 byte header, and
    307  * up to 126 32-byte descriptors.  If more descriptors than
    308  * 126 are required, then subsequent sectors can have up to 128
    309  * descriptors.  Each sector is 4KB.  Data follows the descriptor
    310  * sectors.
    311  *
    312  * This will return the number of sectors needed to encompass
    313  * the passed number of descriptors in desc_cnt.
    314  *
    315  * This will never return 0, even if desc_cnt is 0.
    316  */
    317 static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
    318 {
    319     uint32_t desc_sectors;
    320 
    321     desc_cnt += 2; /* account for header in first sector */
    322     desc_sectors = desc_cnt / 128;
    323     if (desc_cnt % 128) {
    324         desc_sectors++;
    325     }
    326 
    327     return desc_sectors;
    328 }
    329 
    330 
    331 /* Reads the log header, and subsequent descriptors (if any).  This
    332  * will allocate all the space for buffer, which must be NULL when
    333  * passed into this function. Each descriptor will also be validated,
    334  * and error returned if any are invalid. */
    335 static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
    336                               VHDXLogEntries *log, VHDXLogDescEntries **buffer,
    337                               bool convert_endian)
    338 {
    339     int ret = 0;
    340     uint32_t desc_sectors;
    341     uint32_t sectors_read;
    342     VHDXLogEntryHeader hdr;
    343     VHDXLogDescEntries *desc_entries = NULL;
    344     VHDXLogDescriptor desc;
    345     int i;
    346 
    347     assert(*buffer == NULL);
    348 
    349     ret = vhdx_log_peek_hdr(bs, log, &hdr);
    350     if (ret < 0) {
    351         goto exit;
    352     }
    353 
    354     if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
    355         ret = -EINVAL;
    356         goto exit;
    357     }
    358 
    359     desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
    360     desc_entries = qemu_try_blockalign(bs->file->bs,
    361                                        desc_sectors * VHDX_LOG_SECTOR_SIZE);
    362     if (desc_entries == NULL) {
    363         ret = -ENOMEM;
    364         goto exit;
    365     }
    366 
    367     ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
    368                                 desc_sectors, false);
    369     if (ret < 0) {
    370         goto free_and_exit;
    371     }
    372     if (sectors_read != desc_sectors) {
    373         ret = -EINVAL;
    374         goto free_and_exit;
    375     }
    376 
    377     /* put in proper endianness, and validate each desc */
    378     for (i = 0; i < hdr.descriptor_count; i++) {
    379         desc = desc_entries->desc[i];
    380         vhdx_log_desc_le_import(&desc);
    381         if (convert_endian) {
    382             desc_entries->desc[i] = desc;
    383         }
    384         if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
    385             ret = -EINVAL;
    386             goto free_and_exit;
    387         }
    388     }
    389     if (convert_endian) {
    390         desc_entries->hdr = hdr;
    391     }
    392 
    393     *buffer = desc_entries;
    394     goto exit;
    395 
    396 free_and_exit:
    397     qemu_vfree(desc_entries);
    398 exit:
    399     return ret;
    400 }
    401 
    402 
    403 /* Flushes the descriptor described by desc to the VHDX image file.
    404  * If the descriptor is a data descriptor, than 'data' must be non-NULL,
    405  * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be
    406  * written.
    407  *
    408  * Verification is performed to make sure the sequence numbers of a data
    409  * descriptor match the sequence number in the desc.
    410  *
    411  * For a zero descriptor, it may describe multiple sectors to fill with zeroes.
    412  * In this case, it should be noted that zeroes are written to disk, and the
    413  * image file is not extended as a sparse file.  */
    414 static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
    415                                VHDXLogDataSector *data)
    416 {
    417     int ret = 0;
    418     uint64_t seq, file_offset;
    419     uint32_t offset = 0;
    420     void *buffer = NULL;
    421     uint64_t count = 1;
    422     int i;
    423 
    424     buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
    425 
    426     if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
    427         /* data sector */
    428         if (data == NULL) {
    429             ret = -EFAULT;
    430             goto exit;
    431         }
    432 
    433         /* The sequence number of the data sector must match that
    434          * in the descriptor */
    435         seq = data->sequence_high;
    436         seq <<= 32;
    437         seq |= data->sequence_low & 0xffffffff;
    438 
    439         if (seq != desc->sequence_number) {
    440             ret = -EINVAL;
    441             goto exit;
    442         }
    443 
    444         /* Each data sector is in total 4096 bytes, however the first
    445          * 8 bytes, and last 4 bytes, are located in the descriptor */
    446         memcpy(buffer, &desc->leading_bytes, 8);
    447         offset += 8;
    448 
    449         memcpy(buffer+offset, data->data, 4084);
    450         offset += 4084;
    451 
    452         memcpy(buffer+offset, &desc->trailing_bytes, 4);
    453 
    454     } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
    455         /* write 'count' sectors of sector */
    456         memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
    457         count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
    458     } else {
    459         error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
    460                       desc->signature);
    461         ret = -EINVAL;
    462         goto exit;
    463     }
    464 
    465     file_offset = desc->file_offset;
    466 
    467     /* count is only > 1 if we are writing zeroes */
    468     for (i = 0; i < count; i++) {
    469         ret = bdrv_pwrite_sync(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
    470                                buffer, 0);
    471         if (ret < 0) {
    472             goto exit;
    473         }
    474         file_offset += VHDX_LOG_SECTOR_SIZE;
    475     }
    476 
    477 exit:
    478     qemu_vfree(buffer);
    479     return ret;
    480 }
    481 
    482 /* Flush the entire log (as described by 'logs') to the VHDX image
    483  * file, and then set the log to 'empty' status once complete.
    484  *
    485  * The log entries should be validate prior to flushing */
    486 static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
    487                           VHDXLogSequence *logs)
    488 {
    489     int ret = 0;
    490     int i;
    491     uint32_t cnt, sectors_read;
    492     uint64_t new_file_size;
    493     void *data = NULL;
    494     int64_t file_length;
    495     VHDXLogDescEntries *desc_entries = NULL;
    496     VHDXLogEntryHeader hdr_tmp = { 0 };
    497 
    498     cnt = logs->count;
    499 
    500     data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
    501 
    502     ret = vhdx_user_visible_write(bs, s);
    503     if (ret < 0) {
    504         goto exit;
    505     }
    506 
    507     /* each iteration represents one log sequence, which may span multiple
    508      * sectors */
    509     while (cnt--) {
    510         ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp);
    511         if (ret < 0) {
    512             goto exit;
    513         }
    514         file_length = bdrv_getlength(bs->file->bs);
    515         if (file_length < 0) {
    516             ret = file_length;
    517             goto exit;
    518         }
    519         /* if the log shows a FlushedFileOffset larger than our current file
    520          * size, then that means the file has been truncated / corrupted, and
    521          * we must refused to open it / use it */
    522         if (hdr_tmp.flushed_file_offset > file_length) {
    523             ret = -EINVAL;
    524             goto exit;
    525         }
    526 
    527         ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
    528         if (ret < 0) {
    529             goto exit;
    530         }
    531 
    532         for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
    533             if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
    534                 /* data sector, so read a sector to flush */
    535                 ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
    536                                             data, 1, false);
    537                 if (ret < 0) {
    538                     goto exit;
    539                 }
    540                 if (sectors_read != 1) {
    541                     ret = -EINVAL;
    542                     goto exit;
    543                 }
    544                 vhdx_log_data_le_import(data);
    545             }
    546 
    547             ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
    548             if (ret < 0) {
    549                 goto exit;
    550             }
    551         }
    552         if (file_length < desc_entries->hdr.last_file_offset) {
    553             new_file_size = desc_entries->hdr.last_file_offset;
    554             if (new_file_size % (1 * MiB)) {
    555                 /* round up to nearest 1MB boundary */
    556                 new_file_size = QEMU_ALIGN_UP(new_file_size, MiB);
    557                 if (new_file_size > INT64_MAX) {
    558                     ret = -EINVAL;
    559                     goto exit;
    560                 }
    561                 ret = bdrv_truncate(bs->file, new_file_size, false,
    562                                     PREALLOC_MODE_OFF, 0, NULL);
    563                 if (ret < 0) {
    564                     goto exit;
    565                 }
    566             }
    567         }
    568         qemu_vfree(desc_entries);
    569         desc_entries = NULL;
    570     }
    571 
    572     ret = bdrv_flush(bs);
    573     if (ret < 0) {
    574         goto exit;
    575     }
    576     /* once the log is fully flushed, indicate that we have an empty log
    577      * now.  This also sets the log guid to 0, to indicate an empty log */
    578     vhdx_log_reset(bs, s);
    579 
    580 exit:
    581     qemu_vfree(data);
    582     qemu_vfree(desc_entries);
    583     return ret;
    584 }
    585 
    586 static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
    587                                    VHDXLogEntries *log, uint64_t seq,
    588                                    bool *valid, VHDXLogEntryHeader *entry)
    589 {
    590     int ret = 0;
    591     VHDXLogEntryHeader hdr;
    592     void *buffer = NULL;
    593     uint32_t i, desc_sectors, total_sectors, crc;
    594     uint32_t sectors_read = 0;
    595     VHDXLogDescEntries *desc_buffer = NULL;
    596 
    597     *valid = false;
    598 
    599     ret = vhdx_log_peek_hdr(bs, log, &hdr);
    600     if (ret < 0) {
    601         goto inc_and_exit;
    602     }
    603 
    604     if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
    605         goto inc_and_exit;
    606     }
    607 
    608     if (seq > 0) {
    609         if (hdr.sequence_number != seq + 1) {
    610             goto inc_and_exit;
    611         }
    612     }
    613 
    614     desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
    615 
    616     /* Read all log sectors, and calculate log checksum */
    617 
    618     total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
    619 
    620 
    621     /* read_desc() will increment the read idx */
    622     ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
    623     if (ret < 0) {
    624         goto free_and_exit;
    625     }
    626 
    627     crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer,
    628                             desc_sectors * VHDX_LOG_SECTOR_SIZE, 4);
    629     crc ^= 0xffffffff;
    630 
    631     buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
    632     if (total_sectors > desc_sectors) {
    633         for (i = 0; i < total_sectors - desc_sectors; i++) {
    634             sectors_read = 0;
    635             ret = vhdx_log_read_sectors(bs, log, &sectors_read, buffer,
    636                                         1, false);
    637             if (ret < 0 || sectors_read != 1) {
    638                 goto free_and_exit;
    639             }
    640             crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1);
    641             crc ^= 0xffffffff;
    642         }
    643     }
    644     crc ^= 0xffffffff;
    645     if (crc != hdr.checksum) {
    646         goto free_and_exit;
    647     }
    648 
    649     *valid = true;
    650     *entry = hdr;
    651     goto free_and_exit;
    652 
    653 inc_and_exit:
    654     log->read = vhdx_log_inc_idx(log->read, log->length);
    655 
    656 free_and_exit:
    657     qemu_vfree(buffer);
    658     qemu_vfree(desc_buffer);
    659     return ret;
    660 }
    661 
    662 /* Search through the log circular buffer, and find the valid, active
    663  * log sequence, if any exists
    664  * */
    665 static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s,
    666                            VHDXLogSequence *logs)
    667 {
    668     int ret = 0;
    669     uint32_t tail;
    670     bool seq_valid = false;
    671     VHDXLogSequence candidate = { 0 };
    672     VHDXLogEntryHeader hdr = { 0 };
    673     VHDXLogEntries curr_log;
    674 
    675     memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries));
    676     curr_log.write = curr_log.length;   /* assume log is full */
    677     curr_log.read = 0;
    678 
    679 
    680     /* now we will go through the whole log sector by sector, until
    681      * we find a valid, active log sequence, or reach the end of the
    682      * log buffer */
    683     for (;;) {
    684         uint64_t curr_seq = 0;
    685         VHDXLogSequence current = { 0 };
    686 
    687         tail = curr_log.read;
    688 
    689         ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
    690                                       &seq_valid, &hdr);
    691         if (ret < 0) {
    692             goto exit;
    693         }
    694 
    695         if (seq_valid) {
    696             current.valid     = true;
    697             current.log       = curr_log;
    698             current.log.read  = tail;
    699             current.log.write = curr_log.read;
    700             current.count     = 1;
    701             current.hdr       = hdr;
    702 
    703 
    704             for (;;) {
    705                 ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
    706                                               &seq_valid, &hdr);
    707                 if (ret < 0) {
    708                     goto exit;
    709                 }
    710                 if (seq_valid == false) {
    711                     break;
    712                 }
    713                 current.log.write = curr_log.read;
    714                 current.count++;
    715 
    716                 curr_seq = hdr.sequence_number;
    717             }
    718         }
    719 
    720         if (current.valid) {
    721             if (candidate.valid == false ||
    722                 current.hdr.sequence_number > candidate.hdr.sequence_number) {
    723                 candidate = current;
    724             }
    725         }
    726 
    727         if (curr_log.read < tail) {
    728             break;
    729         }
    730     }
    731 
    732     *logs = candidate;
    733 
    734     if (candidate.valid) {
    735         /* this is the next sequence number, for writes */
    736         s->log.sequence = candidate.hdr.sequence_number + 1;
    737     }
    738 
    739 
    740 exit:
    741     return ret;
    742 }
    743 
    744 /* Parse the replay log.  Per the VHDX spec, if the log is present
    745  * it must be replayed prior to opening the file, even read-only.
    746  *
    747  * If read-only, we must replay the log in RAM (or refuse to open
    748  * a dirty VHDX file read-only) */
    749 int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
    750                    Error **errp)
    751 {
    752     int ret = 0;
    753     VHDXHeader *hdr;
    754     VHDXLogSequence logs = { 0 };
    755 
    756     hdr = s->headers[s->curr_header];
    757 
    758     *flushed = false;
    759 
    760     /* s->log.hdr is freed in vhdx_close() */
    761     if (s->log.hdr == NULL) {
    762         s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader));
    763     }
    764 
    765     s->log.offset = hdr->log_offset;
    766     s->log.length = hdr->log_length;
    767 
    768     if (s->log.offset < VHDX_LOG_MIN_SIZE ||
    769         s->log.offset % VHDX_LOG_MIN_SIZE) {
    770         ret = -EINVAL;
    771         goto exit;
    772     }
    773 
    774     /* per spec, only log version of 0 is supported */
    775     if (hdr->log_version != 0) {
    776         ret = -EINVAL;
    777         goto exit;
    778     }
    779 
    780     /* If either the log guid, or log length is zero,
    781      * then a replay log is not present */
    782     if (guid_eq(hdr->log_guid, zero_guid)) {
    783         goto exit;
    784     }
    785 
    786     if (hdr->log_length == 0) {
    787         goto exit;
    788     }
    789 
    790     if (hdr->log_length % VHDX_LOG_MIN_SIZE) {
    791         ret = -EINVAL;
    792         goto exit;
    793     }
    794 
    795 
    796     /* The log is present, we need to find if and where there is an active
    797      * sequence of valid entries present in the log.  */
    798 
    799     ret = vhdx_log_search(bs, s, &logs);
    800     if (ret < 0) {
    801         goto exit;
    802     }
    803 
    804     if (logs.valid) {
    805         if (bdrv_is_read_only(bs)) {
    806             bdrv_refresh_filename(bs);
    807             ret = -EPERM;
    808             error_setg(errp,
    809                        "VHDX image file '%s' opened read-only, but "
    810                        "contains a log that needs to be replayed",
    811                        bs->filename);
    812             error_append_hint(errp,  "To replay the log, run:\n"
    813                               "qemu-img check -r all '%s'\n",
    814                               bs->filename);
    815             goto exit;
    816         }
    817         /* now flush the log */
    818         ret = vhdx_log_flush(bs, s, &logs);
    819         if (ret < 0) {
    820             goto exit;
    821         }
    822         *flushed = true;
    823     }
    824 
    825 
    826 exit:
    827     return ret;
    828 }
    829 
    830 
    831 
    832 static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
    833                                       VHDXLogDataSector *sector, void *data,
    834                                       uint64_t seq)
    835 {
    836     /* 8 + 4084 + 4 = 4096, 1 log sector */
    837     memcpy(&desc->leading_bytes, data, 8);
    838     data += 8;
    839     desc->leading_bytes = cpu_to_le64(desc->leading_bytes);
    840     memcpy(sector->data, data, 4084);
    841     data += 4084;
    842     memcpy(&desc->trailing_bytes, data, 4);
    843     desc->trailing_bytes = cpu_to_le32(desc->trailing_bytes);
    844     data += 4;
    845 
    846     sector->sequence_high  = (uint32_t) (seq >> 32);
    847     sector->sequence_low   = (uint32_t) (seq & 0xffffffff);
    848     sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
    849 
    850     vhdx_log_desc_le_export(desc);
    851     vhdx_log_data_le_export(sector);
    852 }
    853 
    854 
    855 static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
    856                           void *data, uint32_t length, uint64_t offset)
    857 {
    858     int ret = 0;
    859     void *buffer = NULL;
    860     void *merged_sector = NULL;
    861     void *data_tmp, *sector_write;
    862     unsigned int i;
    863     int sector_offset;
    864     uint32_t desc_sectors, sectors, total_length;
    865     uint32_t sectors_written = 0;
    866     uint32_t aligned_length;
    867     uint32_t leading_length = 0;
    868     uint32_t trailing_length = 0;
    869     uint32_t partial_sectors = 0;
    870     uint32_t bytes_written = 0;
    871     uint64_t file_offset;
    872     int64_t file_length;
    873     VHDXHeader *header;
    874     VHDXLogEntryHeader new_hdr;
    875     VHDXLogDescriptor *new_desc = NULL;
    876     VHDXLogDataSector *data_sector = NULL;
    877     MSGUID new_guid = { 0 };
    878 
    879     header = s->headers[s->curr_header];
    880 
    881     /* need to have offset read data, and be on 4096 byte boundary */
    882 
    883     if (length > header->log_length) {
    884         /* no log present.  we could create a log here instead of failing */
    885         ret = -EINVAL;
    886         goto exit;
    887     }
    888 
    889     if (guid_eq(header->log_guid, zero_guid)) {
    890         vhdx_guid_generate(&new_guid);
    891         vhdx_update_headers(bs, s, false, &new_guid);
    892     } else {
    893         /* currently, we require that the log be flushed after
    894          * every write. */
    895         ret = -ENOTSUP;
    896         goto exit;
    897     }
    898 
    899     /* 0 is an invalid sequence number, but may also represent the first
    900      * log write (or a wrapped seq) */
    901     if (s->log.sequence == 0) {
    902         s->log.sequence = 1;
    903     }
    904 
    905     sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
    906     file_offset = QEMU_ALIGN_DOWN(offset, VHDX_LOG_SECTOR_SIZE);
    907 
    908     aligned_length = length;
    909 
    910     /* add in the unaligned head and tail bytes */
    911     if (sector_offset) {
    912         leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
    913         leading_length = leading_length > length ? length : leading_length;
    914         aligned_length -= leading_length;
    915         partial_sectors++;
    916     }
    917 
    918     sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
    919     trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
    920     if (trailing_length) {
    921         partial_sectors++;
    922     }
    923 
    924     sectors += partial_sectors;
    925 
    926     file_length = bdrv_getlength(bs->file->bs);
    927     if (file_length < 0) {
    928         ret = file_length;
    929         goto exit;
    930     }
    931 
    932     /* sectors is now how many sectors the data itself takes, not
    933      * including the header and descriptor metadata */
    934 
    935     new_hdr = (VHDXLogEntryHeader) {
    936                 .signature           = VHDX_LOG_SIGNATURE,
    937                 .tail                = s->log.tail,
    938                 .sequence_number     = s->log.sequence,
    939                 .descriptor_count    = sectors,
    940                 .reserved            = 0,
    941                 .flushed_file_offset = file_length,
    942                 .last_file_offset    = file_length,
    943                 .log_guid            = header->log_guid,
    944               };
    945 
    946 
    947     desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
    948 
    949     total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
    950     new_hdr.entry_length = total_length;
    951 
    952     vhdx_log_entry_hdr_le_export(&new_hdr);
    953 
    954     buffer = qemu_blockalign(bs, total_length);
    955     memcpy(buffer, &new_hdr, sizeof(new_hdr));
    956 
    957     new_desc = buffer + sizeof(new_hdr);
    958     data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
    959     data_tmp = data;
    960 
    961     /* All log sectors are 4KB, so for any partial sectors we must
    962      * merge the data with preexisting data from the final file
    963      * destination */
    964     merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
    965 
    966     for (i = 0; i < sectors; i++) {
    967         new_desc->signature       = VHDX_LOG_DESC_SIGNATURE;
    968         new_desc->sequence_number = s->log.sequence;
    969         new_desc->file_offset     = file_offset;
    970 
    971         if (i == 0 && leading_length) {
    972             /* partial sector at the front of the buffer */
    973             ret = bdrv_pread(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
    974                              merged_sector, 0);
    975             if (ret < 0) {
    976                 goto exit;
    977             }
    978             memcpy(merged_sector + sector_offset, data_tmp, leading_length);
    979             bytes_written = leading_length;
    980             sector_write = merged_sector;
    981         } else if (i == sectors - 1 && trailing_length) {
    982             /* partial sector at the end of the buffer */
    983             ret = bdrv_pread(bs->file, file_offset,
    984                              VHDX_LOG_SECTOR_SIZE - trailing_length,
    985                              merged_sector + trailing_length, 0);
    986             if (ret < 0) {
    987                 goto exit;
    988             }
    989             memcpy(merged_sector, data_tmp, trailing_length);
    990             bytes_written = trailing_length;
    991             sector_write = merged_sector;
    992         } else {
    993             bytes_written = VHDX_LOG_SECTOR_SIZE;
    994             sector_write = data_tmp;
    995         }
    996 
    997         /* populate the raw sector data into the proper structures,
    998          * as well as update the descriptor, and convert to proper
    999          * endianness */
   1000         vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
   1001                                   s->log.sequence);
   1002 
   1003         data_tmp += bytes_written;
   1004         data_sector++;
   1005         new_desc++;
   1006         file_offset += VHDX_LOG_SECTOR_SIZE;
   1007     }
   1008 
   1009     /* checksum covers entire entry, from the log header through the
   1010      * last data sector */
   1011     vhdx_update_checksum(buffer, total_length,
   1012                          offsetof(VHDXLogEntryHeader, checksum));
   1013 
   1014     /* now write to the log */
   1015     ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
   1016                                  desc_sectors + sectors);
   1017     if (ret < 0) {
   1018         goto exit;
   1019     }
   1020 
   1021     if (sectors_written != desc_sectors + sectors) {
   1022         /* instead of failing, we could flush the log here */
   1023         ret = -EINVAL;
   1024         goto exit;
   1025     }
   1026 
   1027     s->log.sequence++;
   1028     /* write new tail */
   1029     s->log.tail = s->log.write;
   1030 
   1031 exit:
   1032     qemu_vfree(buffer);
   1033     qemu_vfree(merged_sector);
   1034     return ret;
   1035 }
   1036 
   1037 /* Perform a log write, and then immediately flush the entire log */
   1038 int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
   1039                              void *data, uint32_t length, uint64_t offset)
   1040 {
   1041     int ret = 0;
   1042     VHDXLogSequence logs = { .valid = true,
   1043                              .count = 1,
   1044                              .hdr = { 0 } };
   1045 
   1046 
   1047     /* Make sure data written (new and/or changed blocks) is stable
   1048      * on disk, before creating log entry */
   1049     ret = bdrv_flush(bs);
   1050     if (ret < 0) {
   1051         goto exit;
   1052     }
   1053 
   1054     ret = vhdx_log_write(bs, s, data, length, offset);
   1055     if (ret < 0) {
   1056         goto exit;
   1057     }
   1058     logs.log = s->log;
   1059 
   1060     /* Make sure log is stable on disk */
   1061     ret = bdrv_flush(bs);
   1062     if (ret < 0) {
   1063         goto exit;
   1064     }
   1065 
   1066     ret = vhdx_log_flush(bs, s, &logs);
   1067     if (ret < 0) {
   1068         goto exit;
   1069     }
   1070 
   1071     s->log = logs.log;
   1072 
   1073 exit:
   1074     return ret;
   1075 }
   1076