qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

qcow2-cache.c (12265B)


      1 /*
      2  * L2/refcount table cache for the QCOW2 format
      3  *
      4  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a copy
      7  * of this software and associated documentation files (the "Software"), to deal
      8  * in the Software without restriction, including without limitation the rights
      9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10  * copies of the Software, and to permit persons to whom the Software is
     11  * furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     22  * THE SOFTWARE.
     23  */
     24 
     25 #include "qemu/osdep.h"
     26 #include "qemu/memalign.h"
     27 #include "qcow2.h"
     28 #include "trace.h"
     29 
     30 typedef struct Qcow2CachedTable {
     31     int64_t  offset;
     32     uint64_t lru_counter;
     33     int      ref;
     34     bool     dirty;
     35 } Qcow2CachedTable;
     36 
     37 struct Qcow2Cache {
     38     Qcow2CachedTable       *entries;
     39     struct Qcow2Cache      *depends;
     40     int                     size;
     41     int                     table_size;
     42     bool                    depends_on_flush;
     43     void                   *table_array;
     44     uint64_t                lru_counter;
     45     uint64_t                cache_clean_lru_counter;
     46 };
     47 
     48 static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table)
     49 {
     50     return (uint8_t *) c->table_array + (size_t) table * c->table_size;
     51 }
     52 
     53 static inline int qcow2_cache_get_table_idx(Qcow2Cache *c, void *table)
     54 {
     55     ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
     56     int idx = table_offset / c->table_size;
     57     assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0);
     58     return idx;
     59 }
     60 
     61 static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c)
     62 {
     63     if (c == s->refcount_block_cache) {
     64         return "refcount block";
     65     } else if (c == s->l2_table_cache) {
     66         return "L2 table";
     67     } else {
     68         /* Do not abort, because this is not critical */
     69         return "unknown";
     70     }
     71 }
     72 
     73 static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables)
     74 {
     75 /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
     76 #ifdef CONFIG_LINUX
     77     void *t = qcow2_cache_get_table_addr(c, i);
     78     int align = qemu_real_host_page_size();
     79     size_t mem_size = (size_t) c->table_size * num_tables;
     80     size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
     81     size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
     82     if (mem_size > offset && length > 0) {
     83         madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
     84     }
     85 #endif
     86 }
     87 
     88 static inline bool can_clean_entry(Qcow2Cache *c, int i)
     89 {
     90     Qcow2CachedTable *t = &c->entries[i];
     91     return t->ref == 0 && !t->dirty && t->offset != 0 &&
     92         t->lru_counter <= c->cache_clean_lru_counter;
     93 }
     94 
     95 void qcow2_cache_clean_unused(Qcow2Cache *c)
     96 {
     97     int i = 0;
     98     while (i < c->size) {
     99         int to_clean = 0;
    100 
    101         /* Skip the entries that we don't need to clean */
    102         while (i < c->size && !can_clean_entry(c, i)) {
    103             i++;
    104         }
    105 
    106         /* And count how many we can clean in a row */
    107         while (i < c->size && can_clean_entry(c, i)) {
    108             c->entries[i].offset = 0;
    109             c->entries[i].lru_counter = 0;
    110             i++;
    111             to_clean++;
    112         }
    113 
    114         if (to_clean > 0) {
    115             qcow2_cache_table_release(c, i - to_clean, to_clean);
    116         }
    117     }
    118 
    119     c->cache_clean_lru_counter = c->lru_counter;
    120 }
    121 
    122 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
    123                                unsigned table_size)
    124 {
    125     BDRVQcow2State *s = bs->opaque;
    126     Qcow2Cache *c;
    127 
    128     assert(num_tables > 0);
    129     assert(is_power_of_2(table_size));
    130     assert(table_size >= (1 << MIN_CLUSTER_BITS));
    131     assert(table_size <= s->cluster_size);
    132 
    133     c = g_new0(Qcow2Cache, 1);
    134     c->size = num_tables;
    135     c->table_size = table_size;
    136     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
    137     c->table_array = qemu_try_blockalign(bs->file->bs,
    138                                          (size_t) num_tables * c->table_size);
    139 
    140     if (!c->entries || !c->table_array) {
    141         qemu_vfree(c->table_array);
    142         g_free(c->entries);
    143         g_free(c);
    144         c = NULL;
    145     }
    146 
    147     return c;
    148 }
    149 
    150 int qcow2_cache_destroy(Qcow2Cache *c)
    151 {
    152     int i;
    153 
    154     for (i = 0; i < c->size; i++) {
    155         assert(c->entries[i].ref == 0);
    156     }
    157 
    158     qemu_vfree(c->table_array);
    159     g_free(c->entries);
    160     g_free(c);
    161 
    162     return 0;
    163 }
    164 
    165 static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
    166 {
    167     int ret;
    168 
    169     ret = qcow2_cache_flush(bs, c->depends);
    170     if (ret < 0) {
    171         return ret;
    172     }
    173 
    174     c->depends = NULL;
    175     c->depends_on_flush = false;
    176 
    177     return 0;
    178 }
    179 
    180 static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
    181 {
    182     BDRVQcow2State *s = bs->opaque;
    183     int ret = 0;
    184 
    185     if (!c->entries[i].dirty || !c->entries[i].offset) {
    186         return 0;
    187     }
    188 
    189     trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
    190                                   c == s->l2_table_cache, i);
    191 
    192     if (c->depends) {
    193         ret = qcow2_cache_flush_dependency(bs, c);
    194     } else if (c->depends_on_flush) {
    195         ret = bdrv_flush(bs->file->bs);
    196         if (ret >= 0) {
    197             c->depends_on_flush = false;
    198         }
    199     }
    200 
    201     if (ret < 0) {
    202         return ret;
    203     }
    204 
    205     if (c == s->refcount_block_cache) {
    206         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
    207                 c->entries[i].offset, c->table_size, false);
    208     } else if (c == s->l2_table_cache) {
    209         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
    210                 c->entries[i].offset, c->table_size, false);
    211     } else {
    212         ret = qcow2_pre_write_overlap_check(bs, 0,
    213                 c->entries[i].offset, c->table_size, false);
    214     }
    215 
    216     if (ret < 0) {
    217         return ret;
    218     }
    219 
    220     if (c == s->refcount_block_cache) {
    221         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
    222     } else if (c == s->l2_table_cache) {
    223         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
    224     }
    225 
    226     ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->table_size,
    227                       qcow2_cache_get_table_addr(c, i), 0);
    228     if (ret < 0) {
    229         return ret;
    230     }
    231 
    232     c->entries[i].dirty = false;
    233 
    234     return 0;
    235 }
    236 
    237 int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c)
    238 {
    239     BDRVQcow2State *s = bs->opaque;
    240     int result = 0;
    241     int ret;
    242     int i;
    243 
    244     trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
    245 
    246     for (i = 0; i < c->size; i++) {
    247         ret = qcow2_cache_entry_flush(bs, c, i);
    248         if (ret < 0 && result != -ENOSPC) {
    249             result = ret;
    250         }
    251     }
    252 
    253     return result;
    254 }
    255 
    256 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
    257 {
    258     int result = qcow2_cache_write(bs, c);
    259 
    260     if (result == 0) {
    261         int ret = bdrv_flush(bs->file->bs);
    262         if (ret < 0) {
    263             result = ret;
    264         }
    265     }
    266 
    267     return result;
    268 }
    269 
    270 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
    271     Qcow2Cache *dependency)
    272 {
    273     int ret;
    274 
    275     if (dependency->depends) {
    276         ret = qcow2_cache_flush_dependency(bs, dependency);
    277         if (ret < 0) {
    278             return ret;
    279         }
    280     }
    281 
    282     if (c->depends && (c->depends != dependency)) {
    283         ret = qcow2_cache_flush_dependency(bs, c);
    284         if (ret < 0) {
    285             return ret;
    286         }
    287     }
    288 
    289     c->depends = dependency;
    290     return 0;
    291 }
    292 
    293 void qcow2_cache_depends_on_flush(Qcow2Cache *c)
    294 {
    295     c->depends_on_flush = true;
    296 }
    297 
    298 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
    299 {
    300     int ret, i;
    301 
    302     ret = qcow2_cache_flush(bs, c);
    303     if (ret < 0) {
    304         return ret;
    305     }
    306 
    307     for (i = 0; i < c->size; i++) {
    308         assert(c->entries[i].ref == 0);
    309         c->entries[i].offset = 0;
    310         c->entries[i].lru_counter = 0;
    311     }
    312 
    313     qcow2_cache_table_release(c, 0, c->size);
    314 
    315     c->lru_counter = 0;
    316 
    317     return 0;
    318 }
    319 
    320 static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
    321     uint64_t offset, void **table, bool read_from_disk)
    322 {
    323     BDRVQcow2State *s = bs->opaque;
    324     int i;
    325     int ret;
    326     int lookup_index;
    327     uint64_t min_lru_counter = UINT64_MAX;
    328     int min_lru_index = -1;
    329 
    330     assert(offset != 0);
    331 
    332     trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
    333                           offset, read_from_disk);
    334 
    335     if (!QEMU_IS_ALIGNED(offset, c->table_size)) {
    336         qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s "
    337                                 "cache: Offset %#" PRIx64 " is unaligned",
    338                                 qcow2_cache_get_name(s, c), offset);
    339         return -EIO;
    340     }
    341 
    342     /* Check if the table is already cached */
    343     i = lookup_index = (offset / c->table_size * 4) % c->size;
    344     do {
    345         const Qcow2CachedTable *t = &c->entries[i];
    346         if (t->offset == offset) {
    347             goto found;
    348         }
    349         if (t->ref == 0 && t->lru_counter < min_lru_counter) {
    350             min_lru_counter = t->lru_counter;
    351             min_lru_index = i;
    352         }
    353         if (++i == c->size) {
    354             i = 0;
    355         }
    356     } while (i != lookup_index);
    357 
    358     if (min_lru_index == -1) {
    359         /* This can't happen in current synchronous code, but leave the check
    360          * here as a reminder for whoever starts using AIO with the cache */
    361         abort();
    362     }
    363 
    364     /* Cache miss: write a table back and replace it */
    365     i = min_lru_index;
    366     trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
    367                                         c == s->l2_table_cache, i);
    368 
    369     ret = qcow2_cache_entry_flush(bs, c, i);
    370     if (ret < 0) {
    371         return ret;
    372     }
    373 
    374     trace_qcow2_cache_get_read(qemu_coroutine_self(),
    375                                c == s->l2_table_cache, i);
    376     c->entries[i].offset = 0;
    377     if (read_from_disk) {
    378         if (c == s->l2_table_cache) {
    379             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
    380         }
    381 
    382         ret = bdrv_pread(bs->file, offset, c->table_size,
    383                          qcow2_cache_get_table_addr(c, i), 0);
    384         if (ret < 0) {
    385             return ret;
    386         }
    387     }
    388 
    389     c->entries[i].offset = offset;
    390 
    391     /* And return the right table */
    392 found:
    393     c->entries[i].ref++;
    394     *table = qcow2_cache_get_table_addr(c, i);
    395 
    396     trace_qcow2_cache_get_done(qemu_coroutine_self(),
    397                                c == s->l2_table_cache, i);
    398 
    399     return 0;
    400 }
    401 
    402 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    403     void **table)
    404 {
    405     return qcow2_cache_do_get(bs, c, offset, table, true);
    406 }
    407 
    408 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
    409     void **table)
    410 {
    411     return qcow2_cache_do_get(bs, c, offset, table, false);
    412 }
    413 
    414 void qcow2_cache_put(Qcow2Cache *c, void **table)
    415 {
    416     int i = qcow2_cache_get_table_idx(c, *table);
    417 
    418     c->entries[i].ref--;
    419     *table = NULL;
    420 
    421     if (c->entries[i].ref == 0) {
    422         c->entries[i].lru_counter = ++c->lru_counter;
    423     }
    424 
    425     assert(c->entries[i].ref >= 0);
    426 }
    427 
    428 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
    429 {
    430     int i = qcow2_cache_get_table_idx(c, table);
    431     assert(c->entries[i].offset != 0);
    432     c->entries[i].dirty = true;
    433 }
    434 
    435 void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset)
    436 {
    437     int i;
    438 
    439     for (i = 0; i < c->size; i++) {
    440         if (c->entries[i].offset == offset) {
    441             return qcow2_cache_get_table_addr(c, i);
    442         }
    443     }
    444     return NULL;
    445 }
    446 
    447 void qcow2_cache_discard(Qcow2Cache *c, void *table)
    448 {
    449     int i = qcow2_cache_get_table_idx(c, table);
    450 
    451     assert(c->entries[i].ref == 0);
    452 
    453     c->entries[i].offset = 0;
    454     c->entries[i].lru_counter = 0;
    455     c->entries[i].dirty = false;
    456 
    457     qcow2_cache_table_release(c, i, 1);
    458 }