qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

xen-mapcache.c (18362B)


      1 /*
      2  * Copyright (C) 2011       Citrix Ltd.
      3  *
      4  * This work is licensed under the terms of the GNU GPL, version 2.  See
      5  * the COPYING file in the top-level directory.
      6  *
      7  * Contributions after 2012-01-13 are licensed under the terms of the
      8  * GNU GPL, version 2 or (at your option) any later version.
      9  */
     10 
     11 #include "qemu/osdep.h"
     12 #include "qemu/units.h"
     13 #include "qemu/error-report.h"
     14 
     15 #include <sys/resource.h>
     16 
     17 #include "hw/xen/xen-legacy-backend.h"
     18 #include "qemu/bitmap.h"
     19 
     20 #include "sysemu/runstate.h"
     21 #include "sysemu/xen-mapcache.h"
     22 #include "trace.h"
     23 
     24 
     25 //#define MAPCACHE_DEBUG
     26 
     27 #ifdef MAPCACHE_DEBUG
     28 #  define DPRINTF(fmt, ...) do { \
     29     fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
     30 } while (0)
     31 #else
     32 #  define DPRINTF(fmt, ...) do { } while (0)
     33 #endif
     34 
     35 #if HOST_LONG_BITS == 32
     36 #  define MCACHE_BUCKET_SHIFT 16
     37 #  define MCACHE_MAX_SIZE     (1UL<<31) /* 2GB Cap */
     38 #else
     39 #  define MCACHE_BUCKET_SHIFT 20
     40 #  define MCACHE_MAX_SIZE     (1UL<<35) /* 32GB Cap */
     41 #endif
     42 #define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
     43 
     44 /* This is the size of the virtual address space reserve to QEMU that will not
     45  * be use by MapCache.
     46  * From empirical tests I observed that qemu use 75MB more than the
     47  * max_mcache_size.
     48  */
     49 #define NON_MCACHE_MEMORY_SIZE (80 * MiB)
     50 
     51 typedef struct MapCacheEntry {
     52     hwaddr paddr_index;
     53     uint8_t *vaddr_base;
     54     unsigned long *valid_mapping;
     55     uint32_t lock;
     56 #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
     57     uint8_t flags;
     58     hwaddr size;
     59     struct MapCacheEntry *next;
     60 } MapCacheEntry;
     61 
     62 typedef struct MapCacheRev {
     63     uint8_t *vaddr_req;
     64     hwaddr paddr_index;
     65     hwaddr size;
     66     QTAILQ_ENTRY(MapCacheRev) next;
     67     bool dma;
     68 } MapCacheRev;
     69 
     70 typedef struct MapCache {
     71     MapCacheEntry *entry;
     72     unsigned long nr_buckets;
     73     QTAILQ_HEAD(, MapCacheRev) locked_entries;
     74 
     75     /* For most cases (>99.9%), the page address is the same. */
     76     MapCacheEntry *last_entry;
     77     unsigned long max_mcache_size;
     78     unsigned int mcache_bucket_shift;
     79 
     80     phys_offset_to_gaddr_t phys_offset_to_gaddr;
     81     QemuMutex lock;
     82     void *opaque;
     83 } MapCache;
     84 
     85 static MapCache *mapcache;
     86 
     87 static inline void mapcache_lock(void)
     88 {
     89     qemu_mutex_lock(&mapcache->lock);
     90 }
     91 
     92 static inline void mapcache_unlock(void)
     93 {
     94     qemu_mutex_unlock(&mapcache->lock);
     95 }
     96 
     97 static inline int test_bits(int nr, int size, const unsigned long *addr)
     98 {
     99     unsigned long res = find_next_zero_bit(addr, size + nr, nr);
    100     if (res >= nr + size)
    101         return 1;
    102     else
    103         return 0;
    104 }
    105 
    106 void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
    107 {
    108     unsigned long size;
    109     struct rlimit rlimit_as;
    110 
    111     mapcache = g_new0(MapCache, 1);
    112 
    113     mapcache->phys_offset_to_gaddr = f;
    114     mapcache->opaque = opaque;
    115     qemu_mutex_init(&mapcache->lock);
    116 
    117     QTAILQ_INIT(&mapcache->locked_entries);
    118 
    119     if (geteuid() == 0) {
    120         rlimit_as.rlim_cur = RLIM_INFINITY;
    121         rlimit_as.rlim_max = RLIM_INFINITY;
    122         mapcache->max_mcache_size = MCACHE_MAX_SIZE;
    123     } else {
    124         getrlimit(RLIMIT_AS, &rlimit_as);
    125         rlimit_as.rlim_cur = rlimit_as.rlim_max;
    126 
    127         if (rlimit_as.rlim_max != RLIM_INFINITY) {
    128             warn_report("QEMU's maximum size of virtual"
    129                         " memory is not infinity");
    130         }
    131         if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) {
    132             mapcache->max_mcache_size = rlimit_as.rlim_max -
    133                 NON_MCACHE_MEMORY_SIZE;
    134         } else {
    135             mapcache->max_mcache_size = MCACHE_MAX_SIZE;
    136         }
    137     }
    138 
    139     setrlimit(RLIMIT_AS, &rlimit_as);
    140 
    141     mapcache->nr_buckets =
    142         (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) +
    143           (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
    144          (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
    145 
    146     size = mapcache->nr_buckets * sizeof (MapCacheEntry);
    147     size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
    148     DPRINTF("%s, nr_buckets = %lx size %lu\n", __func__,
    149             mapcache->nr_buckets, size);
    150     mapcache->entry = g_malloc0(size);
    151 }
    152 
    153 static void xen_remap_bucket(MapCacheEntry *entry,
    154                              void *vaddr,
    155                              hwaddr size,
    156                              hwaddr address_index,
    157                              bool dummy)
    158 {
    159     uint8_t *vaddr_base;
    160     xen_pfn_t *pfns;
    161     int *err;
    162     unsigned int i;
    163     hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
    164 
    165     trace_xen_remap_bucket(address_index);
    166 
    167     pfns = g_new0(xen_pfn_t, nb_pfn);
    168     err = g_new0(int, nb_pfn);
    169 
    170     if (entry->vaddr_base != NULL) {
    171         if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
    172             ram_block_notify_remove(entry->vaddr_base, entry->size,
    173                                     entry->size);
    174         }
    175 
    176         /*
    177          * If an entry is being replaced by another mapping and we're using
    178          * MAP_FIXED flag for it - there is possibility of a race for vaddr
    179          * address with another thread doing an mmap call itself
    180          * (see man 2 mmap). To avoid that we skip explicit unmapping here
    181          * and allow the kernel to destroy the previous mappings by replacing
    182          * them in mmap call later.
    183          *
    184          * Non-identical replacements are not allowed therefore.
    185          */
    186         assert(!vaddr || (entry->vaddr_base == vaddr && entry->size == size));
    187 
    188         if (!vaddr && munmap(entry->vaddr_base, entry->size) != 0) {
    189             perror("unmap fails");
    190             exit(-1);
    191         }
    192     }
    193     g_free(entry->valid_mapping);
    194     entry->valid_mapping = NULL;
    195 
    196     for (i = 0; i < nb_pfn; i++) {
    197         pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
    198     }
    199 
    200     /*
    201      * If the caller has requested the mapping at a specific address use
    202      * MAP_FIXED to make sure it's honored.
    203      */
    204     if (!dummy) {
    205         vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
    206                                            PROT_READ | PROT_WRITE,
    207                                            vaddr ? MAP_FIXED : 0,
    208                                            nb_pfn, pfns, err);
    209         if (vaddr_base == NULL) {
    210             perror("xenforeignmemory_map2");
    211             exit(-1);
    212         }
    213     } else {
    214         /*
    215          * We create dummy mappings where we are unable to create a foreign
    216          * mapping immediately due to certain circumstances (i.e. on resume now)
    217          */
    218         vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE,
    219                           MAP_ANON | MAP_SHARED | (vaddr ? MAP_FIXED : 0),
    220                           -1, 0);
    221         if (vaddr_base == MAP_FAILED) {
    222             perror("mmap");
    223             exit(-1);
    224         }
    225     }
    226 
    227     if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
    228         ram_block_notify_add(vaddr_base, size, size);
    229     }
    230 
    231     entry->vaddr_base = vaddr_base;
    232     entry->paddr_index = address_index;
    233     entry->size = size;
    234     entry->valid_mapping = g_new0(unsigned long,
    235                                   BITS_TO_LONGS(size >> XC_PAGE_SHIFT));
    236 
    237     if (dummy) {
    238         entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY;
    239     } else {
    240         entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY);
    241     }
    242 
    243     bitmap_zero(entry->valid_mapping, nb_pfn);
    244     for (i = 0; i < nb_pfn; i++) {
    245         if (!err[i]) {
    246             bitmap_set(entry->valid_mapping, i, 1);
    247         }
    248     }
    249 
    250     g_free(pfns);
    251     g_free(err);
    252 }
    253 
    254 static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size,
    255                                        uint8_t lock, bool dma)
    256 {
    257     MapCacheEntry *entry, *pentry = NULL,
    258                   *free_entry = NULL, *free_pentry = NULL;
    259     hwaddr address_index;
    260     hwaddr address_offset;
    261     hwaddr cache_size = size;
    262     hwaddr test_bit_size;
    263     bool translated G_GNUC_UNUSED = false;
    264     bool dummy = false;
    265 
    266 tryagain:
    267     address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
    268     address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
    269 
    270     trace_xen_map_cache(phys_addr);
    271 
    272     /* test_bit_size is always a multiple of XC_PAGE_SIZE */
    273     if (size) {
    274         test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1));
    275 
    276         if (test_bit_size % XC_PAGE_SIZE) {
    277             test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
    278         }
    279     } else {
    280         test_bit_size = XC_PAGE_SIZE;
    281     }
    282 
    283     if (mapcache->last_entry != NULL &&
    284         mapcache->last_entry->paddr_index == address_index &&
    285         !lock && !size &&
    286         test_bits(address_offset >> XC_PAGE_SHIFT,
    287                   test_bit_size >> XC_PAGE_SHIFT,
    288                   mapcache->last_entry->valid_mapping)) {
    289         trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset);
    290         return mapcache->last_entry->vaddr_base + address_offset;
    291     }
    292 
    293     /* size is always a multiple of MCACHE_BUCKET_SIZE */
    294     if (size) {
    295         cache_size = size + address_offset;
    296         if (cache_size % MCACHE_BUCKET_SIZE) {
    297             cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE);
    298         }
    299     } else {
    300         cache_size = MCACHE_BUCKET_SIZE;
    301     }
    302 
    303     entry = &mapcache->entry[address_index % mapcache->nr_buckets];
    304 
    305     while (entry && (lock || entry->lock) && entry->vaddr_base &&
    306             (entry->paddr_index != address_index || entry->size != cache_size ||
    307              !test_bits(address_offset >> XC_PAGE_SHIFT,
    308                  test_bit_size >> XC_PAGE_SHIFT,
    309                  entry->valid_mapping))) {
    310         if (!free_entry && !entry->lock) {
    311             free_entry = entry;
    312             free_pentry = pentry;
    313         }
    314         pentry = entry;
    315         entry = entry->next;
    316     }
    317     if (!entry && free_entry) {
    318         entry = free_entry;
    319         pentry = free_pentry;
    320     }
    321     if (!entry) {
    322         entry = g_new0(MapCacheEntry, 1);
    323         pentry->next = entry;
    324         xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
    325     } else if (!entry->lock) {
    326         if (!entry->vaddr_base || entry->paddr_index != address_index ||
    327                 entry->size != cache_size ||
    328                 !test_bits(address_offset >> XC_PAGE_SHIFT,
    329                     test_bit_size >> XC_PAGE_SHIFT,
    330                     entry->valid_mapping)) {
    331             xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
    332         }
    333     }
    334 
    335     if(!test_bits(address_offset >> XC_PAGE_SHIFT,
    336                 test_bit_size >> XC_PAGE_SHIFT,
    337                 entry->valid_mapping)) {
    338         mapcache->last_entry = NULL;
    339 #ifdef XEN_COMPAT_PHYSMAP
    340         if (!translated && mapcache->phys_offset_to_gaddr) {
    341             phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size);
    342             translated = true;
    343             goto tryagain;
    344         }
    345 #endif
    346         if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) {
    347             dummy = true;
    348             goto tryagain;
    349         }
    350         trace_xen_map_cache_return(NULL);
    351         return NULL;
    352     }
    353 
    354     mapcache->last_entry = entry;
    355     if (lock) {
    356         MapCacheRev *reventry = g_new0(MapCacheRev, 1);
    357         entry->lock++;
    358         if (entry->lock == 0) {
    359             fprintf(stderr,
    360                     "mapcache entry lock overflow: "TARGET_FMT_plx" -> %p\n",
    361                     entry->paddr_index, entry->vaddr_base);
    362             abort();
    363         }
    364         reventry->dma = dma;
    365         reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset;
    366         reventry->paddr_index = mapcache->last_entry->paddr_index;
    367         reventry->size = entry->size;
    368         QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next);
    369     }
    370 
    371     trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset);
    372     return mapcache->last_entry->vaddr_base + address_offset;
    373 }
    374 
    375 uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
    376                        uint8_t lock, bool dma)
    377 {
    378     uint8_t *p;
    379 
    380     mapcache_lock();
    381     p = xen_map_cache_unlocked(phys_addr, size, lock, dma);
    382     mapcache_unlock();
    383     return p;
    384 }
    385 
    386 ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
    387 {
    388     MapCacheEntry *entry = NULL;
    389     MapCacheRev *reventry;
    390     hwaddr paddr_index;
    391     hwaddr size;
    392     ram_addr_t raddr;
    393     int found = 0;
    394 
    395     mapcache_lock();
    396     QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
    397         if (reventry->vaddr_req == ptr) {
    398             paddr_index = reventry->paddr_index;
    399             size = reventry->size;
    400             found = 1;
    401             break;
    402         }
    403     }
    404     if (!found) {
    405         fprintf(stderr, "%s, could not find %p\n", __func__, ptr);
    406         QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
    407             DPRINTF("   "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index,
    408                     reventry->vaddr_req);
    409         }
    410         abort();
    411         return 0;
    412     }
    413 
    414     entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
    415     while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
    416         entry = entry->next;
    417     }
    418     if (!entry) {
    419         DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr);
    420         raddr = 0;
    421     } else {
    422         raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
    423              ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
    424     }
    425     mapcache_unlock();
    426     return raddr;
    427 }
    428 
    429 static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer)
    430 {
    431     MapCacheEntry *entry = NULL, *pentry = NULL;
    432     MapCacheRev *reventry;
    433     hwaddr paddr_index;
    434     hwaddr size;
    435     int found = 0;
    436 
    437     QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
    438         if (reventry->vaddr_req == buffer) {
    439             paddr_index = reventry->paddr_index;
    440             size = reventry->size;
    441             found = 1;
    442             break;
    443         }
    444     }
    445     if (!found) {
    446         DPRINTF("%s, could not find %p\n", __func__, buffer);
    447         QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
    448             DPRINTF("   "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req);
    449         }
    450         return;
    451     }
    452     QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next);
    453     g_free(reventry);
    454 
    455     if (mapcache->last_entry != NULL &&
    456         mapcache->last_entry->paddr_index == paddr_index) {
    457         mapcache->last_entry = NULL;
    458     }
    459 
    460     entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
    461     while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
    462         pentry = entry;
    463         entry = entry->next;
    464     }
    465     if (!entry) {
    466         DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer);
    467         return;
    468     }
    469     entry->lock--;
    470     if (entry->lock > 0 || pentry == NULL) {
    471         return;
    472     }
    473 
    474     pentry->next = entry->next;
    475     ram_block_notify_remove(entry->vaddr_base, entry->size, entry->size);
    476     if (munmap(entry->vaddr_base, entry->size) != 0) {
    477         perror("unmap fails");
    478         exit(-1);
    479     }
    480     g_free(entry->valid_mapping);
    481     g_free(entry);
    482 }
    483 
    484 void xen_invalidate_map_cache_entry(uint8_t *buffer)
    485 {
    486     mapcache_lock();
    487     xen_invalidate_map_cache_entry_unlocked(buffer);
    488     mapcache_unlock();
    489 }
    490 
    491 void xen_invalidate_map_cache(void)
    492 {
    493     unsigned long i;
    494     MapCacheRev *reventry;
    495 
    496     /* Flush pending AIO before destroying the mapcache */
    497     bdrv_drain_all();
    498 
    499     mapcache_lock();
    500 
    501     QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
    502         if (!reventry->dma) {
    503             continue;
    504         }
    505         fprintf(stderr, "Locked DMA mapping while invalidating mapcache!"
    506                 " "TARGET_FMT_plx" -> %p is present\n",
    507                 reventry->paddr_index, reventry->vaddr_req);
    508     }
    509 
    510     for (i = 0; i < mapcache->nr_buckets; i++) {
    511         MapCacheEntry *entry = &mapcache->entry[i];
    512 
    513         if (entry->vaddr_base == NULL) {
    514             continue;
    515         }
    516         if (entry->lock > 0) {
    517             continue;
    518         }
    519 
    520         if (munmap(entry->vaddr_base, entry->size) != 0) {
    521             perror("unmap fails");
    522             exit(-1);
    523         }
    524 
    525         entry->paddr_index = 0;
    526         entry->vaddr_base = NULL;
    527         entry->size = 0;
    528         g_free(entry->valid_mapping);
    529         entry->valid_mapping = NULL;
    530     }
    531 
    532     mapcache->last_entry = NULL;
    533 
    534     mapcache_unlock();
    535 }
    536 
    537 static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr,
    538                                                  hwaddr new_phys_addr,
    539                                                  hwaddr size)
    540 {
    541     MapCacheEntry *entry;
    542     hwaddr address_index, address_offset;
    543     hwaddr test_bit_size, cache_size = size;
    544 
    545     address_index  = old_phys_addr >> MCACHE_BUCKET_SHIFT;
    546     address_offset = old_phys_addr & (MCACHE_BUCKET_SIZE - 1);
    547 
    548     assert(size);
    549     /* test_bit_size is always a multiple of XC_PAGE_SIZE */
    550     test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1));
    551     if (test_bit_size % XC_PAGE_SIZE) {
    552         test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
    553     }
    554     cache_size = size + address_offset;
    555     if (cache_size % MCACHE_BUCKET_SIZE) {
    556         cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE);
    557     }
    558 
    559     entry = &mapcache->entry[address_index % mapcache->nr_buckets];
    560     while (entry && !(entry->paddr_index == address_index &&
    561                       entry->size == cache_size)) {
    562         entry = entry->next;
    563     }
    564     if (!entry) {
    565         DPRINTF("Trying to update an entry for "TARGET_FMT_plx \
    566                 "that is not in the mapcache!\n", old_phys_addr);
    567         return NULL;
    568     }
    569 
    570     address_index  = new_phys_addr >> MCACHE_BUCKET_SHIFT;
    571     address_offset = new_phys_addr & (MCACHE_BUCKET_SIZE - 1);
    572 
    573     fprintf(stderr, "Replacing a dummy mapcache entry for "TARGET_FMT_plx \
    574             " with "TARGET_FMT_plx"\n", old_phys_addr, new_phys_addr);
    575 
    576     xen_remap_bucket(entry, entry->vaddr_base,
    577                      cache_size, address_index, false);
    578     if (!test_bits(address_offset >> XC_PAGE_SHIFT,
    579                 test_bit_size >> XC_PAGE_SHIFT,
    580                 entry->valid_mapping)) {
    581         DPRINTF("Unable to update a mapcache entry for "TARGET_FMT_plx"!\n",
    582                 old_phys_addr);
    583         return NULL;
    584     }
    585 
    586     return entry->vaddr_base + address_offset;
    587 }
    588 
    589 uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
    590                                  hwaddr new_phys_addr,
    591                                  hwaddr size)
    592 {
    593     uint8_t *p;
    594 
    595     mapcache_lock();
    596     p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size);
    597     mapcache_unlock();
    598     return p;
    599 }