qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

hostmem.c (17727B)


      1 /*
      2  * QEMU Host Memory Backend
      3  *
      4  * Copyright (C) 2013-2014 Red Hat Inc
      5  *
      6  * Authors:
      7  *   Igor Mammedov <imammedo@redhat.com>
      8  *
      9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
     10  * See the COPYING file in the top-level directory.
     11  */
     12 
     13 #include "qemu/osdep.h"
     14 #include "sysemu/hostmem.h"
     15 #include "hw/boards.h"
     16 #include "qapi/error.h"
     17 #include "qapi/qapi-builtin-visit.h"
     18 #include "qapi/visitor.h"
     19 #include "qemu/config-file.h"
     20 #include "qom/object_interfaces.h"
     21 #include "qemu/mmap-alloc.h"
     22 #include "qemu/madvise.h"
     23 
     24 #ifdef CONFIG_NUMA
     25 #include <numaif.h>
     26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
     27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
     28 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
     29 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
     30 #endif
     31 
     32 char *
     33 host_memory_backend_get_name(HostMemoryBackend *backend)
     34 {
     35     if (!backend->use_canonical_path) {
     36         return g_strdup(object_get_canonical_path_component(OBJECT(backend)));
     37     }
     38 
     39     return object_get_canonical_path(OBJECT(backend));
     40 }
     41 
     42 static void
     43 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
     44                              void *opaque, Error **errp)
     45 {
     46     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
     47     uint64_t value = backend->size;
     48 
     49     visit_type_size(v, name, &value, errp);
     50 }
     51 
     52 static void
     53 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
     54                              void *opaque, Error **errp)
     55 {
     56     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
     57     uint64_t value;
     58 
     59     if (host_memory_backend_mr_inited(backend)) {
     60         error_setg(errp, "cannot change property %s of %s ", name,
     61                    object_get_typename(obj));
     62         return;
     63     }
     64 
     65     if (!visit_type_size(v, name, &value, errp)) {
     66         return;
     67     }
     68     if (!value) {
     69         error_setg(errp,
     70                    "property '%s' of %s doesn't take value '%" PRIu64 "'",
     71                    name, object_get_typename(obj), value);
     72         return;
     73     }
     74     backend->size = value;
     75 }
     76 
     77 static void
     78 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
     79                                    void *opaque, Error **errp)
     80 {
     81     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
     82     uint16List *host_nodes = NULL;
     83     uint16List **tail = &host_nodes;
     84     unsigned long value;
     85 
     86     value = find_first_bit(backend->host_nodes, MAX_NODES);
     87     if (value == MAX_NODES) {
     88         goto ret;
     89     }
     90 
     91     QAPI_LIST_APPEND(tail, value);
     92 
     93     do {
     94         value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
     95         if (value == MAX_NODES) {
     96             break;
     97         }
     98 
     99         QAPI_LIST_APPEND(tail, value);
    100     } while (true);
    101 
    102 ret:
    103     visit_type_uint16List(v, name, &host_nodes, errp);
    104     qapi_free_uint16List(host_nodes);
    105 }
    106 
    107 static void
    108 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
    109                                    void *opaque, Error **errp)
    110 {
    111 #ifdef CONFIG_NUMA
    112     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    113     uint16List *l, *host_nodes = NULL;
    114 
    115     visit_type_uint16List(v, name, &host_nodes, errp);
    116 
    117     for (l = host_nodes; l; l = l->next) {
    118         if (l->value >= MAX_NODES) {
    119             error_setg(errp, "Invalid host-nodes value: %d", l->value);
    120             goto out;
    121         }
    122     }
    123 
    124     for (l = host_nodes; l; l = l->next) {
    125         bitmap_set(backend->host_nodes, l->value, 1);
    126     }
    127 
    128 out:
    129     qapi_free_uint16List(host_nodes);
    130 #else
    131     error_setg(errp, "NUMA node binding are not supported by this QEMU");
    132 #endif
    133 }
    134 
    135 static int
    136 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
    137 {
    138     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    139     return backend->policy;
    140 }
    141 
    142 static void
    143 host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
    144 {
    145     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    146     backend->policy = policy;
    147 
    148 #ifndef CONFIG_NUMA
    149     if (policy != HOST_MEM_POLICY_DEFAULT) {
    150         error_setg(errp, "NUMA policies are not supported by this QEMU");
    151     }
    152 #endif
    153 }
    154 
    155 static bool host_memory_backend_get_merge(Object *obj, Error **errp)
    156 {
    157     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    158 
    159     return backend->merge;
    160 }
    161 
    162 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
    163 {
    164     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    165 
    166     if (!host_memory_backend_mr_inited(backend)) {
    167         backend->merge = value;
    168         return;
    169     }
    170 
    171     if (value != backend->merge) {
    172         void *ptr = memory_region_get_ram_ptr(&backend->mr);
    173         uint64_t sz = memory_region_size(&backend->mr);
    174 
    175         qemu_madvise(ptr, sz,
    176                      value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
    177         backend->merge = value;
    178     }
    179 }
    180 
    181 static bool host_memory_backend_get_dump(Object *obj, Error **errp)
    182 {
    183     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    184 
    185     return backend->dump;
    186 }
    187 
    188 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
    189 {
    190     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    191 
    192     if (!host_memory_backend_mr_inited(backend)) {
    193         backend->dump = value;
    194         return;
    195     }
    196 
    197     if (value != backend->dump) {
    198         void *ptr = memory_region_get_ram_ptr(&backend->mr);
    199         uint64_t sz = memory_region_size(&backend->mr);
    200 
    201         qemu_madvise(ptr, sz,
    202                      value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
    203         backend->dump = value;
    204     }
    205 }
    206 
    207 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
    208 {
    209     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    210 
    211     return backend->prealloc;
    212 }
    213 
    214 static void host_memory_backend_set_prealloc(Object *obj, bool value,
    215                                              Error **errp)
    216 {
    217     Error *local_err = NULL;
    218     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    219 
    220     if (!backend->reserve && value) {
    221         error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
    222         return;
    223     }
    224 
    225     if (!host_memory_backend_mr_inited(backend)) {
    226         backend->prealloc = value;
    227         return;
    228     }
    229 
    230     if (value && !backend->prealloc) {
    231         int fd = memory_region_get_fd(&backend->mr);
    232         void *ptr = memory_region_get_ram_ptr(&backend->mr);
    233         uint64_t sz = memory_region_size(&backend->mr);
    234 
    235         qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
    236                           backend->prealloc_context, &local_err);
    237         if (local_err) {
    238             error_propagate(errp, local_err);
    239             return;
    240         }
    241         backend->prealloc = true;
    242     }
    243 }
    244 
    245 static void host_memory_backend_get_prealloc_threads(Object *obj, Visitor *v,
    246     const char *name, void *opaque, Error **errp)
    247 {
    248     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    249     visit_type_uint32(v, name, &backend->prealloc_threads, errp);
    250 }
    251 
    252 static void host_memory_backend_set_prealloc_threads(Object *obj, Visitor *v,
    253     const char *name, void *opaque, Error **errp)
    254 {
    255     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    256     uint32_t value;
    257 
    258     if (!visit_type_uint32(v, name, &value, errp)) {
    259         return;
    260     }
    261     if (value <= 0) {
    262         error_setg(errp, "property '%s' of %s doesn't take value '%d'", name,
    263                    object_get_typename(obj), value);
    264         return;
    265     }
    266     backend->prealloc_threads = value;
    267 }
    268 
    269 static void host_memory_backend_init(Object *obj)
    270 {
    271     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    272     MachineState *machine = MACHINE(qdev_get_machine());
    273 
    274     /* TODO: convert access to globals to compat properties */
    275     backend->merge = machine_mem_merge(machine);
    276     backend->dump = machine_dump_guest_core(machine);
    277     backend->reserve = true;
    278     backend->prealloc_threads = machine->smp.cpus;
    279 }
    280 
    281 static void host_memory_backend_post_init(Object *obj)
    282 {
    283     object_apply_compat_props(obj);
    284 }
    285 
    286 bool host_memory_backend_mr_inited(HostMemoryBackend *backend)
    287 {
    288     /*
    289      * NOTE: We forbid zero-length memory backend, so here zero means
    290      * "we haven't inited the backend memory region yet".
    291      */
    292     return memory_region_size(&backend->mr) != 0;
    293 }
    294 
    295 MemoryRegion *host_memory_backend_get_memory(HostMemoryBackend *backend)
    296 {
    297     return host_memory_backend_mr_inited(backend) ? &backend->mr : NULL;
    298 }
    299 
    300 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
    301 {
    302     backend->is_mapped = mapped;
    303 }
    304 
    305 bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
    306 {
    307     return backend->is_mapped;
    308 }
    309 
    310 size_t host_memory_backend_pagesize(HostMemoryBackend *memdev)
    311 {
    312     size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block);
    313     g_assert(pagesize >= qemu_real_host_page_size());
    314     return pagesize;
    315 }
    316 
    317 static void
    318 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
    319 {
    320     HostMemoryBackend *backend = MEMORY_BACKEND(uc);
    321     HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
    322     Error *local_err = NULL;
    323     void *ptr;
    324     uint64_t sz;
    325 
    326     if (bc->alloc) {
    327         bc->alloc(backend, &local_err);
    328         if (local_err) {
    329             goto out;
    330         }
    331 
    332         ptr = memory_region_get_ram_ptr(&backend->mr);
    333         sz = memory_region_size(&backend->mr);
    334 
    335         if (backend->merge) {
    336             qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
    337         }
    338         if (!backend->dump) {
    339             qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
    340         }
    341 #ifdef CONFIG_NUMA
    342         unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
    343         /* lastbit == MAX_NODES means maxnode = 0 */
    344         unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
    345         /* ensure policy won't be ignored in case memory is preallocated
    346          * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
    347          * this doesn't catch hugepage case. */
    348         unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
    349 
    350         /* check for invalid host-nodes and policies and give more verbose
    351          * error messages than mbind(). */
    352         if (maxnode && backend->policy == MPOL_DEFAULT) {
    353             error_setg(errp, "host-nodes must be empty for policy default,"
    354                        " or you should explicitly specify a policy other"
    355                        " than default");
    356             return;
    357         } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
    358             error_setg(errp, "host-nodes must be set for policy %s",
    359                        HostMemPolicy_str(backend->policy));
    360             return;
    361         }
    362 
    363         /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
    364          * as argument to mbind() due to an old Linux bug (feature?) which
    365          * cuts off the last specified node. This means backend->host_nodes
    366          * must have MAX_NODES+1 bits available.
    367          */
    368         assert(sizeof(backend->host_nodes) >=
    369                BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
    370         assert(maxnode <= MAX_NODES);
    371 
    372         if (maxnode &&
    373             mbind(ptr, sz, backend->policy, backend->host_nodes, maxnode + 1,
    374                   flags)) {
    375             if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
    376                 error_setg_errno(errp, errno,
    377                                  "cannot bind memory to host NUMA nodes");
    378                 return;
    379             }
    380         }
    381 #endif
    382         /* Preallocate memory after the NUMA policy has been instantiated.
    383          * This is necessary to guarantee memory is allocated with
    384          * specified NUMA policy in place.
    385          */
    386         if (backend->prealloc) {
    387             qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz,
    388                               backend->prealloc_threads,
    389                               backend->prealloc_context, &local_err);
    390             if (local_err) {
    391                 goto out;
    392             }
    393         }
    394     }
    395 out:
    396     error_propagate(errp, local_err);
    397 }
    398 
    399 static bool
    400 host_memory_backend_can_be_deleted(UserCreatable *uc)
    401 {
    402     if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
    403         return false;
    404     } else {
    405         return true;
    406     }
    407 }
    408 
    409 static bool host_memory_backend_get_share(Object *o, Error **errp)
    410 {
    411     HostMemoryBackend *backend = MEMORY_BACKEND(o);
    412 
    413     return backend->share;
    414 }
    415 
    416 static void host_memory_backend_set_share(Object *o, bool value, Error **errp)
    417 {
    418     HostMemoryBackend *backend = MEMORY_BACKEND(o);
    419 
    420     if (host_memory_backend_mr_inited(backend)) {
    421         error_setg(errp, "cannot change property value");
    422         return;
    423     }
    424     backend->share = value;
    425 }
    426 
    427 #ifdef CONFIG_LINUX
    428 static bool host_memory_backend_get_reserve(Object *o, Error **errp)
    429 {
    430     HostMemoryBackend *backend = MEMORY_BACKEND(o);
    431 
    432     return backend->reserve;
    433 }
    434 
    435 static void host_memory_backend_set_reserve(Object *o, bool value, Error **errp)
    436 {
    437     HostMemoryBackend *backend = MEMORY_BACKEND(o);
    438 
    439     if (host_memory_backend_mr_inited(backend)) {
    440         error_setg(errp, "cannot change property value");
    441         return;
    442     }
    443     if (backend->prealloc && !value) {
    444         error_setg(errp, "'prealloc=on' and 'reserve=off' are incompatible");
    445         return;
    446     }
    447     backend->reserve = value;
    448 }
    449 #endif /* CONFIG_LINUX */
    450 
    451 static bool
    452 host_memory_backend_get_use_canonical_path(Object *obj, Error **errp)
    453 {
    454     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    455 
    456     return backend->use_canonical_path;
    457 }
    458 
    459 static void
    460 host_memory_backend_set_use_canonical_path(Object *obj, bool value,
    461                                            Error **errp)
    462 {
    463     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    464 
    465     backend->use_canonical_path = value;
    466 }
    467 
    468 static void
    469 host_memory_backend_class_init(ObjectClass *oc, void *data)
    470 {
    471     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
    472 
    473     ucc->complete = host_memory_backend_memory_complete;
    474     ucc->can_be_deleted = host_memory_backend_can_be_deleted;
    475 
    476     object_class_property_add_bool(oc, "merge",
    477         host_memory_backend_get_merge,
    478         host_memory_backend_set_merge);
    479     object_class_property_set_description(oc, "merge",
    480         "Mark memory as mergeable");
    481     object_class_property_add_bool(oc, "dump",
    482         host_memory_backend_get_dump,
    483         host_memory_backend_set_dump);
    484     object_class_property_set_description(oc, "dump",
    485         "Set to 'off' to exclude from core dump");
    486     object_class_property_add_bool(oc, "prealloc",
    487         host_memory_backend_get_prealloc,
    488         host_memory_backend_set_prealloc);
    489     object_class_property_set_description(oc, "prealloc",
    490         "Preallocate memory");
    491     object_class_property_add(oc, "prealloc-threads", "int",
    492         host_memory_backend_get_prealloc_threads,
    493         host_memory_backend_set_prealloc_threads,
    494         NULL, NULL);
    495     object_class_property_set_description(oc, "prealloc-threads",
    496         "Number of CPU threads to use for prealloc");
    497     object_class_property_add_link(oc, "prealloc-context",
    498         TYPE_THREAD_CONTEXT, offsetof(HostMemoryBackend, prealloc_context),
    499         object_property_allow_set_link, OBJ_PROP_LINK_STRONG);
    500     object_class_property_set_description(oc, "prealloc-context",
    501         "Context to use for creating CPU threads for preallocation");
    502     object_class_property_add(oc, "size", "int",
    503         host_memory_backend_get_size,
    504         host_memory_backend_set_size,
    505         NULL, NULL);
    506     object_class_property_set_description(oc, "size",
    507         "Size of the memory region (ex: 500M)");
    508     object_class_property_add(oc, "host-nodes", "int",
    509         host_memory_backend_get_host_nodes,
    510         host_memory_backend_set_host_nodes,
    511         NULL, NULL);
    512     object_class_property_set_description(oc, "host-nodes",
    513         "Binds memory to the list of NUMA host nodes");
    514     object_class_property_add_enum(oc, "policy", "HostMemPolicy",
    515         &HostMemPolicy_lookup,
    516         host_memory_backend_get_policy,
    517         host_memory_backend_set_policy);
    518     object_class_property_set_description(oc, "policy",
    519         "Set the NUMA policy");
    520     object_class_property_add_bool(oc, "share",
    521         host_memory_backend_get_share, host_memory_backend_set_share);
    522     object_class_property_set_description(oc, "share",
    523         "Mark the memory as private to QEMU or shared");
    524 #ifdef CONFIG_LINUX
    525     object_class_property_add_bool(oc, "reserve",
    526         host_memory_backend_get_reserve, host_memory_backend_set_reserve);
    527     object_class_property_set_description(oc, "reserve",
    528         "Reserve swap space (or huge pages) if applicable");
    529 #endif /* CONFIG_LINUX */
    530     /*
    531      * Do not delete/rename option. This option must be considered stable
    532      * (as if it didn't have the 'x-' prefix including deprecation period) as
    533      * long as 4.0 and older machine types exists.
    534      * Option will be used by upper layers to override (disable) canonical path
    535      * for ramblock-id set by compat properties on old machine types ( <= 4.0),
    536      * to keep migration working when backend is used for main RAM with
    537      * -machine memory-backend= option (main RAM historically used prefix-less
    538      * ramblock-id).
    539      */
    540     object_class_property_add_bool(oc, "x-use-canonical-path-for-ramblock-id",
    541         host_memory_backend_get_use_canonical_path,
    542         host_memory_backend_set_use_canonical_path);
    543 }
    544 
    545 static const TypeInfo host_memory_backend_info = {
    546     .name = TYPE_MEMORY_BACKEND,
    547     .parent = TYPE_OBJECT,
    548     .abstract = true,
    549     .class_size = sizeof(HostMemoryBackendClass),
    550     .class_init = host_memory_backend_class_init,
    551     .instance_size = sizeof(HostMemoryBackend),
    552     .instance_init = host_memory_backend_init,
    553     .instance_post_init = host_memory_backend_post_init,
    554     .interfaces = (InterfaceInfo[]) {
    555         { TYPE_USER_CREATABLE },
    556         { }
    557     }
    558 };
    559 
    560 static void register_types(void)
    561 {
    562     type_register_static(&host_memory_backend_info);
    563 }
    564 
    565 type_init(register_types);