mirror of https://gitlab.com/qemu-project/qemu
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
564 lines
16 KiB
C
564 lines
16 KiB
C
/*
|
|
* QEMU MSHV support
|
|
*
|
|
* Copyright Microsoft, Corp. 2025
|
|
*
|
|
* Authors:
|
|
* Magnus Kulke <magnuskulke@microsoft.com>
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
|
*
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qemu/lockable.h"
|
|
#include "qemu/error-report.h"
|
|
#include "qemu/rcu.h"
|
|
#include "linux/mshv.h"
|
|
#include "system/address-spaces.h"
|
|
#include "system/mshv.h"
|
|
#include "system/mshv_int.h"
|
|
#include "exec/memattrs.h"
|
|
#include <sys/ioctl.h>
|
|
#include "trace.h"
|
|
|
|
typedef struct SlotsRCUReclaim {
|
|
struct rcu_head rcu;
|
|
GList *old_head;
|
|
MshvMemorySlot *removed_slot;
|
|
} SlotsRCUReclaim;
|
|
|
|
static void rcu_reclaim_slotlist(struct rcu_head *rcu)
|
|
{
|
|
SlotsRCUReclaim *r = container_of(rcu, SlotsRCUReclaim, rcu);
|
|
g_list_free(r->old_head);
|
|
g_free(r->removed_slot);
|
|
g_free(r);
|
|
}
|
|
|
|
static void publish_slots(GList *new_head, GList *old_head,
|
|
MshvMemorySlot *removed_slot)
|
|
{
|
|
MshvMemorySlotManager *manager = &mshv_state->msm;
|
|
|
|
assert(manager);
|
|
qatomic_store_release(&manager->slots, new_head);
|
|
|
|
SlotsRCUReclaim *r = g_new(SlotsRCUReclaim, 1);
|
|
r->old_head = old_head;
|
|
r->removed_slot = removed_slot;
|
|
|
|
call_rcu1(&r->rcu, rcu_reclaim_slotlist);
|
|
}
|
|
|
|
/* Needs to be called with mshv_state->msm.mutex held */
|
|
static int remove_slot(MshvMemorySlot *slot)
|
|
{
|
|
GList *old_head, *new_head;
|
|
MshvMemorySlotManager *manager = &mshv_state->msm;
|
|
|
|
assert(manager);
|
|
old_head = qatomic_load_acquire(&manager->slots);
|
|
|
|
if (!g_list_find(old_head, slot)) {
|
|
error_report("slot requested for removal not found");
|
|
return -1;
|
|
}
|
|
|
|
new_head = g_list_copy(old_head);
|
|
new_head = g_list_remove(new_head, slot);
|
|
manager->n_slots--;
|
|
|
|
publish_slots(new_head, old_head, slot);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Needs to be called with mshv_state->msm.mutex held */
|
|
static MshvMemorySlot *append_slot(uint64_t gpa, uint64_t userspace_addr,
|
|
uint64_t size, bool readonly)
|
|
{
|
|
GList *old_head, *new_head;
|
|
MshvMemorySlot *slot;
|
|
MshvMemorySlotManager *manager = &mshv_state->msm;
|
|
|
|
assert(manager);
|
|
|
|
old_head = qatomic_load_acquire(&manager->slots);
|
|
|
|
if (manager->n_slots >= MSHV_MAX_MEM_SLOTS) {
|
|
error_report("no free memory slots available");
|
|
return NULL;
|
|
}
|
|
|
|
slot = g_new0(MshvMemorySlot, 1);
|
|
slot->guest_phys_addr = gpa;
|
|
slot->userspace_addr = userspace_addr;
|
|
slot->memory_size = size;
|
|
slot->readonly = readonly;
|
|
|
|
new_head = g_list_copy(old_head);
|
|
new_head = g_list_append(new_head, slot);
|
|
manager->n_slots++;
|
|
|
|
publish_slots(new_head, old_head, NULL);
|
|
|
|
return slot;
|
|
}
|
|
|
|
static int slot_overlaps(const MshvMemorySlot *slot1,
|
|
const MshvMemorySlot *slot2)
|
|
{
|
|
uint64_t start_1 = slot1->userspace_addr,
|
|
start_2 = slot2->userspace_addr;
|
|
size_t len_1 = slot1->memory_size,
|
|
len_2 = slot2->memory_size;
|
|
|
|
if (slot1 == slot2) {
|
|
return -1;
|
|
}
|
|
|
|
return ranges_overlap(start_1, len_1, start_2, len_2) ? 0 : -1;
|
|
}
|
|
|
|
static bool is_mapped(MshvMemorySlot *slot)
|
|
{
|
|
/* Subsequent reads of mapped field see a fully-initialized slot */
|
|
return qatomic_load_acquire(&slot->mapped);
|
|
}
|
|
|
|
/*
|
|
* Find slot that is:
|
|
* - overlapping in userspace
|
|
* - currently mapped in the guest
|
|
*
|
|
* Needs to be called with mshv_state->msm.mutex or RCU read lock held.
|
|
*/
|
|
static MshvMemorySlot *find_overlap_mem_slot(GList *head, MshvMemorySlot *slot)
|
|
{
|
|
GList *found;
|
|
MshvMemorySlot *overlap_slot;
|
|
|
|
found = g_list_find_custom(head, slot, (GCompareFunc) slot_overlaps);
|
|
|
|
if (!found) {
|
|
return NULL;
|
|
}
|
|
|
|
overlap_slot = found->data;
|
|
if (!overlap_slot || !is_mapped(overlap_slot)) {
|
|
return NULL;
|
|
}
|
|
|
|
return overlap_slot;
|
|
}
|
|
|
|
static int set_guest_memory(int vm_fd,
|
|
const struct mshv_user_mem_region *region)
|
|
{
|
|
int ret;
|
|
|
|
ret = ioctl(vm_fd, MSHV_SET_GUEST_MEMORY, region);
|
|
if (ret < 0) {
|
|
error_report("failed to set guest memory: %s", strerror(errno));
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int map_or_unmap(int vm_fd, const MshvMemorySlot *slot, bool map)
|
|
{
|
|
struct mshv_user_mem_region region = {0};
|
|
|
|
region.guest_pfn = slot->guest_phys_addr >> MSHV_PAGE_SHIFT;
|
|
region.size = slot->memory_size;
|
|
region.userspace_addr = slot->userspace_addr;
|
|
|
|
if (!map) {
|
|
region.flags |= (1 << MSHV_SET_MEM_BIT_UNMAP);
|
|
trace_mshv_unmap_memory(slot->userspace_addr, slot->guest_phys_addr,
|
|
slot->memory_size);
|
|
return set_guest_memory(vm_fd, ®ion);
|
|
}
|
|
|
|
region.flags = BIT(MSHV_SET_MEM_BIT_EXECUTABLE);
|
|
if (!slot->readonly) {
|
|
region.flags |= BIT(MSHV_SET_MEM_BIT_WRITABLE);
|
|
}
|
|
|
|
trace_mshv_map_memory(slot->userspace_addr, slot->guest_phys_addr,
|
|
slot->memory_size);
|
|
return set_guest_memory(vm_fd, ®ion);
|
|
}
|
|
|
|
static int slot_matches_region(const MshvMemorySlot *slot1,
|
|
const MshvMemorySlot *slot2)
|
|
{
|
|
return (slot1->guest_phys_addr == slot2->guest_phys_addr &&
|
|
slot1->userspace_addr == slot2->userspace_addr &&
|
|
slot1->memory_size == slot2->memory_size) ? 0 : -1;
|
|
}
|
|
|
|
/* Needs to be called with mshv_state->msm.mutex held */
|
|
static MshvMemorySlot *find_mem_slot_by_region(uint64_t gpa, uint64_t size,
|
|
uint64_t userspace_addr)
|
|
{
|
|
MshvMemorySlot ref_slot = {
|
|
.guest_phys_addr = gpa,
|
|
.userspace_addr = userspace_addr,
|
|
.memory_size = size,
|
|
};
|
|
GList *found;
|
|
MshvMemorySlotManager *manager = &mshv_state->msm;
|
|
|
|
assert(manager);
|
|
found = g_list_find_custom(manager->slots, &ref_slot,
|
|
(GCompareFunc) slot_matches_region);
|
|
|
|
return found ? found->data : NULL;
|
|
}
|
|
|
|
static int slot_covers_gpa(const MshvMemorySlot *slot, uint64_t *gpa_p)
|
|
{
|
|
uint64_t gpa_offset, gpa = *gpa_p;
|
|
|
|
gpa_offset = gpa - slot->guest_phys_addr;
|
|
return (slot->guest_phys_addr <= gpa && gpa_offset < slot->memory_size)
|
|
? 0 : -1;
|
|
}
|
|
|
|
/* Needs to be called with mshv_state->msm.mutex or RCU read lock held */
|
|
static MshvMemorySlot *find_mem_slot_by_gpa(GList *head, uint64_t gpa)
|
|
{
|
|
GList *found;
|
|
MshvMemorySlot *slot;
|
|
|
|
trace_mshv_find_slot_by_gpa(gpa);
|
|
|
|
found = g_list_find_custom(head, &gpa, (GCompareFunc) slot_covers_gpa);
|
|
if (found) {
|
|
slot = found->data;
|
|
trace_mshv_found_slot(slot->userspace_addr, slot->guest_phys_addr,
|
|
slot->memory_size);
|
|
return slot;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/* Needs to be called with mshv_state->msm.mutex held */
|
|
static void set_mapped(MshvMemorySlot *slot, bool mapped)
|
|
{
|
|
/* prior writes to mapped field becomes visible before readers see slot */
|
|
qatomic_store_release(&slot->mapped, mapped);
|
|
}
|
|
|
|
MshvRemapResult mshv_remap_overlap_region(int vm_fd, uint64_t gpa)
|
|
{
|
|
MshvMemorySlot *gpa_slot, *overlap_slot;
|
|
GList *head;
|
|
int ret;
|
|
MshvMemorySlotManager *manager = &mshv_state->msm;
|
|
|
|
/* fast path, called often by unmapped_gpa vm exit */
|
|
WITH_RCU_READ_LOCK_GUARD() {
|
|
assert(manager);
|
|
head = qatomic_load_acquire(&manager->slots);
|
|
/* return early if no slot is found */
|
|
gpa_slot = find_mem_slot_by_gpa(head, gpa);
|
|
if (gpa_slot == NULL) {
|
|
return MshvRemapNoMapping;
|
|
}
|
|
|
|
/* return early if no overlapping slot is found */
|
|
overlap_slot = find_overlap_mem_slot(head, gpa_slot);
|
|
if (overlap_slot == NULL) {
|
|
return MshvRemapNoOverlap;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We'll modify the mapping list, so we need to upgrade to mutex and
|
|
* recheck.
|
|
*/
|
|
assert(manager);
|
|
QEMU_LOCK_GUARD(&manager->mutex);
|
|
|
|
/* return early if no slot is found */
|
|
gpa_slot = find_mem_slot_by_gpa(manager->slots, gpa);
|
|
if (gpa_slot == NULL) {
|
|
return MshvRemapNoMapping;
|
|
}
|
|
|
|
/* return early if no overlapping slot is found */
|
|
overlap_slot = find_overlap_mem_slot(manager->slots, gpa_slot);
|
|
if (overlap_slot == NULL) {
|
|
return MshvRemapNoOverlap;
|
|
}
|
|
|
|
/* unmap overlapping slot */
|
|
ret = map_or_unmap(vm_fd, overlap_slot, false);
|
|
if (ret < 0) {
|
|
error_report("failed to unmap overlap region");
|
|
abort();
|
|
}
|
|
set_mapped(overlap_slot, false);
|
|
warn_report("mapped out userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
|
|
overlap_slot->userspace_addr,
|
|
overlap_slot->guest_phys_addr,
|
|
overlap_slot->memory_size);
|
|
|
|
/* map region for gpa */
|
|
ret = map_or_unmap(vm_fd, gpa_slot, true);
|
|
if (ret < 0) {
|
|
error_report("failed to map new region");
|
|
abort();
|
|
}
|
|
set_mapped(gpa_slot, true);
|
|
warn_report("mapped in userspace_addr=0x%016lx gpa=0x%010lx size=0x%lx",
|
|
gpa_slot->userspace_addr, gpa_slot->guest_phys_addr,
|
|
gpa_slot->memory_size);
|
|
|
|
return MshvRemapOk;
|
|
}
|
|
|
|
static int handle_unmapped_mmio_region_read(uint64_t gpa, uint64_t size,
|
|
uint8_t *data)
|
|
{
|
|
warn_report("read from unmapped mmio region gpa=0x%lx size=%lu", gpa, size);
|
|
|
|
if (size == 0 || size > 8) {
|
|
error_report("invalid size %lu for reading from unmapped mmio region",
|
|
size);
|
|
return -1;
|
|
}
|
|
|
|
memset(data, 0xFF, size);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int mshv_guest_mem_read(uint64_t gpa, uint8_t *data, uintptr_t size,
|
|
bool is_secure_mode, bool instruction_fetch)
|
|
{
|
|
int ret;
|
|
MemTxAttrs memattr = { .secure = is_secure_mode };
|
|
|
|
if (instruction_fetch) {
|
|
trace_mshv_insn_fetch(gpa, size);
|
|
} else {
|
|
trace_mshv_mem_read(gpa, size);
|
|
}
|
|
|
|
ret = address_space_rw(&address_space_memory, gpa, memattr, (void *)data,
|
|
size, false);
|
|
if (ret == MEMTX_OK) {
|
|
return 0;
|
|
}
|
|
|
|
if (ret == MEMTX_DECODE_ERROR) {
|
|
return handle_unmapped_mmio_region_read(gpa, size, data);
|
|
}
|
|
|
|
error_report("failed to read guest memory at 0x%lx", gpa);
|
|
return -1;
|
|
}
|
|
|
|
int mshv_guest_mem_write(uint64_t gpa, const uint8_t *data, uintptr_t size,
|
|
bool is_secure_mode)
|
|
{
|
|
int ret;
|
|
MemTxAttrs memattr = { .secure = is_secure_mode };
|
|
|
|
trace_mshv_mem_write(gpa, size);
|
|
ret = address_space_rw(&address_space_memory, gpa, memattr, (void *)data,
|
|
size, true);
|
|
if (ret == MEMTX_OK) {
|
|
return 0;
|
|
}
|
|
|
|
if (ret == MEMTX_DECODE_ERROR) {
|
|
warn_report("write to unmapped mmio region gpa=0x%lx size=%lu", gpa,
|
|
size);
|
|
return 0;
|
|
}
|
|
|
|
error_report("Failed to write guest memory");
|
|
return -1;
|
|
}
|
|
|
|
static int tracked_unmap(int vm_fd, uint64_t gpa, uint64_t size,
|
|
uint64_t userspace_addr)
|
|
{
|
|
int ret;
|
|
MshvMemorySlot *slot;
|
|
MshvMemorySlotManager *manager = &mshv_state->msm;
|
|
|
|
assert(manager);
|
|
|
|
QEMU_LOCK_GUARD(&manager->mutex);
|
|
|
|
slot = find_mem_slot_by_region(gpa, size, userspace_addr);
|
|
if (!slot) {
|
|
trace_mshv_skip_unset_mem(userspace_addr, gpa, size);
|
|
/* no work to do */
|
|
return 0;
|
|
}
|
|
|
|
if (!is_mapped(slot)) {
|
|
/* remove slot, no need to unmap */
|
|
return remove_slot(slot);
|
|
}
|
|
|
|
ret = map_or_unmap(vm_fd, slot, false);
|
|
if (ret < 0) {
|
|
error_report("failed to unmap memory region");
|
|
return ret;
|
|
}
|
|
return remove_slot(slot);
|
|
}
|
|
|
|
static int tracked_map(int vm_fd, uint64_t gpa, uint64_t size, bool readonly,
|
|
uint64_t userspace_addr)
|
|
{
|
|
MshvMemorySlot *slot, *overlap_slot;
|
|
int ret;
|
|
MshvMemorySlotManager *manager = &mshv_state->msm;
|
|
|
|
assert(manager);
|
|
|
|
QEMU_LOCK_GUARD(&manager->mutex);
|
|
|
|
slot = find_mem_slot_by_region(gpa, size, userspace_addr);
|
|
if (slot) {
|
|
error_report("memory region already mapped at gpa=0x%lx, "
|
|
"userspace_addr=0x%lx, size=0x%lx",
|
|
slot->guest_phys_addr, slot->userspace_addr,
|
|
slot->memory_size);
|
|
return -1;
|
|
}
|
|
|
|
slot = append_slot(gpa, userspace_addr, size, readonly);
|
|
|
|
overlap_slot = find_overlap_mem_slot(manager->slots, slot);
|
|
if (overlap_slot) {
|
|
trace_mshv_remap_attempt(slot->userspace_addr,
|
|
slot->guest_phys_addr,
|
|
slot->memory_size);
|
|
warn_report("attempt to map region [0x%lx-0x%lx], while "
|
|
"[0x%lx-0x%lx] is already mapped in the guest",
|
|
userspace_addr, userspace_addr + size - 1,
|
|
overlap_slot->userspace_addr,
|
|
overlap_slot->userspace_addr +
|
|
overlap_slot->memory_size - 1);
|
|
|
|
/* do not register mem slot in hv, but record for later swap-in */
|
|
set_mapped(slot, false);
|
|
|
|
return 0;
|
|
}
|
|
|
|
ret = map_or_unmap(vm_fd, slot, true);
|
|
if (ret < 0) {
|
|
error_report("failed to map memory region");
|
|
return -1;
|
|
}
|
|
set_mapped(slot, true);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int set_memory(uint64_t gpa, uint64_t size, bool readonly,
|
|
uint64_t userspace_addr, bool add)
|
|
{
|
|
int vm_fd = mshv_state->vm;
|
|
|
|
if (add) {
|
|
return tracked_map(vm_fd, gpa, size, readonly, userspace_addr);
|
|
}
|
|
|
|
return tracked_unmap(vm_fd, gpa, size, userspace_addr);
|
|
}
|
|
|
|
/*
|
|
* Calculate and align the start address and the size of the section.
|
|
* Return the size. If the size is 0, the aligned section is empty.
|
|
*/
|
|
static hwaddr align_section(MemoryRegionSection *section, hwaddr *start)
|
|
{
|
|
hwaddr size = int128_get64(section->size);
|
|
hwaddr delta, aligned;
|
|
|
|
/*
|
|
* works in page size chunks, but the function may be called
|
|
* with sub-page size and unaligned start address. Pad the start
|
|
* address to next and truncate size to previous page boundary.
|
|
*/
|
|
aligned = ROUND_UP(section->offset_within_address_space,
|
|
qemu_real_host_page_size());
|
|
delta = aligned - section->offset_within_address_space;
|
|
*start = aligned;
|
|
if (delta > size) {
|
|
return 0;
|
|
}
|
|
|
|
return (size - delta) & qemu_real_host_page_mask();
|
|
}
|
|
|
|
void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
|
|
bool add)
|
|
{
|
|
int ret = 0;
|
|
MemoryRegion *area = section->mr;
|
|
bool writable = !area->readonly && !area->rom_device;
|
|
hwaddr start_addr, mr_offset, size;
|
|
void *ram;
|
|
|
|
size = align_section(section, &start_addr);
|
|
trace_mshv_set_phys_mem(add, section->mr->name, start_addr);
|
|
|
|
size = align_section(section, &start_addr);
|
|
trace_mshv_set_phys_mem(add, section->mr->name, start_addr);
|
|
|
|
/*
|
|
* If the memory device is a writable non-ram area, we do not
|
|
* want to map it into the guest memory. If it is not a ROM device,
|
|
* we want to remove mshv memory mapping, so accesses will trap.
|
|
*/
|
|
if (!memory_region_is_ram(area)) {
|
|
if (writable) {
|
|
return;
|
|
} else if (!area->romd_mode) {
|
|
add = false;
|
|
}
|
|
}
|
|
|
|
if (!size) {
|
|
return;
|
|
}
|
|
|
|
mr_offset = section->offset_within_region + start_addr -
|
|
section->offset_within_address_space;
|
|
|
|
ram = memory_region_get_ram_ptr(area) + mr_offset;
|
|
|
|
ret = set_memory(start_addr, size, !writable, (uint64_t)ram, add);
|
|
if (ret < 0) {
|
|
error_report("failed to set memory region");
|
|
abort();
|
|
}
|
|
}
|
|
|
|
void mshv_init_memory_slot_manager(MshvState *mshv_state)
|
|
{
|
|
MshvMemorySlotManager *manager;
|
|
|
|
assert(mshv_state);
|
|
manager = &mshv_state->msm;
|
|
|
|
manager->n_slots = 0;
|
|
manager->slots = NULL;
|
|
qemu_mutex_init(&manager->mutex);
|
|
}
|