You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
qemu/hw/ppc/spapr_vhyp_mmu.c

612 lines
17 KiB
C

/*
* MMU hypercalls for the sPAPR (pseries) vHyp hypervisor that is used by TCG
*
* Copyright (c) 2004-2007 Fabrice Bellard
* Copyright (c) 2007 Jocelyn Mayer
* Copyright (c) 2010 David Gibson, IBM Corporation.
*
* SPDX-License-Identifier: MIT
*/
#include "qemu/osdep.h"
#include "qemu/cutils.h"
#include "qemu/memalign.h"
#include "qemu/error-report.h"
#include "cpu.h"
#include "helper_regs.h"
#include "hw/ppc/spapr.h"
#include "mmu-hash64.h"
static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong flags = args[0];
target_ulong ptex = args[1];
target_ulong pteh = args[2];
target_ulong ptel = args[3];
unsigned apshift;
target_ulong raddr;
target_ulong slot;
const ppc_hash_pte64_t *hptes;
apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
if (!apshift) {
/* Bad page size encoding */
return H_PARAMETER;
}
raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1);
if (is_ram_address(spapr, raddr)) {
/* Regular RAM - should have WIMG=0010 */
if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
return H_PARAMETER;
}
} else {
target_ulong wimg_flags;
/* Looks like an IO address */
/* FIXME: What WIMG combinations could be sensible for IO?
* For now we allow WIMG=010x, but are there others? */
/* FIXME: Should we check against registered IO addresses? */
wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M));
if (wimg_flags != HPTE64_R_I &&
wimg_flags != (HPTE64_R_I | HPTE64_R_M)) {
return H_PARAMETER;
}
}
pteh &= ~0x60ULL;
if (!ppc_hash64_valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
slot = ptex & 7ULL;
ptex = ptex & ~7ULL;
if (likely((flags & H_EXACT) == 0)) {
hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
for (slot = 0; slot < 8; slot++) {
if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
break;
}
}
ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
if (slot == 8) {
return H_PTEG_FULL;
}
} else {
hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
return H_PTEG_FULL;
}
ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
}
spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
args[0] = ptex + slot;
return H_SUCCESS;
}
typedef enum {
REMOVE_SUCCESS = 0,
REMOVE_NOT_FOUND = 1,
REMOVE_PARM = 2,
REMOVE_HW = 3,
} RemoveResult;
static RemoveResult remove_hpte(PowerPCCPU *cpu
, target_ulong ptex,
target_ulong avpn,
target_ulong flags,
target_ulong *vp, target_ulong *rp)
{
const ppc_hash_pte64_t *hptes;
target_ulong v, r;
if (!ppc_hash64_valid_ptex(cpu, ptex)) {
return REMOVE_PARM;
}
hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
v = ppc_hash64_hpte0(cpu, hptes, 0);
r = ppc_hash64_hpte1(cpu, hptes, 0);
ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
if ((v & HPTE64_V_VALID) == 0 ||
((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
((flags & H_ANDCOND) && (v & avpn) != 0)) {
return REMOVE_NOT_FOUND;
}
*vp = v;
*rp = r;
spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
return REMOVE_SUCCESS;
}
static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
target_ulong ptex = args[1];
target_ulong avpn = args[2];
RemoveResult ret;
ret = remove_hpte(cpu, ptex, avpn, flags,
&args[0], &args[1]);
switch (ret) {
case REMOVE_SUCCESS:
check_tlb_flush(env, true);
return H_SUCCESS;
case REMOVE_NOT_FOUND:
return H_NOT_FOUND;
case REMOVE_PARM:
return H_PARAMETER;
case REMOVE_HW:
return H_HARDWARE;
}
g_assert_not_reached();
}
#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL
#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL
#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL
#define H_BULK_REMOVE_END 0xc000000000000000ULL
#define H_BULK_REMOVE_CODE 0x3000000000000000ULL
#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL
#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL
#define H_BULK_REMOVE_PARM 0x2000000000000000ULL
#define H_BULK_REMOVE_HW 0x3000000000000000ULL
#define H_BULK_REMOVE_RC 0x0c00000000000000ULL
#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL
#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL
#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL
#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL
#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL
#define H_BULK_REMOVE_MAX_BATCH 4
static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUPPCState *env = &cpu->env;
int i;
target_ulong rc = H_SUCCESS;
for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
target_ulong *tsh = &args[i*2];
target_ulong tsl = args[i*2 + 1];
target_ulong v, r, ret;
if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
break;
} else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
return H_PARAMETER;
}
*tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
*tsh |= H_BULK_REMOVE_RESPONSE;
if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
*tsh |= H_BULK_REMOVE_PARM;
return H_PARAMETER;
}
ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl,
(*tsh & H_BULK_REMOVE_FLAGS) >> 26,
&v, &r);
*tsh |= ret << 60;
switch (ret) {
case REMOVE_SUCCESS:
*tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43;
break;
case REMOVE_PARM:
rc = H_PARAMETER;
goto exit;
case REMOVE_HW:
rc = H_HARDWARE;
goto exit;
}
}
exit:
check_tlb_flush(env, true);
return rc;
}
static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
CPUPPCState *env = &cpu->env;
target_ulong flags = args[0];
target_ulong ptex = args[1];
target_ulong avpn = args[2];
const ppc_hash_pte64_t *hptes;
target_ulong v, r;
if (!ppc_hash64_valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
v = ppc_hash64_hpte0(cpu, hptes, 0);
r = ppc_hash64_hpte1(cpu, hptes, 0);
ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
if ((v & HPTE64_V_VALID) == 0 ||
((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
return H_NOT_FOUND;
}
r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N |
HPTE64_R_KEY_HI | HPTE64_R_KEY_LO);
r |= (flags << 55) & HPTE64_R_PP0;
r |= (flags << 48) & HPTE64_R_KEY_HI;
r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
spapr_store_hpte(cpu, ptex,
(v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
/* Flush the tlb */
check_tlb_flush(env, true);
/* Don't need a memory barrier, due to qemu's global lock */
spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
return H_SUCCESS;
}
static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong flags = args[0];
target_ulong ptex = args[1];
int i, ridx, n_entries = 1;
const ppc_hash_pte64_t *hptes;
if (!ppc_hash64_valid_ptex(cpu, ptex)) {
return H_PARAMETER;
}
if (flags & H_READ_4) {
/* Clear the two low order bits */
ptex &= ~(3ULL);
n_entries = 4;
}
hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries);
for (i = 0, ridx = 0; i < n_entries; i++) {
args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i);
args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i);
}
ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries);
return H_SUCCESS;
}
struct SpaprPendingHpt {
/* These fields are read-only after initialization */
int shift;
QemuThread thread;
/* These fields are protected by the BQL */
bool complete;
/* These fields are private to the preparation thread if
* !complete, otherwise protected by the BQL */
int ret;
void *hpt;
};
static void free_pending_hpt(SpaprPendingHpt *pending)
{
if (pending->hpt) {
qemu_vfree(pending->hpt);
}
g_free(pending);
}
static void *hpt_prepare_thread(void *opaque)
{
SpaprPendingHpt *pending = opaque;
size_t size = 1ULL << pending->shift;
pending->hpt = qemu_try_memalign(size, size);
if (pending->hpt) {
memset(pending->hpt, 0, size);
pending->ret = H_SUCCESS;
} else {
pending->ret = H_NO_MEM;
}
bql_lock();
if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
/* Ready to go */
pending->complete = true;
} else {
/* We've been cancelled, clean ourselves up */
free_pending_hpt(pending);
}
bql_unlock();
return NULL;
}
/* Must be called with BQL held */
static void cancel_hpt_prepare(SpaprMachineState *spapr)
{
SpaprPendingHpt *pending = spapr->pending_hpt;
/* Let the thread know it's cancelled */
spapr->pending_hpt = NULL;
if (!pending) {
/* Nothing to do */
return;
}
if (!pending->complete) {
/* thread will clean itself up */
return;
}
free_pending_hpt(pending);
}
target_ulong vhyp_mmu_resize_hpt_prepare(PowerPCCPU *cpu,
SpaprMachineState *spapr,
target_ulong shift)
{
SpaprPendingHpt *pending = spapr->pending_hpt;
if (pending) {
/* something already in progress */
if (pending->shift == shift) {
/* and it's suitable */
if (pending->complete) {
return pending->ret;
} else {
return H_LONG_BUSY_ORDER_100_MSEC;
}
}
/* not suitable, cancel and replace */
cancel_hpt_prepare(spapr);
}
if (!shift) {
/* nothing to do */
return H_SUCCESS;
}
/* start new prepare */
pending = g_new0(SpaprPendingHpt, 1);
pending->shift = shift;
pending->ret = H_HARDWARE;
qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
spapr->pending_hpt = pending;
/* In theory we could estimate the time more accurately based on
* the new size, but there's not much point */
return H_LONG_BUSY_ORDER_100_MSEC;
}
static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
{
uint8_t *addr = htab;
addr += pteg * HASH_PTEG_SIZE_64;
addr += slot * HASH_PTE_SIZE_64;
return ldq_p(addr);
}
static void new_hpte_store(void *htab, uint64_t pteg, int slot,
uint64_t pte0, uint64_t pte1)
{
uint8_t *addr = htab;
addr += pteg * HASH_PTEG_SIZE_64;
addr += slot * HASH_PTE_SIZE_64;
stq_p(addr, pte0);
stq_p(addr + HPTE64_DW1, pte1);
}
static int rehash_hpte(PowerPCCPU *cpu,
const ppc_hash_pte64_t *hptes,
void *old_hpt, uint64_t oldsize,
void *new_hpt, uint64_t newsize,
uint64_t pteg, int slot)
{
uint64_t old_hash_mask = (oldsize >> 7) - 1;
uint64_t new_hash_mask = (newsize >> 7) - 1;
target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
target_ulong pte1;
uint64_t avpn;
unsigned base_pg_shift;
uint64_t hash, new_pteg, replace_pte0;
if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
return H_SUCCESS;
}
pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
if (pte0 & HPTE64_V_SECONDARY) {
pteg = ~pteg;
}
if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
uint64_t offset, vsid;
/* We only have 28 - 23 bits of offset in avpn */
offset = (avpn & 0x1f) << 23;
vsid = avpn >> 5;
/* We can find more bits from the pteg value */
if (base_pg_shift < 23) {
offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
}
hash = vsid ^ (offset >> base_pg_shift);
} else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
uint64_t offset, vsid;
/* We only have 40 - 23 bits of seg_off in avpn */
offset = (avpn & 0x1ffff) << 23;
vsid = avpn >> 17;
if (base_pg_shift < 23) {
offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
<< base_pg_shift;
}
hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
} else {
error_report("rehash_pte: Bad segment size in HPTE");
return H_HARDWARE;
}
new_pteg = hash & new_hash_mask;
if (pte0 & HPTE64_V_SECONDARY) {
assert(~pteg == (hash & old_hash_mask));
new_pteg = ~new_pteg;
} else {
assert(pteg == (hash & old_hash_mask));
}
assert((oldsize != newsize) || (pteg == new_pteg));
replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
/*
* Strictly speaking, we don't need all these tests, since we only
* ever rehash bolted HPTEs. We might in future handle non-bolted
* HPTEs, though so make the logic correct for those cases as
* well.
*/
if (replace_pte0 & HPTE64_V_VALID) {
assert(newsize < oldsize);
if (replace_pte0 & HPTE64_V_BOLTED) {
if (pte0 & HPTE64_V_BOLTED) {
/* Bolted collision, nothing we can do */
return H_PTEG_FULL;
} else {
/* Discard this hpte */
return H_SUCCESS;
}
}
}
new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
return H_SUCCESS;
}
static int rehash_hpt(PowerPCCPU *cpu,
void *old_hpt, uint64_t oldsize,
void *new_hpt, uint64_t newsize)
{
uint64_t n_ptegs = oldsize >> 7;
uint64_t pteg;
int slot;
int rc;
for (pteg = 0; pteg < n_ptegs; pteg++) {
hwaddr ptex = pteg * HPTES_PER_GROUP;
const ppc_hash_pte64_t *hptes
= ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
if (!hptes) {
return H_HARDWARE;
}
for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
pteg, slot);
if (rc != H_SUCCESS) {
ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
return rc;
}
}
ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
}
return H_SUCCESS;
}
target_ulong vhyp_mmu_resize_hpt_commit(PowerPCCPU *cpu,
SpaprMachineState *spapr,
target_ulong flags,
target_ulong shift)
{
SpaprPendingHpt *pending = spapr->pending_hpt;
int rc;
size_t newsize;
if (flags != 0) {
return H_PARAMETER;
}
if (!pending || (pending->shift != shift)) {
/* no matching prepare */
return H_CLOSED;
}
if (!pending->complete) {
/* prepare has not completed */
return H_BUSY;
}
/* Shouldn't have got past PREPARE without an HPT */
g_assert(spapr->htab_shift);
newsize = 1ULL << pending->shift;
rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
pending->hpt, newsize);
if (rc == H_SUCCESS) {
qemu_vfree(spapr->htab);
spapr->htab = pending->hpt;
spapr->htab_shift = pending->shift;
push_sregs_to_kvm_pr(spapr);
pending->hpt = NULL; /* so it's not free()d */
}
/* Clean up */
spapr->pending_hpt = NULL;
free_pending_hpt(pending);
return rc;
}
static void hypercall_register_types(void)
{
/* hcall-pft */
spapr_register_hypercall(H_ENTER, h_enter);
spapr_register_hypercall(H_REMOVE, h_remove);
spapr_register_hypercall(H_PROTECT, h_protect);
spapr_register_hypercall(H_READ, h_read);
/* hcall-bulk */
spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
}
type_init(hypercall_register_types)