mirror of https://gitlab.com/qemu-project/qemu
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
442 lines
13 KiB
C
442 lines
13 KiB
C
/*
|
|
* vfio protocol over a UNIX socket device handling.
|
|
*
|
|
* Copyright © 2018, 2021 Oracle and/or its affiliates.
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qapi/error.h"
|
|
#include "qemu/error-report.h"
|
|
#include "qemu/lockable.h"
|
|
#include "qemu/thread.h"
|
|
|
|
#include "hw/vfio-user/device.h"
|
|
#include "hw/vfio-user/trace.h"
|
|
|
|
/*
|
|
* These are to defend against a malign server trying
|
|
* to force us to run out of memory.
|
|
*/
|
|
#define VFIO_USER_MAX_REGIONS 100
|
|
#define VFIO_USER_MAX_IRQS 50
|
|
|
|
bool vfio_user_get_device_info(VFIOUserProxy *proxy,
|
|
struct vfio_device_info *info, Error **errp)
|
|
{
|
|
VFIOUserDeviceInfo msg;
|
|
uint32_t argsz = sizeof(msg) - sizeof(msg.hdr);
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_INFO, sizeof(msg), 0);
|
|
msg.argsz = argsz;
|
|
|
|
if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, errp)) {
|
|
return false;
|
|
}
|
|
|
|
if (msg.hdr.flags & VFIO_USER_ERROR) {
|
|
error_setg_errno(errp, -msg.hdr.error_reply,
|
|
"VFIO_USER_DEVICE_GET_INFO failed");
|
|
return false;
|
|
}
|
|
|
|
trace_vfio_user_get_info(msg.num_regions, msg.num_irqs);
|
|
|
|
memcpy(info, &msg.argsz, argsz);
|
|
|
|
/* defend against a malicious server */
|
|
if (info->num_regions > VFIO_USER_MAX_REGIONS ||
|
|
info->num_irqs > VFIO_USER_MAX_IRQS) {
|
|
error_setg_errno(errp, EINVAL, "invalid reply");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void vfio_user_device_reset(VFIOUserProxy *proxy)
|
|
{
|
|
Error *local_err = NULL;
|
|
VFIOUserHdr hdr;
|
|
|
|
vfio_user_request_msg(&hdr, VFIO_USER_DEVICE_RESET, sizeof(hdr), 0);
|
|
|
|
if (!vfio_user_send_wait(proxy, &hdr, NULL, 0, &local_err)) {
|
|
error_prepend(&local_err, "%s: ", __func__);
|
|
error_report_err(local_err);
|
|
return;
|
|
}
|
|
|
|
if (hdr.flags & VFIO_USER_ERROR) {
|
|
error_printf("reset reply error %d\n", hdr.error_reply);
|
|
}
|
|
}
|
|
|
|
static int vfio_user_get_region_info(VFIOUserProxy *proxy,
|
|
struct vfio_region_info *info,
|
|
VFIOUserFDs *fds)
|
|
{
|
|
g_autofree VFIOUserRegionInfo *msgp = NULL;
|
|
Error *local_err = NULL;
|
|
uint32_t size;
|
|
|
|
/* data returned can be larger than vfio_region_info */
|
|
if (info->argsz < sizeof(*info)) {
|
|
error_printf("vfio_user_get_region_info argsz too small\n");
|
|
return -E2BIG;
|
|
}
|
|
if (fds != NULL && fds->send_fds != 0) {
|
|
error_printf("vfio_user_get_region_info can't send FDs\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
size = info->argsz + sizeof(VFIOUserHdr);
|
|
msgp = g_malloc0(size);
|
|
|
|
vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO,
|
|
sizeof(*msgp), 0);
|
|
msgp->argsz = info->argsz;
|
|
msgp->index = info->index;
|
|
|
|
if (!vfio_user_send_wait(proxy, &msgp->hdr, fds, size, &local_err)) {
|
|
error_prepend(&local_err, "%s: ", __func__);
|
|
error_report_err(local_err);
|
|
return -EFAULT;
|
|
}
|
|
|
|
if (msgp->hdr.flags & VFIO_USER_ERROR) {
|
|
return -msgp->hdr.error_reply;
|
|
}
|
|
trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size);
|
|
|
|
memcpy(info, &msgp->argsz, info->argsz);
|
|
|
|
/*
|
|
* If at least one region is directly mapped into the VM, then we can no
|
|
* longer rely on the sequential nature of vfio-user request handling to
|
|
* ensure that posted writes are completed before a subsequent read. In this
|
|
* case, disable posted write support. This is a per-device property, not
|
|
* per-region.
|
|
*/
|
|
if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) {
|
|
vfio_user_disable_posted_writes(proxy);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vfio_user_device_io_get_region_info(VFIODevice *vbasedev,
|
|
struct vfio_region_info *info,
|
|
int *fd)
|
|
{
|
|
VFIOUserFDs fds = { 0, 1, fd};
|
|
int ret;
|
|
|
|
if (info->index > vbasedev->num_regions) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds);
|
|
if (ret) {
|
|
return ret;
|
|
}
|
|
|
|
/* cap_offset in valid area */
|
|
if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) &&
|
|
(info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vfio_user_device_io_get_irq_info(VFIODevice *vbasedev,
|
|
struct vfio_irq_info *info)
|
|
{
|
|
VFIOUserProxy *proxy = vbasedev->proxy;
|
|
Error *local_err = NULL;
|
|
VFIOUserIRQInfo msg;
|
|
|
|
memset(&msg, 0, sizeof(msg));
|
|
vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO,
|
|
sizeof(msg), 0);
|
|
msg.argsz = info->argsz;
|
|
msg.index = info->index;
|
|
|
|
if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, &local_err)) {
|
|
error_prepend(&local_err, "%s: ", __func__);
|
|
error_report_err(local_err);
|
|
return -EFAULT;
|
|
}
|
|
|
|
if (msg.hdr.flags & VFIO_USER_ERROR) {
|
|
return -msg.hdr.error_reply;
|
|
}
|
|
trace_vfio_user_get_irq_info(msg.index, msg.flags, msg.count);
|
|
|
|
memcpy(info, &msg.argsz, sizeof(*info));
|
|
return 0;
|
|
}
|
|
|
|
static int irq_howmany(int *fdp, uint32_t cur, uint32_t max)
|
|
{
|
|
int n = 0;
|
|
|
|
if (fdp[cur] != -1) {
|
|
do {
|
|
n++;
|
|
} while (n < max && fdp[cur + n] != -1);
|
|
} else {
|
|
do {
|
|
n++;
|
|
} while (n < max && fdp[cur + n] == -1);
|
|
}
|
|
|
|
return n;
|
|
}
|
|
|
|
static int vfio_user_device_io_set_irqs(VFIODevice *vbasedev,
|
|
struct vfio_irq_set *irq)
|
|
{
|
|
VFIOUserProxy *proxy = vbasedev->proxy;
|
|
g_autofree VFIOUserIRQSet *msgp = NULL;
|
|
uint32_t size, nfds, send_fds, sent_fds, max;
|
|
Error *local_err = NULL;
|
|
|
|
if (irq->argsz < sizeof(*irq)) {
|
|
error_printf("vfio_user_set_irqs argsz too small\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Handle simple case
|
|
*/
|
|
if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) {
|
|
size = sizeof(VFIOUserHdr) + irq->argsz;
|
|
msgp = g_malloc0(size);
|
|
|
|
vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0);
|
|
msgp->argsz = irq->argsz;
|
|
msgp->flags = irq->flags;
|
|
msgp->index = irq->index;
|
|
msgp->start = irq->start;
|
|
msgp->count = irq->count;
|
|
trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
|
|
msgp->flags);
|
|
|
|
if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
|
|
error_prepend(&local_err, "%s: ", __func__);
|
|
error_report_err(local_err);
|
|
return -EFAULT;
|
|
}
|
|
|
|
if (msgp->hdr.flags & VFIO_USER_ERROR) {
|
|
return -msgp->hdr.error_reply;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Calculate the number of FDs to send
|
|
* and adjust argsz
|
|
*/
|
|
nfds = (irq->argsz - sizeof(*irq)) / sizeof(int);
|
|
irq->argsz = sizeof(*irq);
|
|
msgp = g_malloc0(sizeof(*msgp));
|
|
/*
|
|
* Send in chunks if over max_send_fds
|
|
*/
|
|
for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) {
|
|
VFIOUserFDs *arg_fds, loop_fds;
|
|
|
|
/* must send all valid FDs or all invalid FDs in single msg */
|
|
max = nfds - sent_fds;
|
|
if (max > proxy->max_send_fds) {
|
|
max = proxy->max_send_fds;
|
|
}
|
|
send_fds = irq_howmany((int *)irq->data, sent_fds, max);
|
|
|
|
vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS,
|
|
sizeof(*msgp), 0);
|
|
msgp->argsz = irq->argsz;
|
|
msgp->flags = irq->flags;
|
|
msgp->index = irq->index;
|
|
msgp->start = irq->start + sent_fds;
|
|
msgp->count = send_fds;
|
|
trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
|
|
msgp->flags);
|
|
|
|
loop_fds.send_fds = send_fds;
|
|
loop_fds.recv_fds = 0;
|
|
loop_fds.fds = (int *)irq->data + sent_fds;
|
|
arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL;
|
|
|
|
if (!vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, &local_err)) {
|
|
error_prepend(&local_err, "%s: ", __func__);
|
|
error_report_err(local_err);
|
|
return -EFAULT;
|
|
}
|
|
|
|
if (msgp->hdr.flags & VFIO_USER_ERROR) {
|
|
return -msgp->hdr.error_reply;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vfio_user_device_io_region_read(VFIODevice *vbasedev, uint8_t index,
|
|
off_t off, uint32_t count,
|
|
void *data)
|
|
{
|
|
g_autofree VFIOUserRegionRW *msgp = NULL;
|
|
VFIOUserProxy *proxy = vbasedev->proxy;
|
|
int size = sizeof(*msgp) + count;
|
|
Error *local_err = NULL;
|
|
|
|
if (count > proxy->max_xfer_size) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
msgp = g_malloc0(size);
|
|
vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0);
|
|
msgp->offset = off;
|
|
msgp->region = index;
|
|
msgp->count = count;
|
|
trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
|
|
|
|
if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, &local_err)) {
|
|
error_prepend(&local_err, "%s: ", __func__);
|
|
error_report_err(local_err);
|
|
return -EFAULT;
|
|
}
|
|
|
|
if (msgp->hdr.flags & VFIO_USER_ERROR) {
|
|
return -msgp->hdr.error_reply;
|
|
} else if (msgp->count > count) {
|
|
return -E2BIG;
|
|
} else {
|
|
memcpy(data, &msgp->data, msgp->count);
|
|
}
|
|
|
|
return msgp->count;
|
|
}
|
|
|
|
/*
|
|
* If this is a posted write, and VFIO_PROXY_NO_POST is not set, then we are OK
|
|
* to send the write to the socket without waiting for the server's reply:
|
|
* a subsequent read (of any region) will not pass the posted write, as all
|
|
* messages are handled sequentially.
|
|
*/
|
|
static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
|
|
off_t off, unsigned count,
|
|
void *data, bool post)
|
|
{
|
|
VFIOUserRegionRW *msgp = NULL;
|
|
VFIOUserProxy *proxy = vbasedev->proxy;
|
|
int size = sizeof(*msgp) + count;
|
|
Error *local_err = NULL;
|
|
bool can_multi;
|
|
int flags = 0;
|
|
int ret;
|
|
|
|
if (count > proxy->max_xfer_size) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (proxy->flags & VFIO_PROXY_NO_POST) {
|
|
post = false;
|
|
}
|
|
|
|
if (post) {
|
|
flags |= VFIO_USER_NO_REPLY;
|
|
}
|
|
|
|
/* write eligible to be in a WRITE_MULTI msg ? */
|
|
can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post &&
|
|
count <= VFIO_USER_MULTI_DATA;
|
|
|
|
/*
|
|
* This should be a rare case, so first check without the lock,
|
|
* if we're wrong, vfio_send_queued() will flush any posted writes
|
|
* we missed here
|
|
*/
|
|
if (proxy->wr_multi != NULL ||
|
|
(proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) {
|
|
|
|
/*
|
|
* re-check with lock
|
|
*
|
|
* if already building a WRITE_MULTI msg,
|
|
* add this one if possible else flush pending before
|
|
* sending the current one
|
|
*
|
|
* else if outgoing queue is over the highwater,
|
|
* start a new WRITE_MULTI message
|
|
*/
|
|
WITH_QEMU_LOCK_GUARD(&proxy->lock) {
|
|
if (proxy->wr_multi != NULL) {
|
|
if (can_multi) {
|
|
vfio_user_add_multi(proxy, index, off, count, data);
|
|
return count;
|
|
}
|
|
vfio_user_flush_multi(proxy);
|
|
} else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) {
|
|
vfio_user_create_multi(proxy);
|
|
vfio_user_add_multi(proxy, index, off, count, data);
|
|
return count;
|
|
}
|
|
}
|
|
}
|
|
|
|
msgp = g_malloc0(size);
|
|
vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags);
|
|
msgp->offset = off;
|
|
msgp->region = index;
|
|
msgp->count = count;
|
|
memcpy(&msgp->data, data, count);
|
|
trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
|
|
|
|
/* async send will free msg after it's sent */
|
|
if (post) {
|
|
if (!vfio_user_send_async(proxy, &msgp->hdr, NULL, &local_err)) {
|
|
error_prepend(&local_err, "%s: ", __func__);
|
|
error_report_err(local_err);
|
|
return -EFAULT;
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
|
|
error_prepend(&local_err, "%s: ", __func__);
|
|
error_report_err(local_err);
|
|
g_free(msgp);
|
|
return -EFAULT;
|
|
}
|
|
|
|
if (msgp->hdr.flags & VFIO_USER_ERROR) {
|
|
ret = -msgp->hdr.error_reply;
|
|
} else {
|
|
ret = count;
|
|
}
|
|
|
|
g_free(msgp);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Socket-based io_ops
|
|
*/
|
|
VFIODeviceIOOps vfio_user_device_io_ops_sock = {
|
|
.get_region_info = vfio_user_device_io_get_region_info,
|
|
.get_irq_info = vfio_user_device_io_get_irq_info,
|
|
.set_irqs = vfio_user_device_io_set_irqs,
|
|
.region_read = vfio_user_device_io_region_read,
|
|
.region_write = vfio_user_device_io_region_write,
|
|
|
|
};
|