You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
qemu/hw/vfio-user/device.c

442 lines
13 KiB
C

/*
* vfio protocol over a UNIX socket device handling.
*
* Copyright © 2018, 2021 Oracle and/or its affiliates.
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/lockable.h"
#include "qemu/thread.h"
#include "hw/vfio-user/device.h"
#include "hw/vfio-user/trace.h"
/*
* These are to defend against a malign server trying
* to force us to run out of memory.
*/
#define VFIO_USER_MAX_REGIONS 100
#define VFIO_USER_MAX_IRQS 50
bool vfio_user_get_device_info(VFIOUserProxy *proxy,
struct vfio_device_info *info, Error **errp)
{
VFIOUserDeviceInfo msg;
uint32_t argsz = sizeof(msg) - sizeof(msg.hdr);
memset(&msg, 0, sizeof(msg));
vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_INFO, sizeof(msg), 0);
msg.argsz = argsz;
if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, errp)) {
return false;
}
if (msg.hdr.flags & VFIO_USER_ERROR) {
error_setg_errno(errp, -msg.hdr.error_reply,
"VFIO_USER_DEVICE_GET_INFO failed");
return false;
}
trace_vfio_user_get_info(msg.num_regions, msg.num_irqs);
memcpy(info, &msg.argsz, argsz);
/* defend against a malicious server */
if (info->num_regions > VFIO_USER_MAX_REGIONS ||
info->num_irqs > VFIO_USER_MAX_IRQS) {
error_setg_errno(errp, EINVAL, "invalid reply");
return false;
}
return true;
}
void vfio_user_device_reset(VFIOUserProxy *proxy)
{
Error *local_err = NULL;
VFIOUserHdr hdr;
vfio_user_request_msg(&hdr, VFIO_USER_DEVICE_RESET, sizeof(hdr), 0);
if (!vfio_user_send_wait(proxy, &hdr, NULL, 0, &local_err)) {
error_prepend(&local_err, "%s: ", __func__);
error_report_err(local_err);
return;
}
if (hdr.flags & VFIO_USER_ERROR) {
error_printf("reset reply error %d\n", hdr.error_reply);
}
}
static int vfio_user_get_region_info(VFIOUserProxy *proxy,
struct vfio_region_info *info,
VFIOUserFDs *fds)
{
g_autofree VFIOUserRegionInfo *msgp = NULL;
Error *local_err = NULL;
uint32_t size;
/* data returned can be larger than vfio_region_info */
if (info->argsz < sizeof(*info)) {
error_printf("vfio_user_get_region_info argsz too small\n");
return -E2BIG;
}
if (fds != NULL && fds->send_fds != 0) {
error_printf("vfio_user_get_region_info can't send FDs\n");
return -EINVAL;
}
size = info->argsz + sizeof(VFIOUserHdr);
msgp = g_malloc0(size);
vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO,
sizeof(*msgp), 0);
msgp->argsz = info->argsz;
msgp->index = info->index;
if (!vfio_user_send_wait(proxy, &msgp->hdr, fds, size, &local_err)) {
error_prepend(&local_err, "%s: ", __func__);
error_report_err(local_err);
return -EFAULT;
}
if (msgp->hdr.flags & VFIO_USER_ERROR) {
return -msgp->hdr.error_reply;
}
trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size);
memcpy(info, &msgp->argsz, info->argsz);
/*
* If at least one region is directly mapped into the VM, then we can no
* longer rely on the sequential nature of vfio-user request handling to
* ensure that posted writes are completed before a subsequent read. In this
* case, disable posted write support. This is a per-device property, not
* per-region.
*/
if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) {
vfio_user_disable_posted_writes(proxy);
}
return 0;
}
static int vfio_user_device_io_get_region_info(VFIODevice *vbasedev,
struct vfio_region_info *info,
int *fd)
{
VFIOUserFDs fds = { 0, 1, fd};
int ret;
if (info->index > vbasedev->num_regions) {
return -EINVAL;
}
ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds);
if (ret) {
return ret;
}
/* cap_offset in valid area */
if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) &&
(info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) {
return -EINVAL;
}
return 0;
}
static int vfio_user_device_io_get_irq_info(VFIODevice *vbasedev,
struct vfio_irq_info *info)
{
VFIOUserProxy *proxy = vbasedev->proxy;
Error *local_err = NULL;
VFIOUserIRQInfo msg;
memset(&msg, 0, sizeof(msg));
vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO,
sizeof(msg), 0);
msg.argsz = info->argsz;
msg.index = info->index;
if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, &local_err)) {
error_prepend(&local_err, "%s: ", __func__);
error_report_err(local_err);
return -EFAULT;
}
if (msg.hdr.flags & VFIO_USER_ERROR) {
return -msg.hdr.error_reply;
}
trace_vfio_user_get_irq_info(msg.index, msg.flags, msg.count);
memcpy(info, &msg.argsz, sizeof(*info));
return 0;
}
static int irq_howmany(int *fdp, uint32_t cur, uint32_t max)
{
int n = 0;
if (fdp[cur] != -1) {
do {
n++;
} while (n < max && fdp[cur + n] != -1);
} else {
do {
n++;
} while (n < max && fdp[cur + n] == -1);
}
return n;
}
static int vfio_user_device_io_set_irqs(VFIODevice *vbasedev,
struct vfio_irq_set *irq)
{
VFIOUserProxy *proxy = vbasedev->proxy;
g_autofree VFIOUserIRQSet *msgp = NULL;
uint32_t size, nfds, send_fds, sent_fds, max;
Error *local_err = NULL;
if (irq->argsz < sizeof(*irq)) {
error_printf("vfio_user_set_irqs argsz too small\n");
return -EINVAL;
}
/*
* Handle simple case
*/
if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) {
size = sizeof(VFIOUserHdr) + irq->argsz;
msgp = g_malloc0(size);
vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0);
msgp->argsz = irq->argsz;
msgp->flags = irq->flags;
msgp->index = irq->index;
msgp->start = irq->start;
msgp->count = irq->count;
trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
msgp->flags);
if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
error_prepend(&local_err, "%s: ", __func__);
error_report_err(local_err);
return -EFAULT;
}
if (msgp->hdr.flags & VFIO_USER_ERROR) {
return -msgp->hdr.error_reply;
}
return 0;
}
/*
* Calculate the number of FDs to send
* and adjust argsz
*/
nfds = (irq->argsz - sizeof(*irq)) / sizeof(int);
irq->argsz = sizeof(*irq);
msgp = g_malloc0(sizeof(*msgp));
/*
* Send in chunks if over max_send_fds
*/
for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) {
VFIOUserFDs *arg_fds, loop_fds;
/* must send all valid FDs or all invalid FDs in single msg */
max = nfds - sent_fds;
if (max > proxy->max_send_fds) {
max = proxy->max_send_fds;
}
send_fds = irq_howmany((int *)irq->data, sent_fds, max);
vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS,
sizeof(*msgp), 0);
msgp->argsz = irq->argsz;
msgp->flags = irq->flags;
msgp->index = irq->index;
msgp->start = irq->start + sent_fds;
msgp->count = send_fds;
trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
msgp->flags);
loop_fds.send_fds = send_fds;
loop_fds.recv_fds = 0;
loop_fds.fds = (int *)irq->data + sent_fds;
arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL;
if (!vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, &local_err)) {
error_prepend(&local_err, "%s: ", __func__);
error_report_err(local_err);
return -EFAULT;
}
if (msgp->hdr.flags & VFIO_USER_ERROR) {
return -msgp->hdr.error_reply;
}
}
return 0;
}
static int vfio_user_device_io_region_read(VFIODevice *vbasedev, uint8_t index,
off_t off, uint32_t count,
void *data)
{
g_autofree VFIOUserRegionRW *msgp = NULL;
VFIOUserProxy *proxy = vbasedev->proxy;
int size = sizeof(*msgp) + count;
Error *local_err = NULL;
if (count > proxy->max_xfer_size) {
return -EINVAL;
}
msgp = g_malloc0(size);
vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0);
msgp->offset = off;
msgp->region = index;
msgp->count = count;
trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, &local_err)) {
error_prepend(&local_err, "%s: ", __func__);
error_report_err(local_err);
return -EFAULT;
}
if (msgp->hdr.flags & VFIO_USER_ERROR) {
return -msgp->hdr.error_reply;
} else if (msgp->count > count) {
return -E2BIG;
} else {
memcpy(data, &msgp->data, msgp->count);
}
return msgp->count;
}
/*
* If this is a posted write, and VFIO_PROXY_NO_POST is not set, then we are OK
* to send the write to the socket without waiting for the server's reply:
* a subsequent read (of any region) will not pass the posted write, as all
* messages are handled sequentially.
*/
static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
off_t off, unsigned count,
void *data, bool post)
{
VFIOUserRegionRW *msgp = NULL;
VFIOUserProxy *proxy = vbasedev->proxy;
int size = sizeof(*msgp) + count;
Error *local_err = NULL;
bool can_multi;
int flags = 0;
int ret;
if (count > proxy->max_xfer_size) {
return -EINVAL;
}
if (proxy->flags & VFIO_PROXY_NO_POST) {
post = false;
}
if (post) {
flags |= VFIO_USER_NO_REPLY;
}
/* write eligible to be in a WRITE_MULTI msg ? */
can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post &&
count <= VFIO_USER_MULTI_DATA;
/*
* This should be a rare case, so first check without the lock,
* if we're wrong, vfio_send_queued() will flush any posted writes
* we missed here
*/
if (proxy->wr_multi != NULL ||
(proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) {
/*
* re-check with lock
*
* if already building a WRITE_MULTI msg,
* add this one if possible else flush pending before
* sending the current one
*
* else if outgoing queue is over the highwater,
* start a new WRITE_MULTI message
*/
WITH_QEMU_LOCK_GUARD(&proxy->lock) {
if (proxy->wr_multi != NULL) {
if (can_multi) {
vfio_user_add_multi(proxy, index, off, count, data);
return count;
}
vfio_user_flush_multi(proxy);
} else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) {
vfio_user_create_multi(proxy);
vfio_user_add_multi(proxy, index, off, count, data);
return count;
}
}
}
msgp = g_malloc0(size);
vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags);
msgp->offset = off;
msgp->region = index;
msgp->count = count;
memcpy(&msgp->data, data, count);
trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
/* async send will free msg after it's sent */
if (post) {
if (!vfio_user_send_async(proxy, &msgp->hdr, NULL, &local_err)) {
error_prepend(&local_err, "%s: ", __func__);
error_report_err(local_err);
return -EFAULT;
}
return count;
}
if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
error_prepend(&local_err, "%s: ", __func__);
error_report_err(local_err);
g_free(msgp);
return -EFAULT;
}
if (msgp->hdr.flags & VFIO_USER_ERROR) {
ret = -msgp->hdr.error_reply;
} else {
ret = count;
}
g_free(msgp);
return ret;
}
/*
* Socket-based io_ops
*/
VFIODeviceIOOps vfio_user_device_io_ops_sock = {
.get_region_info = vfio_user_device_io_get_region_info,
.get_irq_info = vfio_user_device_io_get_irq_info,
.set_irqs = vfio_user_device_io_set_irqs,
.region_read = vfio_user_device_io_region_read,
.region_write = vfio_user_device_io_region_write,
};