You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
qemu/hw/vfio-user/proxy.c

1357 lines
36 KiB
C

/*
* vfio protocol over a UNIX socket.
*
* Copyright © 2018, 2021 Oracle and/or its affiliates.
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include <sys/ioctl.h>
#include "hw/vfio/vfio-device.h"
#include "hw/vfio-user/proxy.h"
#include "hw/vfio-user/trace.h"
#include "qapi/error.h"
#include "qobject/qbool.h"
#include "qobject/qdict.h"
#include "qobject/qjson.h"
#include "qobject/qnum.h"
#include "qemu/error-report.h"
#include "qemu/lockable.h"
#include "qemu/main-loop.h"
#include "qemu/thread.h"
#include "system/iothread.h"
static IOThread *vfio_user_iothread;
static void vfio_user_shutdown(VFIOUserProxy *proxy);
static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds);
static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg);
static void vfio_user_recv(void *opaque);
static void vfio_user_send(void *opaque);
static void vfio_user_cb(void *opaque);
static void vfio_user_request(void *opaque);
static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err)
{
hdr->flags |= VFIO_USER_ERROR;
hdr->error_reply = err;
}
/*
* Functions called by main, CPU, or iothread threads
*/
static void vfio_user_shutdown(VFIOUserProxy *proxy)
{
qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL);
qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL,
proxy->ctx, NULL, NULL);
}
/*
* Same return values as qio_channel_writev_full():
*
* QIO_CHANNEL_ERR_BLOCK: *errp not set
* -1: *errp will be populated
* otherwise: bytes written
*/
static ssize_t vfio_user_send_qio(VFIOUserProxy *proxy, VFIOUserMsg *msg,
Error **errp)
{
VFIOUserFDs *fds = msg->fds;
struct iovec iov = {
.iov_base = msg->hdr,
.iov_len = msg->hdr->size,
};
size_t numfds = 0;
int *fdp = NULL;
ssize_t ret;
if (fds != NULL && fds->send_fds != 0) {
numfds = fds->send_fds;
fdp = fds->fds;
}
ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, errp);
if (ret == -1) {
vfio_user_set_error(msg->hdr, EIO);
vfio_user_shutdown(proxy);
}
trace_vfio_user_send_write(msg->hdr->id, ret);
return ret;
}
static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds)
{
VFIOUserMsg *msg;
msg = QTAILQ_FIRST(&proxy->free);
if (msg != NULL) {
QTAILQ_REMOVE(&proxy->free, msg, next);
} else {
msg = g_malloc0(sizeof(*msg));
qemu_cond_init(&msg->cv);
}
msg->hdr = hdr;
msg->fds = fds;
return msg;
}
/*
* Recycle a message list entry to the free list.
*/
static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg)
{
if (msg->type == VFIO_MSG_NONE) {
error_printf("vfio_user_recycle - freeing free msg\n");
return;
}
/* free msg buffer if no one is waiting to consume the reply */
if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) {
g_free(msg->hdr);
if (msg->fds != NULL) {
g_free(msg->fds);
}
}
msg->type = VFIO_MSG_NONE;
msg->hdr = NULL;
msg->fds = NULL;
msg->complete = false;
msg->pending = false;
QTAILQ_INSERT_HEAD(&proxy->free, msg, next);
}
VFIOUserFDs *vfio_user_getfds(int numfds)
{
VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int)));
fds->fds = (int *)((char *)fds + sizeof(*fds));
return fds;
}
/*
* Functions only called by iothread
*/
/*
* Process a received message.
*/
static void vfio_user_process(VFIOUserProxy *proxy, VFIOUserMsg *msg,
bool isreply)
{
/*
* Replies signal a waiter, if none just check for errors
* and free the message buffer.
*
* Requests get queued for the BH.
*/
if (isreply) {
msg->complete = true;
if (msg->type == VFIO_MSG_WAIT) {
qemu_cond_signal(&msg->cv);
} else {
if (msg->hdr->flags & VFIO_USER_ERROR) {
error_printf("vfio_user_process: error reply on async ");
error_printf("request command %x error %s\n",
msg->hdr->command,
strerror(msg->hdr->error_reply));
}
/* youngest nowait msg has been ack'd */
if (proxy->last_nowait == msg) {
proxy->last_nowait = NULL;
}
vfio_user_recycle(proxy, msg);
}
} else {
QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next);
qemu_bh_schedule(proxy->req_bh);
}
}
/*
* Complete a partial message read
*/
static int vfio_user_complete(VFIOUserProxy *proxy, Error **errp)
{
VFIOUserMsg *msg = proxy->part_recv;
size_t msgleft = proxy->recv_left;
bool isreply;
char *data;
int ret;
data = (char *)msg->hdr + (msg->hdr->size - msgleft);
while (msgleft > 0) {
ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
/* error or would block */
if (ret <= 0) {
/* try for rest on next iternation */
if (ret == QIO_CHANNEL_ERR_BLOCK) {
proxy->recv_left = msgleft;
}
return ret;
}
trace_vfio_user_recv_read(msg->hdr->id, ret);
msgleft -= ret;
data += ret;
}
/*
* Read complete message, process it.
*/
proxy->part_recv = NULL;
proxy->recv_left = 0;
isreply = (msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REPLY;
vfio_user_process(proxy, msg, isreply);
/* return positive value */
return 1;
}
/*
* Receive and process one incoming message.
*
* For replies, find matching outgoing request and wake any waiters.
* For requests, queue in incoming list and run request BH.
*/
static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp)
{
VFIOUserMsg *msg = NULL;
g_autofree int *fdp = NULL;
VFIOUserFDs *reqfds;
VFIOUserHdr hdr;
struct iovec iov = {
.iov_base = &hdr,
.iov_len = sizeof(hdr),
};
bool isreply = false;
int i, ret;
size_t msgleft, numfds = 0;
char *data = NULL;
char *buf = NULL;
/*
* Complete any partial reads
*/
if (proxy->part_recv != NULL) {
ret = vfio_user_complete(proxy, errp);
/* still not complete, try later */
if (ret == QIO_CHANNEL_ERR_BLOCK) {
return ret;
}
if (ret <= 0) {
goto fatal;
}
/* else fall into reading another msg */
}
/*
* Read header
*/
ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, 0,
errp);
if (ret == QIO_CHANNEL_ERR_BLOCK) {
return ret;
}
/* read error or other side closed connection */
if (ret <= 0) {
goto fatal;
}
if (ret < sizeof(hdr)) {
error_setg(errp, "short read of header");
goto fatal;
}
/*
* Validate header
*/
if (hdr.size < sizeof(VFIOUserHdr)) {
error_setg(errp, "bad header size");
goto fatal;
}
switch (hdr.flags & VFIO_USER_TYPE) {
case VFIO_USER_REQUEST:
isreply = false;
break;
case VFIO_USER_REPLY:
isreply = true;
break;
default:
error_setg(errp, "unknown message type");
goto fatal;
}
trace_vfio_user_recv_hdr(proxy->sockname, hdr.id, hdr.command, hdr.size,
hdr.flags);
/*
* For replies, find the matching pending request.
* For requests, reap incoming FDs.
*/
if (isreply) {
QTAILQ_FOREACH(msg, &proxy->pending, next) {
if (hdr.id == msg->id) {
break;
}
}
if (msg == NULL) {
error_setg(errp, "unexpected reply");
goto err;
}
QTAILQ_REMOVE(&proxy->pending, msg, next);
/*
* Process any received FDs
*/
if (numfds != 0) {
if (msg->fds == NULL || msg->fds->recv_fds < numfds) {
error_setg(errp, "unexpected FDs");
goto err;
}
msg->fds->recv_fds = numfds;
memcpy(msg->fds->fds, fdp, numfds * sizeof(int));
}
} else {
if (numfds != 0) {
reqfds = vfio_user_getfds(numfds);
memcpy(reqfds->fds, fdp, numfds * sizeof(int));
} else {
reqfds = NULL;
}
}
/*
* Put the whole message into a single buffer.
*/
if (isreply) {
if (hdr.size > msg->rsize) {
error_setg(errp, "reply larger than recv buffer");
goto err;
}
*msg->hdr = hdr;
data = (char *)msg->hdr + sizeof(hdr);
} else {
if (hdr.size > proxy->max_xfer_size + sizeof(VFIOUserDMARW)) {
error_setg(errp, "vfio_user_recv request larger than max");
goto err;
}
buf = g_malloc0(hdr.size);
memcpy(buf, &hdr, sizeof(hdr));
data = buf + sizeof(hdr);
msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds);
msg->type = VFIO_MSG_REQ;
}
/*
* Read rest of message.
*/
msgleft = hdr.size - sizeof(hdr);
while (msgleft > 0) {
ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
/* prepare to complete read on next iternation */
if (ret == QIO_CHANNEL_ERR_BLOCK) {
proxy->part_recv = msg;
proxy->recv_left = msgleft;
return ret;
}
if (ret <= 0) {
goto fatal;
}
trace_vfio_user_recv_read(hdr.id, ret);
msgleft -= ret;
data += ret;
}
vfio_user_process(proxy, msg, isreply);
return 0;
/*
* fatal means the other side closed or we don't trust the stream
* err means this message is corrupt
*/
fatal:
vfio_user_shutdown(proxy);
proxy->state = VFIO_PROXY_ERROR;
/* set error if server side closed */
if (ret == 0) {
error_setg(errp, "server closed socket");
}
err:
for (i = 0; i < numfds; i++) {
close(fdp[i]);
}
if (isreply && msg != NULL) {
/* force an error to keep sending thread from hanging */
vfio_user_set_error(msg->hdr, EINVAL);
msg->complete = true;
qemu_cond_signal(&msg->cv);
}
return -1;
}
static void vfio_user_recv(void *opaque)
{
VFIOUserProxy *proxy = opaque;
QEMU_LOCK_GUARD(&proxy->lock);
if (proxy->state == VFIO_PROXY_CONNECTED) {
Error *local_err = NULL;
while (vfio_user_recv_one(proxy, &local_err) == 0) {
;
}
if (local_err != NULL) {
error_report_err(local_err);
}
}
}
/*
* Send a single message, same return semantics as vfio_user_send_qio().
*
* Sent async messages are freed, others are moved to pending queue.
*/
static ssize_t vfio_user_send_one(VFIOUserProxy *proxy, Error **errp)
{
VFIOUserMsg *msg;
ssize_t ret;
msg = QTAILQ_FIRST(&proxy->outgoing);
ret = vfio_user_send_qio(proxy, msg, errp);
if (ret < 0) {
return ret;
}
QTAILQ_REMOVE(&proxy->outgoing, msg, next);
proxy->num_outgoing--;
if (msg->type == VFIO_MSG_ASYNC) {
vfio_user_recycle(proxy, msg);
} else {
QTAILQ_INSERT_TAIL(&proxy->pending, msg, next);
msg->pending = true;
}
return ret;
}
/*
* Send messages from outgoing queue when the socket buffer has space.
* If we deplete 'outgoing', remove ourselves from the poll list.
*/
static void vfio_user_send(void *opaque)
{
VFIOUserProxy *proxy = opaque;
QEMU_LOCK_GUARD(&proxy->lock);
if (proxy->state == VFIO_PROXY_CONNECTED) {
while (!QTAILQ_EMPTY(&proxy->outgoing)) {
Error *local_err = NULL;
int ret;
ret = vfio_user_send_one(proxy, &local_err);
if (ret == QIO_CHANNEL_ERR_BLOCK) {
return;
} else if (ret == -1) {
error_report_err(local_err);
return;
}
}
qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
vfio_user_recv, NULL, NULL, proxy);
/* queue empty - send any pending multi write msgs */
if (proxy->wr_multi != NULL) {
vfio_user_flush_multi(proxy);
}
}
}
static void vfio_user_cb(void *opaque)
{
VFIOUserProxy *proxy = opaque;
QEMU_LOCK_GUARD(&proxy->lock);
proxy->state = VFIO_PROXY_CLOSED;
qemu_cond_signal(&proxy->close_cv);
}
/*
* Functions called by main or CPU threads
*/
/*
* Process incoming requests.
*
* The bus-specific callback has the form:
* request(opaque, msg)
* where 'opaque' was specified in vfio_user_set_handler
* and 'msg' is the inbound message.
*
* The callback is responsible for disposing of the message buffer,
* usually by re-using it when calling vfio_send_reply or vfio_send_error,
* both of which free their message buffer when the reply is sent.
*
* If the callback uses a new buffer, it needs to free the old one.
*/
static void vfio_user_request(void *opaque)
{
VFIOUserProxy *proxy = opaque;
VFIOUserMsgQ new, free;
VFIOUserMsg *msg, *m1;
/* reap all incoming */
QTAILQ_INIT(&new);
WITH_QEMU_LOCK_GUARD(&proxy->lock) {
QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) {
QTAILQ_REMOVE(&proxy->incoming, msg, next);
QTAILQ_INSERT_TAIL(&new, msg, next);
}
}
/* process list */
QTAILQ_INIT(&free);
QTAILQ_FOREACH_SAFE(msg, &new, next, m1) {
QTAILQ_REMOVE(&new, msg, next);
trace_vfio_user_recv_request(msg->hdr->command);
proxy->request(proxy->req_arg, msg);
QTAILQ_INSERT_HEAD(&free, msg, next);
}
/* free list */
WITH_QEMU_LOCK_GUARD(&proxy->lock) {
QTAILQ_FOREACH_SAFE(msg, &free, next, m1) {
vfio_user_recycle(proxy, msg);
}
}
}
/*
* Messages are queued onto the proxy's outgoing list.
*
* It handles 3 types of messages:
*
* async messages - replies and posted writes
*
* There will be no reply from the server, so message
* buffers are freed after they're sent.
*
* nowait messages - map/unmap during address space transactions
*
* These are also sent async, but a reply is expected so that
* vfio_wait_reqs() can wait for the youngest nowait request.
* They transition from the outgoing list to the pending list
* when sent, and are freed when the reply is received.
*
* wait messages - all other requests
*
* The reply to these messages is waited for by their caller.
* They also transition from outgoing to pending when sent, but
* the message buffer is returned to the caller with the reply
* contents. The caller is responsible for freeing these messages.
*
* As an optimization, if the outgoing list and the socket send
* buffer are empty, the message is sent inline instead of being
* added to the outgoing list. The rest of the transitions are
* unchanged.
*/
static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg,
Error **errp)
{
int ret;
/* older coalesced writes go first */
if (proxy->wr_multi != NULL &&
((msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REQUEST)) {
vfio_user_flush_multi(proxy);
}
/*
* Unsent outgoing msgs - add to tail
*/
if (!QTAILQ_EMPTY(&proxy->outgoing)) {
QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next);
proxy->num_outgoing++;
return true;
}
/*
* Try inline - if blocked, queue it and kick send poller
*/
if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) {
ret = QIO_CHANNEL_ERR_BLOCK;
} else {
ret = vfio_user_send_qio(proxy, msg, errp);
}
if (ret == QIO_CHANNEL_ERR_BLOCK) {
QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next);
proxy->num_outgoing = 1;
qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
vfio_user_recv, proxy->ctx,
vfio_user_send, proxy);
return true;
}
if (ret == -1) {
return false;
}
/*
* Sent - free async, add others to pending
*/
if (msg->type == VFIO_MSG_ASYNC) {
vfio_user_recycle(proxy, msg);
} else {
QTAILQ_INSERT_TAIL(&proxy->pending, msg, next);
msg->pending = true;
}
return true;
}
/*
* nowait send - vfio_wait_reqs() can wait for it later
*
* Returns false if we did not successfully receive a reply message, in which
* case @errp will be populated.
*
* In either case, ownership of @hdr and @fds is taken, and the caller must
* *not* free them itself.
*/
bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds, int rsize, Error **errp)
{
VFIOUserMsg *msg;
QEMU_LOCK_GUARD(&proxy->lock);
msg = vfio_user_getmsg(proxy, hdr, fds);
msg->id = hdr->id;
msg->rsize = rsize ? rsize : hdr->size;
msg->type = VFIO_MSG_NOWAIT;
if (hdr->flags & VFIO_USER_NO_REPLY) {
error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__);
vfio_user_recycle(proxy, msg);
return false;
}
if (!vfio_user_send_queued(proxy, msg, errp)) {
vfio_user_recycle(proxy, msg);
return false;
}
proxy->last_nowait = msg;
return true;
}
/*
* Returns false if we did not successfully receive a reply message, in which
* case @errp will be populated.
*
* In either case, the caller must free @hdr and @fds if needed.
*/
bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds, int rsize, Error **errp)
{
VFIOUserMsg *msg;
bool ok = false;
if (hdr->flags & VFIO_USER_NO_REPLY) {
error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__);
return false;
}
qemu_mutex_lock(&proxy->lock);
msg = vfio_user_getmsg(proxy, hdr, fds);
msg->id = hdr->id;
msg->rsize = rsize ? rsize : hdr->size;
msg->type = VFIO_MSG_WAIT;
ok = vfio_user_send_queued(proxy, msg, errp);
if (ok) {
while (!msg->complete) {
if (!qemu_cond_timedwait(&msg->cv, &proxy->lock,
proxy->wait_time)) {
VFIOUserMsgQ *list;
list = msg->pending ? &proxy->pending : &proxy->outgoing;
QTAILQ_REMOVE(list, msg, next);
error_setg_errno(errp, ETIMEDOUT,
"timed out waiting for reply");
ok = false;
break;
}
}
}
vfio_user_recycle(proxy, msg);
qemu_mutex_unlock(&proxy->lock);
return ok;
}
/*
* async send - msg can be queued, but will be freed when sent
*
* Returns false on failure, in which case @errp will be populated.
*
* In either case, ownership of @hdr and @fds is taken, and the caller must
* *not* free them itself.
*/
bool vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds, Error **errp)
{
VFIOUserMsg *msg;
QEMU_LOCK_GUARD(&proxy->lock);
msg = vfio_user_getmsg(proxy, hdr, fds);
msg->id = hdr->id;
msg->rsize = 0;
msg->type = VFIO_MSG_ASYNC;
if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) {
error_setg_errno(errp, EINVAL, "%s on sync message", __func__);
vfio_user_recycle(proxy, msg);
return false;
}
if (!vfio_user_send_queued(proxy, msg, errp)) {
vfio_user_recycle(proxy, msg);
return false;
}
return true;
}
void vfio_user_wait_reqs(VFIOUserProxy *proxy)
{
VFIOUserMsg *msg;
/*
* Any DMA map/unmap requests sent in the middle
* of a memory region transaction were sent nowait.
* Wait for them here.
*/
qemu_mutex_lock(&proxy->lock);
if (proxy->last_nowait != NULL) {
/*
* Change type to WAIT to wait for reply
*/
msg = proxy->last_nowait;
msg->type = VFIO_MSG_WAIT;
proxy->last_nowait = NULL;
while (!msg->complete) {
if (!qemu_cond_timedwait(&msg->cv, &proxy->lock,
proxy->wait_time)) {
VFIOUserMsgQ *list;
list = msg->pending ? &proxy->pending : &proxy->outgoing;
QTAILQ_REMOVE(list, msg, next);
error_printf("vfio_wait_reqs - timed out\n");
break;
}
}
if (msg->hdr->flags & VFIO_USER_ERROR) {
error_printf("vfio_user_wait_reqs - error reply on async ");
error_printf("request: command %x error %s\n", msg->hdr->command,
strerror(msg->hdr->error_reply));
}
/*
* Change type back to NOWAIT to free
*/
msg->type = VFIO_MSG_NOWAIT;
vfio_user_recycle(proxy, msg);
}
qemu_mutex_unlock(&proxy->lock);
}
/*
* Reply to an incoming request.
*/
void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size)
{
Error *local_err = NULL;
if (size < sizeof(VFIOUserHdr)) {
error_printf("%s: size too small", __func__);
g_free(hdr);
return;
}
/*
* convert header to associated reply
*/
hdr->flags = VFIO_USER_REPLY;
hdr->size = size;
if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) {
error_report_err(local_err);
}
}
/*
* Send an error reply to an incoming request.
*/
void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error)
{
Error *local_err = NULL;
/*
* convert header to associated reply
*/
hdr->flags = VFIO_USER_REPLY;
hdr->flags |= VFIO_USER_ERROR;
hdr->error_reply = error;
hdr->size = sizeof(*hdr);
if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) {
error_report_err(local_err);
}
}
/*
* Close FDs erroneously received in an incoming request.
*/
void vfio_user_putfds(VFIOUserMsg *msg)
{
VFIOUserFDs *fds = msg->fds;
int i;
for (i = 0; i < fds->recv_fds; i++) {
close(fds->fds[i]);
}
g_free(fds);
msg->fds = NULL;
}
void
vfio_user_disable_posted_writes(VFIOUserProxy *proxy)
{
WITH_QEMU_LOCK_GUARD(&proxy->lock) {
proxy->flags |= VFIO_PROXY_NO_POST;
}
}
static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets =
QLIST_HEAD_INITIALIZER(vfio_user_sockets);
VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp)
{
VFIOUserProxy *proxy;
QIOChannelSocket *sioc;
QIOChannel *ioc;
char *sockname;
if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) {
error_setg(errp, "vfio_user_connect - bad address family");
return NULL;
}
sockname = addr->u.q_unix.path;
sioc = qio_channel_socket_new();
ioc = QIO_CHANNEL(sioc);
if (qio_channel_socket_connect_sync(sioc, addr, errp)) {
object_unref(OBJECT(ioc));
return NULL;
}
qio_channel_set_blocking(ioc, false, NULL);
proxy = g_malloc0(sizeof(VFIOUserProxy));
proxy->sockname = g_strdup_printf("unix:%s", sockname);
proxy->ioc = ioc;
/* init defaults */
proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER;
proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS;
proxy->max_dma = VFIO_USER_DEF_MAP_MAX;
proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE;
proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP;
proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE;
proxy->flags = VFIO_PROXY_CLIENT;
proxy->state = VFIO_PROXY_CONNECTED;
qemu_mutex_init(&proxy->lock);
qemu_cond_init(&proxy->close_cv);
if (vfio_user_iothread == NULL) {
vfio_user_iothread = iothread_create("VFIO user", errp);
}
proxy->ctx = iothread_get_aio_context(vfio_user_iothread);
proxy->req_bh = qemu_bh_new(vfio_user_request, proxy);
QTAILQ_INIT(&proxy->outgoing);
QTAILQ_INIT(&proxy->incoming);
QTAILQ_INIT(&proxy->free);
QTAILQ_INIT(&proxy->pending);
QLIST_INSERT_HEAD(&vfio_user_sockets, proxy, next);
return proxy;
}
void vfio_user_set_handler(VFIODevice *vbasedev,
void (*handler)(void *opaque, VFIOUserMsg *msg),
void *req_arg)
{
VFIOUserProxy *proxy = vbasedev->proxy;
proxy->request = handler;
proxy->req_arg = req_arg;
qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
vfio_user_recv, NULL, NULL, proxy);
}
void vfio_user_disconnect(VFIOUserProxy *proxy)
{
VFIOUserMsg *r1, *r2;
qemu_mutex_lock(&proxy->lock);
/* our side is quitting */
if (proxy->state == VFIO_PROXY_CONNECTED) {
vfio_user_shutdown(proxy);
if (!QTAILQ_EMPTY(&proxy->pending)) {
error_printf("vfio_user_disconnect: outstanding requests\n");
}
}
object_unref(OBJECT(proxy->ioc));
proxy->ioc = NULL;
qemu_bh_delete(proxy->req_bh);
proxy->req_bh = NULL;
proxy->state = VFIO_PROXY_CLOSING;
QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) {
qemu_cond_destroy(&r1->cv);
QTAILQ_REMOVE(&proxy->outgoing, r1, next);
g_free(r1);
}
QTAILQ_FOREACH_SAFE(r1, &proxy->incoming, next, r2) {
qemu_cond_destroy(&r1->cv);
QTAILQ_REMOVE(&proxy->incoming, r1, next);
g_free(r1);
}
QTAILQ_FOREACH_SAFE(r1, &proxy->pending, next, r2) {
qemu_cond_destroy(&r1->cv);
QTAILQ_REMOVE(&proxy->pending, r1, next);
g_free(r1);
}
QTAILQ_FOREACH_SAFE(r1, &proxy->free, next, r2) {
qemu_cond_destroy(&r1->cv);
QTAILQ_REMOVE(&proxy->free, r1, next);
g_free(r1);
}
/*
* Make sure the iothread isn't blocking anywhere
* with a ref to this proxy by waiting for a BH
* handler to run after the proxy fd handlers were
* deleted above.
*/
aio_bh_schedule_oneshot(proxy->ctx, vfio_user_cb, proxy);
qemu_cond_wait(&proxy->close_cv, &proxy->lock);
/* we now hold the only ref to proxy */
qemu_mutex_unlock(&proxy->lock);
qemu_cond_destroy(&proxy->close_cv);
qemu_mutex_destroy(&proxy->lock);
QLIST_REMOVE(proxy, next);
if (QLIST_EMPTY(&vfio_user_sockets)) {
iothread_destroy(vfio_user_iothread);
vfio_user_iothread = NULL;
}
g_free(proxy->sockname);
g_free(proxy);
}
void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
uint32_t size, uint32_t flags)
{
static uint16_t next_id;
hdr->id = qatomic_fetch_inc(&next_id);
hdr->command = cmd;
hdr->size = size;
hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST;
hdr->error_reply = 0;
}
struct cap_entry {
const char *name;
bool (*check)(VFIOUserProxy *proxy, QObject *qobj, Error **errp);
};
static bool caps_parse(VFIOUserProxy *proxy, QDict *qdict,
struct cap_entry caps[], Error **errp)
{
QObject *qobj;
struct cap_entry *p;
for (p = caps; p->name != NULL; p++) {
qobj = qdict_get(qdict, p->name);
if (qobj != NULL) {
if (!p->check(proxy, qobj, errp)) {
return false;
}
qdict_del(qdict, p->name);
}
}
/* warning, for now */
if (qdict_size(qdict) != 0) {
warn_report("spurious capabilities");
}
return true;
}
static bool check_migr_pgsize(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
{
QNum *qn = qobject_to(QNum, qobj);
uint64_t pgsize;
if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) {
error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE);
return false;
}
/* must be larger than default */
if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) {
error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize);
return false;
}
proxy->migr_pgsize = pgsize;
return true;
}
static bool check_bitmap(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
{
QNum *qn = qobject_to(QNum, qobj);
uint64_t bitmap_size;
if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) {
error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP);
return false;
}
/* can only lower it */
if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) {
error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP);
return false;
}
proxy->max_bitmap = bitmap_size;
return true;
}
static struct cap_entry caps_migr[] = {
{ VFIO_USER_CAP_PGSIZE, check_migr_pgsize },
{ VFIO_USER_CAP_MAX_BITMAP, check_bitmap },
{ NULL }
};
static bool check_max_fds(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
{
QNum *qn = qobject_to(QNum, qobj);
uint64_t max_send_fds;
if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) ||
max_send_fds > VFIO_USER_MAX_MAX_FDS) {
error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS);
return false;
}
proxy->max_send_fds = max_send_fds;
return true;
}
static bool check_max_xfer(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
{
QNum *qn = qobject_to(QNum, qobj);
uint64_t max_xfer_size;
if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) ||
max_xfer_size > VFIO_USER_MAX_MAX_XFER) {
error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER);
return false;
}
proxy->max_xfer_size = max_xfer_size;
return true;
}
static bool check_pgsizes(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
{
QNum *qn = qobject_to(QNum, qobj);
uint64_t pgsizes;
if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) {
error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES);
return false;
}
/* must be larger than default */
if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) {
error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes);
return false;
}
proxy->dma_pgsizes = pgsizes;
return true;
}
static bool check_max_dma(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
{
QNum *qn = qobject_to(QNum, qobj);
uint64_t max_dma;
if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) {
error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX);
return false;
}
/* can only lower it */
if (max_dma > VFIO_USER_DEF_MAP_MAX) {
error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX);
return false;
}
proxy->max_dma = max_dma;
return true;
}
static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
{
QDict *qdict = qobject_to(QDict, qobj);
if (qdict == NULL) {
error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS);
return true;
}
return caps_parse(proxy, qdict, caps_migr, errp);
}
static bool check_multi(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
{
QBool *qb = qobject_to(QBool, qobj);
if (qb == NULL) {
error_setg(errp, "malformed %s", VFIO_USER_CAP_MULTI);
return false;
}
if (qbool_get_bool(qb)) {
proxy->flags |= VFIO_PROXY_USE_MULTI;
}
return true;
}
static struct cap_entry caps_cap[] = {
{ VFIO_USER_CAP_MAX_FDS, check_max_fds },
{ VFIO_USER_CAP_MAX_XFER, check_max_xfer },
{ VFIO_USER_CAP_PGSIZES, check_pgsizes },
{ VFIO_USER_CAP_MAP_MAX, check_max_dma },
{ VFIO_USER_CAP_MIGR, check_migr },
{ VFIO_USER_CAP_MULTI, check_multi },
{ NULL }
};
static bool check_cap(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
{
QDict *qdict = qobject_to(QDict, qobj);
if (qdict == NULL) {
error_setg(errp, "malformed %s", VFIO_USER_CAP);
return false;
}
return caps_parse(proxy, qdict, caps_cap, errp);
}
static struct cap_entry ver_0_0[] = {
{ VFIO_USER_CAP, check_cap },
{ NULL }
};
static bool caps_check(VFIOUserProxy *proxy, int minor, const char *caps,
Error **errp)
{
QObject *qobj;
QDict *qdict;
bool ret;
qobj = qobject_from_json(caps, NULL);
if (qobj == NULL) {
error_setg(errp, "malformed capabilities %s", caps);
return false;
}
qdict = qobject_to(QDict, qobj);
if (qdict == NULL) {
error_setg(errp, "capabilities %s not an object", caps);
qobject_unref(qobj);
return false;
}
ret = caps_parse(proxy, qdict, ver_0_0, errp);
qobject_unref(qobj);
return ret;
}
static GString *caps_json(void)
{
QDict *dict = qdict_new();
QDict *capdict = qdict_new();
QDict *migdict = qdict_new();
GString *str;
qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE);
qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP);
qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict));
qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS);
qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER);
qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE);
qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX);
qdict_put_bool(capdict, VFIO_USER_CAP_MULTI, true);
qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict));
str = qobject_to_json(QOBJECT(dict));
qobject_unref(dict);
return str;
}
bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp)
{
g_autofree VFIOUserVersion *msgp = NULL;
GString *caps;
char *reply;
int size, caplen;
caps = caps_json();
caplen = caps->len + 1;
size = sizeof(*msgp) + caplen;
msgp = g_malloc0(size);
vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0);
msgp->major = VFIO_USER_MAJOR_VER;
msgp->minor = VFIO_USER_MINOR_VER;
memcpy(&msgp->capabilities, caps->str, caplen);
g_string_free(caps, true);
trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities);
if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, errp)) {
return false;
}
if (msgp->hdr.flags & VFIO_USER_ERROR) {
error_setg_errno(errp, msgp->hdr.error_reply, "version reply");
return false;
}
if (msgp->major != VFIO_USER_MAJOR_VER ||
msgp->minor > VFIO_USER_MINOR_VER) {
error_setg(errp, "incompatible server version");
return false;
}
reply = msgp->capabilities;
if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') {
error_setg(errp, "corrupt version reply");
return false;
}
if (!caps_check(proxy, msgp->minor, reply, errp)) {
return false;
}
trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities);
return true;
}
void vfio_user_flush_multi(VFIOUserProxy *proxy)
{
VFIOUserMsg *msg;
VFIOUserWRMulti *wm = proxy->wr_multi;
Error *local_err = NULL;
proxy->wr_multi = NULL;
/* adjust size for actual # of writes */
wm->hdr.size -= (VFIO_USER_MULTI_MAX - wm->wr_cnt) * sizeof(VFIOUserWROne);
msg = vfio_user_getmsg(proxy, &wm->hdr, NULL);
msg->id = wm->hdr.id;
msg->rsize = 0;
msg->type = VFIO_MSG_ASYNC;
trace_vfio_user_wrmulti("flush", wm->wr_cnt);
if (!vfio_user_send_queued(proxy, msg, &local_err)) {
error_report_err(local_err);
vfio_user_recycle(proxy, msg);
}
}
void vfio_user_create_multi(VFIOUserProxy *proxy)
{
VFIOUserWRMulti *wm;
wm = g_malloc0(sizeof(*wm));
vfio_user_request_msg(&wm->hdr, VFIO_USER_REGION_WRITE_MULTI,
sizeof(*wm), VFIO_USER_NO_REPLY);
proxy->wr_multi = wm;
}
void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index,
off_t offset, uint32_t count, void *data)
{
VFIOUserWRMulti *wm = proxy->wr_multi;
VFIOUserWROne *w1 = &wm->wrs[wm->wr_cnt];
w1->offset = offset;
w1->region = index;
w1->count = count;
memcpy(&w1->data, data, count);
wm->wr_cnt++;
trace_vfio_user_wrmulti("add", wm->wr_cnt);
if (wm->wr_cnt == VFIO_USER_MULTI_MAX ||
proxy->num_outgoing < VFIO_USER_OUT_LOW) {
vfio_user_flush_multi(proxy);
}
}