From: John Johnson <john.g.john...@oracle.com> Signed-off-by: John G Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> --- hw/vfio/user-protocol.h | 62 +++++++++ hw/vfio/user.h | 8 ++ hw/vfio/pci.c | 6 + hw/vfio/user.c | 289 ++++++++++++++++++++++++++++++++++++++++ MAINTAINERS | 1 + 5 files changed, 366 insertions(+) create mode 100644 hw/vfio/user-protocol.h
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h new file mode 100644 index 0000000000..27062cb910 --- /dev/null +++ b/hw/vfio/user-protocol.h @@ -0,0 +1,62 @@ +#ifndef VFIO_USER_PROTOCOL_H +#define VFIO_USER_PROTOCOL_H + +/* + * vfio protocol over a UNIX socket. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Each message has a standard header that describes the command + * being sent, which is almost always a VFIO ioctl(). + * + * The header may be followed by command-specific data, such as the + * region and offset info for read and write commands. + */ + +typedef struct { + uint16_t id; + uint16_t command; + uint32_t size; + uint32_t flags; + uint32_t error_reply; +} VFIOUserHdr; + +/* VFIOUserHdr commands */ +enum vfio_user_command { + VFIO_USER_VERSION = 1, + VFIO_USER_DMA_MAP = 2, + VFIO_USER_DMA_UNMAP = 3, + VFIO_USER_DEVICE_GET_INFO = 4, + VFIO_USER_DEVICE_GET_REGION_INFO = 5, + VFIO_USER_DEVICE_GET_REGION_IO_FDS = 6, + VFIO_USER_DEVICE_GET_IRQ_INFO = 7, + VFIO_USER_DEVICE_SET_IRQS = 8, + VFIO_USER_REGION_READ = 9, + VFIO_USER_REGION_WRITE = 10, + VFIO_USER_DMA_READ = 11, + VFIO_USER_DMA_WRITE = 12, + VFIO_USER_DEVICE_RESET = 13, + VFIO_USER_DIRTY_PAGES = 14, + VFIO_USER_MAX, +}; + +/* VFIOUserHdr flags */ +#define VFIO_USER_REQUEST 0x0 +#define VFIO_USER_REPLY 0x1 +#define VFIO_USER_TYPE 0xF + +#define VFIO_USER_NO_REPLY 0x10 +#define VFIO_USER_ERROR 0x20 + + +#define VFIO_USER_DEF_MAX_FDS 8 +#define VFIO_USER_MAX_MAX_FDS 16 + +#define VFIO_USER_DEF_MAX_XFER (1024 * 1024) +#define VFIO_USER_MAX_MAX_XFER (64 * 1024 * 1024) + + +#endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio/user.h b/hw/vfio/user.h index 62b2d03d56..905e374e12 100644 --- a/hw/vfio/user.h +++ b/hw/vfio/user.h @@ -11,6 +11,8 @@ * */ +#include "user-protocol.h" + typedef struct { int send_fds; int recv_fds; @@ -19,6 +21,7 @@ typedef struct { typedef struct VFIOUserReply { QTAILQ_ENTRY(VFIOUserReply) next; + VFIOUserHdr *msg; VFIOUserFDs *fds; uint32_t rsize; uint32_t id; @@ -62,5 +65,10 @@ typedef struct VFIOProxy { VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp); void vfio_user_disconnect(VFIOProxy *proxy); +void vfio_user_set_reqhandler(VFIODevice *vbasdev, + int (*handler)(void *opaque, char *buf, + VFIOUserFDs *fds), + void *reqarg); +void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret); #endif /* VFIO_USER_H */ diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 7c2d245ca5..7005d9f891 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3333,6 +3333,11 @@ type_init(register_vfio_pci_dev_type) * vfio-user routines. */ +static int vfio_user_pci_process_req(void *opaque, char *buf, VFIOUserFDs *fds) +{ + return 0; +} + /* * Emulated devices don't use host hot reset */ @@ -3386,6 +3391,7 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) return; } vbasedev->proxy = proxy; + vfio_user_set_reqhandler(vbasedev, vfio_user_pci_process_req, vdev); if (udev->secure_dma) { proxy->flags |= VFIO_PROXY_SECURE; diff --git a/hw/vfio/user.c b/hw/vfio/user.c index 3bd304e036..2fcc77d997 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -25,8 +25,15 @@ #include "sysemu/iothread.h" #include "user.h" +static uint64_t max_xfer_size = VFIO_USER_DEF_MAX_XFER; static IOThread *vfio_user_iothread; + static void vfio_user_shutdown(VFIOProxy *proxy); +static void vfio_user_recv(void *opaque); +static void vfio_user_send_locked(VFIOProxy *proxy, VFIOUserHdr *msg, + VFIOUserFDs *fds); +static void vfio_user_send(VFIOProxy *proxy, VFIOUserHdr *msg, + VFIOUserFDs *fds); /* @@ -36,6 +43,67 @@ static void vfio_user_shutdown(VFIOProxy *proxy); static void vfio_user_shutdown(VFIOProxy *proxy) { qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL); + qio_channel_set_aio_fd_handler(proxy->ioc, + iothread_get_aio_context(vfio_user_iothread), + NULL, NULL, NULL); +} + +static void vfio_user_send_locked(VFIOProxy *proxy, VFIOUserHdr *msg, + VFIOUserFDs *fds) +{ + struct iovec iov = { + .iov_base = msg, + .iov_len = msg->size, + }; + size_t numfds = 0; + int msgleft, ret, *fdp = NULL; + char *buf; + Error *local_err = NULL; + + if (proxy->state != VFIO_PROXY_CONNECTED) { + msg->flags |= VFIO_USER_ERROR; + msg->error_reply = ECONNRESET; + return; + } + + if (fds != NULL && fds->send_fds != 0) { + numfds = fds->send_fds; + fdp = fds->fds; + } + + ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, &local_err); + if (ret < 0) { + goto err; + } + if (ret == msg->size) { + return; + } + + buf = iov.iov_base + ret; + msgleft = iov.iov_len - ret; + do { + ret = qio_channel_write(proxy->ioc, buf, msgleft, &local_err); + if (ret < 0) { + goto err; + } + buf += ret; + msgleft -= ret; + } while (msgleft != 0); + return; + +err: + msg->flags |= VFIO_USER_ERROR; + msg->error_reply = EIO; + error_report_err(local_err); +} + +static void vfio_user_send(VFIOProxy *proxy, VFIOUserHdr *msg, + VFIOUserFDs *fds) +{ + + qemu_mutex_lock(&proxy->lock); + vfio_user_send_locked(proxy, msg, fds); + qemu_mutex_unlock(&proxy->lock); } @@ -43,6 +111,213 @@ static void vfio_user_shutdown(VFIOProxy *proxy) * Functions only called by iothread */ +void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret) +{ + VFIOUserHdr *hdr = (VFIOUserHdr *)buf; + + /* + * convert header to associated reply + * positive ret is reply size, negative is error code + */ + hdr->flags = VFIO_USER_REPLY; + if (ret >= sizeof(VFIOUserHdr)) { + hdr->size = ret; + } else if (ret < 0) { + hdr->flags |= VFIO_USER_ERROR; + hdr->error_reply = -ret; + hdr->size = sizeof(*hdr); + } else { + error_printf("vfio_user_send_reply - size too small\n"); + return; + } + vfio_user_send(proxy, hdr, NULL); +} + +void vfio_user_recv(void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOProxy *proxy = vbasedev->proxy; + VFIOUserReply *reply = NULL; + g_autofree int *fdp = NULL; + VFIOUserFDs reqfds = { 0, 0, fdp }; + VFIOUserHdr msg; + struct iovec iov = { + .iov_base = &msg, + .iov_len = sizeof(msg), + }; + bool isreply; + int i, ret; + size_t msgleft, numfds = 0; + char *data = NULL; + g_autofree char *buf = NULL; + Error *local_err = NULL; + + qemu_mutex_lock(&proxy->lock); + if (proxy->state == VFIO_PROXY_CLOSING) { + qemu_mutex_unlock(&proxy->lock); + return; + } + + ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, + &local_err); + if (ret <= 0) { + /* read error or other side closed connection */ + goto fatal; + } + + if (ret < sizeof(msg)) { + error_setg(&local_err, "vfio_user_recv short read of header"); + goto err; + } + if (msg.size < sizeof(VFIOUserHdr)) { + error_setg(&local_err, "vfio_user_recv bad header size"); + goto err; + } + + /* + * For replies, find the matching pending request + */ + switch (msg.flags & VFIO_USER_TYPE) { + case VFIO_USER_REQUEST: + isreply = 0; + break; + case VFIO_USER_REPLY: + isreply = 1; + break; + default: + error_setg(&local_err, "vfio_user_recv unknown message type"); + goto err; + } + + if (isreply) { + QTAILQ_FOREACH(reply, &proxy->pending, next) { + if (msg.id == reply->id) { + break; + } + } + if (reply == NULL) { + error_setg(&local_err, "vfio_user_recv unexpected reply"); + goto err; + } + QTAILQ_REMOVE(&proxy->pending, reply, next); + + /* + * Process any received FDs + */ + if (numfds != 0) { + if (reply->fds == NULL || reply->fds->recv_fds < numfds) { + error_setg(&local_err, "vfio_user_recv unexpected FDs"); + goto err; + } + reply->fds->recv_fds = numfds; + memcpy(reply->fds->fds, fdp, numfds * sizeof(int)); + } + + } else { + /* + * The client doesn't expect any FDs in requests, but + * they will be expected on the server + */ + if (numfds != 0 && (proxy->flags & VFIO_PROXY_CLIENT)) { + error_setg(&local_err, "vfio_user_recv fd in client reply"); + goto err; + } + reqfds.recv_fds = numfds; + } + + /* + * put the whole message into a single buffer + */ + if (isreply) { + if (msg.size > reply->rsize) { + error_setg(&local_err, + "vfio_user_recv reply larger than recv buffer"); + goto fatal; + } + *reply->msg = msg; + data = (char *)reply->msg + sizeof(msg); + } else { + if (msg.size > max_xfer_size) { + error_setg(&local_err, "vfio_user_recv request larger than max"); + goto fatal; + } + buf = g_malloc0(msg.size); + memcpy(buf, &msg, sizeof(msg)); + data = buf + sizeof(msg); + } + + msgleft = msg.size - sizeof(msg); + if (msgleft != 0) { + ret = qio_channel_read(proxy->ioc, data, msgleft, &local_err); + if (ret < 0) { + goto fatal; + } + if (ret != msgleft) { + error_setg(&local_err, "vfio_user_recv short read of msg body"); + goto err; + } + } + + /* + * Replies signal a waiter, requests get processed by vfio code + * that may assume the iothread lock is held. + */ + if (isreply) { + reply->complete = 1; + if (!reply->nowait) { + qemu_cond_signal(&reply->cv); + } else { + if (msg.flags & VFIO_USER_ERROR) { + error_printf("vfio_user_rcv error reply on async request "); + error_printf("command %x error %s\n", msg.command, + strerror(msg.error_reply)); + } + /* just free it if no one is waiting */ + reply->nowait = 0; + if (proxy->last_nowait == reply) { + proxy->last_nowait = NULL; + } + g_free(reply->msg); + QTAILQ_INSERT_HEAD(&proxy->free, reply, next); + } + qemu_mutex_unlock(&proxy->lock); + } else { + qemu_mutex_unlock(&proxy->lock); + qemu_mutex_lock_iothread(); + /* + * make sure proxy wasn't closed while we waited + * checking state without holding the proxy lock is safe + * since it's only set to CLOSING when BQL is held + */ + if (proxy->state != VFIO_PROXY_CLOSING) { + ret = proxy->request(proxy->reqarg, buf, &reqfds); + if (ret < 0 && !(msg.flags & VFIO_USER_NO_REPLY)) { + vfio_user_send_reply(proxy, buf, ret); + } + } + qemu_mutex_unlock_iothread(); + } + return; + +fatal: + vfio_user_shutdown(proxy); + proxy->state = VFIO_PROXY_RECV_ERROR; + +err: + for (i = 0; i < numfds; i++) { + close(fdp[i]); + } + if (reply != NULL) { + /* force an error to keep sending thread from hanging */ + reply->msg->flags |= VFIO_USER_ERROR; + reply->msg->error_reply = EINVAL; + reply->complete = 1; + qemu_cond_signal(&reply->cv); + } + qemu_mutex_unlock(&proxy->lock); + error_report_err(local_err); +} + static void vfio_user_cb(void *opaque) { VFIOProxy *proxy = opaque; @@ -101,6 +376,20 @@ VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp) return proxy; } +void vfio_user_set_reqhandler(VFIODevice *vbasedev, + int (*handler)(void *opaque, char *buf, + VFIOUserFDs *fds), + void *reqarg) +{ + VFIOProxy *proxy = vbasedev->proxy; + + proxy->request = handler; + proxy->reqarg = reqarg; + qio_channel_set_aio_fd_handler(proxy->ioc, + iothread_get_aio_context(vfio_user_iothread), + vfio_user_recv, NULL, vbasedev); +} + void vfio_user_disconnect(VFIOProxy *proxy) { VFIOUserReply *r1, *r2; diff --git a/MAINTAINERS b/MAINTAINERS index f429bab391..52d37dd088 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1888,6 +1888,7 @@ S: Supported F: docs/devel/vfio-user.rst F: hw/vfio/user.c F: hw/vfio/user.h +F: hw/vfio/user-protocol.h vhost M: Michael S. Tsirkin <m...@redhat.com> -- 2.25.1