From: John G Johnson <john.g.john...@oracle.com> Add user.c and user.h files for vfio-user with the basic send and receive functions.
Signed-off-by: John G Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> --- hw/vfio/user.h | 120 ++++++++++++++ include/hw/vfio/vfio-common.h | 2 + hw/vfio/user.c | 286 ++++++++++++++++++++++++++++++++++ MAINTAINERS | 4 + hw/vfio/meson.build | 1 + 5 files changed, 413 insertions(+) create mode 100644 hw/vfio/user.h create mode 100644 hw/vfio/user.c diff --git a/hw/vfio/user.h b/hw/vfio/user.h new file mode 100644 index 0000000000..cdbc074579 --- /dev/null +++ b/hw/vfio/user.h @@ -0,0 +1,120 @@ +#ifndef VFIO_USER_H +#define VFIO_USER_H + +/* + * vfio protocol over a UNIX socket. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Each message has a standard header that describes the command + * being sent, which is almost always a VFIO ioctl(). + * + * The header may be followed by command-specfic data, such as the + * region and offset info for read and write commands. + */ + +/* commands */ +enum vfio_user_command { + VFIO_USER_VERSION = 1, + VFIO_USER_DMA_MAP = 2, + VFIO_USER_DMA_UNMAP = 3, + VFIO_USER_DEVICE_GET_INFO = 4, + VFIO_USER_DEVICE_GET_REGION_INFO = 5, + VFIO_USER_DEVICE_GET_REGION_IO_FDS = 6, + VFIO_USER_DEVICE_GET_IRQ_INFO = 7, + VFIO_USER_DEVICE_SET_IRQS = 8, + VFIO_USER_REGION_READ = 9, + VFIO_USER_REGION_WRITE = 10, + VFIO_USER_DMA_READ = 11, + VFIO_USER_DMA_WRITE = 12, + VFIO_USER_DEVICE_RESET = 13, + VFIO_USER_DIRTY_PAGES = 14, + VFIO_USER_MAX, +}; + +/* flags */ +#define VFIO_USER_REQUEST 0x0 +#define VFIO_USER_REPLY 0x1 +#define VFIO_USER_TYPE 0xF + +#define VFIO_USER_NO_REPLY 0x10 +#define VFIO_USER_ERROR 0x20 + +typedef struct vfio_user_hdr { + uint16_t id; + uint16_t command; + uint32_t size; + uint32_t flags; + uint32_t error_reply; +} vfio_user_hdr_t; + +/* + * VFIO_USER_VERSION + */ +#define VFIO_USER_MAJOR_VER 0 +#define VFIO_USER_MINOR_VER 0 + +struct vfio_user_version { + vfio_user_hdr_t hdr; + uint16_t major; + uint16_t minor; + char capabilities[]; +}; + +#define VFIO_USER_DEF_MAX_FDS 8 +#define VFIO_USER_MAX_MAX_FDS 16 + +#define VFIO_USER_DEF_MAX_XFER (1024 * 1024) +#define VFIO_USER_MAX_MAX_XFER (64 * 1024 * 1024) + +typedef struct VFIOUserFDs { + int send_fds; + int recv_fds; + int *fds; +} VFIOUserFDs; + +typedef struct VFIOUserReply { + QTAILQ_ENTRY(VFIOUserReply) next; + vfio_user_hdr_t *msg; + VFIOUserFDs *fds; + int rsize; + uint32_t id; + QemuCond cv; + uint8_t complete; +} VFIOUserReply; + +enum proxy_state { + CONNECTED = 1, + RECV_ERROR = 2, + CLOSING = 3, + CLOSED = 4, +}; + +typedef struct VFIOProxy { + QLIST_ENTRY(VFIOProxy) next; + char *sockname; + struct QIOChannel *ioc; + int (*request)(void *opaque, char *buf, VFIOUserFDs *fds); + void *reqarg; + int flags; + QemuCond close_cv; + + /* + * above only changed when iolock is held + * below are protected by per-proxy lock + */ + QemuMutex lock; + QTAILQ_HEAD(, VFIOUserReply) free; + QTAILQ_HEAD(, VFIOUserReply) pending; + enum proxy_state state; + int close_wait; +} VFIOProxy; + +#define VFIO_PROXY_CLIENT 0x1 + +void vfio_user_recv(void *opaque); +void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret); +#endif /* VFIO_USER_H */ diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 8af11b0a76..f43dc6e5d0 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace { } VFIOAddressSpace; struct VFIOGroup; +typedef struct VFIOProxy VFIOProxy; typedef struct VFIOContainer { VFIOAddressSpace *space; @@ -143,6 +144,7 @@ typedef struct VFIODevice { VFIOMigration *migration; Error *migration_blocker; OnOffAuto pre_copy_dirty_page_tracking; + VFIOProxy *proxy; } VFIODevice; struct VFIODeviceOps { diff --git a/hw/vfio/user.c b/hw/vfio/user.c new file mode 100644 index 0000000000..021d5540e0 --- /dev/null +++ b/hw/vfio/user.c @@ -0,0 +1,286 @@ +/* + * vfio protocol over a UNIX socket. + * + * Copyright © 2018, 2021 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include <linux/vfio.h> +#include <sys/ioctl.h> + +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qemu/main-loop.h" +#include "hw/hw.h" +#include "hw/vfio/vfio-common.h" +#include "hw/vfio/vfio.h" +#include "qemu/sockets.h" +#include "io/channel.h" +#include "io/channel-util.h" +#include "sysemu/iothread.h" +#include "user.h" + +static uint64_t max_xfer_size = VFIO_USER_DEF_MAX_XFER; +static IOThread *vfio_user_iothread; +static void vfio_user_send_locked(VFIOProxy *proxy, vfio_user_hdr_t *msg, + VFIOUserFDs *fds); +static void vfio_user_send(VFIOProxy *proxy, vfio_user_hdr_t *msg, + VFIOUserFDs *fds); +static void vfio_user_shutdown(VFIOProxy *proxy); + +static void vfio_user_shutdown(VFIOProxy *proxy) +{ + qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL); + qio_channel_set_aio_fd_handler(proxy->ioc, + iothread_get_aio_context(vfio_user_iothread), + NULL, NULL, NULL); +} + +void vfio_user_send_reply(VFIOProxy *proxy, char *buf, int ret) +{ + vfio_user_hdr_t *hdr = (vfio_user_hdr_t *)buf; + + /* + * convert header to associated reply + * positive ret is reply size, negative is error code + */ + hdr->flags = VFIO_USER_REPLY; + if (ret > 0) { + hdr->size = ret; + } else if (ret < 0) { + hdr->flags |= VFIO_USER_ERROR; + hdr->error_reply = -ret; + hdr->size = sizeof(*hdr); + } + vfio_user_send(proxy, hdr, NULL); +} + +void vfio_user_recv(void *opaque) +{ + VFIODevice *vbasedev = opaque; + VFIOProxy *proxy = vbasedev->proxy; + VFIOUserReply *reply = NULL; + g_autofree int *fdp = NULL; + VFIOUserFDs reqfds = { 0, 0, fdp }; + vfio_user_hdr_t msg; + struct iovec iov = { + .iov_base = &msg, + .iov_len = sizeof(msg), + }; + int isreply, i, ret; + size_t msgleft, numfds = 0; + char *data = NULL; + g_autofree char *buf = NULL; + Error *local_err = NULL; + + qemu_mutex_lock(&proxy->lock); + if (proxy->state == CLOSING) { + qemu_mutex_unlock(&proxy->lock); + return; + } + + ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, + &local_err); + if (ret <= 0) { + /* read error or other side closed connection */ + error_setg_errno(&local_err, errno, "vfio_user_recv read error"); + goto fatal; + } + + if (ret < sizeof(msg)) { + error_setg(&local_err, "vfio_user_recv short read of header"); + goto err; + } + + /* + * For replies, find the matching pending request + */ + switch (msg.flags & VFIO_USER_TYPE) { + case VFIO_USER_REQUEST: + isreply = 0; + break; + case VFIO_USER_REPLY: + isreply = 1; + break; + default: + error_setg(&local_err, "vfio_user_recv unknown message type"); + goto err; + } + + if (isreply) { + QTAILQ_FOREACH(reply, &proxy->pending, next) { + if (msg.id == reply->id) { + break; + } + } + if (reply == NULL) { + error_setg(&local_err, "vfio_user_recv unexpected reply"); + goto err; + } + QTAILQ_REMOVE(&proxy->pending, reply, next); + + /* + * Process any received FDs + */ + if (numfds != 0) { + if (reply->fds == NULL || reply->fds->recv_fds < numfds) { + error_setg(&local_err, "vfio_user_recv unexpected FDs"); + goto err; + } + reply->fds->recv_fds = numfds; + memcpy(reply->fds->fds, fdp, numfds * sizeof(int)); + } + + } else { + /* + * The client doesn't expect any FDs in requests, but + * they will be expected on the server + */ + if (numfds != 0 && (proxy->flags & VFIO_PROXY_CLIENT)) { + error_setg(&local_err, "vfio_user_recv fd in client reply"); + goto err; + } + reqfds.recv_fds = numfds; + } + + /* + * put the whole message into a single buffer + */ + msgleft = msg.size - sizeof(msg); + if (isreply) { + if (msg.size > reply->rsize) { + error_setg(&local_err, + "vfio_user_recv reply larger than recv buffer"); + goto fatal; + } + *reply->msg = msg; + data = (char *)reply->msg + sizeof(msg); + } else { + if (msg.size > max_xfer_size) { + error_setg(&local_err, "vfio_user_recv request larger than max"); + goto fatal; + } + buf = g_malloc0(msg.size); + memcpy(buf, &msg, sizeof(msg)); + data = buf + sizeof(msg); + } + + if (msgleft != 0) { + ret = qio_channel_read(proxy->ioc, data, msgleft, &local_err); + if (ret < 0) { + goto fatal; + } + if (ret != msgleft) { + error_setg(&local_err, "vfio_user_recv short read of msg body"); + goto err; + } + } + + /* + * Replies signal a waiter, requests get processed by vfio code + * that may assume the iothread lock is held. + */ + qemu_mutex_unlock(&proxy->lock); + if (isreply) { + reply->complete = 1; + qemu_cond_signal(&reply->cv); + } else { + qemu_mutex_lock_iothread(); + /* + * make sure proxy wasn't closed while we waited + * checking without holding the proxy lock is safe + * since state is only set to CLOSING when iolock is held + */ + if (proxy->state != CLOSING) { + ret = proxy->request(proxy->reqarg, buf, &reqfds); + if (ret < 0 && !(msg.flags & VFIO_USER_NO_REPLY)) { + vfio_user_send_reply(proxy, buf, ret); + } + } + qemu_mutex_unlock_iothread(); + } + + return; + fatal: + vfio_user_shutdown(proxy); + proxy->state = RECV_ERROR; + + err: + qemu_mutex_unlock(&proxy->lock); + for (i = 0; i < numfds; i++) { + close(fdp[i]); + } + if (reply != NULL) { + /* force an error to keep sending thread from hanging */ + reply->msg->flags |= VFIO_USER_ERROR; + reply->msg->error_reply = EINVAL; + reply->complete = 1; + qemu_cond_signal(&reply->cv); + } + error_report_err(local_err); +} + +static void vfio_user_send_locked(VFIOProxy *proxy, vfio_user_hdr_t *msg, + VFIOUserFDs *fds) +{ + struct iovec iov = { + .iov_base = msg, + .iov_len = msg->size, + }; + size_t numfds = 0; + int msgleft, ret, *fdp = NULL; + char *buf; + Error *local_err = NULL; + + if (proxy->state != CONNECTED) { + msg->flags |= VFIO_USER_ERROR; + msg->error_reply = ECONNRESET; + return; + } + + if (fds != NULL && fds->send_fds != 0) { + numfds = fds->send_fds; + fdp = fds->fds; + } + ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, &local_err); + if (ret < 0) { + goto err; + } + if (ret == msg->size) { + return; + } + + buf = iov.iov_base + ret; + msgleft = iov.iov_len - ret; + do { + ret = qio_channel_write(proxy->ioc, buf, msgleft, &local_err); + if (ret < 0) { + goto err; + } + buf += ret, msgleft -= ret; + } while (msgleft != 0); + return; + + err: + error_report_err(local_err); +} + +static void vfio_user_send(VFIOProxy *proxy, vfio_user_hdr_t *msg, + VFIOUserFDs *fds) +{ + bool iolock = qemu_mutex_iothread_locked(); + + if (iolock) { + qemu_mutex_unlock_iothread(); + } + qemu_mutex_lock(&proxy->lock); + vfio_user_send_locked(proxy, msg, fds); + qemu_mutex_unlock(&proxy->lock); + if (iolock) { + qemu_mutex_lock_iothread(); + } +} diff --git a/MAINTAINERS b/MAINTAINERS index 12d69f3a45..aa4df6c418 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1883,8 +1883,12 @@ L: qemu-s3...@nongnu.org vfio-user M: John G Johnson <john.g.john...@oracle.com> M: Thanos Makatos <thanos.maka...@nutanix.com> +M: Elena Ufimtseva <elena.ufimts...@oracle.com> +M: Jagannathan Raman <jag.ra...@oracle.com> S: Supported F: docs/devel/vfio-user.rst +F: hw/vfio/user.c +F: hw/vfio/user.h vhost M: Michael S. Tsirkin <m...@redhat.com> diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index da9af297a0..739b30be73 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -8,6 +8,7 @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( 'display.c', 'pci-quirks.c', 'pci.c', + 'user.c', )) vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c')) vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c')) -- 2.25.1