Add support for posted writes on remote devices Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: John G Johnson <john.g.john...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> --- hw/vfio/common.c | 10 +++- hw/vfio/pci.c | 9 +++- hw/vfio/pci.h | 1 + hw/vfio/user-protocol.h | 12 +++++ hw/vfio/user.c | 109 ++++++++++++++++++++++++++++++++++++++++++ hw/vfio/user.h | 1 + include/hw/vfio/vfio-common.h | 7 +-- 7 files changed, 143 insertions(+), 6 deletions(-)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 87400b3..87cd1d1 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -214,6 +214,7 @@ void vfio_region_write(void *opaque, hwaddr addr, uint32_t dword; uint64_t qword; } buf; + bool post = region->post_wr; int ret; switch (size) { @@ -234,7 +235,11 @@ void vfio_region_write(void *opaque, hwaddr addr, break; } - ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf); + /* read-after-write hazard if guest can directly access region */ + if (region->nr_mmaps) { + post = false; + } + ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf, post); if (ret != size) { const char *err = ret < 0 ? strerror(-ret) : "short write"; @@ -1587,6 +1592,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, region->size = info->size; region->fd_offset = info->offset; region->nr = index; + region->post_wr = false; if (vbasedev->regfds != NULL) { region->fd = vbasedev->regfds[index]; } else { @@ -2721,7 +2727,7 @@ static int vfio_io_region_read(VFIODevice *vbasedev, uint8_t index, off_t off, } static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t off, - uint32_t size, void *data) + uint32_t size, void *data, bool post) { struct vfio_region_info *info = vbasedev->regions[index]; int ret; diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 2e0e41d..027f9d5 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -51,7 +51,7 @@ (size), (data)) #define VDEV_CONFIG_WRITE(vbasedev, off, size, data) \ VDEV_REGION_WRITE((vbasedev), VFIO_PCI_CONFIG_REGION_INDEX, (off), \ - (size), (data)) + (size), (data), false) #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" @@ -1704,6 +1704,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr) bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); bar->size = bar->region.size; + + /* IO regions are sync, memory can be async */ + bar->region.post_wr = (bar->ioport == 0); } static void vfio_bars_prepare(VFIOPCIDevice *vdev) @@ -3494,6 +3497,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) if (udev->send_queued) { proxy->flags |= VFIO_PROXY_FORCE_QUEUED; } + if (udev->no_post) { + proxy->flags |= VFIO_PROXY_NO_POST; + } vfio_user_validate_version(proxy, &err); if (err != NULL) { @@ -3540,6 +3546,7 @@ static void vfio_user_instance_finalize(Object *obj) static Property vfio_user_pci_dev_properties[] = { DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name), DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false), + DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index c47d2f8..ec17f2e 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -196,6 +196,7 @@ struct VFIOUserPCIDevice { VFIOPCIDevice device; char *sock_name; bool send_queued; /* all sends are queued */ + bool no_post; /* all regions write are sync */ }; /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */ diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index a1b64fe..124340c 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -140,4 +140,16 @@ typedef struct { uint64_t offset; } VFIOUserRegionInfo; +/* + * VFIO_USER_REGION_READ + * VFIO_USER_REGION_WRITE + */ +typedef struct { + VFIOUserHdr hdr; + uint64_t offset; + uint32_t region; + uint32_t count; + char data[]; +} VFIOUserRegionRW; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio/user.c b/hw/vfio/user.c index 69b0fed..1453bb5 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -57,6 +57,8 @@ static void vfio_user_cb(void *opaque); static void vfio_user_request(void *opaque); static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg); +static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds); static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr, VFIOUserFDs *fds, int rsize, bool nobql); static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, @@ -618,6 +620,33 @@ static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg) return 0; } +/* + * async send - msg can be queued, but will be freed when sent + */ +static void vfio_user_send_async(VFIOProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds) +{ + VFIOUserMsg *msg; + int ret; + + if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) { + error_printf("vfio_user_send_async on sync message\n"); + return; + } + + QEMU_LOCK_GUARD(&proxy->lock); + + msg = vfio_user_getmsg(proxy, hdr, fds); + msg->id = hdr->id; + msg->rsize = 0; + msg->type = VFIO_MSG_ASYNC; + + ret = vfio_user_send_queued(proxy, msg); + if (ret < 0) { + vfio_user_recycle(proxy, msg); + } +} + static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr, VFIOUserFDs *fds, int rsize, bool nobql) { @@ -1135,6 +1164,70 @@ static int vfio_user_get_region_info(VFIOProxy *proxy, return 0; } +static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset, + uint32_t count, void *data) +{ + g_autofree VFIOUserRegionRW *msgp = NULL; + int size = sizeof(*msgp) + count; + + if (count > proxy->max_xfer_size) { + return -EINVAL; + } + + msgp = g_malloc0(size); + vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0); + msgp->offset = offset; + msgp->region = index; + msgp->count = count; + + vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, false); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + return -msgp->hdr.error_reply; + } else if (msgp->count > count) { + return -E2BIG; + } else { + memcpy(data, &msgp->data, msgp->count); + } + + return msgp->count; +} + +static int vfio_user_region_write(VFIOProxy *proxy, uint8_t index, off_t offset, + uint32_t count, void *data, bool post) +{ + VFIOUserRegionRW *msgp = NULL; + int flags = post ? VFIO_USER_NO_REPLY : 0; + int size = sizeof(*msgp) + count; + int ret; + + if (count > proxy->max_xfer_size) { + return -EINVAL; + } + + msgp = g_malloc0(size); + vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags); + msgp->offset = offset; + msgp->region = index; + msgp->count = count; + memcpy(&msgp->data, data, count); + + /* async send will free msg after it's sent */ + if (post && !(proxy->flags & VFIO_PROXY_NO_POST)) { + vfio_user_send_async(proxy, &msgp->hdr, NULL); + return count; + } + + vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + ret = -msgp->hdr.error_reply; + } else { + ret = count; + } + + g_free(msgp); + return ret; +} + /* * Socket-based io_ops @@ -1184,8 +1277,24 @@ static int vfio_user_io_get_region_info(VFIODevice *vbasedev, return 0; } +static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index, + off_t off, uint32_t size, void *data) +{ + return vfio_user_region_read(vbasedev->proxy, index, off, size, data); +} + +static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index, + off_t off, unsigned size, void *data, + bool post) +{ + return vfio_user_region_write(vbasedev->proxy, index, off, size, data, + post); +} + VFIODevIO vfio_dev_io_sock = { .get_info = vfio_user_io_get_info, .get_region_info = vfio_user_io_get_region_info, + .region_read = vfio_user_io_region_read, + .region_write = vfio_user_io_region_write, }; diff --git a/hw/vfio/user.h b/hw/vfio/user.h index 2547cf6..359a029 100644 --- a/hw/vfio/user.h +++ b/hw/vfio/user.h @@ -84,6 +84,7 @@ typedef struct VFIOProxy { /* VFIOProxy flags */ #define VFIO_PROXY_CLIENT 0x1 #define VFIO_PROXY_FORCE_QUEUED 0x4 +#define VFIO_PROXY_NO_POST 0x8 VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp); void vfio_user_disconnect(VFIOProxy *proxy); diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 3406e6a..6324132 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -57,6 +57,7 @@ typedef struct VFIORegion { VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ int fd; /* fd to mmap() region */ + bool post_wr; /* writes can be posted */ } VFIORegion; typedef struct VFIOMigration { @@ -180,7 +181,7 @@ struct VFIODevIO { int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, void *data); int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, - void *data); + void *data, bool post); }; #define VDEV_GET_INFO(vdev, info) \ @@ -193,8 +194,8 @@ struct VFIODevIO { ((vdev)->io_ops->set_irqs((vdev), (irqs))) #define VDEV_REGION_READ(vdev, nr, off, size, data) \ ((vdev)->io_ops->region_read((vdev), (nr), (off), (size), (data))) -#define VDEV_REGION_WRITE(vdev, nr, off, size, data) \ - ((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data))) +#define VDEV_REGION_WRITE(vdev, nr, off, size, data, post) \ + ((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data), (post))) struct VFIOContIO { int (*dma_map)(VFIOContainer *container, -- 1.8.3.1