Add new message to send multiple writes to server Signed-off-by: John G Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> --- hw/vfio/user-protocol.h | 21 ++++++++ hw/vfio/user.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++- hw/vfio/user.h | 7 +++ 3 files changed, 154 insertions(+), 2 deletions(-)
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index 6afd090..ffa222b 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -40,6 +40,7 @@ enum vfio_user_command { VFIO_USER_DMA_WRITE = 12, VFIO_USER_DEVICE_RESET = 13, VFIO_USER_DIRTY_PAGES = 14, + VFIO_USER_REGION_WRITE_MULTI = 15, VFIO_USER_MAX, }; @@ -73,6 +74,7 @@ typedef struct { #define VFIO_USER_CAP_PGSIZES "pgsizes" #define VFIO_USER_CAP_MAP_MAX "max_dma_maps" #define VFIO_USER_CAP_MIGR "migration" +#define VFIO_USER_CAP_MULTI "write_multiple" /* "migration" members */ #define VFIO_USER_CAP_PGSIZE "pgsize" @@ -220,4 +222,23 @@ typedef struct { char data[]; } VFIOUserBitmap; +/* + * VFIO_USER_REGION_WRITE_MULTI + */ +#define VFIO_USER_MULTI_DATA 8 +#define VFIO_USER_MULTI_MAX 200 + +typedef struct { + uint64_t offset; + uint32_t region; + uint32_t count; + char data[VFIO_USER_MULTI_DATA]; +} VFIOUserWROne; + +typedef struct { + VFIOUserHdr hdr; + uint64_t wr_cnt; + VFIOUserWROne wrs[VFIO_USER_MULTI_MAX]; +} VFIOUserWRMulti; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio/user.c b/hw/vfio/user.c index a9e6cf5..4ed305b 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -66,6 +66,7 @@ static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr, static void vfio_user_wait_reqs(VFIOProxy *proxy); static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, uint32_t size, uint32_t flags); +static void vfio_user_flush_multi(VFIOProxy *proxy); static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err) { @@ -461,6 +462,11 @@ static void vfio_user_send(void *opaque) } qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, vfio_user_recv, NULL, proxy); + + /* queue empty - send any pending multi write msgs */ + if (proxy->wr_multi != NULL) { + vfio_user_flush_multi(proxy); + } } } @@ -481,6 +487,7 @@ static int vfio_user_send_one(VFIOProxy *proxy) } QTAILQ_REMOVE(&proxy->outgoing, msg, next); + proxy->num_outgoing--; if (msg->type == VFIO_MSG_ASYNC) { vfio_user_recycle(proxy, msg); } else { @@ -587,11 +594,18 @@ static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg) { int ret; + /* older coalesced writes go first */ + if (proxy->wr_multi != NULL && + ((msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REQUEST)) { + vfio_user_flush_multi(proxy); + } + /* * Unsent outgoing msgs - add to tail */ if (!QTAILQ_EMPTY(&proxy->outgoing)) { QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next); + proxy->num_outgoing++; return 0; } @@ -605,6 +619,7 @@ static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg) } if (ret == QIO_CHANNEL_ERR_BLOCK) { QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next); + proxy->num_outgoing = 1; qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, vfio_user_recv, vfio_user_send, proxy); @@ -1145,12 +1160,27 @@ static int check_migr(VFIOProxy *proxy, QObject *qobj, Error **errp) return caps_parse(proxy, qdict, caps_migr, errp); } +static int check_multi(VFIOProxy *proxy, QObject *qobj, Error **errp) +{ + QBool *qb = qobject_to(QBool, qobj); + + if (qb == NULL) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MULTI); + return -1; + } + if (qbool_get_bool(qb)) { + proxy->flags |= VFIO_PROXY_USE_MULTI; + } + return 0; +} + static struct cap_entry caps_cap[] = { { VFIO_USER_CAP_MAX_FDS, check_max_fds }, { VFIO_USER_CAP_MAX_XFER, check_max_xfer }, { VFIO_USER_CAP_PGSIZES, check_pgsizes }, { VFIO_USER_CAP_MAP_MAX, check_max_dma }, { VFIO_USER_CAP_MIGR, check_migr }, + { VFIO_USER_CAP_MULTI, check_multi }, { NULL } }; @@ -1209,6 +1239,7 @@ static GString *caps_json(void) qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER); qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE); qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX); + qdict_put_bool(capdict, VFIO_USER_CAP_MULTI, true); qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict)); @@ -1547,18 +1578,111 @@ static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset, return msgp->count; } +static void vfio_user_flush_multi(VFIOProxy *proxy) +{ + VFIOUserMsg *msg; + VFIOUserWRMulti *wm = proxy->wr_multi; + int ret; + + proxy->wr_multi = NULL; + + /* adjust size for actual # of writes */ + wm->hdr.size -= (VFIO_USER_MULTI_MAX - wm->wr_cnt) * sizeof(VFIOUserWROne); + + msg = vfio_user_getmsg(proxy, &wm->hdr, NULL); + msg->id = wm->hdr.id; + msg->rsize = 0; + msg->type = VFIO_MSG_ASYNC; + + ret = vfio_user_send_queued(proxy, msg); + if (ret < 0) { + vfio_user_recycle(proxy, msg); + } +} + +static void vfio_user_create_multi(VFIOProxy *proxy) +{ + VFIOUserWRMulti *wm; + + wm = g_malloc0(sizeof(*wm)); + vfio_user_request_msg(&wm->hdr, VFIO_USER_REGION_WRITE_MULTI, + sizeof(*wm), VFIO_USER_NO_REPLY); + proxy->wr_multi = wm; +} + +static void vfio_user_add_multi(VFIOProxy *proxy, uint8_t index, off_t offset, + uint32_t count, void *data) +{ + VFIOUserWRMulti *wm = proxy->wr_multi; + VFIOUserWROne *w1 = &wm->wrs[wm->wr_cnt]; + + w1->offset = offset; + w1->region = index; + w1->count = count; + memcpy(&w1->data, data, count); + + wm->wr_cnt++; + if (wm->wr_cnt == VFIO_USER_MULTI_MAX || + proxy->num_outgoing < VFIO_USER_OUT_LOW) { + vfio_user_flush_multi(proxy); + } +} + static int vfio_user_region_write(VFIOProxy *proxy, uint8_t index, off_t offset, uint32_t count, void *data, bool post) { VFIOUserRegionRW *msgp = NULL; - int flags = post ? VFIO_USER_NO_REPLY : 0; + int flags; int size = sizeof(*msgp) + count; + bool can_multi; int ret; if (count > proxy->max_xfer_size) { return -EINVAL; } + if (proxy->flags & VFIO_PROXY_NO_POST) { + post = false; + } + + /* write eligible to be in a WRITE_MULTI msg ? */ + can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post && + count <= VFIO_USER_MULTI_DATA; + + /* + * This should be a rare case, so first check without the lock, + * if we're wrong, vfio_send_queued() will flush any posted writes + * we missed here + */ + if (proxy->wr_multi != NULL || + (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) { + + /* + * re-check with lock + * + * if already building a WRITE_MULTI msg, + * add this one if possible else flush pending before + * sending the current one + * + * else if outgoing queue is over the highwater, + * start a new WRITE_MULTI message + */ + WITH_QEMU_LOCK_GUARD(&proxy->lock) { + if (proxy->wr_multi != NULL) { + if (can_multi) { + vfio_user_add_multi(proxy, index, offset, count, data); + return count; + } + vfio_user_flush_multi(proxy); + } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) { + vfio_user_create_multi(proxy); + vfio_user_add_multi(proxy, index, offset, count, data); + return count; + } + } + } + + flags = post ? VFIO_USER_NO_REPLY : 0; msgp = g_malloc0(size); vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags); msgp->offset = offset; @@ -1567,7 +1691,7 @@ static int vfio_user_region_write(VFIOProxy *proxy, uint8_t index, off_t offset, memcpy(&msgp->data, data, count); /* async send will free msg after it's sent */ - if (post && !(proxy->flags & VFIO_PROXY_NO_POST)) { + if (post) { vfio_user_send_async(proxy, &msgp->hdr, NULL); return count; } diff --git a/hw/vfio/user.h b/hw/vfio/user.h index f711861..c96e74a 100644 --- a/hw/vfio/user.h +++ b/hw/vfio/user.h @@ -79,6 +79,8 @@ typedef struct VFIOProxy { VFIOUserMsg *last_nowait; VFIOUserMsg *part_recv; size_t recv_left; + VFIOUserWRMulti *wr_multi; + int num_outgoing; enum proxy_state state; } VFIOProxy; @@ -87,6 +89,11 @@ typedef struct VFIOProxy { #define VFIO_PROXY_SECURE 0x2 #define VFIO_PROXY_FORCE_QUEUED 0x4 #define VFIO_PROXY_NO_POST 0x8 +#define VFIO_PROXY_USE_MULTI 0x10 + +/* coalescing high and low water marks for VFIOProxy num_outgoing */ +#define VFIO_USER_OUT_HIGH 1024 +#define VFIO_USER_OUT_LOW 128 VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp); void vfio_user_disconnect(VFIOProxy *proxy); -- 1.8.3.1