This patch introduces the vhost_vdpa_net_cvq_add() and refactors the vhost_vdpa_net_load*(), so that QEMU can send CVQ state load commands in parallel.
To be more specific, this patch introduces vhost_vdpa_net_cvq_add() to add SVQ control commands to SVQ and kick the device, but does not poll the device used buffers. QEMU will not poll and check the device used buffers in vhost_vdpa_net_load() until all CVQ state load commands have been sent to the device. What's more, in order to avoid buffer overwriting caused by using `svq->cvq_cmd_out_buffer` and `svq->status` as the buffer for all CVQ state load commands when sending CVQ state load commands in parallel, this patch introduces `out_cursor` and `in_cursor` in vhost_vdpa_net_load(), pointing to the available buffer for in descriptor and out descriptor, so that different CVQ state load commands can use their unique buffer. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578 Signed-off-by: Hawkins Jiawei <yin31...@gmail.com> --- net/vhost-vdpa.c | 152 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 120 insertions(+), 32 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 10804c7200..14e31ca5c5 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -590,6 +590,44 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc) vhost_vdpa_net_client_stop(nc); } +/** + * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ, + * kicks the device but does not poll the device used buffers. + * + * Return the number of elements added to SVQ if success. + */ +static int vhost_vdpa_net_cvq_add(VhostVDPAState *s, + void **out_cursor, size_t out_len, + virtio_net_ctrl_ack **in_cursor, size_t in_len) +{ + /* Buffers for the device */ + const struct iovec out = { + .iov_base = *out_cursor, + .iov_len = out_len, + }; + const struct iovec in = { + .iov_base = *in_cursor, + .iov_len = sizeof(virtio_net_ctrl_ack), + }; + VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); + int r; + + r = vhost_svq_add(svq, &out, 1, &in, 1, NULL); + if (unlikely(r != 0)) { + if (unlikely(r == -ENOSPC)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", + __func__); + } + return r; + } + + /* Update the cursor */ + *out_cursor += out_len; + *in_cursor += 1; + + return 1; +} + /** * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ, * kicks the device and polls the device used buffers. @@ -628,69 +666,82 @@ static ssize_t vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s, return vhost_svq_poll(svq); } -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, - uint8_t cmd, const void *data, - size_t data_size) + +/** + * vhost_vdpa_net_load_cmd() restores the NIC state through SVQ. + * + * Return the number of elements added to SVQ if success. + */ +static int vhost_vdpa_net_load_cmd(VhostVDPAState *s, + void **out_cursor, uint8_t class, uint8_t cmd, + const void *data, size_t data_size, + virtio_net_ctrl_ack **in_cursor) { const struct virtio_net_ctrl_hdr ctrl = { .class = class, .cmd = cmd, }; - assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); + assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() - + (*out_cursor - s->cvq_cmd_out_buffer)); + assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl) - + (*out_cursor - s->cvq_cmd_out_buffer)); - memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl)); - memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size); + memcpy(*out_cursor, &ctrl, sizeof(ctrl)); + memcpy(*out_cursor + sizeof(ctrl), data, data_size); - return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size, - sizeof(virtio_net_ctrl_ack)); + return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + data_size, + in_cursor, sizeof(virtio_net_ctrl_ack)); } -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) +/** + * vhost_vdpa_net_load_mac() restores the NIC mac through SVQ. + * + * Return the number of elements added to SVQ if success. + */ +static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n, + void **out_cursor, virtio_net_ctrl_ack **in_cursor) { uint64_t features = n->parent_obj.guest_features; if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { - ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC, - VIRTIO_NET_CTRL_MAC_ADDR_SET, - n->mac, sizeof(n->mac)); - if (unlikely(dev_written < 0)) { - return dev_written; - } - - return *s->status != VIRTIO_NET_OK; + return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MAC, + VIRTIO_NET_CTRL_MAC_ADDR_SET, + n->mac, sizeof(n->mac), in_cursor); } return 0; } -static int vhost_vdpa_net_load_mq(VhostVDPAState *s, - const VirtIONet *n) +/** + * vhost_vdpa_net_load_mac() restores the NIC mq state through SVQ. + * + * Return the number of elements added to SVQ if success. + */ +static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n, + void **out_cursor, virtio_net_ctrl_ack **in_cursor) { struct virtio_net_ctrl_mq mq; uint64_t features = n->parent_obj.guest_features; - ssize_t dev_written; if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { return 0; } mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs); - dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ, - VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq, - sizeof(mq)); - if (unlikely(dev_written < 0)) { - return dev_written; - } - - return *s->status != VIRTIO_NET_OK; + return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, + &mq, sizeof(mq), in_cursor); } static int vhost_vdpa_net_load(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + VhostShadowVirtqueue *svq; + void *out_cursor; + virtio_net_ctrl_ack *in_cursor; struct vhost_vdpa *v = &s->vhost_vdpa; const VirtIONet *n; - int r; + ssize_t cmds_in_flight = 0, dev_written, r; assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); @@ -699,14 +750,51 @@ static int vhost_vdpa_net_load(NetClientState *nc) } n = VIRTIO_NET(v->dev->vdev); - r = vhost_vdpa_net_load_mac(s, n); + out_cursor = s->cvq_cmd_out_buffer; + in_cursor = s->status; + + r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor); if (unlikely(r < 0)) { return r; } - r = vhost_vdpa_net_load_mq(s, n); - if (unlikely(r)) { + cmds_in_flight += r; + + r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor); + if (unlikely(r < 0)) { return r; } + cmds_in_flight += r; + + /* Poll for all used buffer from device */ + svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); + while (cmds_in_flight > 0) { + /* + * We can poll here since we've had BQL from the time we sent the + * descriptor. Also, we need to take the answer before SVQ pulls + * by itself, when BQL is released + */ + dev_written = vhost_svq_poll(svq); + + if (unlikely(!dev_written)) { + /* + * vhost_svq_poll() return 0 when something wrong, such as + * QEMU waits for too long time or no available used buffer + * from device, and there is no need to continue polling + * in this case. + */ + return -EINVAL; + } + + --cmds_in_flight; + } + + /* Check the buffers written by device */ + for (virtio_net_ctrl_ack *status = s->status; status < in_cursor; + ++status) { + if (*status != VIRTIO_NET_OK) { + return -EINVAL; + } + } return 0; } -- 2.25.1