Sorry for forgetting cc when replying to the email. On Wed, 17 May 2023 at 16:22, Eugenio Perez Martin <epere...@redhat.com> wrote: > > On Wed, May 17, 2023 at 7:22 AM Jason Wang <jasow...@redhat.com> wrote: > > > > On Sat, May 6, 2023 at 10:07 PM Hawkins Jiawei <yin31...@gmail.com> wrote: > > > > > > This patch introduces the vhost_vdpa_net_cvq_add() and > > > refactors the vhost_vdpa_net_load*(), so that QEMU can > > > send CVQ state load commands in parallel. > > > > > > To be more specific, this patch introduces vhost_vdpa_net_cvq_add() > > > to add SVQ control commands to SVQ and kick the device, > > > but does not poll the device used buffers. QEMU will not > > > poll and check the device used buffers in vhost_vdpa_net_load() > > > until all CVQ state load commands have been sent to the device. > > > > > > What's more, in order to avoid buffer overwriting caused by > > > using `svq->cvq_cmd_out_buffer` and `svq->status` as the > > > buffer for all CVQ state load commands when sending > > > CVQ state load commands in parallel, this patch introduces > > > `out_cursor` and `in_cursor` in vhost_vdpa_net_load(), > > > pointing to the available buffer for in descriptor and > > > out descriptor, so that different CVQ state load commands can > > > use their unique buffer. > > > > > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1578 > > > Signed-off-by: Hawkins Jiawei <yin31...@gmail.com> > > > --- > > > net/vhost-vdpa.c | 152 +++++++++++++++++++++++++++++++++++++---------- > > > 1 file changed, 120 insertions(+), 32 deletions(-) > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c > > > index 10804c7200..14e31ca5c5 100644 > > > --- a/net/vhost-vdpa.c > > > +++ b/net/vhost-vdpa.c > > > @@ -590,6 +590,44 @@ static void vhost_vdpa_net_cvq_stop(NetClientState > > > *nc) > > > vhost_vdpa_net_client_stop(nc); > > > } > > > > > > +/** > > > + * vhost_vdpa_net_cvq_add() adds SVQ control commands to SVQ, > > > + * kicks the device but does not poll the device used buffers. > > > + * > > > + * Return the number of elements added to SVQ if success. > > > + */ > > > +static int vhost_vdpa_net_cvq_add(VhostVDPAState *s, > > > + void **out_cursor, size_t out_len, > > > > Can we track things like cursors in e.g VhostVDPAState ? > > > > Cursors will only be used at device startup. After that, cursors are > always the full buffer. Do we want to "pollute" VhostVDPAState with > things that will not be used after the startup? > > Maybe we can create a struct for them and then pass just this struct?
Yes, Currently, the new version of vhost_vdpa_net_cvq_add() is only called in vhost_vdpa_net_load() at startup, so these cursors will not be used after startup. If needed, we can create a `VhostVDPACursor` as suggested by Eugenio. > > Thanks! > > > > + virtio_net_ctrl_ack **in_cursor, size_t > > > in_len) > > > +{ > > > + /* Buffers for the device */ > > > + const struct iovec out = { > > > + .iov_base = *out_cursor, > > > + .iov_len = out_len, > > > + }; > > > + const struct iovec in = { > > > + .iov_base = *in_cursor, > > > + .iov_len = sizeof(virtio_net_ctrl_ack), > > > + }; > > > + VhostShadowVirtqueue *svq = > > > g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); > > > + int r; > > > + > > > + r = vhost_svq_add(svq, &out, 1, &in, 1, NULL); > > > + if (unlikely(r != 0)) { > > > + if (unlikely(r == -ENOSPC)) { > > > + qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device > > > queue\n", > > > + __func__); > > > + } > > > + return r; > > > + } > > > + > > > + /* Update the cursor */ > > > + *out_cursor += out_len; > > > + *in_cursor += 1; > > > + > > > + return 1; > > > +} > > > + > > > /** > > > * vhost_vdpa_net_cvq_add_and_wait() adds SVQ control commands to SVQ, > > > * kicks the device and polls the device used buffers. > > > @@ -628,69 +666,82 @@ static ssize_t > > > vhost_vdpa_net_cvq_add_and_wait(VhostVDPAState *s, > > > return vhost_svq_poll(svq); > > > } > > > > > > -static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, > > > - uint8_t cmd, const void *data, > > > - size_t data_size) > > > + > > > +/** > > > + * vhost_vdpa_net_load_cmd() restores the NIC state through SVQ. > > > + * > > > + * Return the number of elements added to SVQ if success. > > > + */ > > > +static int vhost_vdpa_net_load_cmd(VhostVDPAState *s, > > > + void **out_cursor, uint8_t class, > > > uint8_t cmd, > > > + const void *data, size_t data_size, > > > + virtio_net_ctrl_ack **in_cursor) > > > { > > > const struct virtio_net_ctrl_hdr ctrl = { > > > .class = class, > > > .cmd = cmd, > > > }; > > > > > > - assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); > > > + assert(sizeof(ctrl) < vhost_vdpa_net_cvq_cmd_page_len() - > > > + (*out_cursor - s->cvq_cmd_out_buffer)); > > > + assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl) - > > > + (*out_cursor - s->cvq_cmd_out_buffer)); > > > > > > - memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl)); > > > - memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size); > > > + memcpy(*out_cursor, &ctrl, sizeof(ctrl)); > > > + memcpy(*out_cursor + sizeof(ctrl), data, data_size); > > > > > > - return vhost_vdpa_net_cvq_add_and_wait(s, sizeof(ctrl) + data_size, > > > - sizeof(virtio_net_ctrl_ack)); > > > + return vhost_vdpa_net_cvq_add(s, out_cursor, sizeof(ctrl) + > > > data_size, > > > + in_cursor, > > > sizeof(virtio_net_ctrl_ack)); > > > } > > > > > > -static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) > > > +/** > > > + * vhost_vdpa_net_load_mac() restores the NIC mac through SVQ. > > > + * > > > + * Return the number of elements added to SVQ if success. > > > + */ > > > +static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n, > > > + void **out_cursor, virtio_net_ctrl_ack > > > **in_cursor) > > > { > > > uint64_t features = n->parent_obj.guest_features; > > > if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) { > > > - ssize_t dev_written = vhost_vdpa_net_load_cmd(s, > > > VIRTIO_NET_CTRL_MAC, > > > - > > > VIRTIO_NET_CTRL_MAC_ADDR_SET, > > > - n->mac, > > > sizeof(n->mac)); > > > - if (unlikely(dev_written < 0)) { > > > - return dev_written; > > > - } > > > - > > > - return *s->status != VIRTIO_NET_OK; > > > + return vhost_vdpa_net_load_cmd(s, out_cursor, > > > VIRTIO_NET_CTRL_MAC, > > > + VIRTIO_NET_CTRL_MAC_ADDR_SET, > > > + n->mac, sizeof(n->mac), > > > in_cursor); > > > } > > > > > > return 0; > > > } > > > > > > -static int vhost_vdpa_net_load_mq(VhostVDPAState *s, > > > - const VirtIONet *n) > > > +/** > > > + * vhost_vdpa_net_load_mac() restores the NIC mq state through SVQ. > > > + * > > > + * Return the number of elements added to SVQ if success. > > > + */ > > > +static int vhost_vdpa_net_load_mq(VhostVDPAState *s, const VirtIONet *n, > > > + void **out_cursor, virtio_net_ctrl_ack > > > **in_cursor) > > > { > > > struct virtio_net_ctrl_mq mq; > > > uint64_t features = n->parent_obj.guest_features; > > > - ssize_t dev_written; > > > > > > if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) { > > > return 0; > > > } > > > > > > mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs); > > > - dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ, > > > - > > > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq, > > > - sizeof(mq)); > > > - if (unlikely(dev_written < 0)) { > > > - return dev_written; > > > - } > > > - > > > - return *s->status != VIRTIO_NET_OK; > > > + return vhost_vdpa_net_load_cmd(s, out_cursor, VIRTIO_NET_CTRL_MQ, > > > + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, > > > + &mq, sizeof(mq), in_cursor); > > > } > > > > > > static int vhost_vdpa_net_load(NetClientState *nc) > > > { > > > VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); > > > + VhostShadowVirtqueue *svq; > > > + void *out_cursor; > > > + virtio_net_ctrl_ack *in_cursor; > > > struct vhost_vdpa *v = &s->vhost_vdpa; > > > const VirtIONet *n; > > > - int r; > > > + ssize_t cmds_in_flight = 0, dev_written, r; > > > > > > assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); > > > > > > @@ -699,14 +750,51 @@ static int vhost_vdpa_net_load(NetClientState *nc) > > > } > > > > > > n = VIRTIO_NET(v->dev->vdev); > > > - r = vhost_vdpa_net_load_mac(s, n); > > > + out_cursor = s->cvq_cmd_out_buffer; > > > + in_cursor = s->status; > > > + > > > + r = vhost_vdpa_net_load_mac(s, n, &out_cursor, &in_cursor); > > > if (unlikely(r < 0)) > > > return r; > > > } > > > - r = vhost_vdpa_net_load_mq(s, n); > > > - if (unlikely(r)) { > > > + cmds_in_flight += r; > > > + > > > + r = vhost_vdpa_net_load_mq(s, n, &out_cursor, &in_cursor); > > > + if (unlikely(r < 0)) { > > > return r; > > > } > > > + cmds_in_flight += r; > > > + > > > + /* Poll for all used buffer from device */ > > > + svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0); > > > + while (cmds_in_flight > 0) { > > > + /* > > > + * We can poll here since we've had BQL from the time we sent the > > > + * descriptor. Also, we need to take the answer before SVQ pulls > > > + * by itself, when BQL is released > > > + */ > > > + dev_written = vhost_svq_poll(svq); > > > > I'd tweak vhost_svq_poll to accept cmds_in_flight. That sounds great! I will refactor the code here and send the v2 patch after your patch. Thanks! > > > > Thanks > > > > > + > > > + if (unlikely(!dev_written)) { > > > + /* > > > + * vhost_svq_poll() return 0 when something wrong, such as > > > + * QEMU waits for too long time or no available used buffer > > > + * from device, and there is no need to continue polling > > > + * in this case. > > > + */ > > > + return -EINVAL; > > > + } > > > + > > > + --cmds_in_flight; > > > + } > > > + > > > + /* Check the buffers written by device */ > > > + for (virtio_net_ctrl_ack *status = s->status; status < in_cursor; > > > + ++status) { > > > + if (*status != VIRTIO_NET_OK) { > > > + return -EINVAL; > > > + } > > > + } > > > > > > return 0; > > > } > > > -- > > > 2.25.1 > > > > > >