Hi
> > > > @@ -227,6 +227,7 @@ vhost_user_set_vring_num(struct virtio_net
> *dev,
> > > >                                 "zero copy is force disabled\n");
> > > >                         dev->dequeue_zero_copy = 0;
> > > >                 }
> > > > +               TAILQ_INIT(&vq->zmbuf_list);
> > > >         }
> > > >
> > > >         vq->shadow_used_ring = rte_malloc(NULL, @@ -261,6 +262,9
> @@
> > > > numa_realloc(struct virtio_net *dev, int index)
> > > >         int oldnode, newnode;
> > > >         struct virtio_net *old_dev;
> > > >         struct vhost_virtqueue *old_vq, *vq;
> > > > +       struct zcopy_mbuf *new_zmbuf;
> > > > +       struct vring_used_elem *new_shadow_used_ring;
> > > > +       struct batch_copy_elem *new_batch_copy_elems;
> > > >         int ret;
> > > >
> > > >         old_dev = dev;
> > > > @@ -285,6 +289,33 @@ numa_realloc(struct virtio_net *dev, int
> index)
> > > >                         return dev;
> > > >
> > > >                 memcpy(vq, old_vq, sizeof(*vq));
> > > > +               TAILQ_INIT(&vq->zmbuf_list);
> > > > +
> > > > +               new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
> > > > +                       sizeof(struct zcopy_mbuf), 0, newnode);
> > > > +               if (new_zmbuf) {
> > > > +                       rte_free(vq->zmbufs);
> > > > +                       vq->zmbufs = new_zmbuf;
> > > > +               }
> > >
> > > You need to consider how to handle the case  ( rte_malloc_socket
> > > return NULL).
> >
> > If it failed to allocate new_zmbuf, it uses old zmbufs, so as to keep
> > vhost alive.
> 
> It sounds reasonable, another question is, for the 3 blocks of memory being
> allocated,  If some succeed , others fails,  Does it mean that the code will
> run on different socket?  What's the perf impact if it happens.

The original code doesn't do deep copy and thus access memory on different 
socket, this patch is to mitigate this situation. It does access remote memory 
when one of above allocation failed. 

I saw some performance improvement (24.8Gbits/s -> 26.1Gbit/s) on my dev 
machine when only reallocate for zmbufs, while I didn't see significant 
performance difference when allocating vring_used_elem 
and batch_copy_elem.

Reply via email to