Tested-by: ma,lihong<lihongx...@intel.com>

Regards,
Ma,lihong


-----Original Message-----
From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Liu, Yong
Sent: Wednesday, April 1, 2020 9:01 PM
To: Gavin Hu <gavin...@arm.com>; maxime.coque...@redhat.com; Ye, Xiaolong 
<xiaolong...@intel.com>; Wang, Zhihong <zhihong.w...@intel.com>
Cc: dev@dpdk.org; nd <n...@arm.com>
Subject: Re: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa translation



> -----Original Message-----
> From: Gavin Hu <gavin...@arm.com>
> Sent: Wednesday, April 1, 2020 6:07 PM
> To: Liu, Yong <yong....@intel.com>; maxime.coque...@redhat.com; Ye, 
> Xiaolong <xiaolong...@intel.com>; Wang, Zhihong 
> <zhihong.w...@intel.com>
> Cc: dev@dpdk.org; nd <n...@arm.com>
> Subject: RE: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa 
> translation
> 
> Hi Marvin,
> 
> > -----Original Message-----
> > From: dev <dev-boun...@dpdk.org> On Behalf Of Marvin Liu
> > Sent: Wednesday, April 1, 2020 10:50 PM
> > To: maxime.coque...@redhat.com; xiaolong...@intel.com; 
> > zhihong.w...@intel.com
> > Cc: dev@dpdk.org; Marvin Liu <yong....@intel.com>
> > Subject: [dpdk-dev] [PATCH v2 2/2] vhost: cache gpa to hpa 
> > translation
> >
> > If Tx zero copy enabled, gpa to hpa mapping table is updated one by 
> > one. This will harm performance when guest memory backend using 2M 
> > hugepages. Now add cached mapping table which will sorted by using 
> > sequence. Address translation will first check cached mapping table, 
> > then check unsorted mapping table if no match found.
> >
> > Signed-off-by: Marvin Liu <yong....@intel.com>
> >
> > diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h 
> > index 2087d1400..5cb0e83dd 100644
> > --- a/lib/librte_vhost/vhost.h
> > +++ b/lib/librte_vhost/vhost.h
> > @@ -368,7 +368,9 @@ struct virtio_net {
> >     struct vhost_device_ops const *notify_ops;
> >
> >     uint32_t                nr_guest_pages;
> > +   uint32_t                nr_cached_guest_pages;
> >     uint32_t                max_guest_pages;
> > +   struct guest_page       *cached_guest_pages;
> >     struct guest_page       *guest_pages;
> >
> >     int                     slave_req_fd;
> > @@ -553,12 +555,25 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t 
> > gpa, uint64_t size)  {
> >     uint32_t i;
> >     struct guest_page *page;
> > +   uint32_t cached_pages = dev->nr_cached_guest_pages;
> > +
> > +   for (i = 0; i < cached_pages; i++) {
> > +           page = &dev->cached_guest_pages[i];
> > +           if (gpa >= page->guest_phys_addr &&
> > +                   gpa + size < page->guest_phys_addr + page->size) {
> > +                   return gpa - page->guest_phys_addr +
> > +                           page->host_phys_addr;
> > +           }
> > +   }
> Sorry, I did not see any speedup with cached guest pages in comparison 
> to the old code below.
> Is it not a simple copy?
> Is it a better idea to use hash instead to speed up the translation?
> /Gavin

Hi Gavin,
Here just resort the overall mapping table according to using sequence.  
Most likely virtio driver will reuse recently cycled buffers, thus search will 
find match in beginning. 
That is simple fix for performance enhancement. If use hash for index, it will 
take much more cost in normal case.

Regards,
Marvin 


> >
> >     for (i = 0; i < dev->nr_guest_pages; i++) {
> >             page = &dev->guest_pages[i];
> >
> >             if (gpa >= page->guest_phys_addr &&
> >                 gpa + size < page->guest_phys_addr + page->size) {
> > +                   rte_memcpy(&dev-
> > >cached_guest_pages[cached_pages],
> > +                              page, sizeof(struct guest_page));
> > +                   dev->nr_cached_guest_pages++;
> >                     return gpa - page->guest_phys_addr +
> >                            page->host_phys_addr;
> >             }
> > diff --git a/lib/librte_vhost/vhost_user.c 
> > b/lib/librte_vhost/vhost_user.c index 79fcb9d19..1bae1fddc 100644
> > --- a/lib/librte_vhost/vhost_user.c
> > +++ b/lib/librte_vhost/vhost_user.c
> > @@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
> >     }
> >
> >     rte_free(dev->guest_pages);
> > +   rte_free(dev->cached_guest_pages);
> >     dev->guest_pages = NULL;
> > +   dev->cached_guest_pages = NULL;
> >
> >     if (dev->log_addr) {
> >             munmap((void *)(uintptr_t)dev->log_addr, dev->log_size); @@ 
> > -898,7 +900,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t 
> > guest_phys_addr,
> >                uint64_t host_phys_addr, uint64_t size)  {
> >     struct guest_page *page, *last_page;
> > -   struct guest_page *old_pages;
> > +   struct guest_page *old_pages, *old_cached_pages;
> >
> >     if (dev->nr_guest_pages == dev->max_guest_pages) {
> >             dev->max_guest_pages *= 2;
> > @@ -906,9 +908,19 @@ add_one_guest_page(struct virtio_net *dev, 
> > uint64_t guest_phys_addr,
> >             dev->guest_pages = rte_realloc(dev->guest_pages,
> >                                     dev->max_guest_pages *
> > sizeof(*page),
> >                                     RTE_CACHE_LINE_SIZE);
> > -           if (dev->guest_pages == NULL) {
> > +           old_cached_pages = dev->cached_guest_pages;
> > +           dev->cached_guest_pages = rte_realloc(dev-
> > >cached_guest_pages,
> > +                                           dev->max_guest_pages *
> > +                                           sizeof(*page),
> > +                                           RTE_CACHE_LINE_SIZE);
> > +           dev->nr_cached_guest_pages = 0;
> > +           if (dev->guest_pages == NULL ||
> > +                           dev->cached_guest_pages == NULL) {
> >                     VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
> >                     rte_free(old_pages);
> > +                   rte_free(old_cached_pages);
> > +                   dev->guest_pages = NULL;
> > +                   dev->cached_guest_pages = NULL;
> >                     return -1;
> >             }
> >     }
> > @@ -1078,6 +1090,20 @@ vhost_user_set_mem_table(struct virtio_net 
> > **pdev, struct VhostUserMsg *msg,
> >             }
> >     }
> >
> > +   if (dev->cached_guest_pages == NULL) {
> > +           dev->cached_guest_pages = rte_zmalloc(NULL,
> > +                                           dev->max_guest_pages *
> > +                                           sizeof(struct guest_page),
> > +                                           RTE_CACHE_LINE_SIZE);
> > +           if (dev->cached_guest_pages == NULL) {
> > +                   VHOST_LOG_CONFIG(ERR,
> > +                           "(%d) failed to allocate memory "
> > +                           "for dev->cached_guest_pages\n",
> > +                           dev->vid);
> > +                   return RTE_VHOST_MSG_RESULT_ERR;
> > +           }
> > +   }
> > +
> >     dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct
> > rte_vhost_memory) +
> >             sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
> >     if (dev->mem == NULL) {
> > --
> > 2.17.1

Reply via email to