If Tx zero copy enabled, gpa to hpa mapping table is updated one by
one. This will harm performance when guest memory backend using 2M
hugepages. Now add cached mapping table which will sorted by using
sequence. Address translation will first check cached mapping table,
now performance is back.

Signed-off-by: Marvin Liu <yong....@intel.com>

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 2087d1400..de2c09e7e 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -368,7 +368,9 @@ struct virtio_net {
        struct vhost_device_ops const *notify_ops;
 
        uint32_t                nr_guest_pages;
+       uint32_t                nr_cached;
        uint32_t                max_guest_pages;
+       struct guest_page       *cached_guest_pages;
        struct guest_page       *guest_pages;
 
        int                     slave_req_fd;
@@ -554,11 +556,23 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t 
size)
        uint32_t i;
        struct guest_page *page;
 
+       for (i = 0; i < dev->nr_cached; i++) {
+               page = &dev->cached_guest_pages[i];
+               if (gpa >= page->guest_phys_addr &&
+                       gpa + size < page->guest_phys_addr + page->size) {
+                       return gpa - page->guest_phys_addr +
+                               page->host_phys_addr;
+               }
+       }
+
        for (i = 0; i < dev->nr_guest_pages; i++) {
                page = &dev->guest_pages[i];
 
                if (gpa >= page->guest_phys_addr &&
                    gpa + size < page->guest_phys_addr + page->size) {
+                       rte_memcpy(&dev->cached_guest_pages[dev->nr_cached],
+                                  page, sizeof(struct guest_page));
+                       dev->nr_cached++;
                        return gpa - page->guest_phys_addr +
                               page->host_phys_addr;
                }
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index bd1be0104..573e99066 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -192,7 +192,9 @@ vhost_backend_cleanup(struct virtio_net *dev)
        }
 
        free(dev->guest_pages);
+       free(dev->cached_guest_pages);
        dev->guest_pages = NULL;
+       dev->cached_guest_pages = NULL;
 
        if (dev->log_addr) {
                munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
@@ -905,7 +907,10 @@ add_one_guest_page(struct virtio_net *dev, uint64_t 
guest_phys_addr,
                old_pages = dev->guest_pages;
                dev->guest_pages = realloc(dev->guest_pages,
                                        dev->max_guest_pages * sizeof(*page));
-               if (!dev->guest_pages) {
+               dev->cached_guest_pages = realloc(dev->cached_guest_pages,
+                                       dev->max_guest_pages * sizeof(*page));
+               dev->nr_cached = 0;
+               if (!dev->guest_pages || !dev->cached_guest_pages) {
                        VHOST_LOG_CONFIG(ERR, "cannot realloc guest_pages\n");
                        free(old_pages);
                        return -1;
@@ -1075,6 +1080,18 @@ vhost_user_set_mem_table(struct virtio_net **pdev, 
struct VhostUserMsg *msg,
                }
        }
 
+       if (!dev->cached_guest_pages) {
+               dev->cached_guest_pages = malloc(dev->max_guest_pages *
+                                               sizeof(struct guest_page));
+               if (dev->cached_guest_pages == NULL) {
+                       VHOST_LOG_CONFIG(ERR,
+                               "(%d) failed to allocate memory "
+                               "for dev->cached_guest_pages\n",
+                               dev->vid);
+                       return RTE_VHOST_MSG_RESULT_ERR;
+               }
+       }
+
        dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct 
rte_vhost_memory) +
                sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
        if (dev->mem == NULL) {
-- 
2.17.1

Reply via email to