Added a flag which controls whether rte_ioat_enqueue_copy
and rte_ioat_completed_copies function should process
handle parameters to improve the performance when handle
parameters are not necessary to use. This is targeting
20.11 release.

Signed-off-by: Cheng Jiang <cheng1.ji...@intel.com>
---
 drivers/raw/ioat/ioat_rawdev.c     |  1 +
 drivers/raw/ioat/rte_ioat_rawdev.h | 14 +++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/raw/ioat/ioat_rawdev.c b/drivers/raw/ioat/ioat_rawdev.c
index 87fd088aa..5bf030785 100644
--- a/drivers/raw/ioat/ioat_rawdev.c
+++ b/drivers/raw/ioat/ioat_rawdev.c
@@ -57,6 +57,7 @@ ioat_dev_configure(const struct rte_rawdev *dev, 
rte_rawdev_obj_t config)
                return -EINVAL;
 
        ioat->ring_size = params->ring_size;
+       ioat->hdls_enable = params->hdls_enable;
        if (ioat->desc_ring != NULL) {
                rte_memzone_free(ioat->desc_mz);
                ioat->desc_ring = NULL;
diff --git a/drivers/raw/ioat/rte_ioat_rawdev.h 
b/drivers/raw/ioat/rte_ioat_rawdev.h
index f765a6557..daca04dd3 100644
--- a/drivers/raw/ioat/rte_ioat_rawdev.h
+++ b/drivers/raw/ioat/rte_ioat_rawdev.h
@@ -35,6 +35,7 @@
  */
 struct rte_ioat_rawdev_config {
        unsigned short ring_size;
+       bool hdls_enable;
 };
 
 /**
@@ -52,6 +53,8 @@ struct rte_ioat_rawdev {
 
        unsigned short ring_size;
        struct rte_ioat_generic_hw_desc *desc_ring;
+
+       bool hdls_enable; /* control if handles need to be copied */
        __m128i *hdls; /* completion handles for returning to user */
 
 
@@ -121,8 +124,10 @@ rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, 
phys_addr_t dst,
        desc->u.control_raw = (uint32_t)((!!fence << 4) | (!(write & 0xF)) << 
3);
        desc->src_addr = src;
        desc->dest_addr = dst;
+       if (ioat->hdls_enable)
+               ioat->hdls[write] = _mm_set_epi64x((int64_t)dst_hdl,
+                                       (int64_t)src_hdl);
 
-       ioat->hdls[write] = _mm_set_epi64x((int64_t)dst_hdl, (int64_t)src_hdl);
        rte_prefetch0(&ioat->desc_ring[ioat->next_write & mask]);
 
        ioat->enqueued++;
@@ -208,6 +213,11 @@ rte_ioat_completed_copies(int dev_id, uint8_t max_copies,
        if (count > max_copies)
                count = max_copies;
 
+       ioat->next_read = read + count;
+       ioat->completed += count;
+       if (!ioat->hdls_enable)
+               return count;
+
        for (; i < count - 1; i += 2, read += 2) {
                __m128i hdls0 = _mm_load_si128(&ioat->hdls[read & mask]);
                __m128i hdls1 = _mm_load_si128(&ioat->hdls[(read + 1) & mask]);
@@ -223,8 +233,6 @@ rte_ioat_completed_copies(int dev_id, uint8_t max_copies,
                dst_hdls[i] = hdls[1];
        }
 
-       ioat->next_read = read;
-       ioat->completed += count;
        return count;
 }
 
-- 
2.27.0

Reply via email to