Improved copy function to copy to/from ring elements.

Signed-off-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
Signed-off-by: Konstantin Ananyev <konstantin.anan...@intel.com>
---
 lib/librte_ring/rte_ring_elem.h | 165 ++++++++++++++++----------------
 1 file changed, 84 insertions(+), 81 deletions(-)

diff --git a/lib/librte_ring/rte_ring_elem.h b/lib/librte_ring/rte_ring_elem.h
index 0ce5f2be7..80ec3c562 100644
--- a/lib/librte_ring/rte_ring_elem.h
+++ b/lib/librte_ring/rte_ring_elem.h
@@ -109,85 +109,88 @@ __rte_experimental
 struct rte_ring *rte_ring_create_elem(const char *name, unsigned int count,
                        unsigned int esize, int socket_id, unsigned int flags);
 
-#define ENQUEUE_PTRS_GEN(r, ring_start, prod_head, obj_table, esize, n) do { \
-       unsigned int i, j; \
-       const uint32_t size = (r)->size; \
-       uint32_t idx = prod_head & (r)->mask; \
-       uint32_t *ring = (uint32_t *)ring_start; \
-       uint32_t *obj = (uint32_t *)obj_table; \
-       uint32_t nr_n = n * (esize / sizeof(uint32_t)); \
-       uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \
-       uint32_t seg0 = size - idx; \
-       if (likely(n < seg0)) { \
-               for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \
-                                               i += 8, nr_idx += 8) { \
-                       memcpy(ring + nr_idx, obj + i, 8 * sizeof (uint32_t)); \
-               } \
-               switch (nr_n & 0x7) { \
-               case 7: \
-                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-               case 6: \
-                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-               case 5: \
-                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-               case 4: \
-                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-               case 3: \
-                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-               case 2: \
-                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-               case 1: \
-                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-               } \
-       } else { \
-               uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \
-               uint32_t nr_seg1 = nr_n - nr_seg0; \
-               for (i = 0; i < nr_seg0; i++, nr_idx++)\
-                       ring[nr_idx] = obj[i]; \
-               for (j = 0; j < nr_seg1; i++, j++) \
-                       ring[j] = obj[i]; \
-       } \
-} while (0)
-
-#define DEQUEUE_PTRS_GEN(r, ring_start, cons_head, obj_table, esize, n) do { \
-       unsigned int i, j; \
-       uint32_t idx = cons_head & (r)->mask; \
-       const uint32_t size = (r)->size; \
-       uint32_t *ring = (uint32_t *)ring_start; \
-       uint32_t *obj = (uint32_t *)obj_table; \
-       uint32_t nr_n = n * (esize / sizeof(uint32_t)); \
-       uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \
-       uint32_t seg0 = size - idx; \
-       if (likely(n < seg0)) { \
-               for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \
-                                               i += 8, nr_idx += 8) { \
-                       memcpy(obj + i, ring + nr_idx, 8 * sizeof (uint32_t)); \
-               } \
-               switch (nr_n & 0x7) { \
-               case 7: \
-                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-               case 6: \
-                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-               case 5: \
-                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-               case 4: \
-                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-               case 3: \
-                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-               case 2: \
-                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-               case 1: \
-                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-               } \
-       } else { \
-               uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \
-               uint32_t nr_seg1 = nr_n - nr_seg0; \
-               for (i = 0; i < nr_seg0; i++, nr_idx++)\
-                       obj[i] = ring[nr_idx];\
-               for (j = 0; j < nr_seg1; i++, j++) \
-                       obj[i] = ring[j]; \
-       } \
-} while (0)
+static __rte_always_inline void
+copy_elems(uint32_t du32[], const uint32_t su32[], uint32_t nr_num)
+{
+       uint32_t i;
+
+       for (i = 0; i < (nr_num & ~7); i += 8)
+               memcpy(du32 + i, su32 + i, 8 * sizeof(uint32_t));
+
+       switch (nr_num & 7) {
+       case 7: du32[nr_num - 7] = su32[nr_num - 7]; /* fallthrough */
+       case 6: du32[nr_num - 6] = su32[nr_num - 6]; /* fallthrough */
+       case 5: du32[nr_num - 5] = su32[nr_num - 5]; /* fallthrough */
+       case 4: du32[nr_num - 4] = su32[nr_num - 4]; /* fallthrough */
+       case 3: du32[nr_num - 3] = su32[nr_num - 3]; /* fallthrough */
+       case 2: du32[nr_num - 2] = su32[nr_num - 2]; /* fallthrough */
+       case 1: du32[nr_num - 1] = su32[nr_num - 1]; /* fallthrough */
+       }
+}
+
+static __rte_always_inline void
+enqueue_elems(struct rte_ring *r, void *ring_start, uint32_t prod_head,
+               void *obj_table, uint32_t num, uint32_t esize)
+{
+       uint32_t idx, nr_idx, nr_num;
+       uint32_t *du32;
+       const uint32_t *su32;
+
+       const uint32_t size = r->size;
+       uint32_t s0, nr_s0, nr_s1;
+
+       idx = prod_head & (r)->mask;
+       /* Normalize the idx to uint32_t */
+       nr_idx = (idx * esize) / sizeof(uint32_t);
+
+       du32 = (uint32_t *)ring_start + nr_idx;
+       su32 = obj_table;
+
+       /* Normalize the number of elements to uint32_t */
+       nr_num = (num * esize) / sizeof(uint32_t);
+
+       s0 = size - idx;
+       if (num < s0)
+               copy_elems(du32, su32, nr_num);
+       else {
+               nr_s0 = (s0 * esize) / sizeof(uint32_t);
+               nr_s1 = nr_num - nr_s0;
+               copy_elems(du32, su32, nr_s0);
+               copy_elems(ring_start, su32 + nr_s0, nr_s1);
+       }
+}
+
+static __rte_always_inline void
+dequeue_elems(struct rte_ring *r, void *ring_start, uint32_t cons_head,
+               void *obj_table, uint32_t num, uint32_t esize)
+{
+       uint32_t idx, nr_idx, nr_num;
+       uint32_t *du32;
+       const uint32_t *su32;
+
+       const uint32_t size = r->size;
+       uint32_t s0, nr_s0, nr_s1;
+
+       idx = cons_head & (r)->mask;
+       /* Normalize the idx to uint32_t */
+       nr_idx = (idx * esize) / sizeof(uint32_t);
+
+       su32 = (uint32_t *)ring_start + nr_idx;
+       du32 = obj_table;
+
+       /* Normalize the number of elements to uint32_t */
+       nr_num = (num * esize) / sizeof(uint32_t);
+
+       s0 = size - idx;
+       if (num < s0)
+               copy_elems(du32, su32, nr_num);
+       else {
+               nr_s0 = (s0 * esize) / sizeof(uint32_t);
+               nr_s1 = nr_num - nr_s0;
+               copy_elems(du32, su32, nr_s0);
+               copy_elems(du32 + nr_s0, ring_start, nr_s1);
+       }
+}
 
 /* Between load and load. there might be cpu reorder in weak model
  * (powerpc/arm).
@@ -242,7 +245,7 @@ __rte_ring_do_enqueue_elem(struct rte_ring *r, void * const 
obj_table,
        if (n == 0)
                goto end;
 
-       ENQUEUE_PTRS_GEN(r, &r[1], prod_head, obj_table, esize, n);
+       enqueue_elems(r, &r[1], prod_head, obj_table, n, esize);
 
        update_tail(&r->prod, prod_head, prod_next, is_sp, 1);
 end:
@@ -289,7 +292,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void 
*obj_table,
        if (n == 0)
                goto end;
 
-       DEQUEUE_PTRS_GEN(r, &r[1], cons_head, obj_table, esize, n);
+       dequeue_elems(r, &r[1], cons_head, obj_table, n, esize);
 
        update_tail(&r->cons, cons_head, cons_next, is_sc, 0);
 
-- 
2.17.1

Reply via email to