Copy of ring elements uses memcpy for 32B chunks. The remaining
bytes are copied using assignments.

Signed-off-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
---
 lib/librte_ring/rte_ring.c      |  10 --
 lib/librte_ring/rte_ring_elem.h | 229 +++++++-------------------------
 2 files changed, 49 insertions(+), 190 deletions(-)

diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c
index e95285259..0f7f4b598 100644
--- a/lib/librte_ring/rte_ring.c
+++ b/lib/librte_ring/rte_ring.c
@@ -51,16 +51,6 @@ rte_ring_get_memsize_elem(unsigned count, unsigned esize)
 {
        ssize_t sz;
 
-       /* Supported esize values are 4/8/16.
-        * Others can be added on need basis.
-        */
-       if (esize != 4 && esize != 8 && esize != 16) {
-               RTE_LOG(ERR, RING,
-                       "Unsupported esize value. Supported values are 4, 8 and 
16\n");
-
-               return -EINVAL;
-       }
-
        /* count must be a power of 2 */
        if ((!POWEROF2(count)) || (count > RTE_RING_SZ_MASK )) {
                RTE_LOG(ERR, RING,
diff --git a/lib/librte_ring/rte_ring_elem.h b/lib/librte_ring/rte_ring_elem.h
index 7e9914567..0ce5f2be7 100644
--- a/lib/librte_ring/rte_ring_elem.h
+++ b/lib/librte_ring/rte_ring_elem.h
@@ -24,6 +24,7 @@ extern "C" {
 #include <stdint.h>
 #include <sys/queue.h>
 #include <errno.h>
+#include <string.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include <rte_memory.h>
@@ -108,215 +109,83 @@ __rte_experimental
 struct rte_ring *rte_ring_create_elem(const char *name, unsigned int count,
                        unsigned int esize, int socket_id, unsigned int flags);
 
-/* the actual enqueue of pointers on the ring.
- * Placed here since identical code needed in both
- * single and multi producer enqueue functions.
- */
-#define ENQUEUE_PTRS_ELEM(r, ring_start, prod_head, obj_table, esize, n) do { \
-       if (esize == 4) \
-               ENQUEUE_PTRS_32(r, ring_start, prod_head, obj_table, n); \
-       else if (esize == 8) \
-               ENQUEUE_PTRS_64(r, ring_start, prod_head, obj_table, n); \
-       else if (esize == 16) \
-               ENQUEUE_PTRS_128(r, ring_start, prod_head, obj_table, n); \
-} while (0)
-
-#define ENQUEUE_PTRS_32(r, ring_start, prod_head, obj_table, n) do { \
-       unsigned int i; \
+#define ENQUEUE_PTRS_GEN(r, ring_start, prod_head, obj_table, esize, n) do { \
+       unsigned int i, j; \
        const uint32_t size = (r)->size; \
        uint32_t idx = prod_head & (r)->mask; \
        uint32_t *ring = (uint32_t *)ring_start; \
        uint32_t *obj = (uint32_t *)obj_table; \
-       if (likely(idx + n < size)) { \
-               for (i = 0; i < (n & ((~(uint32_t)0x7))); i += 8, idx += 8) { \
-                       ring[idx] = obj[i]; \
-                       ring[idx + 1] = obj[i + 1]; \
-                       ring[idx + 2] = obj[i + 2]; \
-                       ring[idx + 3] = obj[i + 3]; \
-                       ring[idx + 4] = obj[i + 4]; \
-                       ring[idx + 5] = obj[i + 5]; \
-                       ring[idx + 6] = obj[i + 6]; \
-                       ring[idx + 7] = obj[i + 7]; \
+       uint32_t nr_n = n * (esize / sizeof(uint32_t)); \
+       uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \
+       uint32_t seg0 = size - idx; \
+       if (likely(n < seg0)) { \
+               for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \
+                                               i += 8, nr_idx += 8) { \
+                       memcpy(ring + nr_idx, obj + i, 8 * sizeof (uint32_t)); \
                } \
-               switch (n & 0x7) { \
+               switch (nr_n & 0x7) { \
                case 7: \
-                       ring[idx++] = obj[i++]; /* fallthrough */ \
+                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
                case 6: \
-                       ring[idx++] = obj[i++]; /* fallthrough */ \
+                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
                case 5: \
-                       ring[idx++] = obj[i++]; /* fallthrough */ \
+                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
                case 4: \
-                       ring[idx++] = obj[i++]; /* fallthrough */ \
+                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
                case 3: \
-                       ring[idx++] = obj[i++]; /* fallthrough */ \
+                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
                case 2: \
-                       ring[idx++] = obj[i++]; /* fallthrough */ \
-               case 1: \
-                       ring[idx++] = obj[i++]; /* fallthrough */ \
-               } \
-       } else { \
-               for (i = 0; idx < size; i++, idx++)\
-                       ring[idx] = obj[i]; \
-               for (idx = 0; i < n; i++, idx++) \
-                       ring[idx] = obj[i]; \
-       } \
-} while (0)
-
-#define ENQUEUE_PTRS_64(r, ring_start, prod_head, obj_table, n) do { \
-       unsigned int i; \
-       const uint32_t size = (r)->size; \
-       uint32_t idx = prod_head & (r)->mask; \
-       uint64_t *ring = (uint64_t *)ring_start; \
-       uint64_t *obj = (uint64_t *)obj_table; \
-       if (likely(idx + n < size)) { \
-               for (i = 0; i < (n & ((~(uint32_t)0x3))); i += 4, idx += 4) { \
-                       ring[idx] = obj[i]; \
-                       ring[idx + 1] = obj[i + 1]; \
-                       ring[idx + 2] = obj[i + 2]; \
-                       ring[idx + 3] = obj[i + 3]; \
-               } \
-               switch (n & 0x3) { \
-               case 3: \
-                       ring[idx++] = obj[i++]; /* fallthrough */ \
-               case 2: \
-                       ring[idx++] = obj[i++]; /* fallthrough */ \
-               case 1: \
-                       ring[idx++] = obj[i++]; \
-               } \
-       } else { \
-               for (i = 0; idx < size; i++, idx++)\
-                       ring[idx] = obj[i]; \
-               for (idx = 0; i < n; i++, idx++) \
-                       ring[idx] = obj[i]; \
-       } \
-} while (0)
-
-#define ENQUEUE_PTRS_128(r, ring_start, prod_head, obj_table, n) do { \
-       unsigned int i; \
-       const uint32_t size = (r)->size; \
-       uint32_t idx = prod_head & (r)->mask; \
-       __uint128_t *ring = (__uint128_t *)ring_start; \
-       __uint128_t *obj = (__uint128_t *)obj_table; \
-       if (likely(idx + n < size)) { \
-               for (i = 0; i < (n >> 1); i += 2, idx += 2) { \
-                       ring[idx] = obj[i]; \
-                       ring[idx + 1] = obj[i + 1]; \
-               } \
-               switch (n & 0x1) { \
+                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
                case 1: \
-                       ring[idx++] = obj[i++]; \
+                       ring[nr_idx++] = obj[i++]; /* fallthrough */ \
                } \
        } else { \
-               for (i = 0; idx < size; i++, idx++)\
-                       ring[idx] = obj[i]; \
-               for (idx = 0; i < n; i++, idx++) \
-                       ring[idx] = obj[i]; \
+               uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \
+               uint32_t nr_seg1 = nr_n - nr_seg0; \
+               for (i = 0; i < nr_seg0; i++, nr_idx++)\
+                       ring[nr_idx] = obj[i]; \
+               for (j = 0; j < nr_seg1; i++, j++) \
+                       ring[j] = obj[i]; \
        } \
 } while (0)
 
-/* the actual copy of pointers on the ring to obj_table.
- * Placed here since identical code needed in both
- * single and multi consumer dequeue functions.
- */
-#define DEQUEUE_PTRS_ELEM(r, ring_start, cons_head, obj_table, esize, n) do { \
-       if (esize == 4) \
-               DEQUEUE_PTRS_32(r, ring_start, cons_head, obj_table, n); \
-       else if (esize == 8) \
-               DEQUEUE_PTRS_64(r, ring_start, cons_head, obj_table, n); \
-       else if (esize == 16) \
-               DEQUEUE_PTRS_128(r, ring_start, cons_head, obj_table, n); \
-} while (0)
-
-#define DEQUEUE_PTRS_32(r, ring_start, cons_head, obj_table, n) do { \
-       unsigned int i; \
+#define DEQUEUE_PTRS_GEN(r, ring_start, cons_head, obj_table, esize, n) do { \
+       unsigned int i, j; \
        uint32_t idx = cons_head & (r)->mask; \
        const uint32_t size = (r)->size; \
        uint32_t *ring = (uint32_t *)ring_start; \
        uint32_t *obj = (uint32_t *)obj_table; \
-       if (likely(idx + n < size)) { \
-               for (i = 0; i < (n & (~(uint32_t)0x7)); i += 8, idx += 8) {\
-                       obj[i] = ring[idx]; \
-                       obj[i + 1] = ring[idx + 1]; \
-                       obj[i + 2] = ring[idx + 2]; \
-                       obj[i + 3] = ring[idx + 3]; \
-                       obj[i + 4] = ring[idx + 4]; \
-                       obj[i + 5] = ring[idx + 5]; \
-                       obj[i + 6] = ring[idx + 6]; \
-                       obj[i + 7] = ring[idx + 7]; \
+       uint32_t nr_n = n * (esize / sizeof(uint32_t)); \
+       uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \
+       uint32_t seg0 = size - idx; \
+       if (likely(n < seg0)) { \
+               for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \
+                                               i += 8, nr_idx += 8) { \
+                       memcpy(obj + i, ring + nr_idx, 8 * sizeof (uint32_t)); \
                } \
-               switch (n & 0x7) { \
+               switch (nr_n & 0x7) { \
                case 7: \
-                       obj[i++] = ring[idx++]; /* fallthrough */ \
+                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
                case 6: \
-                       obj[i++] = ring[idx++]; /* fallthrough */ \
+                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
                case 5: \
-                       obj[i++] = ring[idx++]; /* fallthrough */ \
+                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
                case 4: \
-                       obj[i++] = ring[idx++]; /* fallthrough */ \
+                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
                case 3: \
-                       obj[i++] = ring[idx++]; /* fallthrough */ \
+                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
                case 2: \
-                       obj[i++] = ring[idx++]; /* fallthrough */ \
-               case 1: \
-                       obj[i++] = ring[idx++]; /* fallthrough */ \
-               } \
-       } else { \
-               for (i = 0; idx < size; i++, idx++) \
-                       obj[i] = ring[idx]; \
-               for (idx = 0; i < n; i++, idx++) \
-                       obj[i] = ring[idx]; \
-       } \
-} while (0)
-
-#define DEQUEUE_PTRS_64(r, ring_start, cons_head, obj_table, n) do { \
-       unsigned int i; \
-       uint32_t idx = cons_head & (r)->mask; \
-       const uint32_t size = (r)->size; \
-       uint64_t *ring = (uint64_t *)ring_start; \
-       uint64_t *obj = (uint64_t *)obj_table; \
-       if (likely(idx + n < size)) { \
-               for (i = 0; i < (n & (~(uint32_t)0x3)); i += 4, idx += 4) {\
-                       obj[i] = ring[idx]; \
-                       obj[i + 1] = ring[idx + 1]; \
-                       obj[i + 2] = ring[idx + 2]; \
-                       obj[i + 3] = ring[idx + 3]; \
-               } \
-               switch (n & 0x3) { \
-               case 3: \
-                       obj[i++] = ring[idx++]; /* fallthrough */ \
-               case 2: \
-                       obj[i++] = ring[idx++]; /* fallthrough */ \
-               case 1: \
-                       obj[i++] = ring[idx++]; \
-               } \
-       } else { \
-               for (i = 0; idx < size; i++, idx++) \
-                       obj[i] = ring[idx]; \
-               for (idx = 0; i < n; i++, idx++) \
-                       obj[i] = ring[idx]; \
-       } \
-} while (0)
-
-#define DEQUEUE_PTRS_128(r, ring_start, cons_head, obj_table, n) do { \
-       unsigned int i; \
-       uint32_t idx = cons_head & (r)->mask; \
-       const uint32_t size = (r)->size; \
-       __uint128_t *ring = (__uint128_t *)ring_start; \
-       __uint128_t *obj = (__uint128_t *)obj_table; \
-       if (likely(idx + n < size)) { \
-               for (i = 0; i < (n >> 1); i += 2, idx += 2) { \
-                       obj[i] = ring[idx]; \
-                       obj[i + 1] = ring[idx + 1]; \
-               } \
-               switch (n & 0x1) { \
+                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
                case 1: \
-                       obj[i++] = ring[idx++]; /* fallthrough */ \
+                       obj[i++] = ring[nr_idx++]; /* fallthrough */ \
                } \
        } else { \
-               for (i = 0; idx < size; i++, idx++) \
-                       obj[i] = ring[idx]; \
-               for (idx = 0; i < n; i++, idx++) \
-                       obj[i] = ring[idx]; \
+               uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \
+               uint32_t nr_seg1 = nr_n - nr_seg0; \
+               for (i = 0; i < nr_seg0; i++, nr_idx++)\
+                       obj[i] = ring[nr_idx];\
+               for (j = 0; j < nr_seg1; i++, j++) \
+                       obj[i] = ring[j]; \
        } \
 } while (0)
 
@@ -373,7 +242,7 @@ __rte_ring_do_enqueue_elem(struct rte_ring *r, void * const 
obj_table,
        if (n == 0)
                goto end;
 
-       ENQUEUE_PTRS_ELEM(r, &r[1], prod_head, obj_table, esize, n);
+       ENQUEUE_PTRS_GEN(r, &r[1], prod_head, obj_table, esize, n);
 
        update_tail(&r->prod, prod_head, prod_next, is_sp, 1);
 end:
@@ -420,7 +289,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void 
*obj_table,
        if (n == 0)
                goto end;
 
-       DEQUEUE_PTRS_ELEM(r, &r[1], cons_head, obj_table, esize, n);
+       DEQUEUE_PTRS_GEN(r, &r[1], cons_head, obj_table, esize, n);
 
        update_tail(&r->cons, cons_head, cons_next, is_sc, 0);
 
-- 
2.17.1

Reply via email to