Improved copy function to copy to/from ring elements. Signed-off-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> Signed-off-by: Konstantin Ananyev <konstantin.anan...@intel.com> --- lib/librte_ring/rte_ring_elem.h | 165 ++++++++++++++++---------------- 1 file changed, 84 insertions(+), 81 deletions(-)
diff --git a/lib/librte_ring/rte_ring_elem.h b/lib/librte_ring/rte_ring_elem.h index 0ce5f2be7..80ec3c562 100644 --- a/lib/librte_ring/rte_ring_elem.h +++ b/lib/librte_ring/rte_ring_elem.h @@ -109,85 +109,88 @@ __rte_experimental struct rte_ring *rte_ring_create_elem(const char *name, unsigned int count, unsigned int esize, int socket_id, unsigned int flags); -#define ENQUEUE_PTRS_GEN(r, ring_start, prod_head, obj_table, esize, n) do { \ - unsigned int i, j; \ - const uint32_t size = (r)->size; \ - uint32_t idx = prod_head & (r)->mask; \ - uint32_t *ring = (uint32_t *)ring_start; \ - uint32_t *obj = (uint32_t *)obj_table; \ - uint32_t nr_n = n * (esize / sizeof(uint32_t)); \ - uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \ - uint32_t seg0 = size - idx; \ - if (likely(n < seg0)) { \ - for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \ - i += 8, nr_idx += 8) { \ - memcpy(ring + nr_idx, obj + i, 8 * sizeof (uint32_t)); \ - } \ - switch (nr_n & 0x7) { \ - case 7: \ - ring[nr_idx++] = obj[i++]; /* fallthrough */ \ - case 6: \ - ring[nr_idx++] = obj[i++]; /* fallthrough */ \ - case 5: \ - ring[nr_idx++] = obj[i++]; /* fallthrough */ \ - case 4: \ - ring[nr_idx++] = obj[i++]; /* fallthrough */ \ - case 3: \ - ring[nr_idx++] = obj[i++]; /* fallthrough */ \ - case 2: \ - ring[nr_idx++] = obj[i++]; /* fallthrough */ \ - case 1: \ - ring[nr_idx++] = obj[i++]; /* fallthrough */ \ - } \ - } else { \ - uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \ - uint32_t nr_seg1 = nr_n - nr_seg0; \ - for (i = 0; i < nr_seg0; i++, nr_idx++)\ - ring[nr_idx] = obj[i]; \ - for (j = 0; j < nr_seg1; i++, j++) \ - ring[j] = obj[i]; \ - } \ -} while (0) - -#define DEQUEUE_PTRS_GEN(r, ring_start, cons_head, obj_table, esize, n) do { \ - unsigned int i, j; \ - uint32_t idx = cons_head & (r)->mask; \ - const uint32_t size = (r)->size; \ - uint32_t *ring = (uint32_t *)ring_start; \ - uint32_t *obj = (uint32_t *)obj_table; \ - uint32_t nr_n = n * (esize / sizeof(uint32_t)); \ - uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \ - uint32_t seg0 = size - idx; \ - if (likely(n < seg0)) { \ - for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \ - i += 8, nr_idx += 8) { \ - memcpy(obj + i, ring + nr_idx, 8 * sizeof (uint32_t)); \ - } \ - switch (nr_n & 0x7) { \ - case 7: \ - obj[i++] = ring[nr_idx++]; /* fallthrough */ \ - case 6: \ - obj[i++] = ring[nr_idx++]; /* fallthrough */ \ - case 5: \ - obj[i++] = ring[nr_idx++]; /* fallthrough */ \ - case 4: \ - obj[i++] = ring[nr_idx++]; /* fallthrough */ \ - case 3: \ - obj[i++] = ring[nr_idx++]; /* fallthrough */ \ - case 2: \ - obj[i++] = ring[nr_idx++]; /* fallthrough */ \ - case 1: \ - obj[i++] = ring[nr_idx++]; /* fallthrough */ \ - } \ - } else { \ - uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \ - uint32_t nr_seg1 = nr_n - nr_seg0; \ - for (i = 0; i < nr_seg0; i++, nr_idx++)\ - obj[i] = ring[nr_idx];\ - for (j = 0; j < nr_seg1; i++, j++) \ - obj[i] = ring[j]; \ - } \ -} while (0) +static __rte_always_inline void +copy_elems(uint32_t du32[], const uint32_t su32[], uint32_t nr_num) +{ + uint32_t i; + + for (i = 0; i < (nr_num & ~7); i += 8) + memcpy(du32 + i, su32 + i, 8 * sizeof(uint32_t)); + + switch (nr_num & 7) { + case 7: du32[nr_num - 7] = su32[nr_num - 7]; /* fallthrough */ + case 6: du32[nr_num - 6] = su32[nr_num - 6]; /* fallthrough */ + case 5: du32[nr_num - 5] = su32[nr_num - 5]; /* fallthrough */ + case 4: du32[nr_num - 4] = su32[nr_num - 4]; /* fallthrough */ + case 3: du32[nr_num - 3] = su32[nr_num - 3]; /* fallthrough */ + case 2: du32[nr_num - 2] = su32[nr_num - 2]; /* fallthrough */ + case 1: du32[nr_num - 1] = su32[nr_num - 1]; /* fallthrough */ + } +} + +static __rte_always_inline void +enqueue_elems(struct rte_ring *r, void *ring_start, uint32_t prod_head, + void *obj_table, uint32_t num, uint32_t esize) +{ + uint32_t idx, nr_idx, nr_num; + uint32_t *du32; + const uint32_t *su32; + + const uint32_t size = r->size; + uint32_t s0, nr_s0, nr_s1; + + idx = prod_head & (r)->mask; + /* Normalize the idx to uint32_t */ + nr_idx = (idx * esize) / sizeof(uint32_t); + + du32 = (uint32_t *)ring_start + nr_idx; + su32 = obj_table; + + /* Normalize the number of elements to uint32_t */ + nr_num = (num * esize) / sizeof(uint32_t); + + s0 = size - idx; + if (num < s0) + copy_elems(du32, su32, nr_num); + else { + nr_s0 = (s0 * esize) / sizeof(uint32_t); + nr_s1 = nr_num - nr_s0; + copy_elems(du32, su32, nr_s0); + copy_elems(ring_start, su32 + nr_s0, nr_s1); + } +} + +static __rte_always_inline void +dequeue_elems(struct rte_ring *r, void *ring_start, uint32_t cons_head, + void *obj_table, uint32_t num, uint32_t esize) +{ + uint32_t idx, nr_idx, nr_num; + uint32_t *du32; + const uint32_t *su32; + + const uint32_t size = r->size; + uint32_t s0, nr_s0, nr_s1; + + idx = cons_head & (r)->mask; + /* Normalize the idx to uint32_t */ + nr_idx = (idx * esize) / sizeof(uint32_t); + + su32 = (uint32_t *)ring_start + nr_idx; + du32 = obj_table; + + /* Normalize the number of elements to uint32_t */ + nr_num = (num * esize) / sizeof(uint32_t); + + s0 = size - idx; + if (num < s0) + copy_elems(du32, su32, nr_num); + else { + nr_s0 = (s0 * esize) / sizeof(uint32_t); + nr_s1 = nr_num - nr_s0; + copy_elems(du32, su32, nr_s0); + copy_elems(du32 + nr_s0, ring_start, nr_s1); + } +} /* Between load and load. there might be cpu reorder in weak model * (powerpc/arm). @@ -242,7 +245,7 @@ __rte_ring_do_enqueue_elem(struct rte_ring *r, void * const obj_table, if (n == 0) goto end; - ENQUEUE_PTRS_GEN(r, &r[1], prod_head, obj_table, esize, n); + enqueue_elems(r, &r[1], prod_head, obj_table, n, esize); update_tail(&r->prod, prod_head, prod_next, is_sp, 1); end: @@ -289,7 +292,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table, if (n == 0) goto end; - DEQUEUE_PTRS_GEN(r, &r[1], cons_head, obj_table, esize, n); + dequeue_elems(r, &r[1], cons_head, obj_table, n, esize); update_tail(&r->cons, cons_head, cons_next, is_sc, 0); -- 2.17.1