> > > > fix patch > > ======= > > > > From a2be5a9b136333a56d466ef042c655e522ca7012 Mon Sep 17 00:00:00 > > 2001 > > From: Konstantin Ananyev <konstantin.anan...@intel.com> > > Date: Fri, 18 Oct 2019 15:50:43 +0100 > > Subject: [PATCH] fix1 > > > > Signed-off-by: Konstantin Ananyev <konstantin.anan...@intel.com> > > --- > > lib/librte_ring/rte_ring_elem.h | 4 ++-- > > 1 file changed, 2 insertions(+), 2 deletions(-) > > > > diff --git a/lib/librte_ring/rte_ring_elem.h > > b/lib/librte_ring/rte_ring_elem.h > > index 92e92f150..5e1819069 100644 > > --- a/lib/librte_ring/rte_ring_elem.h > > +++ b/lib/librte_ring/rte_ring_elem.h > > @@ -118,7 +118,7 @@ struct rte_ring *rte_ring_create_elem(const char > > *name, unsigned count, > > uint32_t sz = n * (esize / sizeof(uint32_t)); \ > > if (likely(idx + n < size)) { \ > > for (i = 0; i < (sz & ((~(unsigned)0x7))); i += 8, idx += > > 8) { \ > > - memcpy (ring + i, obj + i, 8 * sizeof (uint32_t)); \ > > + memcpy (ring + idx, obj + i, 8 * sizeof > > + (uint32_t)); \ > > } \ > > switch (n & 0x7) { \ > > case 7: \ > > @@ -153,7 +153,7 @@ struct rte_ring *rte_ring_create_elem(const char > > *name, unsigned count, > > uint32_t sz = n * (esize / sizeof(uint32_t)); \ > > if (likely(idx + n < size)) { \ > > for (i = 0; i < (sz & ((~(unsigned)0x7))); i += 8, idx += > > 8) { \ > > - memcpy (obj + i, ring + i, 8 * sizeof (uint32_t)); \ > > + memcpy (obj + i, ring + idx, 8 * sizeof > Actually, this fix alone is not enough. 'idx' needs to be normalized to > elements of type 'uint32_t'. > > > + (uint32_t)); \ > > } \ > > switch (n & 0x7) { \ > > case 7: \ > > -- > > 2.17.1 > > > > update patch (remove macros) > > ========================= > > > > From 18b388e877b97e243f807f27a323e876b30869dd Mon Sep 17 00:00:00 > > 2001 > > From: Konstantin Ananyev <konstantin.anan...@intel.com> > > Date: Fri, 18 Oct 2019 17:35:43 +0100 > > Subject: [PATCH] update1 > > > > Signed-off-by: Konstantin Ananyev <konstantin.anan...@intel.com> > > --- > > lib/librte_ring/rte_ring_elem.h | 141 ++++++++++++++++---------------- > > 1 file changed, 70 insertions(+), 71 deletions(-) > > > > diff --git a/lib/librte_ring/rte_ring_elem.h > > b/lib/librte_ring/rte_ring_elem.h > > index 5e1819069..eb706b12f 100644 > > --- a/lib/librte_ring/rte_ring_elem.h > > +++ b/lib/librte_ring/rte_ring_elem.h > > @@ -109,75 +109,74 @@ __rte_experimental struct rte_ring > > *rte_ring_create_elem(const char *name, unsigned count, > > unsigned esize, int socket_id, unsigned > > flags); > > > > -#define ENQUEUE_PTRS_GEN(r, ring_start, prod_head, obj_table, esize, n) > > do { \ > > - unsigned int i; \ > > - const uint32_t size = (r)->size; \ > > - uint32_t idx = prod_head & (r)->mask; \ > > - uint32_t *ring = (uint32_t *)ring_start; \ > > - uint32_t *obj = (uint32_t *)obj_table; \ > > - uint32_t sz = n * (esize / sizeof(uint32_t)); \ > > - if (likely(idx + n < size)) { \ > > - for (i = 0; i < (sz & ((~(unsigned)0x7))); i += 8, idx += > > 8) { \ > > - memcpy (ring + idx, obj + i, 8 * sizeof > > (uint32_t)); \ > > - } \ > > - switch (n & 0x7) { \ > > - case 7: \ > > - ring[idx++] = obj[i++]; /* fallthrough */ \ > > - case 6: \ > > - ring[idx++] = obj[i++]; /* fallthrough */ \ > > - case 5: \ > > - ring[idx++] = obj[i++]; /* fallthrough */ \ > > - case 4: \ > > - ring[idx++] = obj[i++]; /* fallthrough */ \ > > - case 3: \ > > - ring[idx++] = obj[i++]; /* fallthrough */ \ > > - case 2: \ > > - ring[idx++] = obj[i++]; /* fallthrough */ \ > > - case 1: \ > > - ring[idx++] = obj[i++]; /* fallthrough */ \ > > - } \ > > - } else { \ > > - for (i = 0; idx < size; i++, idx++)\ > > - ring[idx] = obj[i]; \ > > - for (idx = 0; i < n; i++, idx++) \ > > - ring[idx] = obj[i]; \ > > - } \ > > -} while (0) > > - > > -#define DEQUEUE_PTRS_GEN(r, ring_start, cons_head, obj_table, esize, n) > > do { \ > > - unsigned int i; \ > > - uint32_t idx = cons_head & (r)->mask; \ > > - const uint32_t size = (r)->size; \ > > - uint32_t *ring = (uint32_t *)ring_start; \ > > - uint32_t *obj = (uint32_t *)obj_table; \ > > - uint32_t sz = n * (esize / sizeof(uint32_t)); \ > > - if (likely(idx + n < size)) { \ > > - for (i = 0; i < (sz & ((~(unsigned)0x7))); i += 8, idx += > > 8) { \ > > - memcpy (obj + i, ring + idx, 8 * sizeof > > (uint32_t)); \ > > - } \ > > - switch (n & 0x7) { \ > > - case 7: \ > > - obj[i++] = ring[idx++]; /* fallthrough */ \ > > - case 6: \ > > - obj[i++] = ring[idx++]; /* fallthrough */ \ > > - case 5: \ > > - obj[i++] = ring[idx++]; /* fallthrough */ \ > > - case 4: \ > > - obj[i++] = ring[idx++]; /* fallthrough */ \ > > - case 3: \ > > - obj[i++] = ring[idx++]; /* fallthrough */ \ > > - case 2: \ > > - obj[i++] = ring[idx++]; /* fallthrough */ \ > > - case 1: \ > > - obj[i++] = ring[idx++]; /* fallthrough */ \ > > - } \ > > - } else { \ > > - for (i = 0; idx < size; i++, idx++) \ > > - obj[i] = ring[idx]; \ > > - for (idx = 0; i < n; i++, idx++) \ > > - obj[i] = ring[idx]; \ > > - } \ > > -} while (0) > > +static __rte_always_inline void > > +copy_elems(uint32_t du32[], const uint32_t su32[], uint32_t num, > > +uint32_t esize) { > > + uint32_t i, sz; > > + > > + sz = (num * esize) / sizeof(uint32_t); > > + > > + for (i = 0; i < (sz & ~7); i += 8) > > + memcpy(du32 + i, su32 + i, 8 * sizeof(uint32_t)); > > + > > + switch (sz & 7) { > > + case 7: du32[sz - 7] = su32[sz - 7]; /* fallthrough */ > > + case 6: du32[sz - 6] = su32[sz - 6]; /* fallthrough */ > > + case 5: du32[sz - 5] = su32[sz - 5]; /* fallthrough */ > > + case 4: du32[sz - 4] = su32[sz - 4]; /* fallthrough */ > > + case 3: du32[sz - 3] = su32[sz - 3]; /* fallthrough */ > > + case 2: du32[sz - 2] = su32[sz - 2]; /* fallthrough */ > > + case 1: du32[sz - 1] = su32[sz - 1]; /* fallthrough */ > > + } > > +} > > + > > +static __rte_always_inline void > > +enqueue_elems(struct rte_ring *r, void *ring_start, uint32_t prod_head, > > + void *obj_table, uint32_t num, uint32_t esize) { > > + uint32_t idx, n; > > + uint32_t *du32; > > + const uint32_t *su32; > > + > > + const uint32_t size = r->size; > > + > > + idx = prod_head & (r)->mask; > Same here, 'idx' needs to be normalized to elements of type 'uint32_t' and > similar fixes on other variables.
Ups true, my bad. > I have applied your > suggestion in 6/6 in v6 along with my corrections. The rte_ring_elem test > cases are added in 3/6. I have verified that they are running > fine (they are done for 64b alone, will add more). Hopefully, there are no > more errors. Cool, we'll re-run perf test om my box. Thanks Konstantin > > > + > > + du32 = (uint32_t *)ring_start + idx; > > + su32 = obj_table; > > + > > + if (idx + num < size) > > + copy_elems(du32, su32, num, esize); > > + else { > > + n = size - idx; > > + copy_elems(du32, su32, n, esize); > > + copy_elems(ring_start, su32 + n, num - n, esize); > > + } > > +} > > + > > +static __rte_always_inline void > > +dequeue_elems(struct rte_ring *r, void *ring_start, uint32_t cons_head, > > + void *obj_table, uint32_t num, uint32_t esize) { > > + uint32_t idx, n; > > + uint32_t *du32; > > + const uint32_t *su32; > > + > > + const uint32_t size = r->size; > > + > > + idx = cons_head & (r)->mask; > > + > > + su32 = (uint32_t *)ring_start + idx; > > + du32 = obj_table; > > + > > + if (idx + num < size) > > + copy_elems(du32, su32, num, esize); > > + else { > > + n = size - idx; > > + copy_elems(du32, su32, n, esize); > > + copy_elems(du32 + n, ring_start, num - n, esize); > > + } > > +} > > > > /* Between load and load. there might be cpu reorder in weak model > > * (powerpc/arm). > > @@ -232,7 +231,7 @@ __rte_ring_do_enqueue_elem(struct rte_ring *r, void > > * const obj_table, > > if (n == 0) > > goto end; > > > > - ENQUEUE_PTRS_GEN(r, &r[1], prod_head, obj_table, esize, n); > > + enqueue_elems(r, &r[1], prod_head, obj_table, n, esize); > > > > update_tail(&r->prod, prod_head, prod_next, is_sp, 1); > > end: > > @@ -279,7 +278,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void > > *obj_table, > > if (n == 0) > > goto end; > > > > - DEQUEUE_PTRS_GEN(r, &r[1], cons_head, obj_table, esize, n); > > + dequeue_elems(r, &r[1], cons_head, obj_table, n, esize); > > > > update_tail(&r->cons, cons_head, cons_next, is_sc, 0); > > > > -- > > 2.17.1 > >