Yeah that's better. As below, right? @@ -290,6 +294,7 @@ rte_distributor_process(struct rte_distributor *d, match |= (!(d->in_flight_tags[i] ^ new_tag) << i);
+ match &= d->in_flight_bitmask; if (match) { next_mb = NULL; unsigned worker = __builtin_ctz(match); On Fri, Nov 7, 2014 at 4:44 PM, Bruce Richardson <bruce.richardson at intel.com > wrote: > On Fri, Nov 07, 2014 at 04:31:18PM +0200, jigsaw wrote: > > Hi Bruce, > > > > Pls have a quick look at the diff to see if this is exactly what you mean > > about the bitmask. > > I just wrote it without even compiling, just to express the idea. So it > may > > leave some places unpatched. > > If this is agreed, I will make a decent test to verify it before sending > > the patch for RFC. > > > > diff --git a/lib/librte_distributor/rte_distributor.c > > b/lib/librte_distributor/rte_di > > index 585ff88..d606bcf 100644 > > --- a/lib/librte_distributor/rte_distributor.c > > +++ b/lib/librte_distributor/rte_distributor.c > > @@ -92,6 +92,8 @@ struct rte_distributor { > > unsigned num_workers; /**< Number of workers > > polling */ > > > > uint32_t in_flight_tags[RTE_MAX_LCORE]; > > + uint32_t in_flight_bitmask; > > + > > struct rte_distributor_backlog backlog[RTE_MAX_LCORE]; > > > > union rte_distributor_buffer bufs[RTE_MAX_LCORE]; > > @@ -188,6 +190,7 @@ static inline void > > handle_worker_shutdown(struct rte_distributor *d, unsigned wkr) > > { > > d->in_flight_tags[wkr] = 0; > > + d->in_flight_mask &= ~(1 << wkr); > > d->bufs[wkr].bufptr64 = 0; > > if (unlikely(d->backlog[wkr].count != 0)) { > > /* On return of a packet, we need to move the > > @@ -241,6 +244,7 @@ process_returns(struct rte_distributor *d) > > else { > > d->bufs[wkr].bufptr64 = > RTE_DISTRIB_GET_BUF; > > d->in_flight_tags[wkr] = 0; > > + d->in_flight_mask &= ~(1 << wkr); > > } > > oldbuf = data >> RTE_DISTRIB_FLAG_BITS; > > } else if (data & RTE_DISTRIB_RETURN_BUF) { > > @@ -282,12 +286,13 @@ rte_distributor_process(struct rte_distributor *d, > > next_mb = mbufs[next_idx++]; > > next_value = (((int64_t)(uintptr_t)next_mb) > > << RTE_DISTRIB_FLAG_BITS); > > - new_tag = (next_mb->hash.rss | 1); > > + new_tag = next_mb->hash.rss; > > > > uint32_t match = 0; > > unsigned i; > > for (i = 0; i < d->num_workers; i++) > > - match |= (!(d->in_flight_tags[i] ^ > new_tag) > > + match |= (((!(d->in_flight_tags[i] ^ > > new_tag)) & > > + (d->in_flight_bitmask >> > i)) > > I would not do the bitmask comparison here, as that's extra instruction in > the > loop. Instead, because its a bitmask, build up the match variable as it was > before, and then just do a single and operation afterwards, outside the > loop > body. > > /Bruce > > > << i); > > > > if (match) { > > @@ -309,6 +314,7 @@ rte_distributor_process(struct rte_distributor *d, > > else { > > d->bufs[wkr].bufptr64 = next_value; > > d->in_flight_tags[wkr] = new_tag; > > + d->in_flight_bitmask |= 1 << wkr; > > next_mb = NULL; > > } > > oldbuf = data >> RTE_DISTRIB_FLAG_BITS; > > > > > > >