On Tue, Feb 21, 2017 at 03:17:40AM +0000, David Hunt wrote: > Signed-off-by: David Hunt <david.h...@intel.com> > --- > lib/librte_distributor/rte_distributor_private.h | 61 > ++++++++++++++++++++++++ > 1 file changed, 61 insertions(+) > > diff --git a/lib/librte_distributor/rte_distributor_private.h > b/lib/librte_distributor/rte_distributor_private.h > index 2d85b9b..c8e0f98 100644 > --- a/lib/librte_distributor/rte_distributor_private.h > +++ b/lib/librte_distributor/rte_distributor_private.h > @@ -129,6 +129,67 @@ struct rte_distributor_v20 { > struct rte_distributor_returned_pkts returns; > }; > > +/* All different signature compare functions */ > +enum rte_distributor_match_function { > + RTE_DIST_MATCH_SCALAR = 0, > + RTE_DIST_MATCH_VECTOR, > + RTE_DIST_NUM_MATCH_FNS > +}; > + > +/** > + * Buffer structure used to pass the pointer data between cores. This is > cache > + * line aligned, but to improve performance and prevent adjacent cache-line > + * prefetches of buffers for other workers, e.g. when worker 1's buffer is on > + * the next cache line to worker 0, we pad this out to two cache lines. > + * We can pass up to 8 mbufs at a time in one cacheline. > + * There is a separate cacheline for returns in the burst API. > + */ > +struct rte_distributor_buffer { > + volatile int64_t bufptr64[RTE_DIST_BURST_SIZE] > + __rte_cache_aligned; /* <= outgoing to worker */ > + > + int64_t pad1 __rte_cache_aligned; /* <= one cache line */ > + > + volatile int64_t retptr64[RTE_DIST_BURST_SIZE] > + __rte_cache_aligned; /* <= incoming from worker */ > + > + int64_t pad2 __rte_cache_aligned; /* <= one cache line */ > + > + int count __rte_cache_aligned; /* <= number of current mbufs */ > +};
Rather than adding padding elements here, would it be better and clearer just to align the values to 128B (or more strictly CACHE_LINE_SZ * 2)? /Bruce