> -----Original Message----- > From: Konstantin Ananyev <konstantin.v.anan...@yandex.ru> > Sent: Tuesday, June 21, 2022 4:43 AM > To: Rahul Bhansali <rbhans...@marvell.com>; dev@dpdk.org; Ruifeng Wang > <ruifeng.w...@arm.com> > Cc: Jerin Jacob Kollanukkaran <jer...@marvell.com> > Subject: [EXT] Re: [PATCH v2 1/2] examples/l3fwd: common packet group > functionality > > External Email > > ---------------------------------------------------------------------- > 17/06/2022 08:50, Rahul Bhansali пишет: > > CC: Konstantin Ananyev > > > >> -----Original Message----- > >> From: Rahul Bhansali <rbhans...@marvell.com> > >> Sent: Friday, June 17, 2022 1:13 PM > >> To: dev@dpdk.org; Ruifeng Wang <ruifeng.w...@arm.com> > >> Cc: Jerin Jacob Kollanukkaran <jer...@marvell.com>; Rahul Bhansali > >> <rbhans...@marvell.com> > >> Subject: [PATCH v2 1/2] examples/l3fwd: common packet group > >> functionality > >> > >> This will make the packet grouping function common, so that other > >> examples can utilize as per need. > >> > >> Signed-off-by: Rahul Bhansali <rbhans...@marvell.com> > >> --- > >> Changes in v2: New patch to address review comment. > >> > >> examples/common/neon_common.h | 50 ++++++++++++ > >> examples/common/pkt_group.h | 139 > >> ++++++++++++++++++++++++++++++++++ > >> examples/l3fwd/Makefile | 5 +- > >> examples/l3fwd/l3fwd.h | 2 - > >> examples/l3fwd/l3fwd_common.h | 129 +------------------------------ > >> examples/l3fwd/l3fwd_neon.h | 43 +---------- > >> examples/meson.build | 2 +- > >> 7 files changed, 198 insertions(+), 172 deletions(-) create mode > >> 100644 examples/common/neon_common.h create mode 100644 > >> examples/common/pkt_group.h > >> > >> diff --git a/examples/common/neon_common.h > >> b/examples/common/neon_common.h new file mode 100644 index > >> 0000000000..f01b5ab6bc > >> --- /dev/null > >> +++ b/examples/common/neon_common.h > >> @@ -0,0 +1,50 @@ > >> +/* SPDX-License-Identifier: BSD-3-Clause > >> + * Copyright(c) 2016-2018 Intel Corporation. > >> + * Copyright(c) 2017-2018 Linaro Limited. > >> + * Copyright(C) 2022 Marvell. > >> + */ > >> + > >> +#ifndef _NEON_COMMON_H_ > >> +#define _NEON_COMMON_H_ > >> + > >> +#include "pkt_group.h" > >> + > >> +/* > >> + * Group consecutive packets with the same destination port in bursts of > >> 4. > >> + * Suppose we have array of destination ports: > >> + * dst_port[] = {a, b, c, d,, e, ... } > >> + * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. > >> + * We doing 4 comparisons at once and the result is 4 bit mask. > >> + * This mask is used as an index into prebuild array of pnum values. > >> + */ > >> +static inline uint16_t * > >> +neon_port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t > dp1, > >> + uint16x8_t dp2) > >> +{ > >> + union { > >> + uint16_t u16[FWDSTEP + 1]; > >> + uint64_t u64; > >> + } *pnum = (void *)pn; > >> + > >> + uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; > >> + int32_t v; > >> + > >> + dp1 = vceqq_u16(dp1, dp2); > >> + dp1 = vandq_u16(dp1, mask); > >> + v = vaddvq_u16(dp1); > >> + > >> + /* update last port counter. */hh > >> + lp[0] += gptbl[v].lpv; > >> + rte_compiler_barrier(); > >> + > >> + /* if dest port value has changed. */ > >> + if (v != GRPMSK) { > >> + pnum->u64 = gptbl[v].pnum; > >> + pnum->u16[FWDSTEP] = 1; > >> + lp = pnum->u16 + gptbl[v].idx; > >> + } > >> + > >> + return lp; > >> +} > > Thanks for the effort. > As I can see this function: port_groupx4() is nearly identical for all 3 > platforms: sse/nenon/altivec (except of course built-in arch-specific > instincts). > In fact, even comemnts are identical. > I wonder can we have something like: > examples/common/<arch>/port_group.h > and for each arch will have defined port_groupx4(...) ? > Yes, It’s a good point. I was thinking to have arch in file name itself. But we can have arch specific directory and have different header files. Do you want me to make changes for all 3 sse/neon/altivec or just neon ? I can check compilation for all but functionality/perf validate for Neon only.
> >> + > >> +#endif /* _NEON_COMMON_H_ */ > >> diff --git a/examples/common/pkt_group.h > >> b/examples/common/pkt_group.h new file mode 100644 index > >> 0000000000..8b26d9380f > >> --- /dev/null > >> +++ b/examples/common/pkt_group.h > >> @@ -0,0 +1,139 @@ > >> +/* SPDX-License-Identifier: BSD-3-Clause > >> + * Copyright(c) 2016-2018 Intel Corporation. > >> + * Copyright(c) 2017-2018 Linaro Limited. > >> + * Copyright(C) 2022 Marvell. > >> + */ > >> + > >> +#ifndef _PKT_GROUP_H_ > >> +#define _PKT_GROUP_H_ > >> + > >> +#define FWDSTEP 4 > >> + > >> +/* > >> + * Group consecutive packets with the same destination port into one > >> burst. > >> + * To avoid extra latency this is done together with some other > >> +packet > >> + * processing, but after we made a final decision about packet's > >> destination. > >> + * To do this we maintain: > >> + * pnum - array of number of consecutive packets with the same dest > >> +port for > >> + * each packet in the input burst. > >> + * lp - pointer to the last updated element in the pnum. > >> + * dlp - dest port value lp corresponds to. > >> + */ > >> + > >> +#define GRPSZ (1 << FWDSTEP) > >> +#define GRPMSK (GRPSZ - 1) > >> + > >> +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ > >> + if (likely((dlp) == (dcp)[(idx)])) { \ > >> + (lp)[0]++; \ > >> + } else { \ > >> + (dlp) = (dcp)[idx]; \ > >> + (lp) = (pn) + (idx); \ > >> + (lp)[0] = 1; \ > >> + } \ > >> +} while (0) > >> + > >> +static const struct { > >> + uint64_t pnum; /* prebuild 4 values for pnum[]. */ > >> + int32_t idx; /* index for new last updated elemnet. */ > >> + uint16_t lpv; /* add value to the last updated element. */ } > >> +gptbl[GRPSZ] = { > >> + { > >> + /* 0: a != b, b != c, c != d, d != e */ > >> + .pnum = UINT64_C(0x0001000100010001), > >> + .idx = 4, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 1: a == b, b != c, c != d, d != e */ > >> + .pnum = UINT64_C(0x0001000100010002), > >> + .idx = 4, > >> + .lpv = 1, > >> + }, > >> + { > >> + /* 2: a != b, b == c, c != d, d != e */ > >> + .pnum = UINT64_C(0x0001000100020001), > >> + .idx = 4, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 3: a == b, b == c, c != d, d != e */ > >> + .pnum = UINT64_C(0x0001000100020003), > >> + .idx = 4, > >> + .lpv = 2, > >> + }, > >> + { > >> + /* 4: a != b, b != c, c == d, d != e */ > >> + .pnum = UINT64_C(0x0001000200010001), > >> + .idx = 4, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 5: a == b, b != c, c == d, d != e */ > >> + .pnum = UINT64_C(0x0001000200010002), > >> + .idx = 4, > >> + .lpv = 1, > >> + }, > >> + { > >> + /* 6: a != b, b == c, c == d, d != e */ > >> + .pnum = UINT64_C(0x0001000200030001), > >> + .idx = 4, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 7: a == b, b == c, c == d, d != e */ > >> + .pnum = UINT64_C(0x0001000200030004), > >> + .idx = 4, > >> + .lpv = 3, > >> + }, > >> + { > >> + /* 8: a != b, b != c, c != d, d == e */ > >> + .pnum = UINT64_C(0x0002000100010001), > >> + .idx = 3, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 9: a == b, b != c, c != d, d == e */ > >> + .pnum = UINT64_C(0x0002000100010002), > >> + .idx = 3, > >> + .lpv = 1, > >> + }, > >> + { > >> + /* 0xa: a != b, b == c, c != d, d == e */ > >> + .pnum = UINT64_C(0x0002000100020001), > >> + .idx = 3, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 0xb: a == b, b == c, c != d, d == e */ > >> + .pnum = UINT64_C(0x0002000100020003), > >> + .idx = 3, > >> + .lpv = 2, > >> + }, > >> + { > >> + /* 0xc: a != b, b != c, c == d, d == e */ > >> + .pnum = UINT64_C(0x0002000300010001), > >> + .idx = 2, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 0xd: a == b, b != c, c == d, d == e */ > >> + .pnum = UINT64_C(0x0002000300010002), > >> + .idx = 2, > >> + .lpv = 1, > >> + }, > >> + { > >> + /* 0xe: a != b, b == c, c == d, d == e */ > >> + .pnum = UINT64_C(0x0002000300040001), > >> + .idx = 1, > >> + .lpv = 0, > >> + }, > >> + { > >> + /* 0xf: a == b, b == c, c == d, d == e */ > >> + .pnum = UINT64_C(0x0002000300040005), > >> + .idx = 0, > >> + .lpv = 4, > >> + }, > >> +}; > >> + > >> +#endif /* _PKT_GROUP_H_ */ > >> diff --git a/examples/l3fwd/Makefile b/examples/l3fwd/Makefile index > >> 8efe6378e2..8dbe85c2e6 100644 > >> --- a/examples/l3fwd/Makefile > >> +++ b/examples/l3fwd/Makefile > >> @@ -22,6 +22,7 @@ shared: build/$(APP)-shared > >> static: build/$(APP)-static > >> ln -sf $(APP)-static build/$(APP) > >> > >> +INCLUDES =-I../common > >> PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) CFLAGS > >> += -O3 $(shell $(PKGCONF) --cflags libdpdk) # Added for > 'rte_eth_link_to_str()' > >> @@ -38,10 +39,10 @@ endif > >> endif > >> > >> build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build > >> - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED) > >> + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) > >> +$(LDFLAGS_SHARED) > >> > >> build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build > >> - $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC) > >> + $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS) > >> +$(LDFLAGS_STATIC) > >> > >> build: > >> @mkdir -p $@ > >> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index > >> 8a52c90755..40b5f32a9e 100644 > >> --- a/examples/l3fwd/l3fwd.h > >> +++ b/examples/l3fwd/l3fwd.h > >> @@ -44,8 +44,6 @@ > >> /* Used to mark destination port as 'invalid'. */ > >> #define BAD_PORT ((uint16_t)-1) > >> > >> -#define FWDSTEP 4 > >> - > >> /* replace first 12B of the ethernet header. */ > >> #define MASK_ETH 0x3f > >> > >> diff --git a/examples/l3fwd/l3fwd_common.h > >> b/examples/l3fwd/l3fwd_common.h index 8e4c27218f..224b1c08e8 100644 > >> --- a/examples/l3fwd/l3fwd_common.h > >> +++ b/examples/l3fwd/l3fwd_common.h > >> @@ -7,6 +7,8 @@ > >> #ifndef _L3FWD_COMMON_H_ > >> #define _L3FWD_COMMON_H_ > >> > >> +#include "pkt_group.h" > >> + > >> #ifdef DO_RFC_1812_CHECKS > >> > >> #define IPV4_MIN_VER_IHL 0x45 > >> @@ -50,133 +52,6 @@ rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, > >> uint16_t *dp, uint32_t ptype) > >> #define rfc1812_process(mb, dp, ptype) do { } while (0) > >> #endif /* DO_RFC_1812_CHECKS */ > >> > >> -/* > >> - * We group consecutive packets with the same destination port into one > burst. > >> - * To avoid extra latency this is done together with some other > >> packet > >> - * processing, but after we made a final decision about packet's > >> destination. > >> - * To do this we maintain: > >> - * pnum - array of number of consecutive packets with the same dest > >> port for > >> - * each packet in the input burst. > >> - * lp - pointer to the last updated element in the pnum. > >> - * dlp - dest port value lp corresponds to. > >> - */ > >> - > >> -#define GRPSZ (1 << FWDSTEP) > >> -#define GRPMSK (GRPSZ - 1) > >> - > >> -#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \ > >> - if (likely((dlp) == (dcp)[(idx)])) { \ > >> - (lp)[0]++; \ > >> - } else { \ > >> - (dlp) = (dcp)[idx]; \ > >> - (lp) = (pn) + (idx); \ > >> - (lp)[0] = 1; \ > >> - } \ > >> -} while (0) > >> - > >> -static const struct { > >> - uint64_t pnum; /* prebuild 4 values for pnum[]. */ > >> - int32_t idx; /* index for new last updated element. */ > >> - uint16_t lpv; /* add value to the last updated element. */ > >> -} gptbl[GRPSZ] = { > >> - { > >> - /* 0: a != b, b != c, c != d, d != e */ > >> - .pnum = UINT64_C(0x0001000100010001), > >> - .idx = 4, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 1: a == b, b != c, c != d, d != e */ > >> - .pnum = UINT64_C(0x0001000100010002), > >> - .idx = 4, > >> - .lpv = 1, > >> - }, > >> - { > >> - /* 2: a != b, b == c, c != d, d != e */ > >> - .pnum = UINT64_C(0x0001000100020001), > >> - .idx = 4, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 3: a == b, b == c, c != d, d != e */ > >> - .pnum = UINT64_C(0x0001000100020003), > >> - .idx = 4, > >> - .lpv = 2, > >> - }, > >> - { > >> - /* 4: a != b, b != c, c == d, d != e */ > >> - .pnum = UINT64_C(0x0001000200010001), > >> - .idx = 4, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 5: a == b, b != c, c == d, d != e */ > >> - .pnum = UINT64_C(0x0001000200010002), > >> - .idx = 4, > >> - .lpv = 1, > >> - }, > >> - { > >> - /* 6: a != b, b == c, c == d, d != e */ > >> - .pnum = UINT64_C(0x0001000200030001), > >> - .idx = 4, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 7: a == b, b == c, c == d, d != e */ > >> - .pnum = UINT64_C(0x0001000200030004), > >> - .idx = 4, > >> - .lpv = 3, > >> - }, > >> - { > >> - /* 8: a != b, b != c, c != d, d == e */ > >> - .pnum = UINT64_C(0x0002000100010001), > >> - .idx = 3, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 9: a == b, b != c, c != d, d == e */ > >> - .pnum = UINT64_C(0x0002000100010002), > >> - .idx = 3, > >> - .lpv = 1, > >> - }, > >> - { > >> - /* 0xa: a != b, b == c, c != d, d == e */ > >> - .pnum = UINT64_C(0x0002000100020001), > >> - .idx = 3, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 0xb: a == b, b == c, c != d, d == e */ > >> - .pnum = UINT64_C(0x0002000100020003), > >> - .idx = 3, > >> - .lpv = 2, > >> - }, > >> - { > >> - /* 0xc: a != b, b != c, c == d, d == e */ > >> - .pnum = UINT64_C(0x0002000300010001), > >> - .idx = 2, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 0xd: a == b, b != c, c == d, d == e */ > >> - .pnum = UINT64_C(0x0002000300010002), > >> - .idx = 2, > >> - .lpv = 1, > >> - }, > >> - { > >> - /* 0xe: a != b, b == c, c == d, d == e */ > >> - .pnum = UINT64_C(0x0002000300040001), > >> - .idx = 1, > >> - .lpv = 0, > >> - }, > >> - { > >> - /* 0xf: a == b, b == c, c == d, d == e */ > >> - .pnum = UINT64_C(0x0002000300040005), > >> - .idx = 0, > >> - .lpv = 4, > >> - }, > >> -}; > >> - > >> static __rte_always_inline void > >> send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf > *m[], > >> uint32_t num) > >> diff --git a/examples/l3fwd/l3fwd_neon.h > >> b/examples/l3fwd/l3fwd_neon.h index e3d33a5229..5fa765b640 100644 > >> --- a/examples/l3fwd/l3fwd_neon.h > >> +++ b/examples/l3fwd/l3fwd_neon.h > >> @@ -7,6 +7,7 @@ > >> #define _L3FWD_NEON_H_ > >> > >> #include "l3fwd.h" > >> +#include "neon_common.h" > >> #include "l3fwd_common.h" > >> > >> /* > >> @@ -62,44 +63,6 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP], > >> uint16_t dst_port[FWDSTEP]) > >> &dst_port[3], pkt[3]->packet_type); > >> } > >> > >> -/* > >> - * Group consecutive packets with the same destination port in bursts of > >> 4. > >> - * Suppose we have array of destination ports: > >> - * dst_port[] = {a, b, c, d,, e, ... } > >> - * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>. > >> - * We doing 4 comparisons at once and the result is 4 bit mask. > >> - * This mask is used as an index into prebuild array of pnum values. > >> - */ > >> -static inline uint16_t * > >> -port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1, > >> - uint16x8_t dp2) > >> -{ > >> - union { > >> - uint16_t u16[FWDSTEP + 1]; > >> - uint64_t u64; > >> - } *pnum = (void *)pn; > >> - > >> - int32_t v; > >> - uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0}; > >> - > >> - dp1 = vceqq_u16(dp1, dp2); > >> - dp1 = vandq_u16(dp1, mask); > >> - v = vaddvq_u16(dp1); > >> - > >> - /* update last port counter. */ > >> - lp[0] += gptbl[v].lpv; > >> - rte_compiler_barrier(); > >> - > >> - /* if dest port value has changed. */ > >> - if (v != GRPMSK) { > >> - pnum->u64 = gptbl[v].pnum; > >> - pnum->u16[FWDSTEP] = 1; > >> - lp = pnum->u16 + gptbl[v].idx; > >> - } > >> - > >> - return lp; > >> -} > >> - > >> /** > >> * Process one packet: > >> * Update source and destination MAC addresses in the ethernet header. > >> @@ -161,7 +124,7 @@ send_packets_multi(struct lcore_conf *qconf, > >> struct rte_mbuf **pkts_burst, > >> * <d[j-3], d[j-2], d[j-1], d[j], ... > > >> */ > >> dp2 = vld1q_u16(&dst_port[j - FWDSTEP + 1]); > >> - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); > >> + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, > >> dp2); > >> > >> /* > >> * dp1: > >> @@ -175,7 +138,7 @@ send_packets_multi(struct lcore_conf *qconf, > >> struct rte_mbuf **pkts_burst, > >> */ > >> dp2 = vextq_u16(dp1, dp1, 1); > >> dp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3); > >> - lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); > >> + lp = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2); > >> > >> /* > >> * remove values added by the last repeated diff --git > >> a/examples/meson.build b/examples/meson.build index > >> 78de0e1f37..81e93799f2 100644 > >> --- a/examples/meson.build > >> +++ b/examples/meson.build > >> @@ -97,7 +97,7 @@ foreach example: examples > >> ldflags = default_ldflags > >> > >> ext_deps = [] > >> - includes = [include_directories(example)] > >> + includes = [include_directories(example, 'common')] > >> deps = ['eal', 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline'] > >> subdir(example) > >> > >> -- > >> 2.25.1 > >