CC: Konstantin Ananyev

> -----Original Message-----
> From: Rahul Bhansali <rbhans...@marvell.com>
> Sent: Friday, June 17, 2022 1:13 PM
> To: dev@dpdk.org; Ruifeng Wang <ruifeng.w...@arm.com>
> Cc: Jerin Jacob Kollanukkaran <jer...@marvell.com>; Rahul Bhansali
> <rbhans...@marvell.com>
> Subject: [PATCH v2 1/2] examples/l3fwd: common packet group functionality
> 
> This will make the packet grouping function common, so that other examples
> can utilize as per need.
> 
> Signed-off-by: Rahul Bhansali <rbhans...@marvell.com>
> ---
> Changes in v2: New patch to address review comment.
> 
>  examples/common/neon_common.h |  50 ++++++++++++
>  examples/common/pkt_group.h   | 139
> ++++++++++++++++++++++++++++++++++
>  examples/l3fwd/Makefile       |   5 +-
>  examples/l3fwd/l3fwd.h        |   2 -
>  examples/l3fwd/l3fwd_common.h | 129 +------------------------------
>  examples/l3fwd/l3fwd_neon.h   |  43 +----------
>  examples/meson.build          |   2 +-
>  7 files changed, 198 insertions(+), 172 deletions(-)  create mode 100644
> examples/common/neon_common.h  create mode 100644
> examples/common/pkt_group.h
> 
> diff --git a/examples/common/neon_common.h
> b/examples/common/neon_common.h new file mode 100644 index
> 0000000000..f01b5ab6bc
> --- /dev/null
> +++ b/examples/common/neon_common.h
> @@ -0,0 +1,50 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2016-2018 Intel Corporation.
> + * Copyright(c) 2017-2018 Linaro Limited.
> + * Copyright(C) 2022 Marvell.
> + */
> +
> +#ifndef _NEON_COMMON_H_
> +#define _NEON_COMMON_H_
> +
> +#include "pkt_group.h"
> +
> +/*
> + * Group consecutive packets with the same destination port in bursts of 4.
> + * Suppose we have array of destination ports:
> + * dst_port[] = {a, b, c, d,, e, ... }
> + * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
> + * We doing 4 comparisons at once and the result is 4 bit mask.
> + * This mask is used as an index into prebuild array of pnum values.
> + */
> +static inline uint16_t *
> +neon_port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1,
> +               uint16x8_t dp2)
> +{
> +     union {
> +             uint16_t u16[FWDSTEP + 1];
> +             uint64_t u64;
> +     } *pnum = (void *)pn;
> +
> +     uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0};
> +     int32_t v;
> +
> +     dp1 = vceqq_u16(dp1, dp2);
> +     dp1 = vandq_u16(dp1, mask);
> +     v = vaddvq_u16(dp1);
> +
> +     /* update last port counter. */
> +     lp[0] += gptbl[v].lpv;
> +     rte_compiler_barrier();
> +
> +     /* if dest port value has changed. */
> +     if (v != GRPMSK) {
> +             pnum->u64 = gptbl[v].pnum;
> +             pnum->u16[FWDSTEP] = 1;
> +             lp = pnum->u16 + gptbl[v].idx;
> +     }
> +
> +     return lp;
> +}
> +
> +#endif /* _NEON_COMMON_H_ */
> diff --git a/examples/common/pkt_group.h b/examples/common/pkt_group.h
> new file mode 100644 index 0000000000..8b26d9380f
> --- /dev/null
> +++ b/examples/common/pkt_group.h
> @@ -0,0 +1,139 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2016-2018 Intel Corporation.
> + * Copyright(c) 2017-2018 Linaro Limited.
> + * Copyright(C) 2022 Marvell.
> + */
> +
> +#ifndef _PKT_GROUP_H_
> +#define _PKT_GROUP_H_
> +
> +#define FWDSTEP      4
> +
> +/*
> + * Group consecutive packets with the same destination port into one burst.
> + * To avoid extra latency this is done together with some other packet
> + * processing, but after we made a final decision about packet's destination.
> + * To do this we maintain:
> + * pnum - array of number of consecutive packets with the same dest
> +port for
> + * each packet in the input burst.
> + * lp - pointer to the last updated element in the pnum.
> + * dlp - dest port value lp corresponds to.
> + */
> +
> +#define      GRPSZ   (1 << FWDSTEP)
> +#define      GRPMSK  (GRPSZ - 1)
> +
> +#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx)       do { \
> +     if (likely((dlp) == (dcp)[(idx)])) {         \
> +             (lp)[0]++;                           \
> +     } else {                                     \
> +             (dlp) = (dcp)[idx];                  \
> +             (lp) = (pn) + (idx);                 \
> +             (lp)[0] = 1;                         \
> +     }                                            \
> +} while (0)
> +
> +static const struct {
> +     uint64_t pnum; /* prebuild 4 values for pnum[]. */
> +     int32_t  idx;  /* index for new last updated elemnet. */
> +     uint16_t lpv;  /* add value to the last updated element. */ }
> +gptbl[GRPSZ] = {
> +     {
> +             /* 0: a != b, b != c, c != d, d != e */
> +             .pnum = UINT64_C(0x0001000100010001),
> +             .idx = 4,
> +             .lpv = 0,
> +     },
> +     {
> +             /* 1: a == b, b != c, c != d, d != e */
> +             .pnum = UINT64_C(0x0001000100010002),
> +             .idx = 4,
> +             .lpv = 1,
> +     },
> +     {
> +             /* 2: a != b, b == c, c != d, d != e */
> +             .pnum = UINT64_C(0x0001000100020001),
> +             .idx = 4,
> +             .lpv = 0,
> +     },
> +     {
> +             /* 3: a == b, b == c, c != d, d != e */
> +             .pnum = UINT64_C(0x0001000100020003),
> +             .idx = 4,
> +             .lpv = 2,
> +     },
> +     {
> +             /* 4: a != b, b != c, c == d, d != e */
> +             .pnum = UINT64_C(0x0001000200010001),
> +             .idx = 4,
> +             .lpv = 0,
> +     },
> +     {
> +             /* 5: a == b, b != c, c == d, d != e */
> +             .pnum = UINT64_C(0x0001000200010002),
> +             .idx = 4,
> +             .lpv = 1,
> +     },
> +     {
> +             /* 6: a != b, b == c, c == d, d != e */
> +             .pnum = UINT64_C(0x0001000200030001),
> +             .idx = 4,
> +             .lpv = 0,
> +     },
> +     {
> +             /* 7: a == b, b == c, c == d, d != e */
> +             .pnum = UINT64_C(0x0001000200030004),
> +             .idx = 4,
> +             .lpv = 3,
> +     },
> +     {
> +             /* 8: a != b, b != c, c != d, d == e */
> +             .pnum = UINT64_C(0x0002000100010001),
> +             .idx = 3,
> +             .lpv = 0,
> +     },
> +     {
> +             /* 9: a == b, b != c, c != d, d == e */
> +             .pnum = UINT64_C(0x0002000100010002),
> +             .idx = 3,
> +             .lpv = 1,
> +     },
> +     {
> +             /* 0xa: a != b, b == c, c != d, d == e */
> +             .pnum = UINT64_C(0x0002000100020001),
> +             .idx = 3,
> +             .lpv = 0,
> +     },
> +     {
> +             /* 0xb: a == b, b == c, c != d, d == e */
> +             .pnum = UINT64_C(0x0002000100020003),
> +             .idx = 3,
> +             .lpv = 2,
> +     },
> +     {
> +             /* 0xc: a != b, b != c, c == d, d == e */
> +             .pnum = UINT64_C(0x0002000300010001),
> +             .idx = 2,
> +             .lpv = 0,
> +     },
> +     {
> +             /* 0xd: a == b, b != c, c == d, d == e */
> +             .pnum = UINT64_C(0x0002000300010002),
> +             .idx = 2,
> +             .lpv = 1,
> +     },
> +     {
> +             /* 0xe: a != b, b == c, c == d, d == e */
> +             .pnum = UINT64_C(0x0002000300040001),
> +             .idx = 1,
> +             .lpv = 0,
> +     },
> +     {
> +             /* 0xf: a == b, b == c, c == d, d == e */
> +             .pnum = UINT64_C(0x0002000300040005),
> +             .idx = 0,
> +             .lpv = 4,
> +     },
> +};
> +
> +#endif /* _PKT_GROUP_H_ */
> diff --git a/examples/l3fwd/Makefile b/examples/l3fwd/Makefile index
> 8efe6378e2..8dbe85c2e6 100644
> --- a/examples/l3fwd/Makefile
> +++ b/examples/l3fwd/Makefile
> @@ -22,6 +22,7 @@ shared: build/$(APP)-shared
>  static: build/$(APP)-static
>       ln -sf $(APP)-static build/$(APP)
> 
> +INCLUDES =-I../common
>  PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null)  CFLAGS += -O3
> $(shell $(PKGCONF) --cflags libdpdk)  # Added for 'rte_eth_link_to_str()'
> @@ -38,10 +39,10 @@ endif
>  endif
> 
>  build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
> -     $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
> +     $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS)
> +$(LDFLAGS_SHARED)
> 
>  build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build
> -     $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)
> +     $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS)
> +$(LDFLAGS_STATIC)
> 
>  build:
>       @mkdir -p $@
> diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index
> 8a52c90755..40b5f32a9e 100644
> --- a/examples/l3fwd/l3fwd.h
> +++ b/examples/l3fwd/l3fwd.h
> @@ -44,8 +44,6 @@
>  /* Used to mark destination port as 'invalid'. */
>  #define      BAD_PORT ((uint16_t)-1)
> 
> -#define FWDSTEP      4
> -
>  /* replace first 12B of the ethernet header. */
>  #define      MASK_ETH 0x3f
> 
> diff --git a/examples/l3fwd/l3fwd_common.h
> b/examples/l3fwd/l3fwd_common.h index 8e4c27218f..224b1c08e8 100644
> --- a/examples/l3fwd/l3fwd_common.h
> +++ b/examples/l3fwd/l3fwd_common.h
> @@ -7,6 +7,8 @@
>  #ifndef _L3FWD_COMMON_H_
>  #define _L3FWD_COMMON_H_
> 
> +#include "pkt_group.h"
> +
>  #ifdef DO_RFC_1812_CHECKS
> 
>  #define      IPV4_MIN_VER_IHL        0x45
> @@ -50,133 +52,6 @@ rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, uint16_t
> *dp, uint32_t ptype)
>  #define      rfc1812_process(mb, dp, ptype)  do { } while (0)
>  #endif /* DO_RFC_1812_CHECKS */
> 
> -/*
> - * We group consecutive packets with the same destination port into one 
> burst.
> - * To avoid extra latency this is done together with some other packet
> - * processing, but after we made a final decision about packet's destination.
> - * To do this we maintain:
> - * pnum - array of number of consecutive packets with the same dest port for
> - * each packet in the input burst.
> - * lp - pointer to the last updated element in the pnum.
> - * dlp - dest port value lp corresponds to.
> - */
> -
> -#define      GRPSZ   (1 << FWDSTEP)
> -#define      GRPMSK  (GRPSZ - 1)
> -
> -#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx)       do { \
> -     if (likely((dlp) == (dcp)[(idx)])) {             \
> -             (lp)[0]++;                                   \
> -     } else {                                         \
> -             (dlp) = (dcp)[idx];                          \
> -             (lp) = (pn) + (idx);                         \
> -             (lp)[0] = 1;                                 \
> -     }                                                \
> -} while (0)
> -
> -static const struct {
> -     uint64_t pnum; /* prebuild 4 values for pnum[]. */
> -     int32_t  idx;  /* index for new last updated element. */
> -     uint16_t lpv;  /* add value to the last updated element. */
> -} gptbl[GRPSZ] = {
> -     {
> -             /* 0: a != b, b != c, c != d, d != e */
> -             .pnum = UINT64_C(0x0001000100010001),
> -             .idx = 4,
> -             .lpv = 0,
> -     },
> -     {
> -             /* 1: a == b, b != c, c != d, d != e */
> -             .pnum = UINT64_C(0x0001000100010002),
> -             .idx = 4,
> -             .lpv = 1,
> -     },
> -     {
> -             /* 2: a != b, b == c, c != d, d != e */
> -             .pnum = UINT64_C(0x0001000100020001),
> -             .idx = 4,
> -             .lpv = 0,
> -     },
> -     {
> -             /* 3: a == b, b == c, c != d, d != e */
> -             .pnum = UINT64_C(0x0001000100020003),
> -             .idx = 4,
> -             .lpv = 2,
> -     },
> -     {
> -             /* 4: a != b, b != c, c == d, d != e */
> -             .pnum = UINT64_C(0x0001000200010001),
> -             .idx = 4,
> -             .lpv = 0,
> -     },
> -     {
> -             /* 5: a == b, b != c, c == d, d != e */
> -             .pnum = UINT64_C(0x0001000200010002),
> -             .idx = 4,
> -             .lpv = 1,
> -     },
> -     {
> -             /* 6: a != b, b == c, c == d, d != e */
> -             .pnum = UINT64_C(0x0001000200030001),
> -             .idx = 4,
> -             .lpv = 0,
> -     },
> -     {
> -             /* 7: a == b, b == c, c == d, d != e */
> -             .pnum = UINT64_C(0x0001000200030004),
> -             .idx = 4,
> -             .lpv = 3,
> -     },
> -     {
> -             /* 8: a != b, b != c, c != d, d == e */
> -             .pnum = UINT64_C(0x0002000100010001),
> -             .idx = 3,
> -             .lpv = 0,
> -     },
> -     {
> -             /* 9: a == b, b != c, c != d, d == e */
> -             .pnum = UINT64_C(0x0002000100010002),
> -             .idx = 3,
> -             .lpv = 1,
> -     },
> -     {
> -             /* 0xa: a != b, b == c, c != d, d == e */
> -             .pnum = UINT64_C(0x0002000100020001),
> -             .idx = 3,
> -             .lpv = 0,
> -     },
> -     {
> -             /* 0xb: a == b, b == c, c != d, d == e */
> -             .pnum = UINT64_C(0x0002000100020003),
> -             .idx = 3,
> -             .lpv = 2,
> -     },
> -     {
> -             /* 0xc: a != b, b != c, c == d, d == e */
> -             .pnum = UINT64_C(0x0002000300010001),
> -             .idx = 2,
> -             .lpv = 0,
> -     },
> -     {
> -             /* 0xd: a == b, b != c, c == d, d == e */
> -             .pnum = UINT64_C(0x0002000300010002),
> -             .idx = 2,
> -             .lpv = 1,
> -     },
> -     {
> -             /* 0xe: a != b, b == c, c == d, d == e */
> -             .pnum = UINT64_C(0x0002000300040001),
> -             .idx = 1,
> -             .lpv = 0,
> -     },
> -     {
> -             /* 0xf: a == b, b == c, c == d, d == e */
> -             .pnum = UINT64_C(0x0002000300040005),
> -             .idx = 0,
> -             .lpv = 4,
> -     },
> -};
> -
>  static __rte_always_inline void
>  send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
>               uint32_t num)
> diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h
> index e3d33a5229..5fa765b640 100644
> --- a/examples/l3fwd/l3fwd_neon.h
> +++ b/examples/l3fwd/l3fwd_neon.h
> @@ -7,6 +7,7 @@
>  #define _L3FWD_NEON_H_
> 
>  #include "l3fwd.h"
> +#include "neon_common.h"
>  #include "l3fwd_common.h"
> 
>  /*
> @@ -62,44 +63,6 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP],
> uint16_t dst_port[FWDSTEP])
>                       &dst_port[3], pkt[3]->packet_type);
>  }
> 
> -/*
> - * Group consecutive packets with the same destination port in bursts of 4.
> - * Suppose we have array of destination ports:
> - * dst_port[] = {a, b, c, d,, e, ... }
> - * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
> - * We doing 4 comparisons at once and the result is 4 bit mask.
> - * This mask is used as an index into prebuild array of pnum values.
> - */
> -static inline uint16_t *
> -port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1,
> -          uint16x8_t dp2)
> -{
> -     union {
> -             uint16_t u16[FWDSTEP + 1];
> -             uint64_t u64;
> -     } *pnum = (void *)pn;
> -
> -     int32_t v;
> -     uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0};
> -
> -     dp1 = vceqq_u16(dp1, dp2);
> -     dp1 = vandq_u16(dp1, mask);
> -     v = vaddvq_u16(dp1);
> -
> -     /* update last port counter. */
> -     lp[0] += gptbl[v].lpv;
> -     rte_compiler_barrier();
> -
> -     /* if dest port value has changed. */
> -     if (v != GRPMSK) {
> -             pnum->u64 = gptbl[v].pnum;
> -             pnum->u16[FWDSTEP] = 1;
> -             lp = pnum->u16 + gptbl[v].idx;
> -     }
> -
> -     return lp;
> -}
> -
>  /**
>   * Process one packet:
>   * Update source and destination MAC addresses in the ethernet header.
> @@ -161,7 +124,7 @@ send_packets_multi(struct lcore_conf *qconf, struct
> rte_mbuf **pkts_burst,
>                        * <d[j-3], d[j-2], d[j-1], d[j], ... >
>                        */
>                       dp2 = vld1q_u16(&dst_port[j - FWDSTEP + 1]);
> -                     lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
> +                     lp  = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1,
> dp2);
> 
>                       /*
>                        * dp1:
> @@ -175,7 +138,7 @@ send_packets_multi(struct lcore_conf *qconf, struct
> rte_mbuf **pkts_burst,
>                */
>               dp2 = vextq_u16(dp1, dp1, 1);
>               dp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3);
> -             lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
> +             lp  = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
> 
>               /*
>                * remove values added by the last repeated diff --git
> a/examples/meson.build b/examples/meson.build index
> 78de0e1f37..81e93799f2 100644
> --- a/examples/meson.build
> +++ b/examples/meson.build
> @@ -97,7 +97,7 @@ foreach example: examples
>      ldflags = default_ldflags
> 
>      ext_deps = []
> -    includes = [include_directories(example)]
> +    includes = [include_directories(example, 'common')]
>      deps = ['eal', 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline']
>      subdir(example)
> 
> --
> 2.25.1

Reply via email to