17/06/2022 08:50, Rahul Bhansali пишет:
CC: Konstantin Ananyev

-----Original Message-----
From: Rahul Bhansali <rbhans...@marvell.com>
Sent: Friday, June 17, 2022 1:13 PM
To: dev@dpdk.org; Ruifeng Wang <ruifeng.w...@arm.com>
Cc: Jerin Jacob Kollanukkaran <jer...@marvell.com>; Rahul Bhansali
<rbhans...@marvell.com>
Subject: [PATCH v2 1/2] examples/l3fwd: common packet group functionality

This will make the packet grouping function common, so that other examples
can utilize as per need.

Signed-off-by: Rahul Bhansali <rbhans...@marvell.com>
---
Changes in v2: New patch to address review comment.

  examples/common/neon_common.h |  50 ++++++++++++
  examples/common/pkt_group.h   | 139
++++++++++++++++++++++++++++++++++
  examples/l3fwd/Makefile       |   5 +-
  examples/l3fwd/l3fwd.h        |   2 -
  examples/l3fwd/l3fwd_common.h | 129 +------------------------------
  examples/l3fwd/l3fwd_neon.h   |  43 +----------
  examples/meson.build          |   2 +-
  7 files changed, 198 insertions(+), 172 deletions(-)  create mode 100644
examples/common/neon_common.h  create mode 100644
examples/common/pkt_group.h

diff --git a/examples/common/neon_common.h
b/examples/common/neon_common.h new file mode 100644 index
0000000000..f01b5ab6bc
--- /dev/null
+++ b/examples/common/neon_common.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016-2018 Intel Corporation.
+ * Copyright(c) 2017-2018 Linaro Limited.
+ * Copyright(C) 2022 Marvell.
+ */
+
+#ifndef _NEON_COMMON_H_
+#define _NEON_COMMON_H_
+
+#include "pkt_group.h"
+
+/*
+ * Group consecutive packets with the same destination port in bursts of 4.
+ * Suppose we have array of destination ports:
+ * dst_port[] = {a, b, c, d,, e, ... }
+ * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
+ * We doing 4 comparisons at once and the result is 4 bit mask.
+ * This mask is used as an index into prebuild array of pnum values.
+ */
+static inline uint16_t *
+neon_port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1,
+                 uint16x8_t dp2)
+{
+       union {
+               uint16_t u16[FWDSTEP + 1];
+               uint64_t u64;
+       } *pnum = (void *)pn;
+
+       uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0};
+       int32_t v;
+
+       dp1 = vceqq_u16(dp1, dp2);
+       dp1 = vandq_u16(dp1, mask);
+       v = vaddvq_u16(dp1);
+
+       /* update last port counter. */hh
+       lp[0] += gptbl[v].lpv;
+       rte_compiler_barrier();
+
+       /* if dest port value has changed. */
+       if (v != GRPMSK) {
+               pnum->u64 = gptbl[v].pnum;
+               pnum->u16[FWDSTEP] = 1;
+               lp = pnum->u16 + gptbl[v].idx;
+       }
+
+       return lp;
+}

Thanks for the effort.
As I can see this function: port_groupx4() is nearly identical for all 3 platforms: sse/nenon/altivec (except of course built-in arch-specific instincts).
In fact, even comemnts are identical.
I wonder can we have something like:
examples/common/<arch>/port_group.h
and for each arch will have defined port_groupx4(...)
?

+
+#endif /* _NEON_COMMON_H_ */
diff --git a/examples/common/pkt_group.h b/examples/common/pkt_group.h
new file mode 100644 index 0000000000..8b26d9380f
--- /dev/null
+++ b/examples/common/pkt_group.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016-2018 Intel Corporation.
+ * Copyright(c) 2017-2018 Linaro Limited.
+ * Copyright(C) 2022 Marvell.
+ */
+
+#ifndef _PKT_GROUP_H_
+#define _PKT_GROUP_H_
+
+#define FWDSTEP        4
+
+/*
+ * Group consecutive packets with the same destination port into one burst.
+ * To avoid extra latency this is done together with some other packet
+ * processing, but after we made a final decision about packet's destination.
+ * To do this we maintain:
+ * pnum - array of number of consecutive packets with the same dest
+port for
+ * each packet in the input burst.
+ * lp - pointer to the last updated element in the pnum.
+ * dlp - dest port value lp corresponds to.
+ */
+
+#define        GRPSZ   (1 << FWDSTEP)
+#define        GRPMSK  (GRPSZ - 1)
+
+#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \
+       if (likely((dlp) == (dcp)[(idx)])) {         \
+               (lp)[0]++;                           \
+       } else {                                     \
+               (dlp) = (dcp)[idx];                  \
+               (lp) = (pn) + (idx);                 \
+               (lp)[0] = 1;                         \
+       }                                            \
+} while (0)
+
+static const struct {
+       uint64_t pnum; /* prebuild 4 values for pnum[]. */
+       int32_t  idx;  /* index for new last updated elemnet. */
+       uint16_t lpv;  /* add value to the last updated element. */ }
+gptbl[GRPSZ] = {
+       {
+               /* 0: a != b, b != c, c != d, d != e */
+               .pnum = UINT64_C(0x0001000100010001),
+               .idx = 4,
+               .lpv = 0,
+       },
+       {
+               /* 1: a == b, b != c, c != d, d != e */
+               .pnum = UINT64_C(0x0001000100010002),
+               .idx = 4,
+               .lpv = 1,
+       },
+       {
+               /* 2: a != b, b == c, c != d, d != e */
+               .pnum = UINT64_C(0x0001000100020001),
+               .idx = 4,
+               .lpv = 0,
+       },
+       {
+               /* 3: a == b, b == c, c != d, d != e */
+               .pnum = UINT64_C(0x0001000100020003),
+               .idx = 4,
+               .lpv = 2,
+       },
+       {
+               /* 4: a != b, b != c, c == d, d != e */
+               .pnum = UINT64_C(0x0001000200010001),
+               .idx = 4,
+               .lpv = 0,
+       },
+       {
+               /* 5: a == b, b != c, c == d, d != e */
+               .pnum = UINT64_C(0x0001000200010002),
+               .idx = 4,
+               .lpv = 1,
+       },
+       {
+               /* 6: a != b, b == c, c == d, d != e */
+               .pnum = UINT64_C(0x0001000200030001),
+               .idx = 4,
+               .lpv = 0,
+       },
+       {
+               /* 7: a == b, b == c, c == d, d != e */
+               .pnum = UINT64_C(0x0001000200030004),
+               .idx = 4,
+               .lpv = 3,
+       },
+       {
+               /* 8: a != b, b != c, c != d, d == e */
+               .pnum = UINT64_C(0x0002000100010001),
+               .idx = 3,
+               .lpv = 0,
+       },
+       {
+               /* 9: a == b, b != c, c != d, d == e */
+               .pnum = UINT64_C(0x0002000100010002),
+               .idx = 3,
+               .lpv = 1,
+       },
+       {
+               /* 0xa: a != b, b == c, c != d, d == e */
+               .pnum = UINT64_C(0x0002000100020001),
+               .idx = 3,
+               .lpv = 0,
+       },
+       {
+               /* 0xb: a == b, b == c, c != d, d == e */
+               .pnum = UINT64_C(0x0002000100020003),
+               .idx = 3,
+               .lpv = 2,
+       },
+       {
+               /* 0xc: a != b, b != c, c == d, d == e */
+               .pnum = UINT64_C(0x0002000300010001),
+               .idx = 2,
+               .lpv = 0,
+       },
+       {
+               /* 0xd: a == b, b != c, c == d, d == e */
+               .pnum = UINT64_C(0x0002000300010002),
+               .idx = 2,
+               .lpv = 1,
+       },
+       {
+               /* 0xe: a != b, b == c, c == d, d == e */
+               .pnum = UINT64_C(0x0002000300040001),
+               .idx = 1,
+               .lpv = 0,
+       },
+       {
+               /* 0xf: a == b, b == c, c == d, d == e */
+               .pnum = UINT64_C(0x0002000300040005),
+               .idx = 0,
+               .lpv = 4,
+       },
+};
+
+#endif /* _PKT_GROUP_H_ */
diff --git a/examples/l3fwd/Makefile b/examples/l3fwd/Makefile index
8efe6378e2..8dbe85c2e6 100644
--- a/examples/l3fwd/Makefile
+++ b/examples/l3fwd/Makefile
@@ -22,6 +22,7 @@ shared: build/$(APP)-shared
  static: build/$(APP)-static
        ln -sf $(APP)-static build/$(APP)

+INCLUDES =-I../common
  PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null)  CFLAGS += -O3
$(shell $(PKGCONF) --cflags libdpdk)  # Added for 'rte_eth_link_to_str()'
@@ -38,10 +39,10 @@ endif
  endif

  build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
-       $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
+       $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS)
+$(LDFLAGS_SHARED)

  build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build
-       $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)
+       $(CC) $(CFLAGS) $(SRCS-y) $(INCLUDES) -o $@ $(LDFLAGS)
+$(LDFLAGS_STATIC)

  build:
        @mkdir -p $@
diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index
8a52c90755..40b5f32a9e 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -44,8 +44,6 @@
  /* Used to mark destination port as 'invalid'. */
  #define       BAD_PORT ((uint16_t)-1)

-#define FWDSTEP        4
-
  /* replace first 12B of the ethernet header. */
  #define       MASK_ETH 0x3f

diff --git a/examples/l3fwd/l3fwd_common.h
b/examples/l3fwd/l3fwd_common.h index 8e4c27218f..224b1c08e8 100644
--- a/examples/l3fwd/l3fwd_common.h
+++ b/examples/l3fwd/l3fwd_common.h
@@ -7,6 +7,8 @@
  #ifndef _L3FWD_COMMON_H_
  #define _L3FWD_COMMON_H_

+#include "pkt_group.h"
+
  #ifdef DO_RFC_1812_CHECKS

  #define       IPV4_MIN_VER_IHL        0x45
@@ -50,133 +52,6 @@ rfc1812_process(struct rte_ipv4_hdr *ipv4_hdr, uint16_t
*dp, uint32_t ptype)
  #define       rfc1812_process(mb, dp, ptype)  do { } while (0)
  #endif /* DO_RFC_1812_CHECKS */

-/*
- * We group consecutive packets with the same destination port into one burst.
- * To avoid extra latency this is done together with some other packet
- * processing, but after we made a final decision about packet's destination.
- * To do this we maintain:
- * pnum - array of number of consecutive packets with the same dest port for
- * each packet in the input burst.
- * lp - pointer to the last updated element in the pnum.
- * dlp - dest port value lp corresponds to.
- */
-
-#define        GRPSZ   (1 << FWDSTEP)
-#define        GRPMSK  (GRPSZ - 1)
-
-#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \
-       if (likely((dlp) == (dcp)[(idx)])) {             \
-               (lp)[0]++;                                   \
-       } else {                                         \
-               (dlp) = (dcp)[idx];                          \
-               (lp) = (pn) + (idx);                         \
-               (lp)[0] = 1;                                 \
-       }                                                \
-} while (0)
-
-static const struct {
-       uint64_t pnum; /* prebuild 4 values for pnum[]. */
-       int32_t  idx;  /* index for new last updated element. */
-       uint16_t lpv;  /* add value to the last updated element. */
-} gptbl[GRPSZ] = {
-       {
-               /* 0: a != b, b != c, c != d, d != e */
-               .pnum = UINT64_C(0x0001000100010001),
-               .idx = 4,
-               .lpv = 0,
-       },
-       {
-               /* 1: a == b, b != c, c != d, d != e */
-               .pnum = UINT64_C(0x0001000100010002),
-               .idx = 4,
-               .lpv = 1,
-       },
-       {
-               /* 2: a != b, b == c, c != d, d != e */
-               .pnum = UINT64_C(0x0001000100020001),
-               .idx = 4,
-               .lpv = 0,
-       },
-       {
-               /* 3: a == b, b == c, c != d, d != e */
-               .pnum = UINT64_C(0x0001000100020003),
-               .idx = 4,
-               .lpv = 2,
-       },
-       {
-               /* 4: a != b, b != c, c == d, d != e */
-               .pnum = UINT64_C(0x0001000200010001),
-               .idx = 4,
-               .lpv = 0,
-       },
-       {
-               /* 5: a == b, b != c, c == d, d != e */
-               .pnum = UINT64_C(0x0001000200010002),
-               .idx = 4,
-               .lpv = 1,
-       },
-       {
-               /* 6: a != b, b == c, c == d, d != e */
-               .pnum = UINT64_C(0x0001000200030001),
-               .idx = 4,
-               .lpv = 0,
-       },
-       {
-               /* 7: a == b, b == c, c == d, d != e */
-               .pnum = UINT64_C(0x0001000200030004),
-               .idx = 4,
-               .lpv = 3,
-       },
-       {
-               /* 8: a != b, b != c, c != d, d == e */
-               .pnum = UINT64_C(0x0002000100010001),
-               .idx = 3,
-               .lpv = 0,
-       },
-       {
-               /* 9: a == b, b != c, c != d, d == e */
-               .pnum = UINT64_C(0x0002000100010002),
-               .idx = 3,
-               .lpv = 1,
-       },
-       {
-               /* 0xa: a != b, b == c, c != d, d == e */
-               .pnum = UINT64_C(0x0002000100020001),
-               .idx = 3,
-               .lpv = 0,
-       },
-       {
-               /* 0xb: a == b, b == c, c != d, d == e */
-               .pnum = UINT64_C(0x0002000100020003),
-               .idx = 3,
-               .lpv = 2,
-       },
-       {
-               /* 0xc: a != b, b != c, c == d, d == e */
-               .pnum = UINT64_C(0x0002000300010001),
-               .idx = 2,
-               .lpv = 0,
-       },
-       {
-               /* 0xd: a == b, b != c, c == d, d == e */
-               .pnum = UINT64_C(0x0002000300010002),
-               .idx = 2,
-               .lpv = 1,
-       },
-       {
-               /* 0xe: a != b, b == c, c == d, d == e */
-               .pnum = UINT64_C(0x0002000300040001),
-               .idx = 1,
-               .lpv = 0,
-       },
-       {
-               /* 0xf: a == b, b == c, c == d, d == e */
-               .pnum = UINT64_C(0x0002000300040005),
-               .idx = 0,
-               .lpv = 4,
-       },
-};
-
  static __rte_always_inline void
  send_packetsx4(struct lcore_conf *qconf, uint16_t port, struct rte_mbuf *m[],
                uint32_t num)
diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h
index e3d33a5229..5fa765b640 100644
--- a/examples/l3fwd/l3fwd_neon.h
+++ b/examples/l3fwd/l3fwd_neon.h
@@ -7,6 +7,7 @@
  #define _L3FWD_NEON_H_

  #include "l3fwd.h"
+#include "neon_common.h"
  #include "l3fwd_common.h"

  /*
@@ -62,44 +63,6 @@ processx4_step3(struct rte_mbuf *pkt[FWDSTEP],
uint16_t dst_port[FWDSTEP])
                        &dst_port[3], pkt[3]->packet_type);
  }

-/*
- * Group consecutive packets with the same destination port in bursts of 4.
- * Suppose we have array of destination ports:
- * dst_port[] = {a, b, c, d,, e, ... }
- * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
- * We doing 4 comparisons at once and the result is 4 bit mask.
- * This mask is used as an index into prebuild array of pnum values.
- */
-static inline uint16_t *
-port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, uint16x8_t dp1,
-            uint16x8_t dp2)
-{
-       union {
-               uint16_t u16[FWDSTEP + 1];
-               uint64_t u64;
-       } *pnum = (void *)pn;
-
-       int32_t v;
-       uint16x8_t mask = {1, 2, 4, 8, 0, 0, 0, 0};
-
-       dp1 = vceqq_u16(dp1, dp2);
-       dp1 = vandq_u16(dp1, mask);
-       v = vaddvq_u16(dp1);
-
-       /* update last port counter. */
-       lp[0] += gptbl[v].lpv;
-       rte_compiler_barrier();
-
-       /* if dest port value has changed. */
-       if (v != GRPMSK) {
-               pnum->u64 = gptbl[v].pnum;
-               pnum->u16[FWDSTEP] = 1;
-               lp = pnum->u16 + gptbl[v].idx;
-       }
-
-       return lp;
-}
-
  /**
   * Process one packet:
   * Update source and destination MAC addresses in the ethernet header.
@@ -161,7 +124,7 @@ send_packets_multi(struct lcore_conf *qconf, struct
rte_mbuf **pkts_burst,
                         * <d[j-3], d[j-2], d[j-1], d[j], ... >
                         */
                        dp2 = vld1q_u16(&dst_port[j - FWDSTEP + 1]);
-                       lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+                       lp  = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1,
dp2);

                        /*
                         * dp1:
@@ -175,7 +138,7 @@ send_packets_multi(struct lcore_conf *qconf, struct
rte_mbuf **pkts_burst,
                 */
                dp2 = vextq_u16(dp1, dp1, 1);
                dp2 = vsetq_lane_u16(vgetq_lane_u16(dp2, 2), dp2, 3);
-               lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+               lp  = neon_port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);

                /*
                 * remove values added by the last repeated diff --git
a/examples/meson.build b/examples/meson.build index
78de0e1f37..81e93799f2 100644
--- a/examples/meson.build
+++ b/examples/meson.build
@@ -97,7 +97,7 @@ foreach example: examples
      ldflags = default_ldflags

      ext_deps = []
-    includes = [include_directories(example)]
+    includes = [include_directories(example, 'common')]
      deps = ['eal', 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline']
      subdir(example)

--
2.25.1


Reply via email to