Packets are captured at each graph node with the node and mbuf metadata
as part of the pcap.

This is inspired from VPP.

Signed-off-by: Amit Prakash Shukla <>
 doc/guides/sample_app_ug/l3_forward_graph.rst |   3 +
 examples/l3fwd-graph/main.c                   |  12 +
 lib/graph/graph_pcap_trace.c                  | 337 ++++++++++++++++++
 lib/graph/graph_populate.c                    |   6 +-
 lib/graph/                         |   5 +-
 lib/graph/rte_graph_pcap_trace.h              | 149 ++++++++
 lib/graph/rte_graph_worker.h                  |   6 +
 7 files changed, 515 insertions(+), 3 deletions(-)
 create mode 100644 lib/graph/graph_pcap_trace.c
 create mode 100644 lib/graph/rte_graph_pcap_trace.h

diff --git a/doc/guides/sample_app_ug/l3_forward_graph.rst 
index 0a3e0d44ec..cf199bcf81 100644
--- a/doc/guides/sample_app_ug/l3_forward_graph.rst
+++ b/doc/guides/sample_app_ug/l3_forward_graph.rst
@@ -51,6 +51,7 @@ The application has a number of command line options similar 
to l3fwd::
                                    [--max-pkt-len PKTLEN]
+                                   [--pcap-enable]
@@ -69,6 +70,8 @@ Where,
 * ``--per-port-pool:`` Optional, set to use independent buffer pools per port. 
Without this option, single buffer pool is used for all ports.
+* ``--pcap-enable:`` Optional, Enables packet capture in pcap format on each 
node with mbuf and node metadata.
 For example, consider a dual processor socket platform with 8 physical cores, 
where cores 0-7 and 16-23 appear on socket 0,
 while cores 8-15 and 24-31 appear on socket 1.
diff --git a/examples/l3fwd-graph/main.c b/examples/l3fwd-graph/main.c
index 6dcb6ee92b..b6408310aa 100644
--- a/examples/l3fwd-graph/main.c
+++ b/examples/l3fwd-graph/main.c
@@ -404,6 +404,7 @@ static const char short_options[] = "p:" /* portmask */
 #define CMD_LINE_OPT_NO_NUMA      "no-numa"
 #define CMD_LINE_OPT_MAX_PKT_LEN   "max-pkt-len"
 #define CMD_LINE_OPT_PER_PORT_POOL "per-port-pool"
+#define CMD_LINE_OPT_PCAP_ENABLE   "pcap-enable"
 enum {
        /* Long options mapped to a short option */
@@ -416,6 +417,7 @@ enum {
 static const struct option lgopts[] = {
@@ -424,6 +426,7 @@ static const struct option lgopts[] = {
        {NULL, 0, 0, 0},
@@ -498,6 +501,11 @@ parse_args(int argc, char **argv)
                        per_port_pool = 1;
+               case CMD_LINE_OPT_PARSE_PCAP_ENABLE:
+                       printf("Packet capture enabled\n");
+                       set_pcap_trace(1);
+                       break;
                        return -1;
@@ -831,6 +839,7 @@ main(int argc, char **argv)
                        local_port_conf.txmode.offloads |=
+               local_port_conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_RSS_HASH;
                local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
                if (local_port_conf.rx_adv_conf.rss_conf.rss_hf !=
@@ -1116,6 +1125,9 @@ main(int argc, char **argv)
        /* >8 End of adding route to ip4 graph infa. */
+       if (is_pcap_trace_enable())
+               rte_graph_pcap_trace_init();
        /* Launch per-lcore init on every worker lcore */
        rte_eal_mp_remote_launch(graph_main_loop, NULL, SKIP_MAIN);
diff --git a/lib/graph/graph_pcap_trace.c b/lib/graph/graph_pcap_trace.c
new file mode 100644
index 0000000000..c5be07de6d
--- /dev/null
+++ b/lib/graph/graph_pcap_trace.c
@@ -0,0 +1,337 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell International Ltd.
+ */
+#include <unistd.h>
+#include <sys/time.h>
+#include <fcntl.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_net.h>
+#include <rte_flow.h>
+#include "rte_graph_worker.h"
+#define MAX_PKT_TO_CAPTURE 200
+#define MAX_PCAP_BUF_SZ 2048
+#define PCAP_DUMP_STR(buf, buf_size, cur_len, ...)                           \
+do {                                                                         \
+       if (cur_len >= buf_size)                                             \
+               break;                                                       \
+       cur_len += snprintf(buf + cur_len, buf_size - cur_len, __VA_ARGS__); \
+} while (0)
+#define PCAP_DUMP_DATA(dbuf, buf_size, cur_len, sbuf, len)                   \
+do {                                                                         \
+       if ((cur_len + len) >= buf_size)                                     \
+               break;                                                       \
+       rte_memcpy(dbuf + cur_len, sbuf, len);                               \
+       cur_len += len;                                                      \
+} while (0)
+static int pcap_trace_enable;
+static pcap_trace_t pcap_trace;
+static uint16_t pkt_metadata_dump(struct rte_mbuf *mbuf, char *buffer,
+                                 size_t buf_size, uint16_t cur_len);
+static int pcap_trace_close(void);
+static int pcap_trace_write(void);
+       memset(&pcap_trace, 0, sizeof(pcap_trace_t));
+       rte_spinlock_init(&pcap_trace.lock);
+       pcap_trace.packet_type = PCAP_PACKET_TYPE_USER13;
+       pcap_trace.n_packets_to_capture = MAX_PKT_TO_CAPTURE;
+       pcap_trace.file_name = "/tmp/dpdk.pcap";
+       pcap_trace.pcap_data = NULL;
+set_pcap_trace(int val)
+       pcap_trace_enable = val;
+       return pcap_trace_enable;
+static int
+       close(pcap_trace.file_descriptor);
+       pcap_trace.file_descriptor = -1;
+       return 0;
+static int
+       pcap_file_header_t file_hdr;
+       int ret = 0;
+       int n;
+       if (!pcap_trace.file_name)
+               pcap_trace.file_name = "/tmp/dpdk.pcap";
+       pcap_trace.file_descriptor = open(pcap_trace.file_name,
+                                         O_CREAT | O_TRUNC | O_WRONLY, 0664);
+       if (pcap_trace.file_descriptor < 0) {
+               ret = 1;
+               goto done;
+       }
+       pcap_trace.n_pcap_data_written = 0;
+       /* Write file header. */
+       memset(&file_hdr, 0, sizeof(file_hdr));
+       file_hdr.magic = 0xa1b2c3d4;
+       file_hdr.major_version = 2;
+       file_hdr.minor_version = 4;
+       file_hdr.time_zone = 0;
+       file_hdr.max_packet_size_in_bytes = ((1 << 16) - 1);
+       file_hdr.packet_type = pcap_trace.packet_type;
+       n = write(pcap_trace.file_descriptor, &file_hdr, sizeof(file_hdr));
+       if (n != sizeof(file_hdr)) {
+               ret = 1;
+               goto done;
+       }
+       while (pcap_trace.n_bytes > pcap_trace.n_pcap_data_written) {
+               int n = pcap_trace.n_bytes - pcap_trace.n_pcap_data_written;
+               n = write(pcap_trace.file_descriptor,
+                         (pcap_trace.pcap_data +
+                          pcap_trace.n_pcap_data_written), n);
+               if (n < 0 && errno != 0) {
+                       ret = 1;
+                       goto done;
+               }
+               pcap_trace.n_pcap_data_written += n;
+       }
+       if (pcap_trace.n_pcap_data_written >= pcap_trace.n_bytes)       {
+               rte_free(pcap_trace.pcap_data);
+               pcap_trace.pcap_data = NULL;
+               pcap_trace.n_pcap_data_written = 0;
+       }
+       if (pcap_trace.n_packets_captured >=
+           pcap_trace.n_packets_to_capture)
+               pcap_trace_close();
+       if (ret) {
+               if (pcap_trace.file_descriptor >= 0)
+                       pcap_trace_close();
+       }
+       return ret;
+static uint16_t
+pkt_metadata_dump(struct rte_mbuf *mbuf, char *buffer, size_t buf_size,
+                 uint16_t cur_len)
+       struct rte_flow_restore_info info = { 0, };
+       struct rte_net_hdr_lens hdr_lens;
+       struct rte_flow_error error;
+       uint32_t sw_packet_type;
+       uint64_t ol_flags;
+       char buf[256];
+       int ret;
+       ret = rte_flow_get_restore_info(mbuf->port, mbuf, &info, &error);
+       if (!ret) {
+               PCAP_DUMP_STR(buffer, buf_size, cur_len, "restore info:");
+               if (info.flags & RTE_FLOW_RESTORE_INFO_ENCAPSULATED)
+                       PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                                     "outer header present. ");
+               else
+                       PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                                     "no outer header. ");
+               if (info.flags & RTE_FLOW_RESTORE_INFO_GROUP_ID)
+                       PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                                     "miss group %u", info.group_id);
+               else
+                       PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                                     "no miss group");
+               PCAP_DUMP_STR(buffer, buf_size, cur_len, "\n");
+       }
+       PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                     "pool=%s  length=%u  nb_segs=%d\n", mbuf->pool->name,
+                     (unsigned int) mbuf->pkt_len, (int)mbuf->nb_segs);
+       ol_flags = mbuf->ol_flags;
+       if (ol_flags & RTE_MBUF_F_RX_RSS_HASH) {
+               PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                             "RSS hash=0x%x\n",
+                             (unsigned int) mbuf->hash.rss);
+       }
+       if (ol_flags & RTE_MBUF_F_RX_FDIR) {
+               PCAP_DUMP_STR(buffer, buf_size, cur_len, "FDIR matched ");
+               if (ol_flags & RTE_MBUF_F_RX_FDIR_ID)
+                       PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                                     "ID=0x%x\n", mbuf->hash.fdir.hi);
+               else if (ol_flags & RTE_MBUF_F_RX_FDIR_FLX)
+                       PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                                     "flex bytes=0x%08x %08x\n",
+                                     mbuf->hash.fdir.hi, mbuf->hash.fdir.lo);
+               else
+                       PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                                     "hash=0x%x ID=0x%x\n",
+                                     mbuf->hash.fdir.hash, mbuf->;
+       }
+       if (ol_flags & RTE_MBUF_F_RX_QINQ)
+               PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                             "QinQ VLAN tci=0x%x, VLAN tci outer=0x%x\n",
+                             mbuf->vlan_tci, mbuf->vlan_tci_outer);
+       else if (ol_flags & RTE_MBUF_F_RX_VLAN)
+               PCAP_DUMP_STR(buffer, buf_size, cur_len, "VLAN tci=0x%x\n",
+                             mbuf->vlan_tci);
+       if (mbuf->packet_type) {
+               rte_get_ptype_name(mbuf->packet_type, buf, sizeof(buf));
+               PCAP_DUMP_STR(buffer, buf_size, cur_len, "hw ptype: %s\n",
+                             buf);
+       }
+       sw_packet_type = rte_net_get_ptype(mbuf, &hdr_lens, RTE_PTYPE_ALL_MASK);
+       rte_get_ptype_name(sw_packet_type, buf, sizeof(buf));
+       PCAP_DUMP_STR(buffer, buf_size, cur_len, "sw ptype: %s\n", buf);
+       if (sw_packet_type & RTE_PTYPE_L2_MASK)
+               PCAP_DUMP_STR(buffer, buf_size, cur_len, "l2_len=%d  ",
+                             hdr_lens.l2_len);
+       if (sw_packet_type & RTE_PTYPE_L3_MASK)
+               PCAP_DUMP_STR(buffer, buf_size, cur_len, "l3_len=%d  ",
+                             hdr_lens.l3_len);
+       if (sw_packet_type & RTE_PTYPE_L4_MASK)
+               PCAP_DUMP_STR(buffer, buf_size, cur_len, "l4_len=%d  ",
+                             hdr_lens.l4_len);
+       if (sw_packet_type & RTE_PTYPE_TUNNEL_MASK)
+               PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                             "tunnel_len=%d  ", hdr_lens.tunnel_len);
+       if (sw_packet_type & RTE_PTYPE_INNER_L2_MASK)
+               PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                             "inner_l2_len=%d  ", hdr_lens.inner_l2_len);
+       if (sw_packet_type & RTE_PTYPE_INNER_L3_MASK)
+               PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                             "inner_l3_len=%d  ", hdr_lens.inner_l3_len);
+       if (sw_packet_type & RTE_PTYPE_INNER_L4_MASK)
+               PCAP_DUMP_STR(buffer, buf_size, cur_len,
+                             "inner_l4_len=%d  ", hdr_lens.inner_l4_len);
+       PCAP_DUMP_STR(buffer, buf_size, cur_len, "\n");
+       rte_get_rx_ol_flag_list(mbuf->ol_flags, buf, sizeof(buf));
+       PCAP_DUMP_STR(buffer, buf_size, cur_len, "Rx ol_flags: %s\n", buf);
+       rte_get_tx_ol_flag_list(mbuf->ol_flags, buf, sizeof(buf));
+       PCAP_DUMP_STR(buffer, buf_size, cur_len, "Tx ol_flags: %s\n", buf);
+       /* Keep it last. */
+       buffer[cur_len++] = '\0';
+       return cur_len;
+rte_graph_pcap_trace_dispatch(struct rte_graph *graph __rte_unused,
+                             struct rte_node *node, void **objs,
+                             uint16_t nb_objs)
+       struct rte_mbuf *mbuf;
+       pcap_packet_header_t *phdr;
+       pcap_version_hdr_t vhdr = {0};
+       struct timeval ts = {0};
+       char buffer[MAX_PCAP_BUF_SZ] = {0};
+       uint16_t total_len, n_bytes, cb_len;
+       uint8_t *pkt_data;
+       uint8_t *data;
+       int i;
+       gettimeofday(&ts, NULL);
+       for (i = 0; i < nb_objs; i++) {
+               if (pcap_trace.n_packets_captured >=
+                   pcap_trace.n_packets_to_capture)
+                       break;
+               mbuf = (struct rte_mbuf *)objs[i];
+               memset(buffer, 0, sizeof(buffer));
+               cb_len = 0;
+               total_len = 0;
+               n_bytes = 0;
+               vhdr.pcap_major_version = (uint8_t)RTE_PCAP_MAJOR_VERSION;
+               vhdr.pcap_minor_version = (uint8_t)RTE_PCAP_MINOR_VERSION;
+               PCAP_DUMP_DATA(buffer, MAX_PCAP_BUF_SZ, cb_len, &vhdr,
+                              sizeof(pcap_version_hdr_t));
+               PCAP_DUMP_DATA(buffer, MAX_PCAP_BUF_SZ, cb_len, node->name,
+                              (strlen(node->name) + 1));
+               PCAP_DUMP_DATA(buffer, MAX_PCAP_BUF_SZ, cb_len, node->ctx,
+                              RTE_NODE_CTX_SZ);
+               cb_len = pkt_metadata_dump(mbuf, buffer, sizeof(buffer),
+                                          cb_len);
+               total_len = cb_len + mbuf->pkt_len +
+                               sizeof(pcap_packet_header_t);
+               n_bytes = RTE_MIN((int)total_len, 16384);
+               rte_spinlock_lock(&pcap_trace.lock);
+               if (pcap_trace.pcap_data == NULL) {
+                       pcap_trace.pcap_data = rte_malloc(NULL, n_bytes, 0);
+                       phdr = (pcap_packet_header_t *)pcap_trace.pcap_data;
+               } else {
+                       pcap_trace.pcap_data = rte_realloc(
+                                       pcap_trace.pcap_data,
+                                       (pcap_trace.n_bytes + n_bytes), 0);
+                       phdr = (pcap_packet_header_t *)(pcap_trace.pcap_data +
+                                       pcap_trace.n_bytes);
+               }
+               phdr->time_in_sec = (uint32_t)ts.tv_sec;
+               phdr->time_in_usec = (uint32_t)ts.tv_usec;
+               phdr->n_packet_bytes_stored_in_file =
+                               (n_bytes - sizeof(pcap_packet_header_t));
+               phdr->n_bytes_in_packet =
+                               (total_len - sizeof(pcap_packet_header_t));
+               data = phdr->data;
+               rte_memcpy(data, buffer, cb_len);
+               data += cb_len;
+               pkt_data = rte_pktmbuf_mtod(mbuf, uint8_t *);
+               rte_memcpy(data, pkt_data, (RTE_MIN((n_bytes - cb_len),
+                          mbuf->data_len)));
+               pcap_trace.n_bytes += n_bytes;
+               pcap_trace.n_packets_captured++;
+               if (pcap_trace.n_packets_captured == MAX_PKT_TO_CAPTURE)
+                       pcap_trace_write();
+               rte_spinlock_unlock(&pcap_trace.lock);
+       }
+       return node->p_process(graph, node, objs, nb_objs);
diff --git a/lib/graph/graph_populate.c b/lib/graph/graph_populate.c
index 102fd6c29b..36f81505df 100644
--- a/lib/graph/graph_populate.c
+++ b/lib/graph/graph_populate.c
@@ -75,7 +75,11 @@ graph_nodes_populate(struct graph *_graph)
                memset(node, 0, sizeof(*node));
                node->fence = RTE_GRAPH_FENCE;
                node->off = off;
-               node->process = graph_node->node->process;
+               if (is_pcap_trace_enable()) {
+                       node->process = rte_graph_pcap_trace_dispatch;
+                       node->p_process = graph_node->node->process;
+               } else
+                       node->process = graph_node->node->process;
                memcpy(node->name, graph_node->node->name, RTE_GRAPH_NAMESIZE);
                pid = graph_node->node->parent_id;
                if (pid != RTE_NODE_ID_INVALID) { /* Cloned node */
diff --git a/lib/graph/ b/lib/graph/
index c7327549e8..6cd9d836d9 100644
--- a/lib/graph/
+++ b/lib/graph/
@@ -14,7 +14,8 @@ sources = files(
+        'graph_pcap_trace.c',
-headers = files('rte_graph.h', 'rte_graph_worker.h')
+headers = files('rte_graph.h', 'rte_graph_worker.h', 'rte_graph_pcap_trace.h')
-deps += ['eal']
+deps += ['eal', 'mbuf', 'mempool', 'net', 'ethdev']
diff --git a/lib/graph/rte_graph_pcap_trace.h b/lib/graph/rte_graph_pcap_trace.h
new file mode 100644
index 0000000000..e2faf6205f
--- /dev/null
+++ b/lib/graph/rte_graph_pcap_trace.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell International Ltd.
+ */
+ * @file rte_graph_pcap_trace.h
+ *
+ * @warning
+ * All functions in this file may be changed or removed without prior notice.
+ *
+ * This API enables to capture packet at each node with mbuf and node metadata.
+ *
+ */
+#include <rte_spinlock.h>
+#ifdef __cplusplus
+extern "C" {
+ * User packet type used in pcap file is used by wireshark to lookup into the
+ * DLT table to know which protocol(s) to use for each DLT.
+ */
+typedef enum {
+       PCAP_PACKET_TYPE_USER0 = 147,
+} pcap_packet_type_t;
+ * Pcap file shall contain a header at the start of the file. Parameters are
+ * dissected by the wireshark to display the data.
+ */
+typedef struct pcap_file_header {
+       uint32_t magic;
+       uint16_t major_version;
+       uint16_t minor_version;
+       uint32_t time_zone;
+       uint32_t sigfigs;
+       uint32_t max_packet_size_in_bytes;
+       uint32_t packet_type;
+} pcap_file_header_t;
+ * Each packet shall be prepended by the packet header.
+ */
+typedef struct pcap_packet_header_t {
+       uint32_t time_in_sec;
+       uint32_t time_in_usec;
+       uint32_t n_packet_bytes_stored_in_file;
+       uint32_t n_bytes_in_packet;
+       /** Packet data follows. */
+       uint8_t data[0];
+} pcap_packet_header_t;
+ * Pcap version header.
+ */
+typedef struct pcap_version_hdr {
+       uint8_t pcap_major_version;
+       uint8_t pcap_minor_version;
+} pcap_version_hdr_t;
+ * Book-keeping of the packets captured.
+ */
+typedef struct pcap_trace {
+       rte_spinlock_t lock;
+       const char *file_name;
+       uint32_t n_packets_to_capture;
+       uint32_t n_bytes;
+       pcap_packet_type_t packet_type;
+       uint32_t n_packets_captured;
+       int file_descriptor;
+       uint32_t n_pcap_data_written;
+       uint8_t *pcap_data;
+} pcap_trace_t;
+ * Pcap trace enable/disable function.
+ *
+ * The function is called to enable/disable graph pcap trace functionality.
+ *
+ * @param val
+ *   Value to be set to enable/disable graph pcap trace.
+ */
+void set_pcap_trace(int val);
+ * Check graph pcap trace is enable/disable.
+ *
+ * The function is called to check if the graph pcap trace is enabled/disabled.
+ *
+ * @return
+ *   - 1: Enable
+ *   - 0: Disable
+ */
+int is_pcap_trace_enable(void);
+ * Initialise graph pcap trace functionality.
+ *
+ * The function invoked when the graph pcap trace is enabled from the
+ * application.
+ *
+ */
+void rte_graph_pcap_trace_init(void);
+ * Capture mbuf metadata and node metadata to a pcap file.
+ *
+ * When graph pcap trace enabled, this function is invoked prior to each node
+ * and mbuf, node metadata is parsed and captured in a pcap file.
+ *
+ * @param graph
+ *   Pointer to the graph object.
+ * @param node
+ *   Pointer to the node object.
+ * @param objs
+ *   Pointer to an array of objects to be processed.
+ * @param nb_objs
+ *   Number of objects in the array.
+ */
+uint16_t rte_graph_pcap_trace_dispatch(struct rte_graph *graph __rte_unused,
+                                      struct rte_node *node, void **objs,
+                                      uint16_t nb_objs);
+#ifdef __cplusplus
+#endif /* _DPDK_GRAPH_PCAP_TRACE_H_ */
diff --git a/lib/graph/rte_graph_worker.h b/lib/graph/rte_graph_worker.h
index fc6fee48c8..64c18421d5 100644
--- a/lib/graph/rte_graph_worker.h
+++ b/lib/graph/rte_graph_worker.h
@@ -24,6 +24,7 @@
 #include <rte_memory.h>
 #include "rte_graph.h"
+#include "rte_graph_pcap_trace.h"
 #ifdef __cplusplus
 extern "C" {
@@ -64,6 +65,11 @@ struct rte_node {
        char parent[RTE_NODE_NAMESIZE]; /**< Parent node name. */
        char name[RTE_NODE_NAMESIZE];   /**< Name of the node. */
+       union {
+               rte_node_process_t p_process; /**< Process function. */
+               uint64_t p_process_u64;
+       };
        /* Fast path area  */
 #define RTE_NODE_CTX_SZ 16
        uint8_t ctx[RTE_NODE_CTX_SZ] __rte_cache_aligned; /**< Node Context. */

Reply via email to