> Adding per core packet handling stats to analyze traffic distribution
> when multiple cores are engaged.
> 
> Since aggregating the packet stats across cores would affect
> performance, keeping the feature disabled using compile time flags.
> 
> Signed-off-by: Anoob Joseph <ano...@marvell.com>
> ---
> 
> v2:
> * Added lookup failure cases to drop count
> 
>  examples/ipsec-secgw/ipsec-secgw.c   | 118 
> +++++++++++++++++++++++++++++++++--
>  examples/ipsec-secgw/ipsec-secgw.h   |   2 +
>  examples/ipsec-secgw/ipsec.c         |  13 +++-
>  examples/ipsec-secgw/ipsec.h         |  22 +++++++
>  examples/ipsec-secgw/ipsec_process.c |   5 ++
>  5 files changed, 154 insertions(+), 6 deletions(-)
> 
> diff --git a/examples/ipsec-secgw/ipsec-secgw.c 
> b/examples/ipsec-secgw/ipsec-secgw.c
> index 6d02341..db92ddc 100644
> --- a/examples/ipsec-secgw/ipsec-secgw.c
> +++ b/examples/ipsec-secgw/ipsec-secgw.c
> @@ -288,6 +288,61 @@ adjust_ipv6_pktlen(struct rte_mbuf *m, const struct 
> rte_ipv6_hdr *iph,
>       }
>  }
> 
> +#ifdef ENABLE_STATS
> +static uint64_t timer_period = 10; /* default period is 10 seconds */

I think it is better to add user ability to control stats period.
Either runtime-option, or just compile time: 
replace ENABLE_STATS with STATS_PERIOD (0 would mean stats disabled).

> +
> +/* Print out statistics on packet distribution */
> +static void
> +print_stats(void)
> +{
> +     uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
> +     unsigned int coreid;
> +     float burst_percent;
> +
> +     total_packets_dropped = 0;
> +     total_packets_tx = 0;
> +     total_packets_rx = 0;
> +
> +     const char clr[] = { 27, '[', '2', 'J', '\0' };
> +     const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' };
> +
> +     /* Clear screen and move to top left */
> +     printf("%s%s", clr, topLeft);

Is that really needed?

> +
> +     printf("\nCore statistics ====================================");
> +
> +     for (coreid = 0; coreid < RTE_MAX_LCORE; coreid++) {
> +             /* skip disabled cores */
> +             if (rte_lcore_is_enabled(coreid) == 0)
> +                     continue;
> +             burst_percent = (float)(core_statistics[coreid].burst_rx * 100)/
> +                                     core_statistics[coreid].rx;

Would float be always enough here? Might better long double?

> +             printf("\nStatistics for core %u ------------------------------"
> +                        "\nPackets received: %20"PRIu64
> +                        "\nPackets sent: %24"PRIu64
> +                        "\nPackets dropped: %21"PRIu64
> +                        "\nBurst percent: %23.2f",
> +                        coreid,
> +                        core_statistics[coreid].rx,
> +                        core_statistics[coreid].tx,
> +                        core_statistics[coreid].dropped,
> +                        burst_percent);
> +
> +             total_packets_dropped += core_statistics[coreid].dropped;
> +             total_packets_tx += core_statistics[coreid].tx;
> +             total_packets_rx += core_statistics[coreid].rx;
> +     }
> +     printf("\nAggregate statistics ==============================="
> +                "\nTotal packets received: %14"PRIu64
> +                "\nTotal packets sent: %18"PRIu64
> +                "\nTotal packets dropped: %15"PRIu64,
> +                total_packets_rx,
> +                total_packets_tx,
> +                total_packets_dropped);
> +     printf("\n====================================================\n");
> +}
> +#endif /* ENABLE_STATS */
> +
>  static inline void
>  prepare_one_packet(struct rte_mbuf *pkt, struct ipsec_traffic *t)
>  {
> @@ -333,6 +388,7 @@ prepare_one_packet(struct rte_mbuf *pkt, struct 
> ipsec_traffic *t)
> 
>               /* drop packet when IPv6 header exceeds first segment length */
>               if (unlikely(l3len > pkt->data_len)) {
> +                     core_stats_update_drop(1);
>                       rte_pktmbuf_free(pkt);
>                       return;
>               }
> @@ -350,6 +406,7 @@ prepare_one_packet(struct rte_mbuf *pkt, struct 
> ipsec_traffic *t)
>               /* Unknown/Unsupported type, drop the packet */
>               RTE_LOG(ERR, IPSEC, "Unsupported packet type 0x%x\n",
>                       rte_be_to_cpu_16(eth->ether_type));
> +             core_stats_update_drop(1);
>               rte_pktmbuf_free(pkt);
>               return;
>       }
> @@ -471,6 +528,11 @@ send_burst(struct lcore_conf *qconf, uint16_t n, 
> uint16_t port)
>       int32_t ret;
>       uint16_t queueid;
> 
> +#ifdef ENABLE_STATS
> +     int lcore_id = rte_lcore_id();
> +     core_statistics[lcore_id].tx += n;
> +#endif /* ENABLE_STATS */

Instead of polluting genric code with ifdefs, why not
to introduce 2 new functions: core_stats_update_rx(), core_stats_update_tx(),
as you did for core_stats_drop()?

> +
>       queueid = qconf->tx_queue_id[port];
>       m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
> 
> @@ -478,6 +540,9 @@ send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t 
> port)
> 
>       ret = rte_eth_tx_burst(port, queueid, m_table, n);
>       if (unlikely(ret < n)) {
> +#ifdef ENABLE_STATS
> +             core_statistics[lcore_id].dropped += n-ret;
> +#endif /* ENABLE_STATS */

You have core_stats_update_drop() for that - use it.

>               do {
>                       rte_pktmbuf_free(m_table[ret]);
>               } while (++ret < n);
> @@ -525,6 +590,7 @@ send_fragment_packet(struct lcore_conf *qconf, struct 
> rte_mbuf *m,
>                       "error code: %d\n",
>                       __func__, m->pkt_len, rte_errno);
> 
> +     core_stats_update_drop(1);
>       rte_pktmbuf_free(m);
>       return len;
>  }
> @@ -549,8 +615,10 @@ send_single_packet(struct rte_mbuf *m, uint16_t port, 
> uint8_t proto)
>       /* need to fragment the packet */
>       } else if (frag_tbl_sz > 0)
>               len = send_fragment_packet(qconf, m, port, proto);
> -     else
> +     else {
> +             core_stats_update_drop(1);
>               rte_pktmbuf_free(m);

It looks like a lot of such places...
Would it be worth to unite core_stats_update_drop() and rte_pktmbuf_free(m)
Into some inline function: ipsec_secgw_packet_drop(struct rte_mbuf *m[], 
uint32_t n) 
and use it all over such places. 

> +     }
> 
>       /* enough pkts to be sent */
>       if (unlikely(len == MAX_PKT_BURST)) {
> @@ -584,18 +652,21 @@ inbound_sp_sa(struct sp_ctx *sp, struct sa_ctx *sa, 
> struct traffic_type *ip,
>                       continue;
>               }
>               if (res == DISCARD) {
> +                     core_stats_update_drop(1);
>                       rte_pktmbuf_free(m);
>                       continue;
>               }
> 
>               /* Only check SPI match for processed IPSec packets */
>               if (i < lim && ((m->ol_flags & PKT_RX_SEC_OFFLOAD) == 0)) {
> +                     core_stats_update_drop(1);
>                       rte_pktmbuf_free(m);
>                       continue;
>               }
> 
>               sa_idx = res - 1;
>               if (!inbound_sa_check(sa, m, sa_idx)) {
> +                     core_stats_update_drop(1);
>                       rte_pktmbuf_free(m);
>                       continue;
>               }
> @@ -630,8 +701,10 @@ split46_traffic(struct ipsec_traffic *trf, struct 
> rte_mbuf *mb[], uint32_t num)
>                                       uint8_t *,
>                                       offsetof(struct ip6_hdr, ip6_nxt));
>                       n6++;
> -             } else
> +             } else {
> +                     core_stats_update_drop(1);
>                       rte_pktmbuf_free(m);
> +             }
>       }
> 
>       trf->ip4.num = n4;
> @@ -682,11 +755,12 @@ outbound_sp(struct sp_ctx *sp, struct traffic_type *ip,
>       for (i = 0; i < ip->num; i++) {
>               m = ip->pkts[i];
>               sa_idx = ip->res[i] - 1;
> -             if (ip->res[i] == DISCARD)
> +             if (ip->res[i] == DISCARD) {
> +                     core_stats_update_drop(1);
>                       rte_pktmbuf_free(m);
> -             else if (ip->res[i] == BYPASS)
> +             } else if (ip->res[i] == BYPASS) {

Looks unnecessary.

>                       ip->pkts[j++] = m;
> -             else {
> +             } else {
>                       ipsec->res[ipsec->num] = sa_idx;
>                       ipsec->pkts[ipsec->num++] = m;
>               }
> @@ -705,6 +779,8 @@ process_pkts_outbound(struct ipsec_ctx *ipsec_ctx,
>       for (i = 0; i < traffic->ipsec.num; i++)
>               rte_pktmbuf_free(traffic->ipsec.pkts[i]);
> 
> +     core_stats_update_drop(traffic->ipsec.num);
> +
>       traffic->ipsec.num = 0;
> 
>       outbound_sp(ipsec_ctx->sp4_ctx, &traffic->ip4, &traffic->ipsec);
> @@ -745,12 +821,14 @@ process_pkts_inbound_nosp(struct ipsec_ctx *ipsec_ctx,
>       /* Drop any IPv4 traffic from unprotected ports */
>       for (i = 0; i < traffic->ip4.num; i++)
>               rte_pktmbuf_free(traffic->ip4.pkts[i]);
> +     core_stats_update_drop(traffic->ip4.num);
> 
>       traffic->ip4.num = 0;
> 
>       /* Drop any IPv6 traffic from unprotected ports */
>       for (i = 0; i < traffic->ip6.num; i++)
>               rte_pktmbuf_free(traffic->ip6.pkts[i]);
> +     core_stats_update_drop(traffic->ip6.num);
> 
>       traffic->ip6.num = 0;
> 
> @@ -788,6 +866,7 @@ process_pkts_outbound_nosp(struct ipsec_ctx *ipsec_ctx,
>       /* Drop any IPsec traffic from protected ports */
>       for (i = 0; i < traffic->ipsec.num; i++)
>               rte_pktmbuf_free(traffic->ipsec.pkts[i]);
> +     core_stats_update_drop(traffic->ipsec.num);
> 
>       n = 0;
> 
> @@ -901,6 +980,7 @@ route4_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf 
> *pkts[], uint8_t nb_pkts)
>               }
> 
>               if ((pkt_hop & RTE_LPM_LOOKUP_SUCCESS) == 0) {
> +                     core_stats_update_drop(1);
>                       rte_pktmbuf_free(pkts[i]);
>                       continue;
>               }
> @@ -953,6 +1033,7 @@ route6_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf 
> *pkts[], uint8_t nb_pkts)
>               }
> 
>               if (pkt_hop == -1) {
> +                     core_stats_update_drop(1);
>                       rte_pktmbuf_free(pkts[i]);
>                       continue;
>               }
> @@ -1099,6 +1180,9 @@ ipsec_poll_mode_worker(void)
>       const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1)
>                       / US_PER_S * BURST_TX_DRAIN_US;
>       struct lcore_rx_queue *rxql;
> +#ifdef ENABLE_STATS
> +     uint64_t timer_tsc = 0;
> +#endif /* ENABLE_STATS */

Probably better just RTE_SET_USED(timer_tsc);

> 
>       prev_tsc = 0;
>       lcore_id = rte_lcore_id();
> @@ -1159,6 +1243,19 @@ ipsec_poll_mode_worker(void)
>                       drain_tx_buffers(qconf);
>                       drain_crypto_buffers(qconf);
>                       prev_tsc = cur_tsc;
> +#ifdef ENABLE_STATS
> +                     if (lcore_id == rte_get_master_lcore()) {
> +                             /* advance the timer */
> +                             timer_tsc += diff_tsc;
> +
> +                             /* if timer has reached its timeout */
> +                             if (unlikely(timer_tsc >= timer_period)) {
> +                                     print_stats();
> +                                     /* reset the timer */
> +                                     timer_tsc = 0;
> +                             }
> +                     }
> +#endif /* ENABLE_STATS */


Why to do stats collection/display inside data-path?
Why not use rte_timer/rte_alarm and make it happen in control thread?
Another option - make stats printing at some signal to the process.
In that case we don't need to bother with time period at all - it will be
user to decide.
Again if we remove that print_stats() from data-path it might become 
cheap enough to always collect it, and we will not need ENABLE_STATS
macro at all.

>               }
> 
>               for (i = 0; i < qconf->nb_rx_queue; ++i) {
> @@ -1169,6 +1266,12 @@ ipsec_poll_mode_worker(void)
>                       nb_rx = rte_eth_rx_burst(portid, queueid,
>                                       pkts, MAX_PKT_BURST);
> 
> +#ifdef ENABLE_STATS
> +                     core_statistics[lcore_id].rx += nb_rx;
> +                     if (nb_rx == MAX_PKT_BURST)
> +                             core_statistics[lcore_id].burst_rx += nb_rx;
> +#endif /* ENABLE_STATS */
> +

Same for above for TX: no need to pollute the code with ifdefs.
Better to introduce new function: stats_update_rx() or so.


>                       if (nb_rx > 0)
>                               process_pkts(qconf, pkts, nb_rx, portid);
> 
> @@ -2747,6 +2850,11 @@ main(int32_t argc, char **argv)
>       signal(SIGINT, signal_handler);
>       signal(SIGTERM, signal_handler);
> 
> +#ifdef ENABLE_STATS
> +     /* convert to number of cycles */
> +     timer_period *= rte_get_timer_hz();
> +#endif /* ENABLE_STATS */
> +
>       /* initialize event helper configuration */
>       eh_conf = eh_conf_init();
>       if (eh_conf == NULL)
> diff --git a/examples/ipsec-secgw/ipsec-secgw.h 
> b/examples/ipsec-secgw/ipsec-secgw.h
> index 4b53cb5..d886a35 100644
> --- a/examples/ipsec-secgw/ipsec-secgw.h
> +++ b/examples/ipsec-secgw/ipsec-secgw.h
> @@ -6,6 +6,8 @@
> 
>  #include <stdbool.h>
> 
> +//#define ENABLE_STATS
> +

Should be removed I think.

>  #define NB_SOCKETS 4
> 
>  #define MAX_PKT_BURST 32
> diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c
> index bf88d80..dcb9312 100644
> --- a/examples/ipsec-secgw/ipsec.c
> +++ b/examples/ipsec-secgw/ipsec.c
> @@ -499,8 +499,10 @@ enqueue_cop_burst(struct cdev_qp *cqp)
>                       " enqueued %u crypto ops out of %u\n",
>                       cqp->id, cqp->qp, ret, len);
>                       /* drop packets that we fail to enqueue */
> -                     for (i = ret; i < len; i++)
> +                     for (i = ret; i < len; i++) {
> +                             core_stats_update_drop(1);
>                               rte_pktmbuf_free(cqp->buf[i]->sym->m_src);
> +                     }
>       }
>       cqp->in_flight += ret;
>       cqp->len = 0;
> @@ -528,6 +530,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx 
> *ipsec_ctx,
> 
>       for (i = 0; i < nb_pkts; i++) {
>               if (unlikely(sas[i] == NULL)) {
> +                     core_stats_update_drop(1);
>                       rte_pktmbuf_free(pkts[i]);
>                       continue;
>               }
> @@ -549,6 +552,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx 
> *ipsec_ctx,
> 
>                       if ((unlikely(ips->security.ses == NULL)) &&
>                               create_lookaside_session(ipsec_ctx, sa, ips)) {
> +                             core_stats_update_drop(1);
>                               rte_pktmbuf_free(pkts[i]);
>                               continue;
>                       }
> @@ -563,6 +567,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx 
> *ipsec_ctx,
>               case RTE_SECURITY_ACTION_TYPE_CPU_CRYPTO:
>                       RTE_LOG(ERR, IPSEC, "CPU crypto is not supported by the"
>                                       " legacy mode.");
> +                     core_stats_update_drop(1);
>                       rte_pktmbuf_free(pkts[i]);
>                       continue;
> 
> @@ -575,6 +580,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx 
> *ipsec_ctx,
> 
>                       if ((unlikely(ips->crypto.ses == NULL)) &&
>                               create_lookaside_session(ipsec_ctx, sa, ips)) {
> +                             core_stats_update_drop(1);
>                               rte_pktmbuf_free(pkts[i]);
>                               continue;
>                       }
> @@ -584,6 +590,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx 
> *ipsec_ctx,
> 
>                       ret = xform_func(pkts[i], sa, &priv->cop);
>                       if (unlikely(ret)) {
> +                             core_stats_update_drop(1);
>                               rte_pktmbuf_free(pkts[i]);
>                               continue;
>                       }
> @@ -608,6 +615,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx 
> *ipsec_ctx,
> 
>                       ret = xform_func(pkts[i], sa, &priv->cop);
>                       if (unlikely(ret)) {
> +                             core_stats_update_drop(1);
>                               rte_pktmbuf_free(pkts[i]);
>                               continue;
>                       }
> @@ -643,6 +651,7 @@ ipsec_inline_dequeue(ipsec_xform_fn xform_func, struct 
> ipsec_ctx *ipsec_ctx,
>               sa = priv->sa;
>               ret = xform_func(pkt, sa, &priv->cop);
>               if (unlikely(ret)) {
> +                     core_stats_update_drop(1);
>                       rte_pktmbuf_free(pkt);
>                       continue;
>               }
> @@ -690,12 +699,14 @@ ipsec_dequeue(ipsec_xform_fn xform_func, struct 
> ipsec_ctx *ipsec_ctx,
>                               RTE_SECURITY_ACTION_TYPE_NONE) {
>                               ret = xform_func(pkt, sa, cops[j]);
>                               if (unlikely(ret)) {
> +                                     core_stats_update_drop(1);
>                                       rte_pktmbuf_free(pkt);
>                                       continue;
>                               }
>                       } else if (ipsec_get_action_type(sa) ==
>                               RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL) {
>                               if (cops[j]->status) {
> +                                     core_stats_update_drop(1);
>                                       rte_pktmbuf_free(pkt);
>                                       continue;
>                               }
> diff --git a/examples/ipsec-secgw/ipsec.h b/examples/ipsec-secgw/ipsec.h
> index 1e642d1..8519eab 100644
> --- a/examples/ipsec-secgw/ipsec.h
> +++ b/examples/ipsec-secgw/ipsec.h
> @@ -46,6 +46,17 @@
> 
>  #define IP6_VERSION (6)
> 
> +#ifdef ENABLE_STATS
> +struct ipsec_core_statistics {
> +     uint64_t tx;
> +     uint64_t rx;
> +     uint64_t dropped;
> +     uint64_t burst_rx;

A bit strange to have burst_rx and no similar counterpart for tx.
BTW, do you need burst_rx?
Might be better:
nb_calls_rx, nb_calls_tx;
and then: rx/nb_calls_rx will give you average burst size.

> +} __rte_cache_aligned;
> +
> +struct ipsec_core_statistics core_statistics[RTE_MAX_ETHPORTS];

Should be RTE_MAX_LCORES, I think.

> +#endif /* ENABLE_STATS */
> +
>  struct rte_crypto_xform;
>  struct ipsec_xform;
>  struct rte_mbuf;
> @@ -416,4 +427,15 @@ check_flow_params(uint16_t fdir_portid, uint8_t 
> fdir_qid);
>  int
>  create_ipsec_esp_flow(struct ipsec_sa *sa);
> 
> +static inline void
> +core_stats_update_drop(int n)
> +{
> +#ifdef ENABLE_STATS
> +     int lcore_id = rte_lcore_id();
> +     core_statistics[lcore_id].dropped += n;
> +#else
> +     RTE_SET_USED(n);
> +#endif /* ENABLE_STATS */
> +}
> +
>  #endif /* __IPSEC_H__ */
> diff --git a/examples/ipsec-secgw/ipsec_process.c 
> b/examples/ipsec-secgw/ipsec_process.c
> index bb2f2b8..05cb3ad 100644
> --- a/examples/ipsec-secgw/ipsec_process.c
> +++ b/examples/ipsec-secgw/ipsec_process.c
> @@ -24,6 +24,11 @@ free_pkts(struct rte_mbuf *mb[], uint32_t n)
>  {
>       uint32_t i;
> 
> +#ifdef ENABLE_STATS
> +     int lcore_id = rte_lcore_id();
> +     core_statistics[lcore_id].dropped += n;
> +#endif /* ENABLE_STATS */
> +

Same as above - why not use stats_update_drop() here?

>       for (i = 0; i != n; i++)
>               rte_pktmbuf_free(mb[i]);
>  }
> --
> 2.7.4

Reply via email to