> Adding per core packet handling stats to analyze traffic distribution > when multiple cores are engaged. > > Since aggregating the packet stats across cores would affect > performance, keeping the feature disabled using compile time flags. > > Signed-off-by: Anoob Joseph <ano...@marvell.com> > --- > > v2: > * Added lookup failure cases to drop count > > examples/ipsec-secgw/ipsec-secgw.c | 118 > +++++++++++++++++++++++++++++++++-- > examples/ipsec-secgw/ipsec-secgw.h | 2 + > examples/ipsec-secgw/ipsec.c | 13 +++- > examples/ipsec-secgw/ipsec.h | 22 +++++++ > examples/ipsec-secgw/ipsec_process.c | 5 ++ > 5 files changed, 154 insertions(+), 6 deletions(-) > > diff --git a/examples/ipsec-secgw/ipsec-secgw.c > b/examples/ipsec-secgw/ipsec-secgw.c > index 6d02341..db92ddc 100644 > --- a/examples/ipsec-secgw/ipsec-secgw.c > +++ b/examples/ipsec-secgw/ipsec-secgw.c > @@ -288,6 +288,61 @@ adjust_ipv6_pktlen(struct rte_mbuf *m, const struct > rte_ipv6_hdr *iph, > } > } > > +#ifdef ENABLE_STATS > +static uint64_t timer_period = 10; /* default period is 10 seconds */
I think it is better to add user ability to control stats period. Either runtime-option, or just compile time: replace ENABLE_STATS with STATS_PERIOD (0 would mean stats disabled). > + > +/* Print out statistics on packet distribution */ > +static void > +print_stats(void) > +{ > + uint64_t total_packets_dropped, total_packets_tx, total_packets_rx; > + unsigned int coreid; > + float burst_percent; > + > + total_packets_dropped = 0; > + total_packets_tx = 0; > + total_packets_rx = 0; > + > + const char clr[] = { 27, '[', '2', 'J', '\0' }; > + const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' }; > + > + /* Clear screen and move to top left */ > + printf("%s%s", clr, topLeft); Is that really needed? > + > + printf("\nCore statistics ===================================="); > + > + for (coreid = 0; coreid < RTE_MAX_LCORE; coreid++) { > + /* skip disabled cores */ > + if (rte_lcore_is_enabled(coreid) == 0) > + continue; > + burst_percent = (float)(core_statistics[coreid].burst_rx * 100)/ > + core_statistics[coreid].rx; Would float be always enough here? Might better long double? > + printf("\nStatistics for core %u ------------------------------" > + "\nPackets received: %20"PRIu64 > + "\nPackets sent: %24"PRIu64 > + "\nPackets dropped: %21"PRIu64 > + "\nBurst percent: %23.2f", > + coreid, > + core_statistics[coreid].rx, > + core_statistics[coreid].tx, > + core_statistics[coreid].dropped, > + burst_percent); > + > + total_packets_dropped += core_statistics[coreid].dropped; > + total_packets_tx += core_statistics[coreid].tx; > + total_packets_rx += core_statistics[coreid].rx; > + } > + printf("\nAggregate statistics ===============================" > + "\nTotal packets received: %14"PRIu64 > + "\nTotal packets sent: %18"PRIu64 > + "\nTotal packets dropped: %15"PRIu64, > + total_packets_rx, > + total_packets_tx, > + total_packets_dropped); > + printf("\n====================================================\n"); > +} > +#endif /* ENABLE_STATS */ > + > static inline void > prepare_one_packet(struct rte_mbuf *pkt, struct ipsec_traffic *t) > { > @@ -333,6 +388,7 @@ prepare_one_packet(struct rte_mbuf *pkt, struct > ipsec_traffic *t) > > /* drop packet when IPv6 header exceeds first segment length */ > if (unlikely(l3len > pkt->data_len)) { > + core_stats_update_drop(1); > rte_pktmbuf_free(pkt); > return; > } > @@ -350,6 +406,7 @@ prepare_one_packet(struct rte_mbuf *pkt, struct > ipsec_traffic *t) > /* Unknown/Unsupported type, drop the packet */ > RTE_LOG(ERR, IPSEC, "Unsupported packet type 0x%x\n", > rte_be_to_cpu_16(eth->ether_type)); > + core_stats_update_drop(1); > rte_pktmbuf_free(pkt); > return; > } > @@ -471,6 +528,11 @@ send_burst(struct lcore_conf *qconf, uint16_t n, > uint16_t port) > int32_t ret; > uint16_t queueid; > > +#ifdef ENABLE_STATS > + int lcore_id = rte_lcore_id(); > + core_statistics[lcore_id].tx += n; > +#endif /* ENABLE_STATS */ Instead of polluting genric code with ifdefs, why not to introduce 2 new functions: core_stats_update_rx(), core_stats_update_tx(), as you did for core_stats_drop()? > + > queueid = qconf->tx_queue_id[port]; > m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; > > @@ -478,6 +540,9 @@ send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t > port) > > ret = rte_eth_tx_burst(port, queueid, m_table, n); > if (unlikely(ret < n)) { > +#ifdef ENABLE_STATS > + core_statistics[lcore_id].dropped += n-ret; > +#endif /* ENABLE_STATS */ You have core_stats_update_drop() for that - use it. > do { > rte_pktmbuf_free(m_table[ret]); > } while (++ret < n); > @@ -525,6 +590,7 @@ send_fragment_packet(struct lcore_conf *qconf, struct > rte_mbuf *m, > "error code: %d\n", > __func__, m->pkt_len, rte_errno); > > + core_stats_update_drop(1); > rte_pktmbuf_free(m); > return len; > } > @@ -549,8 +615,10 @@ send_single_packet(struct rte_mbuf *m, uint16_t port, > uint8_t proto) > /* need to fragment the packet */ > } else if (frag_tbl_sz > 0) > len = send_fragment_packet(qconf, m, port, proto); > - else > + else { > + core_stats_update_drop(1); > rte_pktmbuf_free(m); It looks like a lot of such places... Would it be worth to unite core_stats_update_drop() and rte_pktmbuf_free(m) Into some inline function: ipsec_secgw_packet_drop(struct rte_mbuf *m[], uint32_t n) and use it all over such places. > + } > > /* enough pkts to be sent */ > if (unlikely(len == MAX_PKT_BURST)) { > @@ -584,18 +652,21 @@ inbound_sp_sa(struct sp_ctx *sp, struct sa_ctx *sa, > struct traffic_type *ip, > continue; > } > if (res == DISCARD) { > + core_stats_update_drop(1); > rte_pktmbuf_free(m); > continue; > } > > /* Only check SPI match for processed IPSec packets */ > if (i < lim && ((m->ol_flags & PKT_RX_SEC_OFFLOAD) == 0)) { > + core_stats_update_drop(1); > rte_pktmbuf_free(m); > continue; > } > > sa_idx = res - 1; > if (!inbound_sa_check(sa, m, sa_idx)) { > + core_stats_update_drop(1); > rte_pktmbuf_free(m); > continue; > } > @@ -630,8 +701,10 @@ split46_traffic(struct ipsec_traffic *trf, struct > rte_mbuf *mb[], uint32_t num) > uint8_t *, > offsetof(struct ip6_hdr, ip6_nxt)); > n6++; > - } else > + } else { > + core_stats_update_drop(1); > rte_pktmbuf_free(m); > + } > } > > trf->ip4.num = n4; > @@ -682,11 +755,12 @@ outbound_sp(struct sp_ctx *sp, struct traffic_type *ip, > for (i = 0; i < ip->num; i++) { > m = ip->pkts[i]; > sa_idx = ip->res[i] - 1; > - if (ip->res[i] == DISCARD) > + if (ip->res[i] == DISCARD) { > + core_stats_update_drop(1); > rte_pktmbuf_free(m); > - else if (ip->res[i] == BYPASS) > + } else if (ip->res[i] == BYPASS) { Looks unnecessary. > ip->pkts[j++] = m; > - else { > + } else { > ipsec->res[ipsec->num] = sa_idx; > ipsec->pkts[ipsec->num++] = m; > } > @@ -705,6 +779,8 @@ process_pkts_outbound(struct ipsec_ctx *ipsec_ctx, > for (i = 0; i < traffic->ipsec.num; i++) > rte_pktmbuf_free(traffic->ipsec.pkts[i]); > > + core_stats_update_drop(traffic->ipsec.num); > + > traffic->ipsec.num = 0; > > outbound_sp(ipsec_ctx->sp4_ctx, &traffic->ip4, &traffic->ipsec); > @@ -745,12 +821,14 @@ process_pkts_inbound_nosp(struct ipsec_ctx *ipsec_ctx, > /* Drop any IPv4 traffic from unprotected ports */ > for (i = 0; i < traffic->ip4.num; i++) > rte_pktmbuf_free(traffic->ip4.pkts[i]); > + core_stats_update_drop(traffic->ip4.num); > > traffic->ip4.num = 0; > > /* Drop any IPv6 traffic from unprotected ports */ > for (i = 0; i < traffic->ip6.num; i++) > rte_pktmbuf_free(traffic->ip6.pkts[i]); > + core_stats_update_drop(traffic->ip6.num); > > traffic->ip6.num = 0; > > @@ -788,6 +866,7 @@ process_pkts_outbound_nosp(struct ipsec_ctx *ipsec_ctx, > /* Drop any IPsec traffic from protected ports */ > for (i = 0; i < traffic->ipsec.num; i++) > rte_pktmbuf_free(traffic->ipsec.pkts[i]); > + core_stats_update_drop(traffic->ipsec.num); > > n = 0; > > @@ -901,6 +980,7 @@ route4_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf > *pkts[], uint8_t nb_pkts) > } > > if ((pkt_hop & RTE_LPM_LOOKUP_SUCCESS) == 0) { > + core_stats_update_drop(1); > rte_pktmbuf_free(pkts[i]); > continue; > } > @@ -953,6 +1033,7 @@ route6_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf > *pkts[], uint8_t nb_pkts) > } > > if (pkt_hop == -1) { > + core_stats_update_drop(1); > rte_pktmbuf_free(pkts[i]); > continue; > } > @@ -1099,6 +1180,9 @@ ipsec_poll_mode_worker(void) > const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) > / US_PER_S * BURST_TX_DRAIN_US; > struct lcore_rx_queue *rxql; > +#ifdef ENABLE_STATS > + uint64_t timer_tsc = 0; > +#endif /* ENABLE_STATS */ Probably better just RTE_SET_USED(timer_tsc); > > prev_tsc = 0; > lcore_id = rte_lcore_id(); > @@ -1159,6 +1243,19 @@ ipsec_poll_mode_worker(void) > drain_tx_buffers(qconf); > drain_crypto_buffers(qconf); > prev_tsc = cur_tsc; > +#ifdef ENABLE_STATS > + if (lcore_id == rte_get_master_lcore()) { > + /* advance the timer */ > + timer_tsc += diff_tsc; > + > + /* if timer has reached its timeout */ > + if (unlikely(timer_tsc >= timer_period)) { > + print_stats(); > + /* reset the timer */ > + timer_tsc = 0; > + } > + } > +#endif /* ENABLE_STATS */ Why to do stats collection/display inside data-path? Why not use rte_timer/rte_alarm and make it happen in control thread? Another option - make stats printing at some signal to the process. In that case we don't need to bother with time period at all - it will be user to decide. Again if we remove that print_stats() from data-path it might become cheap enough to always collect it, and we will not need ENABLE_STATS macro at all. > } > > for (i = 0; i < qconf->nb_rx_queue; ++i) { > @@ -1169,6 +1266,12 @@ ipsec_poll_mode_worker(void) > nb_rx = rte_eth_rx_burst(portid, queueid, > pkts, MAX_PKT_BURST); > > +#ifdef ENABLE_STATS > + core_statistics[lcore_id].rx += nb_rx; > + if (nb_rx == MAX_PKT_BURST) > + core_statistics[lcore_id].burst_rx += nb_rx; > +#endif /* ENABLE_STATS */ > + Same for above for TX: no need to pollute the code with ifdefs. Better to introduce new function: stats_update_rx() or so. > if (nb_rx > 0) > process_pkts(qconf, pkts, nb_rx, portid); > > @@ -2747,6 +2850,11 @@ main(int32_t argc, char **argv) > signal(SIGINT, signal_handler); > signal(SIGTERM, signal_handler); > > +#ifdef ENABLE_STATS > + /* convert to number of cycles */ > + timer_period *= rte_get_timer_hz(); > +#endif /* ENABLE_STATS */ > + > /* initialize event helper configuration */ > eh_conf = eh_conf_init(); > if (eh_conf == NULL) > diff --git a/examples/ipsec-secgw/ipsec-secgw.h > b/examples/ipsec-secgw/ipsec-secgw.h > index 4b53cb5..d886a35 100644 > --- a/examples/ipsec-secgw/ipsec-secgw.h > +++ b/examples/ipsec-secgw/ipsec-secgw.h > @@ -6,6 +6,8 @@ > > #include <stdbool.h> > > +//#define ENABLE_STATS > + Should be removed I think. > #define NB_SOCKETS 4 > > #define MAX_PKT_BURST 32 > diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c > index bf88d80..dcb9312 100644 > --- a/examples/ipsec-secgw/ipsec.c > +++ b/examples/ipsec-secgw/ipsec.c > @@ -499,8 +499,10 @@ enqueue_cop_burst(struct cdev_qp *cqp) > " enqueued %u crypto ops out of %u\n", > cqp->id, cqp->qp, ret, len); > /* drop packets that we fail to enqueue */ > - for (i = ret; i < len; i++) > + for (i = ret; i < len; i++) { > + core_stats_update_drop(1); > rte_pktmbuf_free(cqp->buf[i]->sym->m_src); > + } > } > cqp->in_flight += ret; > cqp->len = 0; > @@ -528,6 +530,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx > *ipsec_ctx, > > for (i = 0; i < nb_pkts; i++) { > if (unlikely(sas[i] == NULL)) { > + core_stats_update_drop(1); > rte_pktmbuf_free(pkts[i]); > continue; > } > @@ -549,6 +552,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx > *ipsec_ctx, > > if ((unlikely(ips->security.ses == NULL)) && > create_lookaside_session(ipsec_ctx, sa, ips)) { > + core_stats_update_drop(1); > rte_pktmbuf_free(pkts[i]); > continue; > } > @@ -563,6 +567,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx > *ipsec_ctx, > case RTE_SECURITY_ACTION_TYPE_CPU_CRYPTO: > RTE_LOG(ERR, IPSEC, "CPU crypto is not supported by the" > " legacy mode."); > + core_stats_update_drop(1); > rte_pktmbuf_free(pkts[i]); > continue; > > @@ -575,6 +580,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx > *ipsec_ctx, > > if ((unlikely(ips->crypto.ses == NULL)) && > create_lookaside_session(ipsec_ctx, sa, ips)) { > + core_stats_update_drop(1); > rte_pktmbuf_free(pkts[i]); > continue; > } > @@ -584,6 +590,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx > *ipsec_ctx, > > ret = xform_func(pkts[i], sa, &priv->cop); > if (unlikely(ret)) { > + core_stats_update_drop(1); > rte_pktmbuf_free(pkts[i]); > continue; > } > @@ -608,6 +615,7 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx > *ipsec_ctx, > > ret = xform_func(pkts[i], sa, &priv->cop); > if (unlikely(ret)) { > + core_stats_update_drop(1); > rte_pktmbuf_free(pkts[i]); > continue; > } > @@ -643,6 +651,7 @@ ipsec_inline_dequeue(ipsec_xform_fn xform_func, struct > ipsec_ctx *ipsec_ctx, > sa = priv->sa; > ret = xform_func(pkt, sa, &priv->cop); > if (unlikely(ret)) { > + core_stats_update_drop(1); > rte_pktmbuf_free(pkt); > continue; > } > @@ -690,12 +699,14 @@ ipsec_dequeue(ipsec_xform_fn xform_func, struct > ipsec_ctx *ipsec_ctx, > RTE_SECURITY_ACTION_TYPE_NONE) { > ret = xform_func(pkt, sa, cops[j]); > if (unlikely(ret)) { > + core_stats_update_drop(1); > rte_pktmbuf_free(pkt); > continue; > } > } else if (ipsec_get_action_type(sa) == > RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL) { > if (cops[j]->status) { > + core_stats_update_drop(1); > rte_pktmbuf_free(pkt); > continue; > } > diff --git a/examples/ipsec-secgw/ipsec.h b/examples/ipsec-secgw/ipsec.h > index 1e642d1..8519eab 100644 > --- a/examples/ipsec-secgw/ipsec.h > +++ b/examples/ipsec-secgw/ipsec.h > @@ -46,6 +46,17 @@ > > #define IP6_VERSION (6) > > +#ifdef ENABLE_STATS > +struct ipsec_core_statistics { > + uint64_t tx; > + uint64_t rx; > + uint64_t dropped; > + uint64_t burst_rx; A bit strange to have burst_rx and no similar counterpart for tx. BTW, do you need burst_rx? Might be better: nb_calls_rx, nb_calls_tx; and then: rx/nb_calls_rx will give you average burst size. > +} __rte_cache_aligned; > + > +struct ipsec_core_statistics core_statistics[RTE_MAX_ETHPORTS]; Should be RTE_MAX_LCORES, I think. > +#endif /* ENABLE_STATS */ > + > struct rte_crypto_xform; > struct ipsec_xform; > struct rte_mbuf; > @@ -416,4 +427,15 @@ check_flow_params(uint16_t fdir_portid, uint8_t > fdir_qid); > int > create_ipsec_esp_flow(struct ipsec_sa *sa); > > +static inline void > +core_stats_update_drop(int n) > +{ > +#ifdef ENABLE_STATS > + int lcore_id = rte_lcore_id(); > + core_statistics[lcore_id].dropped += n; > +#else > + RTE_SET_USED(n); > +#endif /* ENABLE_STATS */ > +} > + > #endif /* __IPSEC_H__ */ > diff --git a/examples/ipsec-secgw/ipsec_process.c > b/examples/ipsec-secgw/ipsec_process.c > index bb2f2b8..05cb3ad 100644 > --- a/examples/ipsec-secgw/ipsec_process.c > +++ b/examples/ipsec-secgw/ipsec_process.c > @@ -24,6 +24,11 @@ free_pkts(struct rte_mbuf *mb[], uint32_t n) > { > uint32_t i; > > +#ifdef ENABLE_STATS > + int lcore_id = rte_lcore_id(); > + core_statistics[lcore_id].dropped += n; > +#endif /* ENABLE_STATS */ > + Same as above - why not use stats_update_drop() here? > for (i = 0; i != n; i++) > rte_pktmbuf_free(mb[i]); > } > -- > 2.7.4