> -----Original Message-----
> From: Su, Simei <simei...@intel.com>
> Sent: Thursday, October 28, 2021 10:42 PM
> To: Zhang, Qi Z <qi.z.zh...@intel.com>
> Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haa...@intel.com>; Wu,
> Wenjun1 <wenjun1...@intel.com>; Su, Simei <simei...@intel.com>
> Subject: [PATCH v2] net/ice: fix performance issue for Rx timestamp
> 
> In Rx data path, it reads hardware registers per packet, resulting in big
> performance drop. This patch improves performance from two aspects:
> (1) replace per packet hardware register read by per burst.
> (2) reduce hardware register read time from 3 to 2 when the low value of
>     time is not close to overflow.
> 
> Meanwhile, this patch refines "ice_timesync_read_rx_timestamp" and
> "ice_timesync_read_tx_timestamp" API in which
> "ice_tstamp_convert_32b_64b"
> is also used.
> 
> Fixes: 953e74e6b73a ("net/ice: enable Rx timestamp on flex descriptor")
> Fixes: 646dcbe6c701 ("net/ice: support IEEE 1588 PTP")
> 
> Suggested-by: Harry van Haaren <harry.van.haa...@intel.com>
> Signed-off-by: Simei Su <simei...@intel.com>
> ---
> v2:
> * Add "guard band" of checking wrap-around.
> 
>  drivers/net/ice/ice_ethdev.c |  4 +--
>  drivers/net/ice/ice_ethdev.h |  1 +
>  drivers/net/ice/ice_rxtx.c   | 59 
> ++++++++++++++++++++++++++------------------
>  drivers/net/ice/ice_rxtx.h   | 34 +++++++++++++++----------
>  4 files changed, 59 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index
> ef6ee1c..13a7a97 100644
> --- a/drivers/net/ice/ice_ethdev.c
> +++ b/drivers/net/ice/ice_ethdev.c
> @@ -5560,7 +5560,7 @@ ice_timesync_read_rx_timestamp(struct
> rte_eth_dev *dev,
>       rxq = dev->data->rx_queues[flags];
> 
>       ts_high = rxq->time_high;
> -     ts_ns = ice_tstamp_convert_32b_64b(hw, ts_high);
> +     ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1, ts_high);
>       ns = rte_timecounter_update(&ad->rx_tstamp_tc, ts_ns);
>       *timestamp = rte_ns_to_timespec(ns);
> 
> @@ -5587,7 +5587,7 @@ ice_timesync_read_tx_timestamp(struct
> rte_eth_dev *dev,
>               return -1;
>       }
> 
> -     ts_ns = ice_tstamp_convert_32b_64b(hw, (tstamp >> 8) & mask);
> +     ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1, (tstamp >> 8) & mask);
>       ns = rte_timecounter_update(&ad->tx_tstamp_tc, ts_ns);
>       *timestamp = rte_ns_to_timespec(ns);
> 
> diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h index
> 599e002..0e42c4c 100644
> --- a/drivers/net/ice/ice_ethdev.h
> +++ b/drivers/net/ice/ice_ethdev.h
> @@ -509,6 +509,7 @@ struct ice_adapter {
>       struct rte_timecounter rx_tstamp_tc;
>       struct rte_timecounter tx_tstamp_tc;
>       bool ptp_ena;
> +     uint64_t time_hw;
>  #ifdef RTE_ARCH_X86
>       bool rx_use_avx2;
>       bool rx_use_avx512;
> diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c index
> c3cad2f..2d771ea 100644
> --- a/drivers/net/ice/ice_rxtx.c
> +++ b/drivers/net/ice/ice_rxtx.c
> @@ -1581,6 +1581,9 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
>       if (!(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
>               return 0;
> 
> +     if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
> +             rxq->hw_register_set = 1;
> +
>       /**
>        * Scan LOOK_AHEAD descriptors at a time to determine which
>        * descriptors reference packets that are ready to be received.
> @@ -1614,15 +1617,15 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq)
>                       ice_rxd_to_vlan_tci(mb, &rxdp[j]);
>                       rxd_to_pkt_fields_ops[rxq->rxdid](rxq, mb, &rxdp[j]);  
> #ifndef
> RTE_LIBRTE_ICE_16BYTE_RX_DESC
> -                     if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
> -                             ts_ns = ice_tstamp_convert_32b_64b(hw,
> +                     if (ice_timestamp_dynflag > 0) {
> +                             ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
> +                                     rxq->hw_register_set,
>                                       
> rte_le_to_cpu_32(rxdp[j].wb.flex_ts.ts_high));
> -                             if (ice_timestamp_dynflag > 0) {
> -                                     *RTE_MBUF_DYNFIELD(mb,
> -                                             ice_timestamp_dynfield_offset,
> -                                             rte_mbuf_timestamp_t *) = ts_ns;
> -                                     mb->ol_flags |= ice_timestamp_dynflag;
> -                             }
> +                             rxq->hw_register_set = 0;
> +                             *RTE_MBUF_DYNFIELD(mb,
> +                                     ice_timestamp_dynfield_offset,
> +                                     rte_mbuf_timestamp_t *) = ts_ns;
> +                             mb->ol_flags |= ice_timestamp_dynflag;
>                       }
> 
>                       if (ad->ptp_ena && ((mb->packet_type & @@ -1822,6 
> +1825,10
> @@ ice_recv_scattered_pkts(void *rx_queue,
>       uint64_t ts_ns;
>       struct ice_adapter *ad = rxq->vsi->adapter;  #endif
> +
> +     if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
> +             rxq->hw_register_set = 1;
> +
>       while (nb_rx < nb_pkts) {
>               rxdp = &rx_ring[rx_id];
>               rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
> @@ -1932,15 +1939,15 @@ ice_recv_scattered_pkts(void *rx_queue,
>               rxd_to_pkt_fields_ops[rxq->rxdid](rxq, first_seg, &rxd);
>               pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
>  #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
> -             if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
> -                     ts_ns = ice_tstamp_convert_32b_64b(hw,
> +             if (ice_timestamp_dynflag > 0) {
> +                     ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
> +                             rxq->hw_register_set,
>                               rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
> -                     if (ice_timestamp_dynflag > 0) {
> -                             *RTE_MBUF_DYNFIELD(first_seg,
> -                                     ice_timestamp_dynfield_offset,
> -                                     rte_mbuf_timestamp_t *) = ts_ns;
> -                             first_seg->ol_flags |= ice_timestamp_dynflag;
> -                     }
> +                     rxq->hw_register_set = 0;
> +                     *RTE_MBUF_DYNFIELD(first_seg,
> +                             ice_timestamp_dynfield_offset,
> +                             rte_mbuf_timestamp_t *) = ts_ns;
> +                     first_seg->ol_flags |= ice_timestamp_dynflag;
>               }
> 
>               if (ad->ptp_ena && ((first_seg->packet_type & RTE_PTYPE_L2_MASK)
> @@ -2312,6 +2319,10 @@ ice_recv_pkts(void *rx_queue,
>       uint64_t ts_ns;
>       struct ice_adapter *ad = rxq->vsi->adapter;  #endif
> +
> +     if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)
> +             rxq->hw_register_set = 1;
> +
>       while (nb_rx < nb_pkts) {
>               rxdp = &rx_ring[rx_id];
>               rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0);
> @@ -2363,15 +2374,15 @@ ice_recv_pkts(void *rx_queue,
>               rxd_to_pkt_fields_ops[rxq->rxdid](rxq, rxm, &rxd);
>               pkt_flags = ice_rxd_error_to_pkt_flags(rx_stat_err0);
>  #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
> -             if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
> -                     ts_ns = ice_tstamp_convert_32b_64b(hw,
> +             if (ice_timestamp_dynflag > 0) {
> +                     ts_ns = ice_tstamp_convert_32b_64b(hw, ad,
> +                             rxq->hw_register_set,
>                               rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high));
> -                     if (ice_timestamp_dynflag > 0) {
> -                             *RTE_MBUF_DYNFIELD(rxm,
> -                                     ice_timestamp_dynfield_offset,
> -                                     rte_mbuf_timestamp_t *) = ts_ns;
> -                             rxm->ol_flags |= ice_timestamp_dynflag;
> -                     }
> +                     rxq->hw_register_set = 0;
> +                     *RTE_MBUF_DYNFIELD(rxm,
> +                             ice_timestamp_dynfield_offset,
> +                             rte_mbuf_timestamp_t *) = ts_ns;
> +                     rxm->ol_flags |= ice_timestamp_dynflag;
>               }
> 
>               if (ad->ptp_ena && ((rxm->packet_type & RTE_PTYPE_L2_MASK) ==
> diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h index
> 146dc1f..02223c5 100644
> --- a/drivers/net/ice/ice_rxtx.h
> +++ b/drivers/net/ice/ice_rxtx.h
> @@ -93,6 +93,7 @@ struct ice_rx_queue {
>       ice_rx_release_mbufs_t rx_rel_mbufs;
>       uint64_t offloads;
>       uint32_t time_high;
> +     uint32_t hw_register_set;
>       const struct rte_memzone *mz;
>  };
> 
> @@ -321,29 +322,36 @@ void ice_fdir_rx_parsing_enable(struct ice_adapter
> *ad, bool on)
> 
>  /* Helper function to convert a 32b nanoseconds timestamp to 64b. */
> static inline -uint64_t ice_tstamp_convert_32b_64b(struct ice_hw *hw,
> uint32_t in_timestamp)
> +uint64_t ice_tstamp_convert_32b_64b(struct ice_hw *hw, struct ice_adapter
> *ad,
> +                                 uint32_t flag, uint32_t in_timestamp)
>  {
>       const uint64_t mask = 0xFFFFFFFF;
>       uint32_t hi, lo, lo2, delta;
> -     uint64_t time, ns;
> +     uint64_t ns;
> 
> -     lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
> -     hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
> -     lo2 = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
> -
> -     if (lo2 < lo) {
> +     if (flag) {
>               lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0));
>               hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0));
> -     }
> 
> -     time = ((uint64_t)hi << 32) | lo;
> +             if (lo > (UINT32_MAX - 10000))

better use a meaningful macro to replace magic number here.


Reply via email to