Enable Tdm_desc_chk then handle the interrupt of TDM desc error. If it's a
non-fatal error, clear the error to reset the queue. If it's a fatal error,
require users to manually restart the port. This flow prevents the hardware
from PCIe pending due to Tx hang, resulting in a reboot to recover.

But remarkably, when packet layer length does not match the packet type in
TX descriptor, it will cause non-fatal error if Tdm_desc_chk is enabled.
But it can be transmitted normally if Tdm_desc_chk is disabled. So in order
to prevent hardware over check, fix the layer length on the basis of packet
type.

Signed-off-by: Jiawen Wu <jiawe...@trustnetic.com>
---
 drivers/net/txgbe/base/txgbe_regs.h |   6 ++
 drivers/net/txgbe/txgbe_ethdev.c    |  64 ++++++++++++++++++
 drivers/net/txgbe/txgbe_ethdev.h    |   3 +
 drivers/net/txgbe/txgbe_rxtx.c      | 100 ++++++++++++++++++++++++++++
 drivers/net/txgbe/txgbe_rxtx.h      |   1 +
 5 files changed, 174 insertions(+)

diff --git a/drivers/net/txgbe/base/txgbe_regs.h 
b/drivers/net/txgbe/base/txgbe_regs.h
index b46d65331e..7a9ba6976f 100644
--- a/drivers/net/txgbe/base/txgbe_regs.h
+++ b/drivers/net/txgbe/base/txgbe_regs.h
@@ -1197,6 +1197,7 @@ enum txgbe_5tuple_protocol {
 #define   TXGBE_ICRMISC_ANDONE MS(19, 0x1) /* link auto-nego done */
 #define   TXGBE_ICRMISC_ERRIG  MS(20, 0x1) /* integrity error */
 #define   TXGBE_ICRMISC_SPI    MS(21, 0x1) /* SPI interface */
+#define   TXGBE_ICRMISC_TXDESC MS(22, 0x1) /* TDM desc error */
 #define   TXGBE_ICRMISC_VFMBX  MS(23, 0x1) /* VF-PF message box */
 #define   TXGBE_ICRMISC_GPIO   MS(26, 0x1) /* GPIO interrupt */
 #define   TXGBE_ICRMISC_ERRPCI MS(27, 0x1) /* pcie request error */
@@ -1382,6 +1383,11 @@ enum txgbe_5tuple_protocol {
 #define   TXGBE_TXCFG_WTHRESH(v)        LS(v, 16, 0x7F)
 #define   TXGBE_TXCFG_FLUSH             MS(26, 0x1)
 
+#define TXGBE_TDM_DESC_CHK(i)          (0x0180B0 + (i) * 4) /*0-3*/
+#define TXGBE_TDM_DESC_NONFATAL(i)     (0x0180C0 + (i) * 4) /*0-3*/
+#define TXGBE_TDM_DESC_FATAL(i)                (0x0180D0 + (i) * 4) /*0-3*/
+#define TXGBE_TDM_DESC_MASK(v)         MS(v, 0x1)
+
 /* interrupt registers */
 #define TXGBE_ITRI                      0x000180
 #define TXGBE_ITR(i)                    (0x000200 + 4 * (i))
diff --git a/drivers/net/txgbe/txgbe_ethdev.c b/drivers/net/txgbe/txgbe_ethdev.c
index 4aa3bfd0bc..bafa9cf829 100644
--- a/drivers/net/txgbe/txgbe_ethdev.c
+++ b/drivers/net/txgbe/txgbe_ethdev.c
@@ -1936,6 +1936,7 @@ txgbe_dev_stop(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
 
        rte_eal_alarm_cancel(txgbe_dev_detect_sfp, dev);
+       rte_eal_alarm_cancel(txgbe_tx_queue_clear_error, dev);
        txgbe_dev_wait_setup_link_complete(dev, 0);
 
        /* disable interrupts */
@@ -2838,6 +2839,60 @@ txgbe_dev_setup_link_alarm_handler(void *param)
        intr->flags &= ~TXGBE_FLAG_NEED_LINK_CONFIG;
 }
 
+static void
+txgbe_do_reset(struct rte_eth_dev *dev)
+{
+       struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
+       struct txgbe_tx_queue *txq;
+       u32 i;
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               txq = dev->data->tx_queues[i];
+               txq->resetting = true;
+       }
+
+       rte_delay_ms(1);
+       wr32(hw, TXGBE_RST, TXGBE_RST_LAN(hw->bus.lan_id));
+       txgbe_flush(hw);
+
+       PMD_DRV_LOG(ERR, "Please manually restart the port %d",
+               dev->data->port_id);
+}
+
+static void
+txgbe_tx_ring_recovery(struct rte_eth_dev *dev)
+{
+       struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
+       u32 desc_error[4] = {0, 0, 0, 0};
+       struct txgbe_tx_queue *txq;
+       u32 i;
+
+       /* check tdm fatal error */
+       for (i = 0; i < 4; i++) {
+               desc_error[i] = rd32(hw, TXGBE_TDM_DESC_FATAL(i));
+               if (desc_error[i] != 0) {
+                       PMD_DRV_LOG(ERR, "TDM fatal error reg[%d]: 0x%x", i, 
desc_error[i]);
+                       txgbe_do_reset(dev);
+                       return;
+               }
+       }
+
+       /* check tdm non-fatal error */
+       for (i = 0; i < 4; i++)
+               desc_error[i] = rd32(hw, TXGBE_TDM_DESC_NONFATAL(i));
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               if (desc_error[i / 32] & (1 << i % 32)) {
+                       PMD_DRV_LOG(ERR, "TDM non-fatal error, reset port[%d] 
queue[%d]",
+                               dev->data->port_id, i);
+                       dev->data->tx_queue_state[i] = 
RTE_ETH_QUEUE_STATE_STOPPED;
+                       txq = dev->data->tx_queues[i];
+                       txq->resetting = true;
+                       rte_eal_alarm_set(1000, txgbe_tx_queue_clear_error, 
(void *)dev);
+               }
+       }
+}
+
 /*
  * If @timeout_ms was 0, it means that it will not return until link complete.
  * It returns 1 on complete, return 0 on timeout.
@@ -3096,6 +3151,7 @@ txgbe_dev_misc_interrupt_setup(struct rte_eth_dev *dev)
        intr->mask |= mask;
        intr->mask_misc |= TXGBE_ICRMISC_GPIO;
        intr->mask_misc |= TXGBE_ICRMISC_ANDONE;
+       intr->mask_misc |= TXGBE_ICRMISC_TXDESC;
        return 0;
 }
 
@@ -3191,6 +3247,9 @@ txgbe_dev_interrupt_get_status(struct rte_eth_dev *dev,
        if (eicr & TXGBE_ICRMISC_HEAT)
                intr->flags |= TXGBE_FLAG_OVERHEAT;
 
+       if (eicr & TXGBE_ICRMISC_TXDESC)
+               intr->flags |= TXGBE_FLAG_TX_DESC_ERR;
+
        ((u32 *)hw->isb_mem)[TXGBE_ISB_MISC] = 0;
 
        return 0;
@@ -3310,6 +3369,11 @@ txgbe_dev_interrupt_action(struct rte_eth_dev *dev,
                intr->flags &= ~TXGBE_FLAG_OVERHEAT;
        }
 
+       if (intr->flags & TXGBE_FLAG_TX_DESC_ERR) {
+               txgbe_tx_ring_recovery(dev);
+               intr->flags &= ~TXGBE_FLAG_TX_DESC_ERR;
+       }
+
        PMD_DRV_LOG(DEBUG, "enable intr immediately");
        txgbe_enable_intr(dev);
        rte_intr_enable(intr_handle);
diff --git a/drivers/net/txgbe/txgbe_ethdev.h b/drivers/net/txgbe/txgbe_ethdev.h
index f0f4ced5b0..302ea9f037 100644
--- a/drivers/net/txgbe/txgbe_ethdev.h
+++ b/drivers/net/txgbe/txgbe_ethdev.h
@@ -31,6 +31,7 @@
 #define TXGBE_FLAG_NEED_LINK_CONFIG (uint32_t)(1 << 4)
 #define TXGBE_FLAG_NEED_AN_CONFIG   (uint32_t)(1 << 5)
 #define TXGBE_FLAG_OVERHEAT         (uint32_t)(1 << 6)
+#define TXGBE_FLAG_TX_DESC_ERR      (uint32_t)(1 << 7)
 
 /*
  * Defines that were not part of txgbe_type.h as they are not used by the
@@ -474,6 +475,8 @@ int txgbe_dev_tx_queue_start(struct rte_eth_dev *dev, 
uint16_t tx_queue_id);
 
 int txgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id);
 
+void txgbe_tx_queue_clear_error(void *param);
+
 void txgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
        struct rte_eth_rxq_info *qinfo);
 
diff --git a/drivers/net/txgbe/txgbe_rxtx.c b/drivers/net/txgbe/txgbe_rxtx.c
index c12726553c..2d2b437643 100644
--- a/drivers/net/txgbe/txgbe_rxtx.c
+++ b/drivers/net/txgbe/txgbe_rxtx.c
@@ -728,6 +728,66 @@ txgbe_get_tun_len(struct rte_mbuf *mbuf)
        return tun_len;
 }
 
+static inline void
+txgbe_fix_offload_len(union txgbe_tx_offload *ol)
+{
+       uint8_t ptid = ol->ptid;
+
+       if (ptid & TXGBE_PTID_PKT_TUN) {
+               if (ol->outer_l2_len == 0)
+                       ol->outer_l2_len = sizeof(struct rte_ether_hdr);
+               if (ol->outer_l3_len == 0) {
+                       if (ptid & TXGBE_PTID_TUN_IPV6)
+                               ol->outer_l3_len = sizeof(struct rte_ipv6_hdr);
+                       else
+                               ol->outer_l3_len = sizeof(struct rte_ipv4_hdr);
+               }
+               if ((ptid & 0xF) == 0) {
+                       ol->l3_len = 0;
+                       ol->l4_len = 0;
+               } else {
+                       goto inner;
+               }
+       }
+
+       if ((ptid & 0xF0) == TXGBE_PTID_PKT_MAC) {
+               if (ol->l2_len == 0)
+                       ol->l2_len = sizeof(struct rte_ether_hdr);
+               ol->l3_len = 0;
+               ol->l4_len = 0;
+       } else if ((ptid & 0xF0) == TXGBE_PTID_PKT_IP) {
+               if (ol->l2_len == 0)
+                       ol->l2_len = sizeof(struct rte_ether_hdr);
+inner:
+               if (ol->l3_len == 0) {
+                       if (ptid & TXGBE_PTID_PKT_IPV6)
+                               ol->l3_len = sizeof(struct rte_ipv6_hdr);
+                       else
+                               ol->l3_len = sizeof(struct rte_ipv4_hdr);
+               }
+               switch (ptid & 0x7) {
+               case 0x1:
+               case 0x2:
+                       ol->l4_len = 0;
+                       break;
+               case 0x3:
+                       if (ol->l4_len == 0)
+                               ol->l4_len =  sizeof(struct rte_udp_hdr);
+                       break;
+               case 0x4:
+                       if (ol->l4_len == 0)
+                               ol->l4_len =  sizeof(struct rte_tcp_hdr);
+                       break;
+               case 0x5:
+                       if (ol->l4_len == 0)
+                               ol->l4_len =  sizeof(struct rte_sctp_hdr);
+                       break;
+               default:
+                       break;
+               }
+       }
+}
+
 static inline uint8_t
 txgbe_parse_tun_ptid(struct rte_mbuf *tx_pkt, uint8_t tun_len)
 {
@@ -782,6 +842,10 @@ txgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        uint8_t use_ipsec;
 #endif
 
+       txq = tx_queue;
+       if (txq->resetting)
+               return 0;
+
        tx_offload.data[0] = 0;
        tx_offload.data[1] = 0;
        txq = tx_queue;
@@ -826,6 +890,7 @@ txgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                        if (tx_offload.ptid & TXGBE_PTID_PKT_TUN)
                                tx_offload.ptid |= txgbe_parse_tun_ptid(tx_pkt,
                                                        
tx_offload.outer_tun_len);
+                       txgbe_fix_offload_len(&tx_offload);
 
 #ifdef RTE_LIB_SECURITY
                        if (use_ipsec) {
@@ -4570,6 +4635,11 @@ txgbe_dev_tx_init(struct rte_eth_dev *dev)
                wr32(hw, TXGBE_TXWP(txq->reg_idx), 0);
        }
 
+#ifndef RTE_LIB_SECURITY
+       for (i = 0; i < 4; i++)
+               wr32(hw, TXGBE_TDM_DESC_CHK(i), 0xFFFFFFFF);
+#endif
+
        /* Device configured with multiple TX queues. */
        txgbe_dev_mq_tx_configure(dev);
 }
@@ -4806,6 +4876,7 @@ txgbe_dev_tx_queue_start(struct rte_eth_dev *dev, 
uint16_t tx_queue_id)
        rte_wmb();
        wr32(hw, TXGBE_TXWP(txq->reg_idx), txq->tx_tail);
        dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
+       txq->resetting = false;
 
        return 0;
 }
@@ -4863,6 +4934,35 @@ txgbe_dev_tx_queue_stop(struct rte_eth_dev *dev, 
uint16_t tx_queue_id)
        return 0;
 }
 
+void
+txgbe_tx_queue_clear_error(void *param)
+{
+       struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+       struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
+       struct txgbe_tx_queue *txq;
+       u32 i;
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               txq = dev->data->tx_queues[i];
+               if (!txq->resetting)
+                       continue;
+
+               txgbe_dev_save_tx_queue(hw, i);
+
+               /* tx ring reset */
+               wr32(hw, TXGBE_TDM_DESC_NONFATAL(i / 32),
+                       TXGBE_TDM_DESC_MASK(i % 32));
+
+               if (txq->ops != NULL) {
+                       txq->ops->release_mbufs(txq);
+                       txq->ops->reset(txq);
+               }
+
+               txgbe_dev_store_tx_queue(hw, i);
+               txgbe_dev_tx_queue_start(dev, i);
+       }
+}
+
 void
 txgbe_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
        struct rte_eth_rxq_info *qinfo)
diff --git a/drivers/net/txgbe/txgbe_rxtx.h b/drivers/net/txgbe/txgbe_rxtx.h
index 9155eb1f70..e668b60b1e 100644
--- a/drivers/net/txgbe/txgbe_rxtx.h
+++ b/drivers/net/txgbe/txgbe_rxtx.h
@@ -412,6 +412,7 @@ struct txgbe_tx_queue {
        /**< indicates that IPsec TX feature is in use */
 #endif
        const struct rte_memzone *mz;
+       bool                resetting;
 };
 
 struct txgbe_txq_ops {
-- 
2.27.0

Reply via email to