On 27.07.23 18:37, Konstantin Khorenko wrote:
Here we've got to a situation when tasklet called usleep(), thus welcome
to the "scheduling while atomic" BUG().

BUG: scheduling while atomic: swapper/24/0/0x00000100

  [<ffffffffb41c6199>] schedule+0x29/0x70
  [<ffffffffb41c5512>] schedule_hrtimeout_range_clock+0xb2/0x150
  [<ffffffffb41c55c3>] schedule_hrtimeout_range+0x13/0x20
  [<ffffffffb41c3bcf>] usleep_range+0x4f/0x70
  [<ffffffffc08d3e58>] qed_ptt_acquire+0x38/0x100 [qed]
  [<ffffffffc08eac48>] _qed_get_vport_stats+0x458/0x580 [qed]
  [<ffffffffc08ead8c>] qed_get_vport_stats+0x1c/0xd0 [qed]
  [<ffffffffc08dffd3>] qed_get_protocol_stats+0x93/0x100 [qed]
                       qed_mcp_send_protocol_stats
           case MFW_DRV_MSG_GET_LAN_STATS:
           case MFW_DRV_MSG_GET_FCOE_STATS:
           case MFW_DRV_MSG_GET_ISCSI_STATS:
           case MFW_DRV_MSG_GET_RDMA_STATS:
  [<ffffffffc08e36d8>] qed_mcp_handle_events+0x2d8/0x890 [qed]
                       qed_int_assertion
                       qed_int_attentions
  [<ffffffffc08d9490>] qed_int_sp_dpc+0xa50/0xdc0 [qed]
  [<ffffffffb3aa7623>] tasklet_action+0x83/0x140
  [<ffffffffb41d9125>] __do_softirq+0x125/0x2bb
  [<ffffffffb41d560c>] call_softirq+0x1c/0x30
  [<ffffffffb3a30645>] do_softirq+0x65/0xa0
  [<ffffffffb3aa78d5>] irq_exit+0x105/0x110
  [<ffffffffb41d8996>] do_IRQ+0x56/0xf0

There was a similar issue in mainstream, but for qede driver and caused
by reading stats via sysfs.

3rd version of the patch fixing that issue, which i have taken as a
base: https://www.spinics.net/lists/netdev/msg901089.html

ms final commit: 42510dffd0e2 ("qed/qede: Fix scheduling while atomic")

Unfortunately last version of the patch which is accepted by mainstream
is far away from version 3, it moves stats updates to a delayed work,
which is the correct way,
but in qed driver i do not see which exactly stats should i collect in a
delayed work for each device (and devices supported by qed driver are
quite different).

Taking into account that we do not have a node access to reproduce and
verify the changes, reworking the code to use delayed work seems
impossible.

So let's live with udelay() crutch which at least is technically clear.

https://jira.vzint.dev/browse/PSBM-146761

Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com>

---
v1->v2:
  - Fixed kdoc for qed_get_protocol_stats_iscsi()

  - Added kdoc descriptions for qed_ptt_acquire_context(),
    qed_get_protocol_stats_fcoe(), qed_get_vport_stats() and
    qed_get_vport_stats_context()
---
  drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 16 ++++++++++++
  drivers/net/ethernet/qlogic/qed/qed_fcoe.c    | 19 ++++++++++----
  drivers/net/ethernet/qlogic/qed/qed_fcoe.h    | 17 ++++++++++--
  drivers/net/ethernet/qlogic/qed/qed_hw.c      | 26 ++++++++++++++++---
  drivers/net/ethernet/qlogic/qed/qed_iscsi.c   | 19 ++++++++++----
  drivers/net/ethernet/qlogic/qed/qed_iscsi.h   | 17 ++++++++----
  drivers/net/ethernet/qlogic/qed/qed_l2.c      | 19 ++++++++++----
  drivers/net/ethernet/qlogic/qed/qed_l2.h      | 24 +++++++++++++++++
  drivers/net/ethernet/qlogic/qed/qed_main.c    |  6 ++---
  9 files changed, 134 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h 
b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index e4b4e3b78e8a..440047dc858f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -210,6 +210,22 @@ void qed_hw_remove(struct qed_dev *cdev);
   */
  struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);
+/**
+ * qed_ptt_acquire_context(): Allocate a PTT window honoring the context
+ *                           atomicy.
+ *
+ * @p_hwfn: HW device data.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: struct qed_ptt.
+ *
+ * Should be called at the entry point to the driver
+ * (at the beginning of an exported function).
+ */
+struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn,
+                                       bool is_atomic);
+
  /**
   * @brief qed_ptt_release - Release PTT Window
   *
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c 
b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index 46dc93d3b9b5..bbf3e4e41235 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
@@ -716,13 +716,14 @@ static void _qed_fcoe_get_pstats(struct qed_hwfn *p_hwfn,
  }
static int qed_fcoe_get_stats(struct qed_hwfn *p_hwfn,
-                             struct qed_fcoe_stats *p_stats)
+                             struct qed_fcoe_stats *p_stats,
+                             bool is_atomic)
  {
        struct qed_ptt *p_ptt;
memset(p_stats, 0, sizeof(*p_stats)); - p_ptt = qed_ptt_acquire(p_hwfn);
+       p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
if (!p_ptt) {
                DP_ERR(p_hwfn, "Failed to acquire ptt\n");
@@ -996,19 +997,27 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
                                        QED_SPQ_MODE_EBLOCK, NULL);
  }
+static int qed_fcoe_stats_context(struct qed_dev *cdev,
+                                 struct qed_fcoe_stats *stats,
+                                 bool is_atomic)
+{
+       return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats, is_atomic);
+}
+
  static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
  {
-       return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats);
+       return qed_fcoe_stats_context(cdev, stats, false);
  }
void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
-                                struct qed_mcp_fcoe_stats *stats)
+                                struct qed_mcp_fcoe_stats *stats,
+                                bool is_atomic)
  {
        struct qed_fcoe_stats proto_stats;
/* Retrieve FW statistics */
        memset(&proto_stats, 0, sizeof(proto_stats));
-       if (qed_fcoe_stats(cdev, &proto_stats)) {
+       if (qed_fcoe_stats_context(cdev, &proto_stats, is_atomic)) {
                DP_VERBOSE(cdev, QED_MSG_STORAGE,
                           "Failed to collect FCoE statistics\n");
                return;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h 
b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
index 027a76ac839a..da14cc02a55d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
@@ -54,8 +54,20 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn);
  void qed_fcoe_setup(struct qed_hwfn *p_hwfn);
void qed_fcoe_free(struct qed_hwfn *p_hwfn);
+/**
+ * qed_get_protocol_stats_fcoe(): Fills provided statistics
+ *                               struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
+ */
  void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
-                                struct qed_mcp_fcoe_stats *stats);
+                                struct qed_mcp_fcoe_stats *stats,
+                                bool is_atomic);
  #else /* CONFIG_QED_FCOE */
  static inline int qed_fcoe_alloc(struct qed_hwfn *p_hwfn)
  {
@@ -66,7 +78,8 @@ static inline void qed_fcoe_setup(struct qed_hwfn *p_hwfn) {}
  static inline void qed_fcoe_free(struct qed_hwfn *p_hwfn) {}
static inline void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
-                                              struct qed_mcp_fcoe_stats *stats)
+                                              struct qed_mcp_fcoe_stats *stats,
+                                              bool is_atomic)
  {
  }
  #endif /* CONFIG_QED_FCOE */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c 
b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 72ec1c6bdf70..86033b759a68 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -49,7 +49,10 @@
  #include "qed_reg_addr.h"
  #include "qed_sriov.h"
-#define QED_BAR_ACQUIRE_TIMEOUT 1000
+#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT     1000
+#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP         1000
+#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT     100000
+#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY         10
/* Invalid values */
  #define QED_BAR_INVALID_OFFSET          (cpu_to_le32(-1))
@@ -110,12 +113,22 @@ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn)
  }
struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
+{
+       return qed_ptt_acquire_context(p_hwfn, false);
+}
+
+struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, bool 
is_atomic)
  {
        struct qed_ptt *p_ptt;
-       unsigned int i;
+       unsigned int i, count;
+
+       if (is_atomic)
+               count = QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT;
+       else
+               count = QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT;
/* Take the free PTT from the list */
-       for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) {
+       for (i = 0; i < count; i++) {
                spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) {
@@ -131,7 +144,12 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
                }
spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
-               usleep_range(1000, 2000);
+
+               if (is_atomic)
+                       udelay(QED_BAR_ACQUIRE_TIMEOUT_UDELAY);
+               else
+                       usleep_range(QED_BAR_ACQUIRE_TIMEOUT_USLEEP,
+                                    QED_BAR_ACQUIRE_TIMEOUT_USLEEP * 2);
        }
DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n");
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c 
b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 4f8a685d1a55..40eaab4330f9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -1049,13 +1049,14 @@ static void _qed_iscsi_get_pstats(struct qed_hwfn 
*p_hwfn,
  }
static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
-                              struct qed_iscsi_stats *stats)
+                              struct qed_iscsi_stats *stats,
+                              bool is_atomic)
  {
        struct qed_ptt *p_ptt;
memset(stats, 0, sizeof(*stats)); - p_ptt = qed_ptt_acquire(p_hwfn);
+       p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
        if (!p_ptt) {
                DP_ERR(p_hwfn, "Failed to acquire ptt\n");
                return -EAGAIN;
@@ -1390,9 +1391,16 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
                                           QED_SPQ_MODE_EBLOCK, NULL);
  }
+static int qed_iscsi_stats_context(struct qed_dev *cdev,
+                                  struct qed_iscsi_stats *stats,
+                                  bool is_atomic)
+{
+       return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats, is_atomic);
+}
+
  static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats 
*stats)
  {
-       return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
+       return qed_iscsi_stats_context(cdev, stats, false);
  }
static int qed_iscsi_change_mac(struct qed_dev *cdev,
@@ -1413,13 +1421,14 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
  }
void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
-                                 struct qed_mcp_iscsi_stats *stats)
+                                 struct qed_mcp_iscsi_stats *stats,
+                                 bool is_atomic)
  {
        struct qed_iscsi_stats proto_stats;
/* Retrieve FW statistics */
        memset(&proto_stats, 0, sizeof(proto_stats));
-       if (qed_iscsi_stats(cdev, &proto_stats)) {
+       if (qed_iscsi_stats_context(cdev, &proto_stats, is_atomic)) {
                DP_VERBOSE(cdev, QED_MSG_STORAGE,
                           "Failed to collect ISCSI statistics\n");
                return;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h 
b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
index 225c75b02a06..5d84c8d382f1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
@@ -64,13 +64,19 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn);
  void qed_iscsi_free(struct qed_hwfn *p_hwfn);
/**
- * @brief - Fills provided statistics struct with statistics.
+ * qed_get_protocol_stats_iscsi(): Fills provided statistics
+ *                                struct with statistics.
   *
- * @param cdev
- * @param stats - points to struct that will be filled with statistics.
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
   */
  void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
-                                 struct qed_mcp_iscsi_stats *stats);
+                                 struct qed_mcp_iscsi_stats *stats,
+                                 bool is_atomic);
  #else /* IS_ENABLED(CONFIG_QED_ISCSI) */
  static inline int qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
  {
@@ -83,7 +89,8 @@ static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn) {}
static inline void
  qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
-                            struct qed_mcp_iscsi_stats *stats) {}
+                            struct qed_mcp_iscsi_stats *stats,
+                            bool is_atomic) {}
  #endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c 
b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index b3937935d2a8..8b3d9a06b979 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1887,7 +1887,8 @@ static void __qed_get_vport_stats(struct qed_hwfn *p_hwfn,
  }
static void _qed_get_vport_stats(struct qed_dev *cdev,
-                                struct qed_eth_stats *stats)
+                                struct qed_eth_stats *stats,
+                                bool is_atomic)
  {
        u8 fw_vport = 0;
        int i;
@@ -1896,10 +1897,11 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
for_each_hwfn(cdev, i) {
                struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
-               struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn)
-                                                   :  NULL;
+               struct qed_ptt *p_ptt;
                bool b_get_port_stats;
+ p_ptt = IS_PF(cdev) ? qed_ptt_acquire_context(p_hwfn, is_atomic)
+                                   : NULL;
                if (IS_PF(cdev)) {
                        /* The main vport index is relative first */
                        if (qed_fw_vport(p_hwfn, 0, &fw_vport)) {
@@ -1924,6 +1926,13 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
  }
void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
+{
+       qed_get_vport_stats_context(cdev, stats, false);
+}
+
+void qed_get_vport_stats_context(struct qed_dev *cdev,
+                                struct qed_eth_stats *stats,
+                                bool is_atomic)
  {
        u32 i;
@@ -1932,7 +1941,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
                return;
        }
- _qed_get_vport_stats(cdev, stats);
+       _qed_get_vport_stats(cdev, stats, is_atomic);
if (!cdev->reset_stats)
                return;
@@ -1984,7 +1993,7 @@ void qed_reset_vport_stats(struct qed_dev *cdev)
        if (!cdev->reset_stats) {
                DP_INFO(cdev, "Reset stats not allocated\n");
        } else {
-               _qed_get_vport_stats(cdev, cdev->reset_stats);
+               _qed_get_vport_stats(cdev, cdev->reset_stats, false);
                cdev->reset_stats->common.link_change_count = 0;
        }
  }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h 
b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index 7127d5aaac42..3545235a1f6e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -277,8 +277,32 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
                            enum spq_mode comp_mode,
                            struct qed_spq_comp_cb *p_comp_data);
+/**
+ * qed_get_vport_stats(): Fills provided statistics
+ *                       struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ *
+ * Return: Void.
+ */
  void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
+/**
+ * qed_get_vport_stats_context(): Fills provided statistics
+ *                               struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
+ */
+void qed_get_vport_stats_context(struct qed_dev *cdev,
+                                struct qed_eth_stats *stats,
+                                bool is_atomic);
+
  void qed_reset_vport_stats(struct qed_dev *cdev);
/**
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c 
b/drivers/net/ethernet/qlogic/qed/qed_main.c
index ed052f67ce50..80d6501ea62b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2430,7 +2430,7 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
switch (type) {
        case QED_MCP_LAN_STATS:
-               qed_get_vport_stats(cdev, &eth_stats);
+               qed_get_vport_stats_context(cdev, &eth_stats, true);
                stats->lan_stats.ucast_rx_pkts =
                                        eth_stats.common.rx_ucast_pkts;
                stats->lan_stats.ucast_tx_pkts =
@@ -2438,10 +2438,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
                stats->lan_stats.fcs_err = -1;
                break;
        case QED_MCP_FCOE_STATS:
-               qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats);
+               qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats, true);
                break;
        case QED_MCP_ISCSI_STATS:
-               qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats);
+               qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats, true);
                break;
        default:
                DP_VERBOSE(cdev, QED_MSG_SP,


LGTM - udelay does not look good but i agree it's the best in that situation.

--
Regards,
Alexander Atanasov

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to