On 5/7/2019 3:41 PM, Jiri Pirko wrote:
> Mon, Apr 29, 2019 at 04:17:39PM CEST, a...@mellanox.com wrote:
>> TX reporter reports an error on two scenarios:
>> - TX timeout on a specific tx queue
>> - TX completion error on a specific send queue
>> Prior to this patch, no dump data was supported by the tx reporter. This
>> patch adds support for SW data dump of the related SQ context. The dump
>> is simply the SQ's raw memory snapshot taken right after the error was
>> reported, before any recovery procedure was launched. With this
>> approach, no maintenance is needed as the driver fetch the actual data
>> according to the layout on which the SQ was compiled with.  By providing
>> a SW context, one can easily debug error on a given SQ.
>>
>> In order to offline translate the raw memory into a human readable
>> format, the user can use some out-of-kernel scripts which receives as an
>> input the following:
>> - Object raw memory
>> - Driver object compiled with debug info (can be taken/generated at any time 
>> from the machine)
>> - Object name
>>
>> An example of such script output can be seen below.
>> Note: the script is not offered as part of this patch as it do not
>> belong to the kernel, I just described it in order to grasp the general
>> idea of how/what can be fetched from SW dump via devlink health.
>>
>> The output of the SW dump can be extracted by devlink health command:
>> $ sudo devlink health dump show pci/0000:00:0b.0 reporter tx.
>> mlx5e_txqsq: sqn: 6336
>> memory:
>>    00 00 00 00 00 00 00 00
>>    01 00 00 00 00 00 00 00
>>    00 00 00 00 00 00 00 00
>>    45 f4 88 cb 09 00 00 00
>>    00 00 00 00 00 00 00 00
>>    00 00 00 00 00 00 00 00
>>    c0 ff ff ff 1f 00 00 00
>>    f8 18 1e 89 81 88 ff ff
>>    ...
>>
>> script output below, with struct members names and actual values:
>>
>> struct  mlx5e_txqsq {
>>      short unsigned int         cc    0x5 ;
>>      unsigned int               dma_fifo_cc   0x5 ;
>>      struct  net_dim {
>>              unsigned char      state         0x1 ;
>>              struct  net_dim_stats {
>>                      int        ppms          0x0 ;
>>                      int        bpms          0x0 ;
>>                      int        epms          0x0 ;
>>              } prev_stats;
>>              struct  net_dim_sample {
>>                      long long int time       0x90766ef9d ;
>>                      unsigned int pkt_ctr     0x0 ;
>>                      unsigned int byte_ctr    0x0 ;
>>                      short unsigned int event_ctr     0x0 ;
>>              } start_sample;
>>              struct  work_struct {
>>                      struct   {
>>                              long int counter         0x1fffffffc0 ;
>>                      } data;
>>                      struct  list_head {
>>                              struct list_head * next          
>> 0xffff8881b08998f8 ;
>>                              struct list_head * prev          
>> 0xffff8881b08998f8 ;
>>                      } entry;
>>                      void       (*func)(struct work_struct *)         
>> 0xffffffffa02d0e30 ;
>>              } work;
>>              unsigned char      profile_ix    0x60 ;
>>              unsigned char      mode          0x72 ;
>>              unsigned char      tune_state    0x35 ;
>>              unsigned char      steps_right   0xa0 ;
>>              unsigned char      steps_left    0xff ;
>>              unsigned char      tired         0xff ;
>>      } dim;
>>      short unsigned int         pc    0x0 ;
>>      unsigned int               dma_fifo_pc   0x0 ;
>>      struct  mlx5e_cq {
>>              struct  mlx5_cqwq {
>>                      struct  mlx5_frag_buf_ctrl {
>>                              struct mlx5_buf_list * frags     0x500000005 ;
>>                              unsigned int sz_m1       0x0 ;
>>                              short unsigned int frag_sz_m1    0x0 ;
>>                              short unsigned int strides_offset        0x0 ;
>>                              unsigned char log_sz     0x0 ;
>>                              unsigned char log_stride         0x0 ;
>>                              unsigned char log_frag_strides   0x0 ;
>>                      } fbc;
>>                      __be32 *   db    0x0 ;
>>                      unsigned int cc          0x0 ;
>>              } wq;
>>              short unsigned int event_ctr     0x0 ;
>>              struct napi_struct * napi        0x0 ;
>>              struct  mlx5_core_cq {
>>                      unsigned int cqn         0x0 ;
>>                      int        cqe_sz        0x0 ;
>>                      __be32 *   set_ci_db     0xffff8881b1aa4988 ;
>>                      __be32 *   arm_db        0x3f000003ff ;
>>                      struct mlx5_uars_page * uar      0x6060a ;
>>                      struct  refcount_struct {
>>                              struct   {
>>                                      int    counter   0xa1814500 ;
>>                              } refs;
>>                      } refcount;
>>                      struct  completion {
>>                              unsigned int done        0x5 ;
>>                              struct  wait_queue_head {
>>                                      struct  spinlock {
>>                                              union   {
>>                                                      struct  raw_spinlock {
>>                                                              struct  
>> qspinlock {
>>                                                                      union   
>> {
>>                                                                              
>> struct   {
>>                                                                              
>>         int                                                    counter   0x5 
>> ;
>>                                                                              
>> } val;
>>                                                                              
>> struct   {
>>                                                                              
>>         unsigned char                                          locked    0x5 
>> ;
>>                                                                              
>>         unsigned char                                          pending   0x0 
>> ;
>>                                                                              
>> } ;
>>                                                                              
>> struct   {
>>                                                                              
>>         short unsigned int                                     
>> locked_pending    0x5 ;
>>                                                                              
>>         short unsigned int                                     tail      0x0 
>> ;
>>                                                                              
>> } ;
>>                                                                      } ;
>>                                                              } raw_lock;
>>                                                      } rlock;
>>                                              } ;
>>                                      } lock;
>>                                      struct  list_head {
>>                                              struct list_head * next         
>>  0xffff8881b089bb88 ;
>>                                              struct list_head * prev         
>>  0x4000000c0a ;
>>                                      } head;
>>                              } wait;
>>                      } free;
>>                      unsigned int vector      0xa1814500 ;
>>                      unsigned int irqn        0xffff8881 ;
>>                      void       (*comp)(struct mlx5_core_cq *)        
>> 0xffff8881a1814504 ;
>>                      void       (*event)(struct mlx5_core_cq *, enum 
>> mlx5_event)      0xffff8881a2cdea08 ;
>>                      unsigned int cons_index          0x1 ;
>>                      unsigned int arm_sn      0x0 ;
>>                      struct mlx5_rsc_debug * dbg      0x0 ;
>>                      int        pid   0x0 ;
>>                      struct   {
>>                              struct  list_head {
>>                                      struct list_head * next          
>> 0xffffffff ;
>>                                      struct list_head * prev          
>> 0xffffffffffffffff ;
>>                              } list;
>>                              void (*comp)(struct mlx5_core_cq *)      
>> 0xffffffffa0356940 ;
>>                              void * priv      0x0 ;
>>                      } tasklet_ctx;
>>                      int        reset_notify_added    0x0 ;
>>                      struct  list_head {
>>                              struct list_head * next          
>> 0xffffffffa0300700 ;
>>                              struct list_head * prev          0xd ;
>>                      } reset_notify;
>>                      struct mlx5_eq_comp * eq         0x0 ;
>>                      short unsigned int uid   0x9a70 ;
>>              } mcq;
>>              struct mlx5e_channel * channel   0xffff8881b0899a70 ;
>>              struct mlx5_core_dev * mdev      0x4800000001 ;
>>              struct  mlx5_wq_ctrl {
>>                      struct mlx5_core_dev * mdev      0xffffffffa02d5350 ;
>>                      struct  mlx5_frag_buf {
>>                              struct mlx5_buf_list * frags     
>> 0xffffffffa02d5460 ;
>>                              int npages       0x0 ;
>>                              int size         0x5 ;
>>                              unsigned char page_shift         0x8 ;
>>                      } buf;
>>                      struct  mlx5_db {
>>                              __be32 * db      0x1c6 ;
>>                              union   {
>>                                      struct mlx5_db_pgdir * pgdir     0x0 ;
>>                                      struct mlx5_ib_user_db_page * user_page 
>>          0x0 ;
>>                              } u;
>>                              long long unsigned int dma       
>> 0xffff8881b0899ab0 ;
>>                              int index        0x0 ;
>>                      } db;
>>              } wq_ctrl;
>>      } cq;
>>      struct  mlx5_wq_cyc {
>>              struct  mlx5_frag_buf_ctrl {
>>                      struct mlx5_buf_list * frags     0xffff8881a7600160 ;
>>                      unsigned int sz_m1       0xa7600160 ;
>>                      short unsigned int frag_sz_m1    0x8881 ;
>>                      short unsigned int strides_offset        0xffff ;
>>                      unsigned char log_sz     0x88 ;
>>                      unsigned char log_stride         0x49 ;
>>                      unsigned char log_frag_strides   0xaa ;
>>              } fbc;
>>              __be32 *           db    0x1000000000010 ;
>>              short unsigned int sz    0xc ;
>>              short unsigned int wqe_ctr       0x0 ;
>>              short unsigned int cur_sz        0x0 ;
>>      } wq;
>>      unsigned int               dma_fifo_mask         0xa1814500 ;
>>      struct mlx5e_sq_stats *    stats         0xffff8881a33a0348 ;
>>      struct   {
>>              struct mlx5e_sq_dma * dma_fifo   0x1a1814500 ;
>>              struct mlx5e_tx_wqe_info * wqe_info      0x14 ;
>>      } db;
>>      void *                     uar_map       0x0 ;
>>      struct netdev_queue *      txq   0x0 ;
>>      unsigned int               sqn   0x18c0 ;
>>      unsigned char              min_inline_mode       0x0 ;
>>      struct device *            pdev          0x0 ;
>>      unsigned int               mkey_be       0x0 ;
>>      long unsigned int          state         0x0 ;
>>      struct hwtstamp_config *   tstamp        0x0 ;
>>      struct mlx5_clock *        clock         0xffff8881b1aa6f88 ;
>>      struct  mlx5_wq_ctrl {
>>              struct mlx5_core_dev * mdev      0x3f000003ff ;
>>              struct  mlx5_frag_buf {
>>                      struct mlx5_buf_list * frags     0x6060a ;
>>                      int        npages        0xa1814604 ;
>>                      int        size          0xffff8881 ;
>>                      unsigned char page_shift         0x0 ;
>>              } buf;
>>              struct  mlx5_db {
>>                      __be32 *   db    0xfff ;
>>                      union   {
>>                              struct mlx5_db_pgdir * pgdir     0x0 ;
>>                              struct mlx5_ib_user_db_page * user_page         
>>  0x0 ;
>>                      } u;
>>                      long long unsigned int dma       0xffff888188440000 ;
>>                      int        index         0x8b074000 ;
>>              } db;
>>      } wq_ctrl;
>>      struct mlx5e_channel *     channel       0xffffc9000010d800 ;
>>      int                        txq_ix        0xa0020180 ;
>>      unsigned int               rate_limit    0xffff8881 ;
>>      struct  work_struct {
>>              struct   {
>>                      long int   counter       0x1000018c0 ;
>>              } data;
>>              struct  list_head {
>>                      struct list_head * next          0xffff8881c32b68e8 ;
>>                      struct list_head * prev          0x800 ;
>>              } entry;
>>              void               (*func)(struct work_struct *)         0x9 ;
>>      } recover_work;
>> } ;
> 
> I don't get it. You are dumping live kernel memory? There are already
> facilities to do that in place. Why to replicate it?
I am dumping the driver's memory under a lock so I can ensure it's 
consistency (as appose to /dev/mem)
vmcore cannot be taken from a live kernel (without crashing).
I need the memory's snapshot right after the error from the driver's 
context.
Which other tools do you mean?
> 
> 
>>
>> Signed-off-by: Aya Levin <a...@mellanox.com>
>> ---
>> .../ethernet/mellanox/mlx5/core/en/reporter_tx.c   | 100 
>> +++++++++++++++++++++
>> 1 file changed, 100 insertions(+)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c 
>> b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
>> index 476dd97f7f2f..8a39f5525e57 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
>> @@ -9,6 +9,7 @@
>>
>> struct mlx5e_tx_err_ctx {
>>      int (*recover)(struct mlx5e_txqsq *sq);
>> +    int (*dump)(struct mlx5e_txqsq *sq);
>>      struct mlx5e_txqsq *sq;
>> };
>>
>> @@ -281,10 +282,109 @@ static int mlx5e_tx_reporter_diagnose(struct 
>> devlink_health_reporter *reporter,
>>      return err;
>> }
>>
>> +static int mlx5e_tx_reporter_sw_dump_from_ctx(struct mlx5e_priv *priv,
>> +                                          struct mlx5e_txqsq *sq,
>> +                                          struct devlink_fmsg *fmsg)
>> +{
>> +    u64 *ptr = (u64 *)sq;
>> +    int copy, err;
>> +    int i = 0;
>> +
>> +    if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
>> +            return 0;
>> +
>> +    err = devlink_fmsg_pair_nest_start(fmsg, "mlx5e_txqsq");
>> +    if (err)
>> +            return err;
>> +
>> +    err = devlink_fmsg_obj_nest_start(fmsg);
>> +    if (err)
>> +            return err;
>> +
>> +    err = devlink_fmsg_arr_pair_nest_start(fmsg, "memory");
>> +    if (err)
>> +            return err;
>> +
>> +    while (i < sizeof(struct mlx5e_txqsq)) {
>> +            copy = sizeof(u64);
>> +
>> +            if (i + copy > sizeof(struct mlx5e_txqsq))
>> +                    copy = sizeof(struct mlx5e_txqsq) - i;
>> +
>> +            err = devlink_fmsg_binary_put(fmsg, ptr, copy);
>> +            if (err)
>> +                    return err;
>> +            ptr++;
>> +            i += copy;
>> +    }
>> +
>> +    err = devlink_fmsg_arr_pair_nest_end(fmsg);
>> +    if (err)
>> +            return err;
>> +
>> +    err = devlink_fmsg_obj_nest_end(fmsg);
>> +    if (err)
>> +            return err;
>> +
>> +    err = devlink_fmsg_pair_nest_end(fmsg);
>> +
>> +    return err;
>> +}
>> +
>> +static int mlx5e_tx_reporter_sw_dump_all(struct mlx5e_priv *priv,
>> +                                     struct devlink_fmsg *fmsg)
>> +{
>> +    int i, err = 0;
>> +
>> +    mutex_lock(&priv->state_lock);
>> +
>> +    if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
>> +            goto unlock;
>> +
>> +    err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
>> +    if (err)
>> +            goto unlock;
>> +
>> +    for (i = 0; i < priv->channels.num * priv->channels.params.num_tc;
>> +         i++) {
>> +            err = devlink_fmsg_obj_nest_start(fmsg);
>> +            if (err)
>> +                    goto unlock;
>> +
>> +            err = mlx5e_tx_reporter_sw_dump_from_ctx(priv, priv->txq2sq[i],
>> +                                                     fmsg);
>> +            if (err)
>> +                    goto unlock;
>> +
>> +            err = devlink_fmsg_pair_nest_end(fmsg);
>> +            if (err)
>> +                    goto unlock;
>> +    }
>> +    err = devlink_fmsg_arr_pair_nest_end(fmsg);
>> +    if (err)
>> +            goto unlock;
>> +
>> +unlock:
>> +    mutex_unlock(&priv->state_lock);
>> +    return err;
>> +}
>> +
>> +static int mlx5e_tx_reporter_sw_dump(struct devlink_health_reporter 
>> *reporter,
>> +                                 struct devlink_fmsg *fmsg, void *context)
>> +{
>> +    struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
>> +    struct mlx5e_tx_err_ctx *err_ctx = context;
>> +
>> +    return err_ctx ? mlx5e_tx_reporter_sw_dump_from_ctx(priv, err_ctx->sq,
>> +                                                        fmsg) :
>> +                     mlx5e_tx_reporter_sw_dump_all(priv, fmsg);
>> +}
>> +
>> static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
>>              .name = "tx",
>>              .recover = mlx5e_tx_reporter_recover,
>>              .diagnose = mlx5e_tx_reporter_diagnose,
>> +            .dump = mlx5e_tx_reporter_sw_dump,
>> };
>>
>> #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
>> -- 
>> 2.14.1
>>

Reply via email to