Hi:
    I think the flow list may be accessed in the same time by two different 
threads and may cause some errors. Do it need a lock to protect the flow list?

                                                                                
                   Thanks
                                                                                
                              Yunjian
(gdb) bt
#0 0x00007f54c9641237 in raise () from /usr/lib64/libc.so.6
#1 0x00007f54c9642928 in abort () from /usr/lib64/libc.so.6
#2 0x00000000006a8749 in PAT_abort ()
#3 0x00000000006a588d in patchIllInsHandler ()
#4 <signal handler called>
#5 0x00007f54c6acd2c8 in flow_list_destroy (dev=dev@entry=0xad8940 
<rte_eth_devices+16512>, flow=0x1444b1b00, list=0x14455e618) at 
/usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_flow.c:2150
#6 0x00007f54c6acfe1b in mlx5_flow_list_flush (dev=0xad8940 
<rte_eth_devices+16512>, list=0x14455e618) at 
/usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_flow.c:2170
#7 0x00007f54c6ac5cc4 in mlx5_traffic_disable (dev=<optimized out>) at 
/usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_trigger.c:384
#8 0x00007f54c6ac637d in mlx5_traffic_restart (dev=0xad8940 
<rte_eth_devices+16512>) at 
/usr/src/debug/dpdk-mlx4-pmd-18.11/drivers/net/mlx5/mlx5_trigger.c:400
#9 0x00007f54d1db3bba in rte_eth_dev_default_mac_addr_set (port_id=<optimized 
out>, addr=0x140200f40) at 
/usr/src/debug/dpdk-18.11/lib/librte_ethdev/rte_ethdev.c:3230
#10 0x00007f54cd8dee81 in mac_address_slaves_update 
(bonded_eth_dev=bonded_eth_dev@entry=0xad48c0 <rte_eth_devices>) at 
/usr/src/debug/dpdk-18.11/drivers/net/bonding/rte_eth_bond_pmd.c:1842
#11 0x00007f54cd8e0c31 in bond_ethdev_lsc_event_callback (port_id=<optimized 
out>, type=<optimized out>, param=<optimized out>, ret_param=<optimized out>) 
at /usr/src/debug/dpdk-18.11/drivers/net/bonding/rte_eth_bond_pmd.c:3070
#12 0x00007f54cd8e117b in bond_ethdev_slave_lsc_delay (cb_arg=0xad48c0 
<rte_eth_devices>) at 
/usr/src/debug/dpdk-18.11/drivers/net/bonding/rte_eth_bond_pmd.c:2298
#13 0x00007f54d25ebe5f in eal_alarm_callback (arg=<optimized out>) at 
/usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_alarm.c:90
#14 0x00007f54d25ea8aa in eal_intr_process_interrupts (nfds=<optimized out>, 
events=<optimized out>) at 
/usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_interrupts.c:838
#15 eal_intr_handle_interrupts (totalfds=<optimized out>, pfd=21) at 
/usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_interrupts.c:885
#16 eal_intr_thread_main (arg=<optimized out>) at 
/usr/src/debug/dpdk-18.11/lib/librte_eal/linuxapp/eal/eal_interrupts.c:965
#17 0x00007f54cade6dd5 in start_thread () from /usr/lib64/libpthread.so.0
#18 0x00007f54c970950d in clone () from /usr/lib64/libc.so.6


In order to solve this problem(core dump). Something modified like 
this:(Looking forward to your reply)

From: zhaohui8 <zhaoh...@huawei.com>
---
 drivers/net/mlx5/mlx5.c         |  1 +
 drivers/net/mlx5/mlx5.h         |  1 +
 drivers/net/mlx5/mlx5_flow.c     |  33 ++++++++++++++++++++++++++-------
 drivers/net/mlx5/mlx5_trigger.c   |  12 +++++++++++-
 4 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 9e5cab1..e8ae816 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1195,6 +1195,7 @@
                        priv->tcf_context = NULL;
                }
        }
+       rte_rwlock_init(&priv->flows_rwlock);
        TAILQ_INIT(&priv->flows);
        TAILQ_INIT(&priv->ctrl_flows);
        /* Hint libmlx5 to use PMD allocator for data plane resources */
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index bc500b2..cb8657c 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -202,6 +202,7 @@ struct priv {
        unsigned int (*reta_idx)[]; /* RETA index table. */
        unsigned int reta_idx_n; /* RETA index size. */
        struct mlx5_drop drop_queue; /* Flow drop queues. */
+       rte_rwlock_t flows_rwlock; /* flows Lock. */
        struct mlx5_flows flows; /* RTE Flow rules. */
        struct mlx5_flows ctrl_flows; /* Control flow rules. */
        LIST_HEAD(counters, mlx5_flow_counter) flow_counters;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 97dc3e1..2c18602 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -2121,9 +2121,13 @@ struct rte_flow *
                 const struct rte_flow_action actions[],
                 struct rte_flow_error *error)
 {
-       return flow_list_create(dev,
+       struct rte_flow * flow;
+       rte_rwlock_write_lock(&((struct priv 
*)dev->data->dev_private)->flows_rwlock);
+       flow = flow_list_create(dev,
                                &((struct priv *)dev->data->dev_private)->flows,
                                attr, items, actions, error);
+       rte_rwlock_write_unlock(&((struct priv 
*)dev->data->dev_private)->flows_rwlock);
+       return flow;
 }
 
 /**
@@ -2235,12 +2239,13 @@ struct rte_flow *
        struct priv *priv = dev->data->dev_private;
        struct rte_flow *flow;
        int ret = 0;
-
+       rte_rwlock_read_lock(&priv->flows_rwlock);
        TAILQ_FOREACH(flow, &priv->flows, next) {
                DRV_LOG(DEBUG, "port %u flow %p still referenced",
                        dev->data->port_id, (void *)flow);
                ++ret;
        }
+       rte_rwlock_read_unlock(&priv->flows_rwlock);
        return ret;
 }
 
@@ -2320,10 +2325,14 @@ struct rte_flow *
        }
        for (i = 0; i != priv->reta_idx_n; ++i)
                queue[i] = (*priv->reta_idx)[i];
+       rte_rwlock_write_lock(&priv->flows_rwlock);
        flow = flow_list_create(dev, &priv->ctrl_flows,
                                &attr, items, actions, &error);
-       if (!flow)
+       if (!flow) {
+               rte_rwlock_write_unlock(&priv->flows_rwlock);
                return -rte_errno;
+       }
+       rte_rwlock_write_unlock(&priv->flows_rwlock);
        return 0;
 }
 
@@ -2360,8 +2369,9 @@ struct rte_flow *
                  struct rte_flow_error *error __rte_unused)
 {
        struct priv *priv = dev->data->dev_private;
-
+       rte_rwlock_write_lock(&priv->flows_rwlock);
        flow_list_destroy(dev, &priv->flows, flow);
+       rte_rwlock_write_unlock(&priv->flows_rwlock);
        return 0;
 }
 
@@ -2376,8 +2386,9 @@ struct rte_flow *
                struct rte_flow_error *error __rte_unused)
 {
        struct priv *priv = dev->data->dev_private;
-
+       rte_rwlock_write_lock(&priv->flows_rwlock);
        mlx5_flow_list_flush(dev, &priv->flows);
+       rte_rwlock_write_unlock(&priv->flows_rwlock);
        return 0;
 }
 
@@ -2729,17 +2740,22 @@ struct rte_flow *
        ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow);
        if (ret)
                goto error;
+       rte_rwlock_write_lock(&priv->flows_rwlock);
        flow = flow_fdir_filter_lookup(dev, fdir_flow);
        if (flow) {
                rte_errno = EEXIST;
+               rte_rwlock_write_unlock(&priv->flows_rwlock);
                goto error;
        }
        flow = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
                                fdir_flow->items, fdir_flow->actions, NULL);
-       if (!flow)
+       if (!flow) {
+               rte_rwlock_write_unlock(&priv->flows_rwlock);
                goto error;
+       }
        assert(!flow->fdir);
        flow->fdir = fdir_flow;
+       rte_rwlock_write_unlock(&priv->flows_rwlock);
        DRV_LOG(DEBUG, "port %u created FDIR flow %p",
                dev->data->port_id, (void *)flow);
        return 0;
@@ -2773,6 +2789,7 @@ struct rte_flow *
        ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow);
        if (ret)
                return -rte_errno;
+       rte_rwlock_write_lock(&priv->flows_rwlock);
        flow = flow_fdir_filter_lookup(dev, &fdir_flow);
        if (!flow) {
                rte_errno = ENOENT;
@@ -2781,6 +2798,7 @@ struct rte_flow *
        flow_list_destroy(dev, &priv->flows, flow);
        DRV_LOG(DEBUG, "port %u deleted FDIR flow %p",
                dev->data->port_id, (void *)flow);
+       rte_rwlock_write_unlock(&priv->flows_rwlock);
        return 0;
 }
 
@@ -2817,8 +2835,9 @@ struct rte_flow *
 flow_fdir_filter_flush(struct rte_eth_dev *dev)
 {
        struct priv *priv = dev->data->dev_private;
-
+       rte_rwlock_write_lock(&priv->flows_rwlock);
        mlx5_flow_list_flush(dev, &priv->flows);
+       rte_rwlock_write_unlock(&priv->flows_rwlock);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index e2a9bb7..b95c7cf 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -188,12 +188,15 @@
                        dev->data->port_id);
                goto error;
        }
+       rte_rwlock_read_lock(&priv->flows_rwlock);
        ret = mlx5_flow_start(dev, &priv->flows);
        if (ret) {
                DRV_LOG(DEBUG, "port %u failed to set flows",
                        dev->data->port_id);
+               rte_rwlock_read_unlock(&priv->flows_rwlock);
                goto error;
        }
+       rte_rwlock_read_unlock(&priv->flows_rwlock);
        dev->tx_pkt_burst = mlx5_select_tx_function(dev);
        dev->rx_pkt_burst = mlx5_select_rx_function(dev);
        mlx5_dev_interrupt_handler_install(dev);
@@ -202,7 +205,9 @@
        ret = rte_errno; /* Save rte_errno before cleanup. */
        /* Rollback. */
        dev->data->dev_started = 0;
+       rte_rwlock_write_lock(&priv->flows_rwlock);
        mlx5_flow_stop(dev, &priv->flows);
+       rte_rwlock_write_unlock(&priv->flows_rwlock);
        mlx5_traffic_disable(dev);
        mlx5_txq_stop(dev);
        mlx5_rxq_stop(dev);
@@ -230,7 +235,9 @@
        rte_wmb();
        usleep(1000 * priv->rxqs_n);
        DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
+       rte_rwlock_write_lock(&priv->flows_rwlock);
        mlx5_flow_stop(dev, &priv->flows);
+       rte_rwlock_write_unlock(&priv->flows_rwlock);
        mlx5_traffic_disable(dev);
        mlx5_rx_intr_vec_disable(dev);
        mlx5_dev_interrupt_handler_uninstall(dev);
@@ -364,7 +371,9 @@
        return 0;
 error:
        ret = rte_errno; /* Save rte_errno before cleanup. */
+       rte_rwlock_write_lock(&priv->flows_rwlock);
        mlx5_flow_list_flush(dev, &priv->ctrl_flows);
+       rte_rwlock_write_unlock(&priv->flows_rwlock);
        rte_errno = ret; /* Restore rte_errno. */
        return -rte_errno;
 }
@@ -380,8 +389,9 @@
 mlx5_traffic_disable(struct rte_eth_dev *dev)
 {
        struct priv *priv = dev->data->dev_private;
-
+       rte_rwlock_write_lock(&priv->flows_rwlock);
        mlx5_flow_list_flush(dev, &priv->ctrl_flows);
+       rte_rwlock_write_unlock(&priv->flows_rwlock);
 }
 

-----邮件原件-----
发件人: wangyunjian 
发送时间: 2019年2月22日 15:34
收件人: dev@dpdk.org; shah...@mellanox.com; ys...@mellanox.com
抄送: xudingke <xudin...@huawei.com>; Zhaohui (zhaohui, Polestar) 
<zhaoh...@huawei.com>
主题: [dpdk-dev] Segfault when eal thread executing mlx5 nic‘s lsc event

Reply via email to