[dpdk-dev] [PATCH] net/bnxt: fix nested lock at bond mode

2021-07-03 Thread Weifeng Li
Bnxt regist lsc callback (bond_ethdev_lsc_event_callback) when
working at bond mode. This callback will dead lock when lsc
interrupt triggerred.

lsc interrupt ->
bnxt_handle_async_event ->
bnxt_link_update_op ->
bond_ethdev_lsc_event_callback (lsc_lock) ->
bnxt_link_update_op ->
bond_ethdev_lsc_event_callback (lsc_lock dead lock)

Fixes: c2faa1d1969e ("net/bnxt: add support for LSC interrupt event")
Cc: sta...@dpdk.org

Signed-off-by: Weifeng Li 
---
 drivers/net/bnxt/bnxt_cpr.c| 2 ++
 drivers/net/bnxt/bnxt_ethdev.c | 5 -
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 2c7fd78..f4c9c72 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -111,6 +111,8 @@ void bnxt_handle_async_event(struct bnxt *bp,
case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE:
/* FALLTHROUGH */
bnxt_link_update_op(bp->eth_dev, 0);
+   rte_eth_dev_callback_process(bp->eth_dev,
+   RTE_ETH_EVENT_INTR_LSC, NULL);
break;
case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD:
PMD_DRV_LOG(INFO, "Async event: PF driver unloaded\n");
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 495c6cd..619b7b8 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1844,11 +1844,6 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int 
wait_to_complete)
if (new.link_status != eth_dev->data->dev_link.link_status ||
new.link_speed != eth_dev->data->dev_link.link_speed) {
rte_eth_linkstatus_set(eth_dev, &new);
-
-   rte_eth_dev_callback_process(eth_dev,
-RTE_ETH_EVENT_INTR_LSC,
-NULL);
-
bnxt_print_link_info(eth_dev);
}
 
-- 
1.8.3.1



[dpdk-dev] [PATCH v2] net/bnxt: fix nested lock at bond mode

2021-07-03 Thread Weifeng Li
Bnxt register lsc callback (bond_ethdev_lsc_event_callback) when
working at bond mode. This callback will dead lock when lsc
interrupt triggered.

lsc interrupt ->
bnxt_handle_async_event ->
bnxt_link_update_op ->
bond_ethdev_lsc_event_callback (lsc_lock) ->
bnxt_link_update_op ->
bond_ethdev_lsc_event_callback (lsc_lock dead lock)

Fixes: c2faa1d1969e ("net/bnxt: add support for LSC interrupt event")
Cc: sta...@dpdk.org

Signed-off-by: Weifeng Li 
---
v2: fix coding style issues
---
 drivers/net/bnxt/bnxt_cpr.c| 2 ++
 drivers/net/bnxt/bnxt_ethdev.c | 5 -
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 2c7fd78..f4c9c72 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -111,6 +111,8 @@ void bnxt_handle_async_event(struct bnxt *bp,
case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE:
/* FALLTHROUGH */
bnxt_link_update_op(bp->eth_dev, 0);
+   rte_eth_dev_callback_process(bp->eth_dev,
+   RTE_ETH_EVENT_INTR_LSC, NULL);
break;
case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD:
PMD_DRV_LOG(INFO, "Async event: PF driver unloaded\n");
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 495c6cd..619b7b8 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1844,11 +1844,6 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int 
wait_to_complete)
if (new.link_status != eth_dev->data->dev_link.link_status ||
new.link_speed != eth_dev->data->dev_link.link_speed) {
rte_eth_linkstatus_set(eth_dev, &new);
-
-   rte_eth_dev_callback_process(eth_dev,
-RTE_ETH_EVENT_INTR_LSC,
-NULL);
-
bnxt_print_link_info(eth_dev);
}
 
-- 
1.8.3.1



[dpdk-dev] Segfault when eal thread executing mlx5 nic's lsc event

2020-10-30 Thread Weifeng LI

hi

    I am using the dpdk bond of mlx5. There is a segment error in the 
process of starting the bond port. This is because EAL interrupt thread 
is processing LSC interrupt when slave_configure is executing the 
rte_eth_dev_rss_reta_update. rte_eth_dev_rss_reta_update will also use 
mlx5 flow list.


    I've also found another discussion about this 
issue.https://mails.dpdk.org/archives/dev/2019-March/125929.html 



    Do it need a lock to protect the mlx5 flow list?


int
slave_configure(struct rte_eth_dev *bonded_eth_dev,
        struct rte_eth_dev *slave_eth_dev)

{

    ...

    /* Start device */
    errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
    if (errval != 0) {
        RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
                slave_eth_dev->data->port_id, errval);
        return -1;
    }

    /* If RSS is enabled for bonding, synchronize RETA */
    if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
        int i;
        struct bond_dev_private *internals;

        internals = bonded_eth_dev->data->dev_private;

        for (i = 0; i < internals->slave_count; i++) {
            if (internals->slaves[i].port_id == 
slave_eth_dev->data->port_id) {

                errval = rte_eth_dev_rss_reta_update(
                        slave_eth_dev->data->port_id,
                        &internals->reta_conf[0],
                        internals->slaves[i].reta_size);
                if (errval != 0) {
                    RTE_BOND_LOG(WARNING,
                         "rte_eth_dev_rss_reta_update on slave port 
%d fails (err %d)."
                         " RSS Configuration for bonding may be 
inconsistent.",

                         slave_eth_dev->data->port_id, errval);
                }
                break;
            }
        }
    }




[dpdk-dev] [PATCH v2] net/bonding: change the state machine to defaulted

2020-07-06 Thread Weifeng Li
From: Weifeng Li 

A dpdk bonding 802.3ad network as follows:
+--+ +---+
|dpdk lacp |slave1 <--> port1|switch lacp|
|  |slave2 <--> port2|   |
+--+ +---+
If a fiber optic go wrong about single pass during normal running like
this:
slave2 -> port2 ok
slave2 <- port2 error: salve2 receive no LACPDU Some packets from
   switch to dpdk will choose port2 and lost.

DPDK lacp state machine will transits to the expired state if no LACPDU
is received before the current_while_timer expires. But if no LACPDU is
received before the current_while_timer expires again, DPDK lacp state
machine has no change. Port2 can not change to inactive depend on the
received LACPDU.
According to IEEE 802.3ad, if no lacpdu is received before the
current_while_timer expires again, the state machine should transits from
expired to defaulted. Port2 will change to inactive depend on the LACPDU
with defaulted state.

This patch adds a state machine change from expired to defaulted when no
lacpdu is received before the current_while_timer expires again according
to IEEE 802.3ad:
If no LACPDU is received before the current_while timer expires again,
the state machine transits to the DEFAULTED state. The record Default
function overwrites the current operational parameters for the Partner
with administratively configured values. This allows configuration of
aggregations and individual links when no protocol partner is present,
while still permitting an active partner to override default settings.
The update_Default_Selected function sets the Selected variable FALSE
if the Link Aggregation Group has changed. Since all operational parameters
are now set to locally administered values there can be no disagreement as
to the Link Aggregation Group, so the Matched variable is set TRUE.

The relevant description is in the chapter 43.4.12 of the link below:
https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=850426

Signed-off-by: Weifeng Li 
---
v1 -> v2: adjust the formate of commit log
---
 drivers/net/bonding/eth_bond_8023ad_private.h |  2 ++
 drivers/net/bonding/rte_eth_bond_8023ad.c | 21 +
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/eth_bond_8023ad_private.h 
b/drivers/net/bonding/eth_bond_8023ad_private.h
index 6e44ffd..76f8b8d 100644
--- a/drivers/net/bonding/eth_bond_8023ad_private.h
+++ b/drivers/net/bonding/eth_bond_8023ad_private.h
@@ -50,6 +50,7 @@
 #define SM_FLAGS_MOVED  0x0100
 #define SM_FLAGS_PARTNER_SHORT_TIMEOUT  0x0200
 #define SM_FLAGS_NTT0x0400
+#define SM_FLAGS_EXPIRED0x0800
 
 #define BOND_LINK_FULL_DUPLEX_KEY   0x01
 #define BOND_LINK_SPEED_KEY_10M 0x02
@@ -103,6 +104,7 @@ struct port {
 
/** The operational Partner's port parameters */
struct port_params partner;
+   struct port_params partner_admin;
 
/* Additional port parameters not listed in documentation */
/** State machine flags */
diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c 
b/drivers/net/bonding/rte_eth_bond_8023ad.c
index b77a37d..bfa418d 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.c
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
@@ -356,16 +356,28 @@ rx_machine(struct bond_dev_private *internals, uint16_t 
slave_id,
 
timer_set(&port->current_while_timer, timeout);
ACTOR_STATE_CLR(port, EXPIRED);
+   SM_FLAG_CLR(port, EXPIRED);
return; /* No state change */
}
 
/* If CURRENT state timer is not running (stopped or expired)
 * transit to EXPIRED state from DISABLED or CURRENT */
if (!timer_is_running(&port->current_while_timer)) {
-   ACTOR_STATE_SET(port, EXPIRED);
-   PARTNER_STATE_CLR(port, SYNCHRONIZATION);
-   PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
-   timer_set(&port->current_while_timer, 
internals->mode4.short_timeout);
+   if (SM_FLAG(port, EXPIRED)) {
+   port->selected = UNSELECTED;
+   memcpy(&port->partner, &port->partner_admin,
+   sizeof(struct port_params));
+   record_default(port);
+   ACTOR_STATE_CLR(port, EXPIRED);
+   timer_cancel(&port->current_while_timer);
+   } else {
+   SM_FLAG_SET(port, EXPIRED);
+   ACTOR_STATE_SET(port, EXPIRED);
+   PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+   PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
+   timer_set(&port->current_while_timer,
+  

[dpdk-dev] [PATCH] net/i40e: fix SFP I X722 with FW4.16

2021-01-10 Thread Weifeng Li
When NVM API version is 1.7 or above adminq operation to set TPID is
set as supported. This cause using adminq instead of registers.
For SFP_I_X722 FW4.16, reported NVM API version is 1.8, and this cause
adminq operation to set as supported but it is not supported on FW4.16
Additional check added for SFP_I_X722 to not enable adminq operation.

Fixes: 9efa8d28b4da ("net/i40e: fix SFP X722 with FW4.16")
Cc: sta...@dpdk.org

Signed-off-by: Weifeng Li 
---
 drivers/net/i40e/i40e_ethdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 1462248..a07879c 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -1550,7 +1550,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void 
*init_params __rte_unused)
return -EIO;
}
/* Firmware of SFP x722 does not support adminq option */
-   if (hw->device_id == I40E_DEV_ID_SFP_X722)
+   if (hw->device_id == I40E_DEV_ID_SFP_X722 ||
+   hw->device_id == I40E_DEV_ID_SFP_I_X722)
hw->flags &= ~I40E_HW_FLAG_802_1AD_CAPABLE;
 
PMD_INIT_LOG(INFO, "FW %d.%d API %d.%d NVM %02d.%02d.%02d eetrack %04x",
-- 
2.9.5



[dpdk-dev] [PATCH] net/mlx5: make flow opration thread safe

2020-11-07 Thread Weifeng Li
Does it neet a lock for flow about below scene.
Thread1: flow_list_destroyflow_list_create
Thread2: -flow_list_destroy
Maybe the same flow can be operate at the same time.

When i start mlx5 bond and trigger LSC at the same time.
It is possible to assert in mlx5_rx_queue_release func and
print "port 4 Rx queu 0 is still used by a flow and cannot
be removed". I use dpdk-testpmd to simulate the test.

Signed-off-by: Weifeng Li 
---
 drivers/net/mlx5/linux/mlx5_os.c |  1 +
 drivers/net/mlx5/mlx5.h  |  1 +
 drivers/net/mlx5/mlx5_flow.c | 13 +++--
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index c78d56f..59c074e 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1426,6 +1426,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
  MLX5_MAX_MAC_ADDRESSES);
priv->flows = 0;
priv->ctrl_flows = 0;
+   rte_spinlock_init(&priv->flow_lock);
rte_spinlock_init(&priv->flow_list_lock);
TAILQ_INIT(&priv->flow_meters);
TAILQ_INIT(&priv->flow_meter_profiles);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index b43a8c9..860bf2f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -963,6 +963,7 @@ struct mlx5_priv {
unsigned int reta_idx_n; /* RETA index size. */
struct mlx5_drop drop_queue; /* Flow drop queues. */
uint32_t flows; /* RTE Flow rules. */
+   rte_spinlock_t flow_lock;
uint32_t ctrl_flows; /* Control flow rules. */
rte_spinlock_t flow_list_lock;
struct mlx5_obj_ops obj_ops; /* HW objects operations. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index e9c0ddd..69d8159 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -5577,6 +5577,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
external, hairpin_flow, error);
if (ret < 0)
goto error_before_hairpin_split;
+   rte_spinlock_lock(&priv->flow_lock);
flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
if (!flow) {
rte_errno = ENOMEM;
@@ -5598,8 +5599,10 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
rss = flow_get_rss_action(p_actions_rx);
if (rss) {
-   if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
+   if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num)) {
+   rte_spinlock_unlock(&priv->flow_lock);
return 0;
+   }
/*
 * The following information is required by
 * mlx5_flow_hashfields_adjust() in advance.
@@ -5723,6 +5726,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
__atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
mlx5_free(default_miss_ctx.queue);
}
+   rte_spinlock_unlock(&priv->flow_lock);
return idx;
 error:
MLX5_ASSERT(flow);
@@ -5738,6 +5742,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
wks->flow_nested_idx = 0;
 error_before_hairpin_split:
rte_free(translated_actions);
+   rte_spinlock_unlock(&priv->flow_lock);
return 0;
 }
 
@@ -5877,11 +5882,14 @@ flow_list_destroy(struct rte_eth_dev *dev, uint32_t 
*list,
  uint32_t flow_idx)
 {
struct mlx5_priv *priv = dev->data->dev_private;
+   rte_spinlock_lock(&priv->flow_lock);
struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
   [MLX5_IPOOL_RTE_FLOW], flow_idx);
 
-   if (!flow)
+   if (!flow) {
+   rte_spinlock_unlock(&priv->flow_lock);
return;
+   }
/*
 * Update RX queue flags only if port is started, otherwise it is
 * already clean.
@@ -5908,6 +5916,7 @@ flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
}
flow_mreg_del_copy_action(dev, flow);
mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
+   rte_spinlock_unlock(&priv->flow_lock);
 }
 
 /**
-- 
2.9.5



[dpdk-dev] [PATCH v2] net/mlx5: make flow operation thread safe

2020-11-07 Thread Weifeng Li
Does it need a lock for flow about below scene.
Thread1: flow_list_destroyflow_list_create
Thread2: -flow_list_destroy
Maybe the same flow can be operate at the same time.

When i start mlx5 bond and trigger LSC at the same time.
It is possible to assert in mlx5_rx_queue_release func and
print "port 4 Rx queue 0 is still used by a flow and cannot
be removed". I use dpdk-testpmd to simulate the test.

Signed-off-by: Weifeng Li 
---
v2: adjust coding style issue.
---
 drivers/net/mlx5/linux/mlx5_os.c |  1 +
 drivers/net/mlx5/mlx5.h  |  1 +
 drivers/net/mlx5/mlx5_flow.c | 13 +++--
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index c78d56f..59c074e 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1426,6 +1426,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
  MLX5_MAX_MAC_ADDRESSES);
priv->flows = 0;
priv->ctrl_flows = 0;
+   rte_spinlock_init(&priv->flow_lock);
rte_spinlock_init(&priv->flow_list_lock);
TAILQ_INIT(&priv->flow_meters);
TAILQ_INIT(&priv->flow_meter_profiles);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index b43a8c9..860bf2f 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -963,6 +963,7 @@ struct mlx5_priv {
unsigned int reta_idx_n; /* RETA index size. */
struct mlx5_drop drop_queue; /* Flow drop queues. */
uint32_t flows; /* RTE Flow rules. */
+   rte_spinlock_t flow_lock;
uint32_t ctrl_flows; /* Control flow rules. */
rte_spinlock_t flow_list_lock;
struct mlx5_obj_ops obj_ops; /* HW objects operations. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index e9c0ddd..69d8159 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -5577,6 +5577,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
external, hairpin_flow, error);
if (ret < 0)
goto error_before_hairpin_split;
+   rte_spinlock_lock(&priv->flow_lock);
flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
if (!flow) {
rte_errno = ENOMEM;
@@ -5598,8 +5599,10 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
rss = flow_get_rss_action(p_actions_rx);
if (rss) {
-   if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
+   if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num)) {
+   rte_spinlock_unlock(&priv->flow_lock);
return 0;
+   }
/*
 * The following information is required by
 * mlx5_flow_hashfields_adjust() in advance.
@@ -5723,6 +5726,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
__atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
mlx5_free(default_miss_ctx.queue);
}
+   rte_spinlock_unlock(&priv->flow_lock);
return idx;
 error:
MLX5_ASSERT(flow);
@@ -5738,6 +5742,7 @@ flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
wks->flow_nested_idx = 0;
 error_before_hairpin_split:
rte_free(translated_actions);
+   rte_spinlock_unlock(&priv->flow_lock);
return 0;
 }
 
@@ -5877,11 +5882,14 @@ flow_list_destroy(struct rte_eth_dev *dev, uint32_t 
*list,
  uint32_t flow_idx)
 {
struct mlx5_priv *priv = dev->data->dev_private;
+   rte_spinlock_lock(&priv->flow_lock);
struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
   [MLX5_IPOOL_RTE_FLOW], flow_idx);
 
-   if (!flow)
+   if (!flow) {
+   rte_spinlock_unlock(&priv->flow_lock);
return;
+   }
/*
 * Update RX queue flags only if port is started, otherwise it is
 * already clean.
@@ -5908,6 +5916,7 @@ flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
}
flow_mreg_del_copy_action(dev, flow);
mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
+   rte_spinlock_unlock(&priv->flow_lock);
 }
 
 /**
-- 
2.9.5



[dpdk-dev] [PATCH v3] net/bonding: change the state machine to defaulted

2020-07-17 Thread Weifeng Li
A dpdk bonding 802.3ad network as follows:
+--++---+
|dpdk lacp |bond1.1 <--> bond2.1|switch lacp|
|  |bond1.2 <--> bond2.2|   |
+--++---+
If a fiber optic go wrong about single pass during normal running like
this:
bond1.2 -> bond2.2 ok
bond1.2 <--x-- bond2.2 error: bond1.2 receive no LACPDU Some packets from
  switch to dpdk will choose bond2.2 and lost.

DPDK lacp state machine will transits to the expired state if no LACPDU
is received before the current_while_timer expires. But if no LACPDU is
received before the current_while_timer expires again, DPDK lacp state
machine has no change. Bond2.2 can not change to inactive depend on the
received LACPDU.
According to IEEE 802.3ad, if no lacpdu is received before the
current_while_timer expires again, the state machine should transits from
expired to defaulted. Bond2.2 will change to inactive depend on the LACPDU
with defaulted state.

This patch adds a state machine change from expired to defaulted when no
lacpdu is received before the current_while_timer expires again according
to IEEE 802.3ad:
If no LACPDU is received before the current_while timer expires again,
the state machine transits to the DEFAULTED state. The record Default
function overwrites the current operational parameters for the Partner
with administratively configured values. This allows configuration of
aggregations and individual links when no protocol partner is present,
while still permitting an active partner to override default settings.
The update_Default_Selected function sets the Selected variable FALSE
if the Link Aggregation Group has changed. Since all operational parameters
are now set to locally administered values there can be no disagreement as
to the Link Aggregation Group, so the Matched variable is set TRUE.

The relevant description is in the chapter 43.4.12 of the link below:
https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=850426

Signed-off-by: Weifeng Li 
---
v1 -> v2: adjust the formate of commit log
---
v2 -> v3:
1. adjust coding style issue.
2. add description about partner_admin from IEEE spec.
---
 drivers/net/bonding/eth_bond_8023ad_private.h |  3 +++
 drivers/net/bonding/rte_eth_bond_8023ad.c | 21 +
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/eth_bond_8023ad_private.h 
b/drivers/net/bonding/eth_bond_8023ad_private.h
index 6e44ffd..9b5738a 100644
--- a/drivers/net/bonding/eth_bond_8023ad_private.h
+++ b/drivers/net/bonding/eth_bond_8023ad_private.h
@@ -50,6 +50,7 @@
 #define SM_FLAGS_MOVED  0x0100
 #define SM_FLAGS_PARTNER_SHORT_TIMEOUT  0x0200
 #define SM_FLAGS_NTT0x0400
+#define SM_FLAGS_EXPIRED0x0800
 
 #define BOND_LINK_FULL_DUPLEX_KEY   0x01
 #define BOND_LINK_SPEED_KEY_10M 0x02
@@ -103,6 +104,8 @@ struct port {
 
/** The operational Partner's port parameters */
struct port_params partner;
+   /** Partner administrative parameter values */
+   struct port_params partner_admin;
 
/* Additional port parameters not listed in documentation */
/** State machine flags */
diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c 
b/drivers/net/bonding/rte_eth_bond_8023ad.c
index 3991825..3c2c7bf 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.c
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
@@ -356,16 +356,28 @@ rx_machine(struct bond_dev_private *internals, uint16_t 
slave_id,
 
timer_set(&port->current_while_timer, timeout);
ACTOR_STATE_CLR(port, EXPIRED);
+   SM_FLAG_CLR(port, EXPIRED);
return; /* No state change */
}
 
/* If CURRENT state timer is not running (stopped or expired)
 * transit to EXPIRED state from DISABLED or CURRENT */
if (!timer_is_running(&port->current_while_timer)) {
-   ACTOR_STATE_SET(port, EXPIRED);
-   PARTNER_STATE_CLR(port, SYNCHRONIZATION);
-   PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
-   timer_set(&port->current_while_timer, 
internals->mode4.short_timeout);
+   if (SM_FLAG(port, EXPIRED)) {
+   port->selected = UNSELECTED;
+   memcpy(&port->partner, &port->partner_admin,
+   sizeof(struct port_params));
+   record_default(port);
+   ACTOR_STATE_CLR(port, EXPIRED);
+   timer_cancel(&port->current_while_timer);
+   } else {
+   SM_FLAG_SET(port, EXPIRED);
+   ACTOR_STATE_SET(port, EXPIRED);
+   PARTNER_STATE_CLR(port, SYNCHRONIZATION);
+   PAR

[dpdk-dev] [PATCH v4] net/bonding: change the state machine to defaulted

2020-07-17 Thread Weifeng Li
A dpdk bonding 802.3ad network as follows:
+--++---+
|dpdk lacp |bond1.1 <--> bond2.1|switch lacp|
|  |bond1.2 <--> bond2.2|   |
+--++---+
If a fiber optic go wrong about single pass during normal running like
this:
bond1.2 -> bond2.2 ok
bond1.2 <--x-- bond2.2 error: bond1.2 receive no LACPDU Some packets from
  switch to dpdk will choose bond2.2 and lost.

DPDK lacp state machine will transits to the expired state if no LACPDU
is received before the current_while_timer expires. But if no LACPDU is
received before the current_while_timer expires again, DPDK lacp state
machine has no change. Bond2.2 can not change to inactive depend on the
received LACPDU.
According to IEEE 802.3ad, if no lacpdu is received before the
current_while_timer expires again, the state machine should transits from
expired to defaulted. Bond2.2 will change to inactive depend on the LACPDU
with defaulted state.

This patch adds a state machine change from expired to defaulted when no
lacpdu is received before the current_while_timer expires again according
to IEEE 802.3ad:
If no LACPDU is received before the current_while timer expires again,
the state machine transits to the DEFAULTED state. The record Default
function overwrites the current operational parameters for the Partner
with administratively configured values. This allows configuration of
aggregations and individual links when no protocol partner is present,
while still permitting an active partner to override default settings.
The update_Default_Selected function sets the Selected variable FALSE
if the Link Aggregation Group has changed. Since all operational parameters
are now set to locally administered values there can be no disagreement as
to the Link Aggregation Group, so the Matched variable is set TRUE.

The relevant description is in the chapter 43.4.12 of the link below:
https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=850426

Signed-off-by: Weifeng Li 
---
v1 -> v2: adjust the formate of commit log
---
v2 -> v3:
1. adjust coding style issue.
2. add description about partner_admin from IEEE spec.
---
v3 -> v4
1. adjust coding style issue: Missing Signed-off-by
---
 drivers/net/bonding/eth_bond_8023ad_private.h |  3 +++
 drivers/net/bonding/rte_eth_bond_8023ad.c | 21 +
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/eth_bond_8023ad_private.h 
b/drivers/net/bonding/eth_bond_8023ad_private.h
index 6e44ffd..9b5738a 100644
--- a/drivers/net/bonding/eth_bond_8023ad_private.h
+++ b/drivers/net/bonding/eth_bond_8023ad_private.h
@@ -50,6 +50,7 @@
 #define SM_FLAGS_MOVED  0x0100
 #define SM_FLAGS_PARTNER_SHORT_TIMEOUT  0x0200
 #define SM_FLAGS_NTT0x0400
+#define SM_FLAGS_EXPIRED0x0800
 
 #define BOND_LINK_FULL_DUPLEX_KEY   0x01
 #define BOND_LINK_SPEED_KEY_10M 0x02
@@ -103,6 +104,8 @@ struct port {
 
/** The operational Partner's port parameters */
struct port_params partner;
+   /** Partner administrative parameter values */
+   struct port_params partner_admin;
 
/* Additional port parameters not listed in documentation */
/** State machine flags */
diff --git a/drivers/net/bonding/rte_eth_bond_8023ad.c 
b/drivers/net/bonding/rte_eth_bond_8023ad.c
index 3991825..3c2c7bf 100644
--- a/drivers/net/bonding/rte_eth_bond_8023ad.c
+++ b/drivers/net/bonding/rte_eth_bond_8023ad.c
@@ -356,16 +356,28 @@ rx_machine(struct bond_dev_private *internals, uint16_t 
slave_id,
 
timer_set(&port->current_while_timer, timeout);
ACTOR_STATE_CLR(port, EXPIRED);
+   SM_FLAG_CLR(port, EXPIRED);
return; /* No state change */
}
 
/* If CURRENT state timer is not running (stopped or expired)
 * transit to EXPIRED state from DISABLED or CURRENT */
if (!timer_is_running(&port->current_while_timer)) {
-   ACTOR_STATE_SET(port, EXPIRED);
-   PARTNER_STATE_CLR(port, SYNCHRONIZATION);
-   PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT);
-   timer_set(&port->current_while_timer, 
internals->mode4.short_timeout);
+   if (SM_FLAG(port, EXPIRED)) {
+   port->selected = UNSELECTED;
+   memcpy(&port->partner, &port->partner_admin,
+   sizeof(struct port_params));
+   record_default(port);
+   ACTOR_STATE_CLR(port, EXPIRED);
+   timer_cancel(&port->current_while_timer);
+   } else {
+   SM_FLAG_SET(port, EXPIRED);
+   ACTOR_STATE_SET(port, EXPIRED);
+   PART