If peers are "simultaneously" told to disconnect from each other,
either explicitly, or implicitly by taking down the resource,
with bad timing, one side may see its disconnect "fail" with
a result of "state change failed by peer", and interpret this as
"please oudate yourself".

Try to catch this by checking for current connection status,
and possibly retry as local-only state change instead.

Signed-off-by: Lars Ellenberg <lars.ellenb...@linbit.com>
---
 drivers/block/drbd/drbd_nl.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 1958eb33b643..82915880c5e9 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2711,8 +2711,10 @@ int drbd_adm_connect(struct sk_buff *skb, struct 
genl_info *info)
 
 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection 
*connection, bool force)
 {
+       enum drbd_conns cstate;
        enum drbd_state_rv rv;
 
+repeat:
        rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
                        force ? CS_HARD : 0);
 
@@ -2730,6 +2732,11 @@ static enum drbd_state_rv conn_try_disconnect(struct 
drbd_connection *connection
 
                break;
        case SS_CW_FAILED_BY_PEER:
+               spin_lock_irq(&connection->resource->req_lock);
+               cstate = connection->cstate;
+               spin_unlock_irq(&connection->resource->req_lock);
+               if (cstate <= C_WF_CONNECTION)
+                       goto repeat;
                /* The peer probably wants to see us outdated. */
                rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
                                                        disk, D_OUTDATED), 0);
-- 
2.17.1

Reply via email to