From: Liang Zhen <liang.z...@intel.com>

A couple of changes to improve aliveness detection:
- When LNet received a message, it can determine peer of this message
  is alive

- When LNet received a message from remote network, it can determine
  router is alive and NI status on router is UP.

Signed-off-by: Liang Zhen <liang.z...@intel.com>
Reviewed-on: http://review.whamcloud.com/12453
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-5485
Reviewed-by: James Simmons <uja.o...@gmail.com>
Reviewed-by: Isaac Huang <he.hu...@intel.com>
Signed-off-by: Oleg Drokin <oleg.dro...@intel.com>
---
 drivers/staging/lustre/include/linux/lnet/lib-lnet.h | 10 ++++++++++
 drivers/staging/lustre/lnet/lnet/lib-move.c          | 13 +++++++++++++
 drivers/staging/lustre/lnet/lnet/router.c            | 17 ++++++++++++++++-
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h 
b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
index 99fb52a..0038d29 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -636,6 +636,7 @@ lnet_net2rnethash(__u32 net)
 }
 
 extern lnd_t the_lolnd;
+extern int avoid_asym_router_failure;
 
 int lnet_cpt_of_nid_locked(lnet_nid_t nid);
 int lnet_cpt_of_nid(lnet_nid_t nid);
@@ -851,6 +852,7 @@ int lnet_peer_buffer_credits(lnet_ni_t *ni);
 
 int lnet_router_checker_start(void);
 void lnet_router_checker_stop(void);
+void lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net);
 void lnet_swap_pinginfo(lnet_ping_info_t *info);
 
 int lnet_ping_target_init(void);
@@ -870,4 +872,12 @@ void lnet_peer_tables_destroy(void);
 int lnet_peer_tables_create(void);
 void lnet_debug_peer(lnet_nid_t nid);
 
+static inline void lnet_peer_set_alive(lnet_peer_t *lp)
+{
+       lp->lp_last_alive = lp->lp_last_query = get_seconds();
+       if (!lp->lp_alive)
+               lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
+}
+
+
 #endif
diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c 
b/drivers/staging/lustre/lnet/lnet/lib-move.c
index ed6eec9..0f53c76 100644
--- a/drivers/staging/lustre/lnet/lnet/lib-move.c
+++ b/drivers/staging/lustre/lnet/lnet/lib-move.c
@@ -1877,6 +1877,19 @@ lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t 
from_nid,
                goto drop;
        }
 
+       if (lnet_isrouter(msg->msg_rxpeer)) {
+               lnet_peer_set_alive(msg->msg_rxpeer);
+               if (avoid_asym_router_failure &&
+                   LNET_NIDNET(src_nid) != LNET_NIDNET(from_nid)) {
+                       /* received a remote message from router, update
+                        * remote NI status on this router.
+                        * NB: multi-hop routed message will be ignored.
+                        */
+                       lnet_router_ni_update_locked(msg->msg_rxpeer,
+                                                    LNET_NIDNET(src_nid));
+               }
+       }
+
        lnet_msg_commit(msg, cpt);
 
        if (!for_me) {
diff --git a/drivers/staging/lustre/lnet/lnet/router.c 
b/drivers/staging/lustre/lnet/lnet/router.c
index 1bbaa5b..52ec0ab 100644
--- a/drivers/staging/lustre/lnet/lnet/router.c
+++ b/drivers/staging/lustre/lnet/lnet/router.c
@@ -84,7 +84,7 @@ static int check_routers_before_use;
 module_param(check_routers_before_use, int, 0444);
 MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping 
them before use");
 
-static int avoid_asym_router_failure = 1;
+int avoid_asym_router_failure = 1;
 module_param(avoid_asym_router_failure, int, 0644);
 MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router 
failures (0 to disable)");
 
@@ -783,6 +783,21 @@ lnet_wait_known_routerstate(void)
        }
 }
 
+void
+lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net)
+{
+       lnet_route_t *rte;
+
+       if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) {
+               list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
+                       if (rte->lr_net == net) {
+                               rte->lr_downis = 0;
+                               break;
+                       }
+               }
+       }
+}
+
 static void
 lnet_update_ni_status_locked(void)
 {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to