I was able to reproduce stop corosync on node1 : 18:12:29 /etc/pve locked at 18:12:30
logs of all nodes are here: http://odisoweb1.odiso.net/test1/ I don't have coredump, as my coworker have restarted pmxcfs too fast :/ . Sorry. I'm going to launch another test with coredump this time ----- Mail original ----- De: "aderumier" <aderum...@odiso.com> À: "Proxmox VE development discussion" <pve-devel@lists.proxmox.com> Envoyé: Jeudi 24 Septembre 2020 16:29:17 Objet: Re: [pve-devel] corosync bug: cluster break after 1 node clean shutdown Hi fabian, >>if you are still able to test, it would be great if you could give the >>following packages a spin (they only contain some extra debug prints >>on message processing/sending): Sure, no problem, I'm going to test it tonight. >>ideally, you could get the debug logs from all nodes, and the >>coredump/bt from the node where pmxcfs hangs. thanks! ok,no problem. I'll keep you in touch tomorrow. ----- Mail original ----- De: "Fabian Grünbichler" <f.gruenbich...@proxmox.com> À: "Proxmox VE development discussion" <pve-devel@lists.proxmox.com> Envoyé: Jeudi 24 Septembre 2020 16:02:04 Objet: Re: [pve-devel] corosync bug: cluster break after 1 node clean shutdown On September 22, 2020 7:43 am, Alexandre DERUMIER wrote: > I have done test with "kill -9 <pidofcorosync", and I have around 20s hang on > other nodes, > but after that it's become available again. > > > So, it's really something when corosync is in shutdown phase, and pmxcfs is > running. > > So, for now, as workaround, I have changed > > /lib/systemd/system/pve-cluster.service > > #Wants=corosync.service > #Before=corosync.service > Requires=corosync.service > After=corosync.service > > > Like this, at shutdown, pve-cluster is stopped before corosync, and if I > restart corosync, pve-cluster is stopped first. if you are still able to test, it would be great if you could give the following packages a spin (they only contain some extra debug prints on message processing/sending): http://download.proxmox.com/temp/pmxcfs-dbg/ 64eb9dbd2f60fe319abaeece89a84fd5de1f05a8c38cb9871058ec1f55025486ec15b7c0053976159fe5c2518615fd80084925abf4d3a5061ea7d6edef264c36 pve-cluster_6.1-8_amd64.deb 04b557c7f0dc1aa2846b534d6afab70c2b8d4720ac307364e015d885e2e997b6dcaa54cad673b22d626d27cb053e5723510fde15d078d5fe1f262fc5486e6cef pve-cluster-dbgsym_6.1-8_amd64.deb ideally, you could get the debug logs from all nodes, and the coredump/bt from the node where pmxcfs hangs. thanks! diff --git a/data/src/dfsm.c b/data/src/dfsm.c index 529c7f9..e0bd93f 100644 --- a/data/src/dfsm.c +++ b/data/src/dfsm.c @@ -162,8 +162,8 @@ static void dfsm_send_sync_message_abort(dfsm_t *dfsm) { g_return_if_fail(dfsm != NULL); - g_mutex_lock (&dfsm->sync_mutex); + cfs_dom_debug(dfsm->log_domain, "dfsm_send_sync_message_abort - %" PRIu64" / %" PRIu64, dfsm->msgcount_rcvd, dfsm->msgcount); dfsm->msgcount_rcvd = dfsm->msgcount; g_cond_broadcast (&dfsm->sync_cond); g_mutex_unlock (&dfsm->sync_mutex); @@ -181,6 +181,7 @@ dfsm_record_local_result( g_mutex_lock (&dfsm->sync_mutex); dfsm_result_t *rp = (dfsm_result_t *)g_hash_table_lookup(dfsm->results, &msg_count); + cfs_dom_debug(dfsm->log_domain, "dfsm_record_local_result - %" PRIu64": %d", msg_count, msg_result); if (rp) { rp->result = msg_result; rp->processed = processed; @@ -235,6 +236,8 @@ dfsm_send_state_message_full( g_return_val_if_fail(DFSM_VALID_STATE_MESSAGE(type), CS_ERR_INVALID_PARAM); g_return_val_if_fail(!len || iov != NULL, CS_ERR_INVALID_PARAM); + cfs_dom_debug(dfsm->log_domain, "dfsm_send_state_message_full: type %d len %d", type, len); + dfsm_message_state_header_t header; header.base.type = type; header.base.subtype = 0; @@ -317,6 +320,7 @@ dfsm_send_message_sync( for (int i = 0; i < len; i++) real_iov[i + 1] = iov[i]; + cfs_dom_debug(dfsm->log_domain, "dfsm_send_messag_sync: type NORMAL, msgtype %d, len %d", msgtype, len); cs_error_t result = dfsm_send_message_full(dfsm, real_iov, len + 1, 1); g_mutex_unlock (&dfsm->sync_mutex); @@ -335,10 +339,12 @@ dfsm_send_message_sync( if (rp) { g_mutex_lock (&dfsm->sync_mutex); - while (dfsm->msgcount_rcvd < msgcount) + while (dfsm->msgcount_rcvd < msgcount) { + cfs_dom_debug(dfsm->log_domain, "dfsm_send_message_sync: waiting for received messages %" PRIu64 " / %" PRIu64, dfsm->msgcount_rcvd, msgcount); g_cond_wait (&dfsm->sync_cond, &dfsm->sync_mutex); + } + cfs_dom_debug(dfsm->log_domain, "dfsm_send_message_sync: done waiting for received messages!"); - g_hash_table_remove(dfsm->results, &rp->msgcount); g_mutex_unlock (&dfsm->sync_mutex); @@ -685,9 +691,13 @@ dfsm_cpg_deliver_callback( return; } + cfs_dom_debug(dfsm->log_domain, "received message's header type is %d", base_header->type); + if (base_header->type == DFSM_MESSAGE_NORMAL) { dfsm_message_normal_header_t *header = (dfsm_message_normal_header_t *)msg; + cfs_dom_debug(dfsm->log_domain, "received normal message (type = %d, subtype = %d, %zd bytes)", + base_header->type, base_header->subtype, msg_len); if (msg_len < sizeof(dfsm_message_normal_header_t)) { cfs_dom_critical(dfsm->log_domain, "received short message (type = %d, subtype = %d, %zd bytes)", @@ -704,6 +714,8 @@ dfsm_cpg_deliver_callback( } else { int msg_res = -1; + cfs_dom_debug(dfsm->log_domain, "deliver message %" PRIu64 " (subtype = %d, length = %zd)", + header->count, base_header->subtype, msg_len); int res = dfsm->dfsm_callbacks->dfsm_deliver_fn( dfsm, dfsm->data, &msg_res, nodeid, pid, base_header->subtype, base_header->time, (uint8_t *)msg + sizeof(dfsm_message_normal_header_t), @@ -724,6 +736,8 @@ dfsm_cpg_deliver_callback( */ dfsm_message_state_header_t *header = (dfsm_message_state_header_t *)msg; + cfs_dom_debug(dfsm->log_domain, "received state message (type = %d, subtype = %d, %zd bytes), mode is %d", + base_header->type, base_header->subtype, msg_len, mode); if (msg_len < sizeof(dfsm_message_state_header_t)) { cfs_dom_critical(dfsm->log_domain, "received short state message (type = %d, subtype = %d, %zd bytes)", @@ -744,6 +758,7 @@ dfsm_cpg_deliver_callback( if (mode == DFSM_MODE_SYNCED) { if (base_header->type == DFSM_MESSAGE_UPDATE_COMPLETE) { + cfs_dom_debug(dfsm->log_domain, "received update complete message"); for (int i = 0; i < dfsm->sync_info->node_count; i++) dfsm->sync_info->nodes[i].synced = 1; @@ -754,6 +769,7 @@ dfsm_cpg_deliver_callback( return; } else if (base_header->type == DFSM_MESSAGE_VERIFY_REQUEST) { + cfs_dom_debug(dfsm->log_domain, "received verify request message"); if (msg_len != sizeof(dfsm->csum_counter)) { cfs_dom_critical(dfsm->log_domain, "cpg received verify request with wrong length (%zd bytes) form node %d/%d", msg_len, nodeid, pid); @@ -823,7 +839,6 @@ dfsm_cpg_deliver_callback( } else if (mode == DFSM_MODE_START_SYNC) { if (base_header->type == DFSM_MESSAGE_SYNC_START) { - if (nodeid != dfsm->lowest_nodeid) { cfs_dom_critical(dfsm->log_domain, "ignore sync request from wrong member %d/%d", nodeid, pid); @@ -861,6 +876,7 @@ dfsm_cpg_deliver_callback( return; } else if (base_header->type == DFSM_MESSAGE_STATE) { + cfs_dom_debug(dfsm->log_domain, "received state message for %d/%d", nodeid, pid); dfsm_node_info_t *ni; @@ -906,6 +922,8 @@ dfsm_cpg_deliver_callback( goto leave; } + } else { + cfs_dom_debug(dfsm->log_domain, "haven't received all states, waiting for more"); } return; @@ -914,6 +932,7 @@ dfsm_cpg_deliver_callback( } else if (mode == DFSM_MODE_UPDATE) { if (base_header->type == DFSM_MESSAGE_UPDATE) { + cfs_dom_debug(dfsm->log_domain, "received update message"); int res = dfsm->dfsm_callbacks->dfsm_process_update_fn( dfsm, dfsm->data, dfsm->sync_info, nodeid, pid, msg, msg_len); @@ -925,6 +944,7 @@ dfsm_cpg_deliver_callback( } else if (base_header->type == DFSM_MESSAGE_UPDATE_COMPLETE) { + cfs_dom_debug(dfsm->log_domain, "received update complete message"); int res = dfsm->dfsm_callbacks->dfsm_commit_fn(dfsm, dfsm->data, dfsm->sync_info); @@ -1104,6 +1124,7 @@ dfsm_cpg_confchg_callback( size_t joined_list_entries) { cs_error_t result; + cfs_debug("dfsm_cpg_confchg_callback called"); dfsm_t *dfsm = NULL; result = cpg_context_get(handle, (gpointer *)&dfsm); _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel _______________________________________________ pve-devel mailing list pve-devel@lists.proxmox.com https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel