On September 25, 2020 11:46 am, Alexandre DERUMIER wrote:
>>>I will rebuild once more modifying the send code a bit to log a lot more
>>>details when sending state messages, it would be great if you could
>>>repeat with that as we are still unable to reproduce the issue.
> 
> ok, no problem, I'm able to easily reproduce it, I'll do new test when you'll
> send the new version.
> 
> (and thanks again to debugging this, because It's really beyond my competence)

same procedure as last time, same place, new checksums:

6b5e2defe543a874e0f7a883e40b279c997438dde158566c4c93c11ea531aef924ed2e4eb2506b5064e49ec9bdd4ebe7acd0b9278e9286eac527b0b15a43d8d7
  pve-cluster_6.1-8_amd64.deb
d57ddc08824055826ee15c9c255690d9140e43f8d5164949108f0dc483a2d181b2bda76f0e7f47202699a062342c0cf0bba8f2ae0f7c5411af9967ef051050a0
  pve-cluster-dbgsym_6.1-8_amd64.deb

I found one (unfortunately unrelated) bug in our error handling, so 
there's that at least ;)

diff --git a/data/src/dfsm.c b/data/src/dfsm.c
index 529c7f9..f3397a0 100644
--- a/data/src/dfsm.c
+++ b/data/src/dfsm.c
@@ -162,8 +162,8 @@ static void
 dfsm_send_sync_message_abort(dfsm_t *dfsm)
 {
        g_return_if_fail(dfsm != NULL);
-
        g_mutex_lock (&dfsm->sync_mutex);
+       cfs_dom_debug(dfsm->log_domain, "dfsm_send_sync_message_abort - %" 
PRIu64" / %" PRIu64, dfsm->msgcount_rcvd, dfsm->msgcount);
        dfsm->msgcount_rcvd = dfsm->msgcount;
        g_cond_broadcast (&dfsm->sync_cond);
        g_mutex_unlock (&dfsm->sync_mutex);
@@ -181,6 +181,7 @@ dfsm_record_local_result(
 
        g_mutex_lock (&dfsm->sync_mutex);
        dfsm_result_t *rp = (dfsm_result_t *)g_hash_table_lookup(dfsm->results, 
&msg_count);
+       cfs_dom_debug(dfsm->log_domain, "dfsm_record_local_result - %" PRIu64": 
 %d", msg_count, msg_result);
        if (rp) {
                rp->result = msg_result;
                rp->processed = processed;
@@ -224,6 +225,48 @@ loop:
        return result;
 }
 
+static cs_error_t 
+dfsm_send_message_full_debug_state(
+       dfsm_t *dfsm,
+       struct iovec *iov, 
+       unsigned int len,
+       int retry)
+{
+       g_return_val_if_fail(dfsm != NULL, CS_ERR_INVALID_PARAM);
+       g_return_val_if_fail(!len || iov != NULL, CS_ERR_INVALID_PARAM);
+
+       struct timespec tvreq = { .tv_sec = 0, .tv_nsec = 100000000 };
+       cs_error_t result;
+       int retries = 0;
+       cfs_dom_message(dfsm->log_domain, "send state message debug");
+       cfs_dom_message(dfsm->log_domain, "cpg_handle %" PRIu64, 
dfsm->cpg_handle);
+       for (int i = 0; i < len; i++)
+               cfs_dom_message(dfsm->log_domain, "iov[%d] len %zd", i, 
iov[i].iov_len);
+loop:
+       cfs_dom_message(dfsm->log_domain, "send state message loop body");
+
+       result = cpg_mcast_joined(dfsm->cpg_handle, CPG_TYPE_AGREED, iov, len);
+
+       cfs_dom_message(dfsm->log_domain, "send state message result: %d", 
result);
+       if (retry && result == CS_ERR_TRY_AGAIN) {
+               nanosleep(&tvreq, NULL);
+               ++retries;
+               if ((retries % 10) == 0)
+                       cfs_dom_message(dfsm->log_domain, "cpg_send_message 
retry %d", retries);
+               if (retries < 100)
+                       goto loop;
+       }
+
+       if (retries)
+               cfs_dom_message(dfsm->log_domain, "cpg_send_message retried %d 
times", retries);
+
+       if (result != CS_OK &&
+           (!retry || result != CS_ERR_TRY_AGAIN))
+               cfs_dom_critical(dfsm->log_domain, "cpg_send_message failed: 
%d", result);
+
+       return result;
+}
+
 static cs_error_t 
 dfsm_send_state_message_full(
        dfsm_t *dfsm,
@@ -235,6 +278,8 @@ dfsm_send_state_message_full(
        g_return_val_if_fail(DFSM_VALID_STATE_MESSAGE(type), 
CS_ERR_INVALID_PARAM);
        g_return_val_if_fail(!len || iov != NULL, CS_ERR_INVALID_PARAM);
 
+       cfs_dom_debug(dfsm->log_domain, "dfsm_send_state_message_full: type %d 
len %d", type, len);
+
        dfsm_message_state_header_t header;
        header.base.type = type;
        header.base.subtype = 0;
@@ -252,7 +297,7 @@ dfsm_send_state_message_full(
        for (int i = 0; i < len; i++)
                real_iov[i + 1] = iov[i];
 
-       return dfsm_send_message_full(dfsm, real_iov, len + 1, 1);
+       return type == DFSM_MESSAGE_STATE ? 
dfsm_send_message_full_debug_state(dfsm, real_iov, len + 1, 1) : 
dfsm_send_message_full(dfsm, real_iov, len + 1, 1);
 }
 
 cs_error_t 
@@ -317,6 +362,7 @@ dfsm_send_message_sync(
        for (int i = 0; i < len; i++)
                real_iov[i + 1] = iov[i];
 
+       cfs_dom_debug(dfsm->log_domain, "dfsm_send_messag_sync: type NORMAL, 
count %" PRIu64 ", msgtype %d, len %d", msgcount, msgtype, len);
        cs_error_t result = dfsm_send_message_full(dfsm, real_iov, len + 1, 1);
 
        g_mutex_unlock (&dfsm->sync_mutex);
@@ -335,10 +381,12 @@ dfsm_send_message_sync(
        if (rp) {
                g_mutex_lock (&dfsm->sync_mutex);
 
-               while (dfsm->msgcount_rcvd < msgcount)
+               while (dfsm->msgcount_rcvd < msgcount) {
+                       cfs_dom_debug(dfsm->log_domain, 
"dfsm_send_message_sync: waiting for received messages %" PRIu64 " / %" PRIu64, 
dfsm->msgcount_rcvd, msgcount);
                        g_cond_wait (&dfsm->sync_cond, &dfsm->sync_mutex);
+               }
+               cfs_dom_debug(dfsm->log_domain, "dfsm_send_message_sync: done 
waiting for received messages!");
 
-      
                g_hash_table_remove(dfsm->results, &rp->msgcount);
                
                g_mutex_unlock (&dfsm->sync_mutex);
@@ -685,9 +733,13 @@ dfsm_cpg_deliver_callback(
                return;
        }
 
+       cfs_dom_debug(dfsm->log_domain, "received message's header type is %d", 
base_header->type);
+
        if (base_header->type == DFSM_MESSAGE_NORMAL) {
 
                dfsm_message_normal_header_t *header = 
(dfsm_message_normal_header_t *)msg;
+               cfs_dom_debug(dfsm->log_domain, "received normal message (type 
= %d, subtype = %d, %zd bytes)",
+                             base_header->type, base_header->subtype, msg_len);
 
                if (msg_len < sizeof(dfsm_message_normal_header_t)) {
                        cfs_dom_critical(dfsm->log_domain, "received short 
message (type = %d, subtype = %d, %zd bytes)",
@@ -704,6 +756,8 @@ dfsm_cpg_deliver_callback(
                } else {
 
                        int msg_res = -1;
+                       cfs_dom_debug(dfsm->log_domain, "deliver message %" 
PRIu64 " (subtype = %d, length = %zd)",
+                                     header->count, base_header->subtype, 
msg_len); 
                        int res = dfsm->dfsm_callbacks->dfsm_deliver_fn(
                                dfsm, dfsm->data, &msg_res, nodeid, pid, 
base_header->subtype, 
                                base_header->time, (uint8_t *)msg + 
sizeof(dfsm_message_normal_header_t),
@@ -724,6 +778,8 @@ dfsm_cpg_deliver_callback(
         */
 
        dfsm_message_state_header_t *header = (dfsm_message_state_header_t 
*)msg;
+       cfs_dom_debug(dfsm->log_domain, "received state message (type = %d, 
subtype = %d, %zd bytes), mode is %d",
+                        base_header->type, base_header->subtype, msg_len, 
mode);
 
        if (msg_len < sizeof(dfsm_message_state_header_t)) {
                cfs_dom_critical(dfsm->log_domain, "received short state 
message (type = %d, subtype = %d, %zd bytes)",
@@ -744,6 +800,7 @@ dfsm_cpg_deliver_callback(
 
        if (mode == DFSM_MODE_SYNCED) {
                if (base_header->type == DFSM_MESSAGE_UPDATE_COMPLETE) {
+                       cfs_dom_debug(dfsm->log_domain, "received update 
complete message");
 
                        for (int i = 0; i < dfsm->sync_info->node_count; i++)
                                dfsm->sync_info->nodes[i].synced = 1;
@@ -754,6 +811,7 @@ dfsm_cpg_deliver_callback(
                        return;
 
                } else if (base_header->type == DFSM_MESSAGE_VERIFY_REQUEST) {
+                       cfs_dom_debug(dfsm->log_domain, "received verify 
request message");
 
                        if (msg_len != sizeof(dfsm->csum_counter)) {
                                cfs_dom_critical(dfsm->log_domain, "cpg 
received verify request with wrong length (%zd bytes) form node %d/%d", 
msg_len, nodeid, pid);
@@ -823,7 +881,6 @@ dfsm_cpg_deliver_callback(
        } else if (mode == DFSM_MODE_START_SYNC) {
 
                if (base_header->type == DFSM_MESSAGE_SYNC_START) {
-
                        if (nodeid != dfsm->lowest_nodeid) {
                                cfs_dom_critical(dfsm->log_domain, "ignore sync 
request from wrong member %d/%d",
                                                 nodeid, pid);
@@ -861,6 +918,7 @@ dfsm_cpg_deliver_callback(
                        return;
 
                } else if (base_header->type == DFSM_MESSAGE_STATE) {
+                       cfs_dom_debug(dfsm->log_domain, "received state message 
for %d/%d", nodeid, pid);
 
                        dfsm_node_info_t *ni;
                        
@@ -906,6 +964,8 @@ dfsm_cpg_deliver_callback(
                                                goto leave;
                                }
 
+                       } else {
+                               cfs_dom_debug(dfsm->log_domain, "haven't 
received all states, waiting for more");
                        }
 
                        return;
@@ -914,6 +974,7 @@ dfsm_cpg_deliver_callback(
        } else if (mode == DFSM_MODE_UPDATE) {
 
                if (base_header->type == DFSM_MESSAGE_UPDATE) {
+                       cfs_dom_debug(dfsm->log_domain, "received update 
message");
                                
                        int res = dfsm->dfsm_callbacks->dfsm_process_update_fn(
                                dfsm, dfsm->data, dfsm->sync_info, nodeid, pid, 
msg, msg_len);
@@ -925,6 +986,7 @@ dfsm_cpg_deliver_callback(
 
                } else if (base_header->type == DFSM_MESSAGE_UPDATE_COMPLETE) {
 
+                       cfs_dom_debug(dfsm->log_domain, "received update 
complete message");
 
                        int res = dfsm->dfsm_callbacks->dfsm_commit_fn(dfsm, 
dfsm->data, dfsm->sync_info);
 
@@ -1104,6 +1166,7 @@ dfsm_cpg_confchg_callback(
        size_t joined_list_entries)
 {
        cs_error_t result;
+       cfs_debug("dfsm_cpg_confchg_callback called");
 
        dfsm_t *dfsm = NULL;
        result = cpg_context_get(handle, (gpointer *)&dfsm);
@@ -1190,7 +1253,7 @@ dfsm_cpg_confchg_callback(
 
                dfsm_set_mode(dfsm, DFSM_MODE_START_SYNC);
                if (lowest_nodeid == dfsm->nodeid) {
-                       if (!dfsm_send_state_message_full(dfsm, 
DFSM_MESSAGE_SYNC_START, NULL, 0)) {
+                       if (dfsm_send_state_message_full(dfsm, 
DFSM_MESSAGE_SYNC_START, NULL, 0) != CS_OK) {
                                cfs_dom_critical(dfsm->log_domain, "failed to 
send SYNC_START message");
                                goto leave;
                        }


_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to