On  2025-05-19  15:09, Maximiliano Sandoval wrote:
Signed-off-by: Maximiliano Sandoval <m.sando...@proxmox.com>
---
  src/watchdog-mux.c | 26 ++++++++++++++++++++++++++
  1 file changed, 26 insertions(+)

diff --git a/src/watchdog-mux.c b/src/watchdog-mux.c
index a9017b3..e14c768 100644
--- a/src/watchdog-mux.c
+++ b/src/watchdog-mux.c
@@ -29,15 +29,24 @@
#define JOURNALCTL_BIN "/bin/journalctl" +#define CLIENT_WATCHDOG_TIMEOUT_WARNING 50

some comment why 50 is used would be useful. Or alternatively it maybe could be defined differently a bit further below, using the client_watchdog_timeout as reference:

client_watchdog_timeout_warning = client_watchdog_timeout - 10;

This way would be clearer then that we want to react to the last 10 seconds before we have the timeout. But that is just some idea.

+
  int watchdog_fd = -1;
  int watchdog_timeout = 10;
  int client_watchdog_timeout = 60;
  int update_watchdog = 1;
+enum warning_state_t {
+   NONE,
+   WARNING_ISSUED,
+   CRISIS_AVERTED,

I don't like the "CRISIS" vocabulary here. Why not call it "FENCE_AVERTED" or "HOST_FENCE_AVERTED" ?
It sounds less sensational and refers to what is averted

+};
+
  typedef struct {
      int fd;
      time_t time;
      int magic_close;
+    enum warning_state_t warning_state;
  } wd_client_t;
#define MAX_CLIENTS 100
@@ -54,6 +63,7 @@ alloc_client(int fd, time_t time)
              client_list[i].fd = fd;
              client_list[i].time = time;
              client_list[i].magic_close = 0;
+            client_list[i].warning_state = NONE;
              return &client_list[i];
          }
      }
@@ -244,6 +254,22 @@ main(void)
                  time_t ctime = time(NULL);
                  for (i = 0; i < MAX_CLIENTS; i++) {
                      if (client_list[i].fd != 0 && client_list[i].time != 0) {
+                        if (
+                            client_list[i].warning_state == WARNING_ISSUED
+                            && (ctime - client_list[i].time) <= 
CLIENT_WATCHDOG_TIMEOUT_WARNING
+                        ) {
+                            client_list[i].warning_state = CRISIS_AVERTED;
+                            fprintf(stderr, "phew, client watchdog was updated 
before expiring\n");
+                        }
+
+                        if (
+                            client_list[i].warning_state != WARNING_ISSUED
+                            && (ctime - client_list[i].time) > 
CLIENT_WATCHDOG_TIMEOUT_WARNING
+                        ) {
+                            client_list[i].warning_state = WARNING_ISSUED;
+                            fprintf(stderr, "client watchdog is about to 
expire\n");
+                        }
+
                          if ((ctime - client_list[i].time) > 
client_watchdog_timeout) {
                              update_watchdog = 0;
                              fprintf(stderr, "client watchdog expired - disable 
watchdog updates\n");



_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel

Reply via email to