On 2025-05-19 15:09, Maximiliano Sandoval wrote:
Signed-off-by: Maximiliano Sandoval <m.sando...@proxmox.com>
---
src/watchdog-mux.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/src/watchdog-mux.c b/src/watchdog-mux.c
index a9017b3..e14c768 100644
--- a/src/watchdog-mux.c
+++ b/src/watchdog-mux.c
@@ -29,15 +29,24 @@
#define JOURNALCTL_BIN "/bin/journalctl"
+#define CLIENT_WATCHDOG_TIMEOUT_WARNING 50
some comment why 50 is used would be useful. Or alternatively it maybe
could be defined differently a bit further below, using the
client_watchdog_timeout as reference:
client_watchdog_timeout_warning = client_watchdog_timeout - 10;
This way would be clearer then that we want to react to the last 10
seconds before we have the timeout. But that is just some idea.
+
int watchdog_fd = -1;
int watchdog_timeout = 10;
int client_watchdog_timeout = 60;
int update_watchdog = 1;
+enum warning_state_t {
+ NONE,
+ WARNING_ISSUED,
+ CRISIS_AVERTED,
I don't like the "CRISIS" vocabulary here. Why not call it
"FENCE_AVERTED" or "HOST_FENCE_AVERTED" ?
It sounds less sensational and refers to what is averted
+};
+
typedef struct {
int fd;
time_t time;
int magic_close;
+ enum warning_state_t warning_state;
} wd_client_t;
#define MAX_CLIENTS 100
@@ -54,6 +63,7 @@ alloc_client(int fd, time_t time)
client_list[i].fd = fd;
client_list[i].time = time;
client_list[i].magic_close = 0;
+ client_list[i].warning_state = NONE;
return &client_list[i];
}
}
@@ -244,6 +254,22 @@ main(void)
time_t ctime = time(NULL);
for (i = 0; i < MAX_CLIENTS; i++) {
if (client_list[i].fd != 0 && client_list[i].time != 0) {
+ if (
+ client_list[i].warning_state == WARNING_ISSUED
+ && (ctime - client_list[i].time) <=
CLIENT_WATCHDOG_TIMEOUT_WARNING
+ ) {
+ client_list[i].warning_state = CRISIS_AVERTED;
+ fprintf(stderr, "phew, client watchdog was updated
before expiring\n");
+ }
+
+ if (
+ client_list[i].warning_state != WARNING_ISSUED
+ && (ctime - client_list[i].time) >
CLIENT_WATCHDOG_TIMEOUT_WARNING
+ ) {
+ client_list[i].warning_state = WARNING_ISSUED;
+ fprintf(stderr, "client watchdog is about to
expire\n");
+ }
+
if ((ctime - client_list[i].time) >
client_watchdog_timeout) {
update_watchdog = 0;
fprintf(stderr, "client watchdog expired - disable
watchdog updates\n");
_______________________________________________
pve-devel mailing list
pve-devel@lists.proxmox.com
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel