From: Elena Ufimtseva <elena.ufimts...@oracle.com> In order to detect remote processes which are hung, the proxy periodically sends heartbeat messages to confirm if the remote process is alive
Signed-off-by: Jagannathan Raman <jag.ra...@oracle.com> Signed-off-by: John G Johnson <john.g.john...@oracle.com> Signed-off-by: Elena Ufimtseva <elena.ufimts...@oracle.com> --- hw/proxy/qemu-proxy.c | 86 +++++++++++++++++++++++++++++++++++ include/hw/proxy/qemu-proxy.h | 3 ++ include/io/mpqemu-link.h | 1 + io/mpqemu-link.c | 5 ++ 4 files changed, 95 insertions(+) diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c index 730e28483e..162014353f 100644 --- a/hw/proxy/qemu-proxy.c +++ b/hw/proxy/qemu-proxy.c @@ -21,6 +21,78 @@ static void probe_pci_info(PCIDevice *dev); +static void childsig_handler(int sig, siginfo_t *siginfo, void *ctx) +{ + /* TODO: Add proper handler. */ + printf("Child (pid %d) is dead? Signal is %d, Exit code is %d.\n", + siginfo->si_pid, siginfo->si_signo, siginfo->si_code); +} + +static void hb_msg(PCIProxyDev *dev) +{ + DeviceState *ds = DEVICE(dev); + MPQemuMsg msg = { 0 }; + uint64_t ret; + + if (event_notifier_get_fd(&dev->en_ping) == -1) { + return; + } + + memset(&msg, 0, sizeof(MPQemuMsg)); + + msg.num_fds = 1; + msg.cmd = PROXY_PING; + msg.bytestream = 0; + msg.size = 0; + msg.fds[0] = event_notifier_get_fd(&dev->en_ping); + + mpqemu_msg_send(&msg, dev->mpqemu_link->com); + + ret = wait_for_remote(msg.fds[0]); + + if (ret) { + printf("Lost contact with remote device %s\n", ds->id); + /* TODO: Initiate error recovery */ + } +} + +#define NOP_INTERVAL 1000 + +static void remote_ping(void *opaque) +{ + PCIProxyDev *dev = opaque; + + hb_msg(dev); + + timer_mod(dev->hb_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL); +} + +static void start_hb_timer(PCIProxyDev *dev) +{ + dev->hb_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + remote_ping, + dev); + + timer_mod(dev->hb_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL); +} + +static void stop_hb_timer(PCIProxyDev *dev) +{ + timer_del(dev->hb_timer); + timer_free(dev->hb_timer); +} + +static void set_sigchld_handler(void) +{ + struct sigaction sa_sigterm; + memset(&sa_sigterm, 0, sizeof(sa_sigterm)); + sa_sigterm.sa_sigaction = childsig_handler; + sa_sigterm.sa_flags = SA_SIGINFO | SA_NOCLDWAIT | SA_NOCLDSTOP; + sigaction(SIGCHLD, &sa_sigterm, NULL); +} + static int config_op_send(PCIProxyDev *dev, uint32_t addr, uint32_t *val, int l, unsigned int op) { @@ -204,6 +276,19 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) setup_irqfd(dev); probe_pci_info(PCI_DEVICE(dev)); + + set_sigchld_handler(); + + event_notifier_init(&dev->en_ping, 0); + + start_hb_timer(dev); +} + +static void pci_proxy_dev_exit(PCIDevice *pdev) +{ + PCIProxyDev *dev = PCI_PROXY_DEV(pdev); + + stop_hb_timer(dev); } static void pci_proxy_dev_class_init(ObjectClass *klass, void *data) @@ -211,6 +296,7 @@ static void pci_proxy_dev_class_init(ObjectClass *klass, void *data) PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); k->realize = pci_proxy_dev_realize; + k->exit = pci_proxy_dev_exit; k->config_read = pci_proxy_read_config; k->config_write = pci_proxy_write_config; } diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h index 0d8ec6d686..26f0a41110 100644 --- a/include/hw/proxy/qemu-proxy.h +++ b/include/hw/proxy/qemu-proxy.h @@ -55,6 +55,9 @@ struct PCIProxyDev { EventNotifier intr; EventNotifier resample; + EventNotifier en_ping; + QEMUTimer *hb_timer; + int socket; ProxyMemoryRegion region[PCI_NUM_REGIONS]; diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h index 102c736705..45ea1fcafa 100644 --- a/include/io/mpqemu-link.h +++ b/include/io/mpqemu-link.h @@ -50,6 +50,7 @@ typedef enum { SET_IRQFD, GET_PCI_INFO, RET_PCI_INFO, + PROXY_PING, MAX, } mpqemu_cmd_t; diff --git a/io/mpqemu-link.c b/io/mpqemu-link.c index ea519a980e..91a3395566 100644 --- a/io/mpqemu-link.c +++ b/io/mpqemu-link.c @@ -394,6 +394,11 @@ bool mpqemu_msg_valid(MPQemuMsg *msg) return false; } break; + case PROXY_PING: + if (msg->size != 0) { + return false; + } + break; default: break; } -- 2.25.GIT