[OpenWrt-Devel] [PATCH] procd: stop service using SIGKILL if SIGTERM failed to do so
SIGKILL is sent if instance process is still running after seconds after SIGTERM has been sent. To prevent another daemon process being launched before old process dies, the instance is kept until SIGCHLD confirms that service has been stopped. Signed-off-by: Alin Nastac --- service/instance.c | 44 +--- service/instance.h | 1 + service/service.c | 26 -- service/service.h | 3 +++ 4 files changed, 57 insertions(+), 17 deletions(-) diff --git a/service/instance.c b/service/instance.c index 018db3c..4d340fd 100644 --- a/service/instance.c +++ b/service/instance.c @@ -55,6 +55,7 @@ enum { INSTANCE_ATTR_SECCOMP, INSTANCE_ATTR_PIDFILE, INSTANCE_ATTR_RELOADSIG, + INSTANCE_ATTR_TERMTIMEOUT, __INSTANCE_ATTR_MAX }; @@ -79,6 +80,7 @@ static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = { [INSTANCE_ATTR_SECCOMP] = { "seccomp", BLOBMSG_TYPE_STRING }, [INSTANCE_ATTR_PIDFILE] = { "pidfile", BLOBMSG_TYPE_STRING }, [INSTANCE_ATTR_RELOADSIG] = { "reload_signal", BLOBMSG_TYPE_INT32 }, + [INSTANCE_ATTR_TERMTIMEOUT] = { "term_timeout", BLOBMSG_TYPE_INT32 }, }; enum { @@ -389,8 +391,16 @@ instance_start(struct service_instance *in) return; } - if (in->proc.pending || !in->command) + if (!in->command) { + LOG("Not starting instance %s::%s, command not set\n", in->srv->name, in->name); return; + } + + if (in->proc.pending) { + if (in->halt) + in->restart = true; + return; + } instance_free_stdio(in); if (in->_stdout.fd.fd > -2) { @@ -408,7 +418,7 @@ instance_start(struct service_instance *in) } in->restart = false; - in->halt = !in->respawn; + in->halt = false; if (!in->valid) return; @@ -494,7 +504,11 @@ instance_timeout(struct uloop_timeout *t) in = container_of(t, struct service_instance, timeout); - if (!in->halt && (in->restart || in->respawn)) + if (in->halt) { + LOG("Instance %s::%s pid %d not stopped on SIGTERM, sending SIGKILL instead\n", + in->srv->name, in->name, in->proc.pid); + kill(in->proc.pid, SIGKILL); + } else if (in->restart || in->respawn) instance_start(in); } @@ -515,8 +529,19 @@ instance_exit(struct uloop_process *p, int ret) return; uloop_timeout_cancel(&in->timeout); + service_event("instance.stop", in->srv->name, in->name); + if (in->halt) { instance_removepid(in); + if (in->restart) + instance_start(in); + else { + struct service *s = in->srv; + + avl_delete(&s->instances.avl, &in->node.avl); + instance_free(in); + service_stopped(s); + } } else if (in->restart) { instance_start(in); } else if (in->respawn) { @@ -535,7 +560,6 @@ instance_exit(struct uloop_process *p, int ret) uloop_timeout_set(&in->timeout, in->respawn_timeout * 1000); } } - service_event("instance.stop", in->srv->name, in->name); } void @@ -546,6 +570,7 @@ instance_stop(struct service_instance *in) in->halt = true; in->restart = in->respawn = false; kill(in->proc.pid, SIGTERM); + uloop_timeout_set(&in->timeout, in->term_timeout * 1000); } static void @@ -559,10 +584,10 @@ instance_restart(struct service_instance *in) return; } - in->halt = false; + in->halt = true; in->restart = true; kill(in->proc.pid, SIGTERM); - instance_removepid(in); + uloop_timeout_set(&in->timeout, in->term_timeout * 1000); } static bool @@ -796,6 +821,8 @@ instance_config_parse(struct service_instance *in) if (!instance_config_parse_command(in, tb)) return false; + if (tb[INSTANCE_ATTR_TERMTIMEOUT]) + in->term_timeout = blobmsg_get_u32(tb[INSTANCE_ATTR_TERMTIMEOUT]); if (tb[INSTANCE_ATTR_RESPAWN]) { int i = 0; uint32_t vals[3] = { 3600, 5, 5}; @@ -933,8 +960,9 @@ instance_update(struct service_instance *in, struct service_instance *in_new) { bool changed = instance_config_changed(in, in_new); bool running = in->proc.pending; + bool stopping = in->halt; - if (!running) { + if (!running || stopping) { instance_config_move(in, in_new); instance_start(in); } else { @@ -967,6 +995,7 @@ instance_init(struct service_instance *in, struct service *s, struct blob_attr * in->config = config; in->timeout.cb = instance_
Re: [OpenWrt-Devel] [LEDE-DEV] [PATCH] procd: stop service using SIGKILL if SIGTERM failed to do so
Hi, i know that someone else is about to send a fix for the same issue but with a different approach of fixing it. i'd like to wait for this 2nd patch to arrive before we decide which to merge John On 09/02/2017 11:02, Alin Nastac wrote: > SIGKILL is sent if instance process is still running after > seconds after SIGTERM has been sent. To prevent > another daemon process being launched before old process dies, > the instance is kept until SIGCHLD confirms that service has > been stopped. > > Signed-off-by: Alin Nastac > --- > service/instance.c | 44 +--- > service/instance.h | 1 + > service/service.c | 26 -- > service/service.h | 3 +++ > 4 files changed, 57 insertions(+), 17 deletions(-) > > diff --git a/service/instance.c b/service/instance.c > index 018db3c..4d340fd 100644 > --- a/service/instance.c > +++ b/service/instance.c > @@ -55,6 +55,7 @@ enum { > INSTANCE_ATTR_SECCOMP, > INSTANCE_ATTR_PIDFILE, > INSTANCE_ATTR_RELOADSIG, > + INSTANCE_ATTR_TERMTIMEOUT, > __INSTANCE_ATTR_MAX > }; > > @@ -79,6 +80,7 @@ static const struct blobmsg_policy > instance_attr[__INSTANCE_ATTR_MAX] = { > [INSTANCE_ATTR_SECCOMP] = { "seccomp", BLOBMSG_TYPE_STRING }, > [INSTANCE_ATTR_PIDFILE] = { "pidfile", BLOBMSG_TYPE_STRING }, > [INSTANCE_ATTR_RELOADSIG] = { "reload_signal", BLOBMSG_TYPE_INT32 }, > + [INSTANCE_ATTR_TERMTIMEOUT] = { "term_timeout", BLOBMSG_TYPE_INT32 }, > }; > > enum { > @@ -389,8 +391,16 @@ instance_start(struct service_instance *in) > return; > } > > - if (in->proc.pending || !in->command) > + if (!in->command) { > + LOG("Not starting instance %s::%s, command not set\n", > in->srv->name, in->name); > return; > + } > + > + if (in->proc.pending) { > + if (in->halt) > + in->restart = true; > + return; > + } > > instance_free_stdio(in); > if (in->_stdout.fd.fd > -2) { > @@ -408,7 +418,7 @@ instance_start(struct service_instance *in) > } > > in->restart = false; > - in->halt = !in->respawn; > + in->halt = false; > > if (!in->valid) > return; > @@ -494,7 +504,11 @@ instance_timeout(struct uloop_timeout *t) > > in = container_of(t, struct service_instance, timeout); > > - if (!in->halt && (in->restart || in->respawn)) > + if (in->halt) { > + LOG("Instance %s::%s pid %d not stopped on SIGTERM, sending > SIGKILL instead\n", > + in->srv->name, in->name, in->proc.pid); > + kill(in->proc.pid, SIGKILL); > + } else if (in->restart || in->respawn) > instance_start(in); > } > > @@ -515,8 +529,19 @@ instance_exit(struct uloop_process *p, int ret) > return; > > uloop_timeout_cancel(&in->timeout); > + service_event("instance.stop", in->srv->name, in->name); > + > if (in->halt) { > instance_removepid(in); > + if (in->restart) > + instance_start(in); > + else { > + struct service *s = in->srv; > + > + avl_delete(&s->instances.avl, &in->node.avl); > + instance_free(in); > + service_stopped(s); > + } > } else if (in->restart) { > instance_start(in); > } else if (in->respawn) { > @@ -535,7 +560,6 @@ instance_exit(struct uloop_process *p, int ret) > uloop_timeout_set(&in->timeout, in->respawn_timeout * > 1000); > } > } > - service_event("instance.stop", in->srv->name, in->name); > } > > void > @@ -546,6 +570,7 @@ instance_stop(struct service_instance *in) > in->halt = true; > in->restart = in->respawn = false; > kill(in->proc.pid, SIGTERM); > + uloop_timeout_set(&in->timeout, in->term_timeout * 1000); > } > > static void > @@ -559,10 +584,10 @@ instance_restart(struct service_instance *in) > return; > } > > - in->halt = false; > + in->halt = true; > in->restart = true; > kill(in->proc.pid, SIGTERM); > - instance_removepid(in); > + uloop_timeout_set(&in->timeout, in->term_timeout * 1000); > } > > static bool > @@ -796,6 +821,8 @@ instance_config_parse(struct service_instance *in) > if (!instance_config_parse_command(in, tb)) > return false; > > + if (tb[INSTANCE_ATTR_TERMTIMEOUT]) > + in->term_timeout = > blobmsg_get_u32(tb[INSTANCE_ATTR_TERMTIMEOUT]); > if (tb[INSTANCE_ATTR_RESPAWN]) { > int i = 0; > uint32_t vals[3] = { 3600, 5, 5}; > @@ -933,8 +960,9 @@ instance_update(struct service_instance *in, struct > service_instance *in_new) > { > bool changed = instance_config_changed(in, in_new); > bool running = in->p
Re: [OpenWrt-Devel] [LEDE-DEV] [PATCH] procd: stop service using SIGKILL if SIGTERM failed to do so
On Thu, Feb 9, 2017 at 11:54 AM, John Crispin wrote: > Hi, > > i know that someone else is about to send a fix for the same issue but > with a different approach of fixing it. i'd like to wait for this 2nd > patch to arrive before we decide which to merge Are you sure it wasn't me? :) You said yesterday that I should send you a patch for it. The only other approach I could think of would involve a instance_stop() that waits for the service instance to exit. I thought initially to do it like this, but decided that waiting asynchronously for stop event would be a fer better technical solution to the given issue, don't you agree? > > John > Alin ___ openwrt-devel mailing list openwrt-devel@lists.openwrt.org https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel