Forum: CFEngine Help
Subject: Re: Aborting CFEngine agent if there is an earlier instance of it
already running
Author: sauer
Link to topic: https://cfengine.com/forum/read.php?3,27239,27267#msg-27267
I had no luck with expireafter. As I recall, there was some discussion earlier
about adding a parameter in the executor to define a maximum execution time,
but I don't know where that went.
My current solution (which is more than you asked for, but you can probably
figure it out) looks like this:
########################################
# verify running processes
bundle agent cfengine_check {
vars:
any::
"agent_name" slist => { "cf-agent", "cf-twin", "cf-promises" };
# ps returns time in local zone, ago() uses UTC
"agent_age" int => ago(0,0,0,7,0,0); #year, month, day, hour, min, sec
"now" int => now();
"agent_count" int => "6";
any::
"maxsize" int => "12000"; # 12,000 KB
srv_any::
"maxsize" int => "30000"; # 30,000 KB on servers
!srv_any::
"maxsize" int => "7000"; # 7,000 KB on clients
any::
"sizeprocs" slist => getindices( "maxsize" );
any::
"default_keys" string => "RUN_CF_SERVERD";
"default_keys" string => "RUN_CF_EXECD";
"default_keys" string => "RUN_CF_MONITORD";
"default_keys" string => "RUN_CF_HUB";
"cfengine_procs" slist => getindices( "default_keys" );
"can_proc[$(cfengine_procs)]" string => canonify("$(cfengine_procs)");
classes:
"start_$(can_proc[$(cfengine_procs)])" expression =>
regline( "$(default_keys[$(cfengine_procs)])=1",
"$(update.cfengine_default_file)"
);
"kill_$(can_proc[$(cfengine_procs)])" not =>
regline( "$(default_keys[$(cfengine_procs)])=1",
"$(update.cfengine_default_file)"
);
processes:
"$(sizeprocs)"
process_select => rsize_exceeds("$(maxsize[$(sizeprocs)])"),
signals => { "term", "kill" },
comment => "Kill $(sizeprocs) if RSS > $(maxsize[$(sizeprocs)])";
"$(cfengine_procs)"
ifvarclass => "start_$(can_proc[$(cfengine_procs)])",
restart_class => "restart_$(can_proc[$(cfengine_procs)])",
comment => "restart cfengine if $(cfengine_procs) is dead";
"$(cfengine_procs)"
ifvarclass => "kill_$(can_proc[$(cfengine_procs)])",
signals => { "term", "kill" },
comment => "kill $(cfengine_procs) if it's running";
"$(agent_name)"
process_select => cfengine_reaper_select("$(agent_age)"),
signals => { "kill" },
comment => "clean up old $(agent_name) processes";
"$(agent_name)"
process_count => cfengine_reaper_count("$(agent_name)", "$(agent_count)"),
comment => "count $(agent_name) processes";
cfengine_gone_nuts::
"$(agent_name)" # cf-agent won't kill itself, so just select everything
process_select => cfengine_reaper_select("$(now)"),
signals => { "kill" },
comment => "clean up all $(agent_name) processes, it's gone nuts";
commands:
!restarted_cfengine_procs::
"$(update.cfengine_init_script) restart"
ifvarclass => "restart_$(can_proc[$(cfengine_procs)])",
classes => if_ok("restarted_cfengine_procs");
reports:
restarted_cfengine_procs::
"Restarted cfengine procs ($(cfengine_procs) was down)"
ifvarclass => "restart_$(can_proc[$(cfengine_procs)])";
!restarted_cfengine_procs::
"failed restarting cfengine procs ($(cfengine_procs) down)"
ifvarclass => "restart_$(can_proc[$(cfengine_procs)])";
cfengine_gone_nuts::
"$(agent_name) had gone nuts; attempted return to sanity."
ifvarclass => "$(can_proc[$(agent_name)])_gone_nuts";
}
# grab all root processes in time range or who are orphaned
body process_select cfengine_reaper_select(t) {
process_owner => { "root" };
stime_range => irange(0,"$(t)"); # started between the epoch and $t ago
ppid => irange(0,1); # parent is 0 or 1
process_result => "process_owner&(stime|ppid)";
}
# see if we have too many procs running
body process_count cfengine_reaper_count(p,c) {
match_range => "0,$(c)";
out_of_range_define => { "cfengine_gone_nuts", "$(p)_gone_nuts" };
}
# find processes with a resident stack size > $(limit)
body process_select rsize_exceeds(limit){
# hpux seemingly lacks the rsize attribute, so this doesn't work there
rsize => irange("$(limit)","inf"); # vsize is over $(vsize_limit)
process_result => "rsize";
}
I have the cfengine defaults file (/etc/defaults/cfengine, or whatever) defined
in a bundle named update.
I also use 7 hours for the age, because my machines are between UTC-0400 and
UTC-0600, so things which are actually only an hour old show up as being 7
hours old in the farthest-back timezone. I submitted a bug for that; I should
probably check on that status and see if I still need to be working around that
behavior. :)
_______________________________________________
Help-cfengine mailing list
[email protected]
https://cfengine.org/mailman/listinfo/help-cfengine