The following is gdb at the time of STONITH resource starting.
----
[root@dev1 ~]# gdb /usr/libexec/pacemaker/stonithd `pgrep
stonithd`
(gdb) b run_stonith_agent
Breakpoint 1 at 0x7f03f1e00d69: file st_client.c, line 479.
(gdb) c
Continuing.
Breakpoint 1, run_stonith_agent (agent=0xe0f820
"fence_legacy",
action=0xe11fb0 "monitor",
<snip>
479 {
(gdb) bt
#0 run_stonith_agent (agent=0xe0f820 "fence_legacy",
action=0xe11fb0 "monitor",
victim=0x0, device_args=Traceback (most recent call
last):0xcffe30, port_map=
Traceback (most recent call last):0xcffe80,
agent_result=0x7fff70214ef4,
output=0x0, track=0xe11d20) at st_client.c:479
#1 0x0000000000406230 in stonith_device_execute
(device=0xe10ff0)
at commands.c:140
#2 0x0000000000406404 in stonith_device_dispatch
(user_data=0xe10ff0) at commands.c:160
#3 0x00007f03f224ad00 in crm_trigger_dispatch
(source=0xe11160,
callback=
0x4063dd <stonith_device_dispatch>, userdata=0xe11160) at
mainloop.c:105
#4 0x0000003642638f0e in g_main_context_dispatch () from
/lib64/libglib-2.0.so.0
#5 0x000000364263c938 in ?? () from /lib64/libglib-2.0.so.0
#6 0x000000364263cd55 in g_main_loop_run () from
/lib64/libglib-2.0.so.0
#7 0x00000000004056dc in main (argc=1, argv=0x7fff70215278)
at
main.c:853
(gdb) n 15
Detaching after fork from child process 28915.
510 if (pid) {
(gdb) n 15
542 track->pid = pid;
(gdb) list
537 track->stdout = p_read_fd;
538 g_child_watch_add(pid, track->done, track);
539 crm_trace("Op: %s on %s, pid: %d, timeout:
%ds",
action, agent, pid, track->timeout);
540
541 if (track->timeout) {
542 track->pid = pid;
543 track->timer_sigterm =
g_timeout_add(1000*track->timeout, st_child_term, track);
544 track->timer_sigkill =
g_timeout_add(1000*(track->timeout+5), st_child_kill, track);
545
546 } else {
(gdb) n
543 track->timer_sigterm =
g_timeout_add(1000*track->timeout, st_child_term, track);
(gdb) n
544 track->timer_sigkill =
g_timeout_add(1000*(track->timeout+5), st_child_kill, track);
(gdb) p agent
$1 = 0xe0f820 "fence_legacy"
(gdb) p action
$2 = 0xe11fb0 "monitor"
(gdb) p args
$3 = 0xe11500
"plugin=external/libvirt\nhostlist=dev2\nhypervisor_uri=qemu+ssh://n8/system\noption=monitor\n"
* (gdb) p track->timeout
$4 = 61000
* (gdb) p 1000*track->timeout
$5 = 61000000
----
1. I added "sleep 3600" to status() of
/usr/lib64/stonith/plugins/external/libvirt.
[root@dev1 external]# diff -u libvirt.ORG libvirt
--- libvirt.ORG 2012-07-17 13:10:01.000000000 +0900
+++ libvirt 2012-07-30 13:36:19.661431208 +0900
@@ -221,6 +221,7 @@
;;
status)
+ sleep 3600
libvirt_check_config
libvirt_status
exit $?
2. service corosync start ; service pacemaker start
3. cibadmin -U -x test.xml
4. When I wait for 61 seconds (timeout value of start),
[root@dev1 ~]# crm_mon -rf1
============
Last updated: Mon Jul 30 13:18:48 2012
Last change: Mon Jul 30 13:15:08 2012 via cibadmin on dev1
Stack: corosync
Current DC: dev1 (-1788499776) - partition with quorum
Version: 1.1.7-33119da
2 Nodes configured, unknown expected votes
1 Resources configured.
============
Online: [ dev1 dev2 ]
Full list of resources:
f-2 (stonith:external/libvirt): Started dev1 FAILED
Migration summary:
* Node dev2:
* Node dev1:
f-2: migration-threshold=1 fail-count=1000000
Failed actions:
* f-2_start_0 (node=dev1, call=-1, rc=1, status=Timed Out):
unknown error
[root@dev1 ~]# ps -ef|egrep
"UID|corosync|pacemaker|stonith|fence|sleep"
UID PID PPID C STIME TTY TIME CMD
root 28840 1 0 13:13 ? 00:00:01 corosync
root 28858 1 0 13:13 ? 00:00:00 pacemakerd
496 28860 28858 0 13:13 ? 00:00:00
/usr/libexec/pacemaker/cib
root 28861 28858 0 13:13 ? 00:00:00
/usr/libexec/pacemaker/stonithd
root 28862 28858 73 13:13 ? 00:04:16
/usr/libexec/pacemaker/lrmd
496 28863 28858 0 13:13 ? 00:00:00
/usr/libexec/pacemaker/attrd
496 28864 28858 0 13:13 ? 00:00:00
/usr/libexec/pacemaker/pengine
496 28865 28858 51 13:13 ? 00:02:58
/usr/libexec/pacemaker/crmd
* root 28915 28861 0 13:15 ? 00:00:00 /usr/bin/perl
/usr/sbin/fence_legacy
* root 28916 28915 0 13:15 ? 00:00:00 stonith -t
external/libvirt
-E -S
* root 28921 28916 0 13:15 ? 00:00:00 /bin/sh
/usr/lib64/stonith/plugins/external/libvirt status
root 28925 28921 0 13:15 ? 00:00:00 sleep 3600
[root@dev1 ~]# top -bn1
top - 13:21:26 up 5 days, 3:23, 5 users, load average:
1.99,
1.42, 0.72
Tasks: 198 total, 3 running, 195 sleeping, 0 stopped, 0
zombie
Cpu(s): 0.7%us, 0.7%sy, 0.0%ni, 98.5%id, 0.0%wa, 0.0%hi,
0.0%si, 0.0%st
Mem: 5089052k total, 2423104k used, 2665948k free,
265756k
buffers
Swap: 1048568k total, 0k used, 1048568k free,
1717712k
cached
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+
COMMAND
* 28862 root 20 0 83816 3412 2572 R 98.2 0.1 6:17.18
lrmd
* 28865 hacluste 20 0 166m 6380 3428 R 98.2 0.1 4:59.84
crmd
28860 hacluste 20 0 93888 7192 4472 S 2.0 0.1 0:00.23
cib
29052 root 20 0 15024 1136 792 R 2.0 0.0 0:00.01
top
1 root 20 0 19348 1520 1212 S 0.0 0.0 0:00.77
init
2 root 20 0 0 0 0 S 0.0 0.0 0:00.00
kthreadd
3 root RT 0 0 0 0 S 0.0 0.0 0:06.85
migration/0
4 root 20 0 0 0 0 S 0.0 0.0 14:25.15
ksoftirqd/0
5 root RT 0 0 0 0 S 0.0 0.0 0:00.10
migration/0
Best Regards,
Kazunori INOUE
_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker
Project Home: http://www.clusterlabs.org
Getting started:
http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://bugs.clusterlabs.org