Hi, I've installed a 2-Node Xen-Cluster with SLES 11 SP1.
After an upgrade to SLES11 SP2 the cluster won't work as the old one. After some time, all LVs reorganize and so the VM where restarted. Any clue? CIB: node xencluster1 \ attributes standby="off" node xencluster2 \ attributes standby="off" primitive STONITH--Node1 stonith:meatware \ meta is-managed="true" target-role="Started" \ operations $id="STONITH--Node1-operations" \ op monitor interval="15" timeout="15" start-delay="15" \ params hostlist="xencluster1" primitive STONITH-Node2 stonith:meatware \ meta target-role="started" is-managed="true" \ operations $id="STONITH-Node2-operations" \ op monitor interval="15" timeout="15" start-delay="15" \ params hostlist="xencluster2" primitive clvm ocf:lvm2:clvmd \ operations $id="clvm-operations" \ op monitor interval="10" timeout="20" \ op start interval="0" timeout="90" \ op stop interval="0" timeout="100" \ params daemon_timeout="30" primitive clvm-xenvg ocf:heartbeat:LVM \ operations $id="clvm-xenvg-operations" \ op monitor interval="10" timeout="30" \ params volgrpname="XenVG" primitive cmirror ocf:lvm2:cmirrord \ operations $id="cmirror-operations" \ op monitor interval="10" timeout="20" \ op start interval="0" timeout="90" \ op stop interval="0" timeout="100" \ params daemon_timeout="30" primitive dlm ocf:pacemaker:controld \ operations $id="dlm-operations" \ op monitor interval="10" timeout="20" start-delay="0" \ op start interval="0" timeout="90" \ op stop interval="0" timeout="100" primitive fs-config-xen ocf:heartbeat:Filesystem \ meta is-managed="true" target-role="Started" \ operations $id="fs-config-xen-operations" \ op monitor interval="20" timeout="40" \ params device="/dev/mapper/XenVG-xenconfig" directory="/etc/xen/vm" fstype="ocfs2" primitive o2cb ocf:ocfs2:o2cb \ operations $id="o2cb-operations" \ op monitor interval="10" timeout="20" \ op start interval="0" timeout="90" \ op stop interval="0" timeout="100" primitive vm-cim ocf:heartbeat:Xen \ meta target-role="Started" is-managed="true" allow-migrate="true" \ operations $id="vm-cim-operations" \ op monitor interval="10" timeout="30" \ op start interval="0" timeout="60" \ op stop interval="0" timeout="60" \ op migrate_to interval="0" timeout="360" \ params xmfile="/etc/xen/vm/cim" primitive vm-deprepo ocf:heartbeat:Xen \ meta target-role="started" is-managed="true" allow-migrate="true" \ operations $id="vm-deprepo-operations" \ op monitor interval="10" timeout="30" \ op start interval="0" timeout="60" \ op stop interval="0" timeout="60" \ op migrate_to interval="0" timeout="360" \ params xmfile="/etc/xen/vm/debrepo" group clvm-glue dlm clvm o2cb cmirror \ meta target-role="Started" group xen-vg-fs clvm-xenvg fs-config-xen clone c-clvm-glue clvm-glue \ meta interleave="true" ordered="true" target-role="Started" clone c-xen-vg-fs xen-vg-fs \ meta interleave="true" ordered="true" is-managed="true" target-role="Started" location STONITHnode1Constraint STONITH--Node1 -inf: xencluster1 colocation colo-clvmglue-xenvgsfs inf: c-xen-vg-fs c-clvm-glue order o-cim inf: c-xen-vg-fs vm-cim order o-clvmglue-xenvgfs inf: c-clvm-glue c-xen-vg-fs order o-deprepo inf: c-xen-vg-fs vm-deprepo order o-ilmt inf: c-xen-vg-fs vm-ilmt order o-intranet inf: c-xen-vg-fs vm-intranet order o-nagois inf: c-xen-vg-fs vm-nagios order o-oneiroi inf: c-xen-vg-fs vm-oneiroi order o-otbridge inf: c-xen-vg-fs vm-otbridge order o-soa inf: c-xen-vg-fs vm-soa order o-topdesk-entw inf: c-xen-vg-fs vm-topdesk-entw order o-traumschiff inf: c-xen-vg-fs vm-traumschiff order o-virenscanner inf: c-xen-vg-fs vm-virenscanner property $id="cib-bootstrap-options" \ dc-version="1.1.6-b988976485d15cb702c9307df55512d323831a5e" \ no-quorum-policy="ignore" \ default-resource-stickiness="100000" \ last-lrm-refresh="1334518148" \ cluster-infrastructure="openais" \ expected-quorum-votes="2" \ default-action-timeout="120s" \ maintenance-mode="true" op_defaults $id="op_defaults-options" \ record-pending="false" Node 1Logs: Apr 15 22:01:10 xencluster1 clvmd[8763]: Sending message to all cluster nodes Apr 15 22:01:10 xencluster1 clvmd[8763]: process_work_item: local Apr 15 22:01:10 xencluster1 clvmd[8763]: process_local_command: unknown (0x2d) msg=0x7f5c40010e80, msglen =32, client=0x7f5c400578d0 Apr 15 22:01:10 xencluster1 clvmd[8763]: Syncing device names Apr 15 22:01:10 xencluster1 clvmd[8763]: Reply from node 4914420a: 0 bytes Apr 15 22:01:10 xencluster1 clvmd[8763]: Got 1 replies, expecting: 2 Apr 15 22:01:10 xencluster1 clvmd[8763]: LVM thread waiting for work Apr 15 22:01:10 xencluster1 clvmd[8763]: 1226064394 got message from nodeid 1226064394 for 0. len 32 Apr 15 22:01:40 xencluster1 lrmd: [8093]: WARN: clvm-xenvg:0:monitor process (PID 31785) timed out (try 1). Killing with signal SIGTERM (15). Apr 15 22:01:40 xencluster1 lrmd: [8093]: WARN: operation monitor[92] on clvm-xenvg:0 for client 8096: pid 31785 timed out Apr 15 22:01:40 xencluster1 crmd: [8096]: ERROR: process_lrm_event: LRM operation clvm-xenvg:0_monitor_10000 (92) Timed Out (timeout=30000ms) Apr 15 22:01:42 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ac Apr 15 22:01:42 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ab Apr 15 22:01:42 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ac Apr 15 22:01:42 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ab pr 15 22:01:50 xencluster1 clvmd[8763]: Send local reply Apr 15 22:01:50 xencluster1 clvmd[8763]: Read on local socket 14, len = 32 Apr 15 22:01:50 xencluster1 clvmd[8763]: check_all_clvmds_running Apr 15 22:01:50 xencluster1 clvmd[8763]: down_callback. node 1226064394, state = 3 Apr 15 22:01:50 xencluster1 clvmd[8763]: down_callback. node 1309950474, state = 3 Apr 15 22:01:50 xencluster1 clvmd[8763]: Got pre command condition... Apr 15 22:01:50 xencluster1 clvmd[8763]: Writing status 0 down pipe 16 Apr 15 22:01:50 xencluster1 clvmd[8763]: Waiting to do post command - state = 0 Apr 15 22:01:50 xencluster1 clvmd[8763]: read on PIPE 15: 4 bytes: status: 0 Apr 15 22:01:50 xencluster1 clvmd[8763]: background routine status was 0, sock_client=0x7f5c40057c10 Apr 15 22:01:50 xencluster1 clvmd[8763]: distribute command: XID = 15473 Apr 15 22:01:50 xencluster1 clvmd[8763]: num_nodes = 2 Apr 15 22:01:50 xencluster1 clvmd[8763]: add_to_lvmqueue: cmd=0x7f5c4000dbd0. client=0x7f5c40057c10, msg=0x7f5c40057880, len=32, csid=(nil), xid=15473 Apr 15 22:01:50 xencluster1 clvmd[8763]: Sending message to all cluster nodes Apr 15 22:01:50 xencluster1 clvmd[8763]: process_work_item: local Apr 15 22:01:50 xencluster1 clvmd[8763]: process_local_command: unknown (0x2d) msg=0x7f5c40010e80, msglen =32, client=0x7f5c40057c10 Apr 15 22:01:50 xencluster1 clvmd[8763]: Syncing device names Apr 15 22:01:50 xencluster1 clvmd[8763]: Reply from node 4914420a: 0 bytes Apr 15 22:01:50 xencluster1 clvmd[8763]: Got 1 replies, expecting: 2 Apr 15 22:01:50 xencluster1 clvmd[8763]: LVM thread waiting for work Apr 15 22:01:50 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ab Apr 15 22:01:50 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ac Apr 15 22:01:50 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ab ... Apr 15 22:02:19 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ac Apr 15 22:02:20 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ab Apr 15 22:02:20 xencluster1 lrmd: [8093]: WARN: clvm-xenvg:0:monitor process (PID 771) timed out (try 1). Killing with signal SIGTERM (15). Apr 15 22:02:20 xencluster1 lrmd: [8093]: WARN: operation monitor[92] on clvm-xenvg:0 for client 8096: pid 771 timed out Apr 15 22:02:20 xencluster1 crmd: [8096]: ERROR: process_lrm_event: LRM operation clvm-xenvg:0_monitor_10000 (92) Timed Out (timeout=30000ms) Apr 15 22:02:20 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ac Apr 15 22:02:20 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ab ... Apr 15 22:02:30 xencluster1 clvmd[8763]: Got new connection on fd 17 Apr 15 22:02:30 xencluster1 clvmd[8763]: Read on local socket 17, len = 28 Apr 15 22:02:30 xencluster1 clvmd[8763]: creating pipe, [18, 19] Apr 15 22:02:30 xencluster1 clvmd[8763]: Creating pre&post thread Apr 15 22:02:30 xencluster1 clvmd[8763]: Created pre&post thread, state = 0 Apr 15 22:02:30 xencluster1 clvmd[8763]: in sub thread: client = 0x7f5c40058080 Apr 15 22:02:30 xencluster1 clvmd[8763]: Sub thread ready for work. Apr 15 22:02:30 xencluster1 clvmd[8763]: doing PRE command LOCK_VG 'V_XenVG' at 1 (client=0x7f5c40058080) Apr 15 22:02:30 xencluster1 clvmd[8763]: lock_resource 'V_XenVG', flags=0, mode=3 Apr 15 22:02:30 xencluster1 clvmd[8763]: lock_resource returning 0, lock_id=3a0001 Apr 15 22:02:48 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ab Apr 15 22:02:48 xencluster1 cib: [8092]: info: cib_stats: Processed 4 operations (2500.00us average, 0% utilization) in the last 10min Apr 15 22:02:48 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ac Apr 15 22:03:00 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ac Apr 15 22:03:00 xencluster1 lrmd: [8093]: WARN: clvm-xenvg:0:monitor process (PID 2256) timed out (try 1). Killing with signal SIGTERM (15). Apr 15 22:03:00 xencluster1 lrmd: [8093]: WARN: operation monitor[92] on clvm-xenvg:0 for client 8096: pid 2256 timed out Apr 15 22:03:00 xencluster1 crmd: [8096]: ERROR: process_lrm_event: LRM operation clvm-xenvg:0_monitor_10000 (92) Timed Out (timeout=30000ms) Apr 15 22:03:00 xencluster1 corosync[8071]: [TOTEM ] Retransmit List: 21ab Node2 Log Apr 15 22:00:51 xencluster2 clvmd[12666]: Joined child thread Apr 15 22:00:51 xencluster2 clvmd[12666]: ret == 0, errno = 0. removing client Apr 15 22:00:51 xencluster2 clvmd[12666]: add_to_lvmqueue: cmd=0x7fa3d0028710. client=0x7fa3d0028760, msg=(nil), len=0, csid=(nil), xid=14778 Apr 15 22:00:51 xencluster2 clvmd[12666]: process_work_item: free fd -1 Apr 15 22:00:51 xencluster2 clvmd[12666]: LVM thread waiting for work Apr 15 22:00:52 xencluster2 crmd: [7678]: info: crm_timer_popped: PEngine Recheck Timer (I_PE_CALC) just popped (900000ms) Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition: State transition S_IDLE -> S_POLICY_ENGINE [ input=I_PE_CALC cause=C_TIMER_POPPED origin=crm_timer_popped ] Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition: Progressed to state S_POLICY_ENGINE after C_TIMER_POPPED Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition: All 2 cluster nodes are eligible to run resources. Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_pe_invoke: Query 983: Requesting the current CIB: S_POLICY_ENGINE Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_pe_invoke_callback: Invoking the PE: query=983, ref=pe_calc-dc-1334520052-604, seq=2212, quorate=1 Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_config: On loss of CCM Quorum: Ignore Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation clvm:1_last_failure_0 found resource clvm:1 active on xencluster2 Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation STONITH-Node2_last_failure_0 found resource STONITH-Node2 active on xencluster2 Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation STONITH--Node1_last_failure_0 found resource STONITH--Node1 active on xencluster2 Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation cmirror:1_last_failure_0 found resource cmirror:1 active on xencluster2 Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation dlm:1_last_failure_0 found resource dlm:1 active on xencluster2 Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation o2cb:1_last_failure_0 found resource o2cb:1 active on xencluster2 Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation dlm:0_last_failure_0 found resource dlm:0 active on xencluster1 Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation clvm:0_last_failure_0 found resource clvm:0 active on xencluster1 Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation o2cb:0_last_failure_0 found resource o2cb:0 active on xencluster1 Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation clvm-xenvg:0_last_failure_0 found resource clvm-xenvg:0 active on xencluster1 Apr 15 22:00:52 xencluster2 pengine: [7677]: WARN: unpack_rsc_op: Processing failed op vm-deprepo_last_failure_0 on xencluster1: unknown error (1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave STONITH-Node2 (Started xencluster2) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave STONITH--Node1 (Started xencluster2) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave dlm:0 (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave clvm:0 (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave o2cb:0 (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave cmirror:0 (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave dlm:1 (Started xencluster2) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave clvm:1 (Started xencluster2) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave o2cb:1 (Started xencluster2) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave cmirror:1 (Started xencluster2) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave clvm-xenvg:0 (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave fs-config-xen:0 (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave clvm-xenvg:1 (Started xencluster2) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave fs-config-xen:1 (Started xencluster2) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-ilmt (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-topdesk-entw (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-otbridge (Started xencluster2) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-virenscanner (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-deprepo (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-traumschiff (Started xencluster2) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-cim (Started xencluster2) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-oneiroi (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-intranet (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-soa (Started xencluster1) Apr 15 22:00:52 xencluster2 pengine: [7677]: notice: process_pe_message: Transition 80: PEngine Input stored in: /var/lib/pengine/pe-input-271.bz2 Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition: State transition S_POLICY_ENGINE -> S_TRANSITION_ENGINE [ input=I_PE_SUCCESS cause=C_IPC_MESSAGE origin=handle_response ] Apr 15 22:00:52 xencluster2 crmd: [7678]: info: unpack_graph: Unpacked transition 80: 0 actions in 0 synapses Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_te_invoke: Processing graph 80 (ref=pe_calc-dc-1334520052-604) derived from /var/lib/pengine/pe-input-271.bz2 Apr 15 22:00:52 xencluster2 crmd: [7678]: info: run_graph: ==================================================== Apr 15 22:00:52 xencluster2 crmd: [7678]: notice: run_graph: Transition 80 (Complete=0, Pending=0, Fired=0, Skipped=0, Incomplete=0, Source=/var/lib/pengine/pe-input-271.bz2): Complete Apr 15 22:00:52 xencluster2 crmd: [7678]: info: te_graph_trigger: Transition 80 is now complete Apr 15 22:00:52 xencluster2 crmd: [7678]: info: notify_crmd: Transition 80 status: done - <null> Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition: State transition S_TRANSITION_ENGINE -> S_IDLE [ input=I_TE_SUCCESS cause=C_FSA_INTERNAL origin=notify_crmd ] Apr 15 22:00:52 xencluster2 crmd: [7678]: info: do_state_transition: Starting PEngine Recheck Timer Apr 15 22:01:00 xencluster2 clvmd[12666]: 1309950474 got message from nodeid 1226064394 for 0. len 32 Apr 15 22:01:00 xencluster2 clvmd[12666]: add_to_lvmqueue: cmd=0x7fa3d0028780. client=0x6934a0, msg=0x7fa3d910063c, len=32, csid=0x7fffb4e5d944, xid=0 Apr 15 22:01:00 xencluster2 clvmd[12666]: process_work_item: remote Apr 15 22:01:00 xencluster2 clvmd[12666]: process_remote_command unknown (0x2d) for clientid 0x5000000 XID 15337 on node 4914420a Apr 15 22:01:00 xencluster2 clvmd[12666]: Syncing device names Apr 15 22:01:00 xencluster2 clvmd[12666]: LVM thread waiting for work Apr 15 22:01:00 xencluster2 clvmd[12666]: 1309950474 got message from nodeid 1309950474 for 1226064394. len 18 Apr 15 22:01:00 xencluster2 clvmd[12666]: 1309950474 got message from nodeid 1226064394 for 0. len 32 Apr 15 22:01:02 xencluster2 clvmd[12666]: Joined child thread Apr 15 22:01:02 xencluster2 clvmd[12666]: ret == 0, errno = 0. removing client Apr 15 22:01:02 xencluster2 clvmd[12666]: add_to_lvmqueue: cmd=0x7fa3d004d7a0. client=0x7fa3d004d7f0, msg=(nil), len=0, csid=(nil), xid=14848 Apr 15 22:01:02 xencluster2 clvmd[12666]: process_work_item: free fd -1 Apr 15 22:01:02 xencluster2 clvmd[12666]: LVM thread waiting for work Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: stonith_command: Processed st_execute from lrmd: rc=-1 Apr 15 22:01:06 xencluster2 stonith: parse config info info=xencluster1 Apr 15 22:01:06 xencluster2 stonith: meatware device OK. Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH--Node1: Performing: stonith -t meatware -S Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH--Node1: success: 0 Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: stonith_device_execute: Nothing to do for STONITH--Node1 Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: stonith_command: Processed st_execute from lrmd: rc=-1 Apr 15 22:01:06 xencluster2 stonith: parse config info info=xencluster2 Apr 15 22:01:06 xencluster2 stonith: meatware device OK. Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH-Node2: Performing: stonith -t meatware -S Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH-Node2: success: 0 Apr 15 22:01:06 xencluster2 stonith-ng: [8283]: info: stonith_device_execute: Nothing to do for STONITH-Node2 Apr 15 22:01:10 xencluster2 clvmd[12666]: 1309950474 got message from nodeid 1226064394 for 0. len 32 Apr 15 22:01:10 xencluster2 clvmd[12666]: add_to_lvmqueue: cmd=0x7fa3d004d810. client=0x6934a0, msg=0x7fa3d9100cfc, len=32, csid=0x7fffb4e5d944, xid=0 Apr 15 22:01:10 xencluster2 clvmd[12666]: process_work_item: remote Apr 15 22:01:10 xencluster2 clvmd[12666]: process_remote_command unknown (0x2d) for clientid 0x5000000 XID 15407 on node 4914420a Apr 15 22:01:10 xencluster2 clvmd[12666]: Syncing device names Apr 15 22:01:10 xencluster2 clvmd[12666]: LVM thread waiting for work Apr 15 22:01:10 xencluster2 clvmd[12666]: 1309950474 got message from nodeid 1309950474 for 1226064394. len 18 Apr 15 22:01:10 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 Apr 15 22:01:10 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 Apr 15 22:01:10 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 .... Apr 15 22:01:11 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 Apr 15 22:01:12 xencluster2 clvmd[12666]: Got new connection on fd 5 Apr 15 22:01:12 xencluster2 clvmd[12666]: Read on local socket 5, len = 28 Apr 15 22:01:12 xencluster2 clvmd[12666]: creating pipe, [12, 13] Apr 15 22:01:12 xencluster2 clvmd[12666]: Creating pre&post thread Apr 15 22:01:12 xencluster2 clvmd[12666]: Created pre&post thread, state = 0 Apr 15 22:01:12 xencluster2 clvmd[12666]: in sub thread: client = 0x7fa3d004d810 Apr 15 22:01:12 xencluster2 clvmd[12666]: Syncing device names Apr 15 22:01:12 xencluster2 clvmd[12666]: Reply from node 4e14420a: 0 bytes Apr 15 22:01:12 xencluster2 clvmd[12666]: Got 1 replies, expecting: 2 Apr 15 22:01:12 xencluster2 clvmd[12666]: LVM thread waiting for work Apr 15 22:01:12 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 Apr 15 22:01:12 xencluster2 clvmd[12666]: 1309950474 got message from nodeid 1309950474 for 0. len 32 Apr 15 22:01:12 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 Apr 15 22:01:12 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 .... Apr 15 22:01:21 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:21 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: stonith_command: Processed st_execute from lrmd: rc=-1 Apr 15 22:01:21 xencluster2 stonith: parse config info info=xencluster1 Apr 15 22:01:21 xencluster2 stonith: meatware device OK. Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH--Node1: Performing: stonith -t meatware -S Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH--Node1: success: 0 Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: stonith_device_execute: Nothing to do for STONITH--Node1 Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: stonith_command: Processed st_execute from lrmd: rc=-1 Apr 15 22:01:21 xencluster2 stonith: parse config info info=xencluster2 Apr 15 22:01:21 xencluster2 stonith: meatware device OK. Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH-Node2: Performing: stonith -t meatware -S Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH-Node2: success: 0 Apr 15 22:01:21 xencluster2 stonith-ng: [8283]: info: stonith_device_execute: Nothing to do for STONITH-Node2 Apr 15 22:01:21 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:22 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:22 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 .... Apr 15 22:01:36 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:36 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: stonith_command: Processed st_execute from lrmd: rc=-1 Apr 15 22:01:36 xencluster2 stonith: parse config info info=xencluster1 Apr 15 22:01:36 xencluster2 stonith: meatware device OK. Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH--Node1: Performing: stonith -t meatware -S Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH--Node1: success: 0 Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: stonith_device_execute: Nothing to do for STONITH--Node1 Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: stonith_command: Processed st_execute from lrmd: rc=-1 Apr 15 22:01:36 xencluster2 stonith: parse config info info=xencluster2 Apr 15 22:01:36 xencluster2 stonith: meatware device OK. Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH-Node2: Performing: stonith -t meatware -S Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: log_operation: STONITH-Node2: success: 0 Apr 15 22:01:36 xencluster2 stonith-ng: [8283]: info: stonith_device_execute: Nothing to do for STONITH-Node2 Apr 15 22:01:36 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:37 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 .... Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:40 xencluster2 crmd: [7678]: info: process_graph_event: Detected action clvm-xenvg:0_monitor_10000 from a different transition: 62 vs. 80 Apr 15 22:01:40 xencluster2 crmd: [7678]: info: abort_transition_graph: process_graph_event:476 - Triggered transition abort (complete=1, tag=lrm_rsc_op, id=clvm-xenvg:0_monitor_10000, magic=2:-2;61:62:0:72497771-fdb2-4ca7-a723-2cc360861bb0, cib=0.5816.4) : Old event Apr 15 22:01:40 xencluster2 crmd: [7678]: WARN: update_failcount: Updating failcount for clvm-xenvg:0 on xencluster1 after failed monitor: rc=-2 (update=value++, time=1334520100) Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_state_transition: State transition S_IDLE -> S_POLICY_ENGINE [ input=I_PE_CALC cause=C_FSA_INTERNAL origin=abort_transition_graph ] Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_state_transition: All 2 cluster nodes are eligible to run resources. Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_pe_invoke: Query 984: Requesting the current CIB: S_POLICY_ENGINE Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d ... Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_pe_invoke_callback: Invoking the PE: query=984, ref=pe_calc-dc-1334520100-605, seq=2212, quorate=1 Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d ... Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_config: On loss of CCM Quorum: Ignore Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation clvm:1_last_failure_0 found resource clvm:1 active on xencluster2 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation STONITH-Node2_last_failure_0 found resource STONITH-Node2 active on xencluster2 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation STONITH--Node1_last_failure_0 found resource STONITH--Node1 active on xencluster2 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation cmirror:1_last_failure_0 found resource cmirror:1 active on xencluster2 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation dlm:1_last_failure_0 found resource dlm:1 active on xencluster2 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation o2cb:1_last_failure_0 found resource o2cb:1 active on xencluster2 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation cmirror:0_last_failure_0 found resource cmirror:0 active on xencluster1 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation dlm:0_last_failure_0 found resource dlm:0 active on xencluster1 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation clvm:0_last_failure_0 found resource clvm:0 active on xencluster1 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation o2cb:0_last_failure_0 found resource o2cb:0 active on xencluster1 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: unpack_rsc_op: Operation clvm-xenvg:0_last_failure_0 found resource clvm-xenvg:0 active on xencluster1 Apr 15 22:01:40 xencluster2 pengine: [7677]: WARN: unpack_rsc_op: Processing failed op clvm-xenvg:0_monitor_10000 on xencluster1: unknown exec error (-2) Apr 15 22:01:40 xencluster2 pengine: [7677]: WARN: unpack_rsc_op: Processing failed op vm-deprepo_last_failure_0 on xencluster1: unknown error (1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: RecurringOp: Start recurring monitor (10s) for clvm-xenvg:0 on xencluster1 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave STONITH-Node2 (Started xencluster2) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave STONITH--Node1 (Started xencluster2) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave dlm:0 (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave clvm:0 (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave o2cb:0 (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave cmirror:0 (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave dlm:1 (Started xencluster2) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave clvm:1 (Started xencluster2) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave o2cb:1 (Started xencluster2) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave cmirror:1 (Started xencluster2) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Recover clvm-xenvg:0 (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart fs-config-xen:0 (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave clvm-xenvg:1 (Started xencluster2) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave fs-config-xen:1 (Started xencluster2) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart vm-ilmt (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart vm-topdesk-entw (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-otbridge (Started xencluster2) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart vm-virenscanner (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart vm-deprepo (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-traumschiff (Started xencluster2) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Leave vm-cim (Started xencluster2) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart vm-oneiroi (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart vm-intranet (Started xencluster1) Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: LogActions: Restart vm-soa (Started xencluster1) Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_state_transition: State transition S_POLICY_ENGINE -> S_TRANSITION_ENGINE [ input=I_PE_SUCCESS cause=C_IPC_MESSAGE origin=handle_response ] Apr 15 22:01:40 xencluster2 crmd: [7678]: info: unpack_graph: Unpacked transition 81: 39 actions in 39 synapses Apr 15 22:01:40 xencluster2 crmd: [7678]: info: do_te_invoke: Processing graph 81 (ref=pe_calc-dc-1334520100-605) derived from /var/lib/pengine/pe-input-272.bz2 Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command: Initiating action 82: stop vm-ilmt_stop_0 on xencluster1 Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command: Initiating action 86: stop vm-topdesk-entw_stop_0 on xencluster1 Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command: Initiating action 90: stop vm-virenscanner_stop_0 on xencluster1 Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command: Initiating action 92: stop vm-deprepo_stop_0 on xencluster1 Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command: Initiating action 98: stop vm-oneiroi_stop_0 on xencluster1 Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command: Initiating action 100: stop vm-intranet_stop_0 on xencluster1 Apr 15 22:01:40 xencluster2 crmd: [7678]: info: te_rsc_command: Initiating action 102: stop vm-soa_stop_0 on xencluster1 Apr 15 22:01:40 xencluster2 pengine: [7677]: notice: process_pe_message: Transition 81: PEngine Input stored in: /var/lib/pengine/pe-input-272.bz2 Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d 219e 219f 21a0 21a1 21a2 21a3 21a4 21a5 Apr 15 22:01:40 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d 219e 219f 21a0 21a1 21a2 21a3 21a4 21a5 ... Apr 15 22:01:41 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d 219e 219f 21a0 21a1 21a2 21a3 21a4 21a5 Apr 15 22:01:42 xencluster2 lrmd: [7675]: WARN: clvm-xenvg:1:monitor process (PID 29785) timed out (try 1). Killing with signal SIGTERM (15). Apr 15 22:01:42 xencluster2 lrmd: [7675]: WARN: operation monitor[125] on clvm-xenvg:1 for client 7678: pid 29785 timed out Apr 15 22:01:42 xencluster2 crmd: [7678]: ERROR: process_lrm_event: LRM operation clvm-xenvg:1_monitor_10000 (125) Timed Out (timeout=30000ms) Apr 15 22:01:42 xencluster2 crmd: [7678]: info: process_graph_event: Detected action clvm-xenvg:1_monitor_10000 from a different transition: 68 vs. 81 Apr 15 22:01:42 xencluster2 crmd: [7678]: info: abort_transition_graph: process_graph_event:476 - Triggered transition abort (complete=0, tag=lrm_rsc_op, id=clvm-xenvg:1_monitor_10000, magic=2:-2;61:68:0:72497771-fdb2-4ca7-a723-2cc360861bb0, cib=0.5816.5) : Old event Apr 15 22:01:42 xencluster2 crmd: [7678]: info: update_abort_priority: Abort priority upgraded from 0 to 1000000 Apr 15 22:01:42 xencluster2 crmd: [7678]: info: update_abort_priority: Abort action done superceeded by restart Apr 15 22:01:42 xencluster2 crmd: [7678]: WARN: update_failcount: Updating failcount for clvm-xenvg:1 on xencluster2 after failed monitor: rc=-2 (update=value++, time=1334520102) Apr 15 22:01:42 xencluster2 attrd: [7676]: notice: attrd_trigger_update: Sending flush op to all hosts for: fail-count-clvm-xenvg:1 (1) Apr 15 22:01:42 xencluster2 attrd: [7676]: notice: attrd_perform_update: Sent update 448: fail-count-clvm-xenvg:1=1 Apr 15 22:01:42 xencluster2 attrd: [7676]: notice: attrd_trigger_update: Sending flush op to all hosts for: last-failure-clvm-xenvg:1 (1334520102) Apr 15 22:01:42 xencluster2 crmd: [7678]: info: abort_transition_graph: te_update_diff:176 - Triggered transition abort (complete=0, tag=nvpair, id=status-xencluster2-fail-count-clvm-xenvg.1, name=fail-count-clvm-xenvg:1, value=1, magic=NA, cib=0.5816.6) : Transient attribute: update Apr 15 22:01:42 xencluster2 attrd: [7676]: notice: attrd_perform_update: Sent update 451: last-failure-clvm-xenvg:1=1334520102 Apr 15 22:01:42 xencluster2 crmd: [7678]: info: abort_transition_graph: te_update_diff:176 - Triggered transition abort (complete=0, tag=nvpair, id=status-xencluster2-last-failure-clvm-xenvg.1, name=last-failure-clvm-xenvg:1, value=1334520102, magic=NA, cib=0.5816.7) : Transient attribute: update Apr 15 22:01:42 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d 219e 219f 21a0 21a1 21a2 21a3 21a4 21a5 Apr 15 22:01:42 xencluster2 corosync[7666]: [TOTEM ] Retransmit List: 2196 2197 219a 219b 219c 219d 219e 219f 21a0 21a1 21a2 21a3 21a4 21a5 -- Mit freundlichen Grüßen Frank Meier UNIX-Basis Hamm Reno Group GmbH Industriegebiet West | D-66987 Thaleischweiler-Fröschen T.+49(0)6334 444-8322 | F.+49(0)6334 444-8190 frank.me...@hr-group.de | www.reno.de ___________________________________________________________________ Sitz: Am Tie 7 | D-49086 Osnabrück Handelsregister Osnabrück HRB 19587 Geschäftsführer: Hans-Jürgen de Fries, Jens Gransee, Manfred Klumpp, Robert Reisch _______________________________________________ Pacemaker mailing list: Pacemaker@oss.clusterlabs.org http://oss.clusterlabs.org/mailman/listinfo/pacemaker Project Home: http://www.clusterlabs.org Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf Bugs: http://bugs.clusterlabs.org