Perhaps try setting the "ordered" meta attribute to true for the clone.
We had to do something similar for ocfs2 at one stage.

On Thu, Sep 22, 2011 at 11:48 AM,  <xin.li...@cs2c.com.cn> wrote:
> hi:
>           Hope you everything goes well.
>
>           I have problems with DRBD+dlm+gfs in pacemaker.
>
>           I follow this
> doc:http://www.clusterlabs.org/doc/en-US/Pacemaker/1.1/html/Clusters_from_Scratch/index.html
> to make a DRBD-GFS2 cluster
>
>           When I run DRBD(two primaries)+dlm+gfs+Filesystem, node-c hangs
> and reboot.
>
>           Then I run DRBD(two primaries)+dlm+gfs , it does well, running on
> both nodes;
>
>            And then, I "mount -t gfs2 /dev/drbd0 /mnt" on node-b, it's ok;
>
>            BUT, when I run command "mount -t gfs2 /dev/drbd0 /mnt" on
> node-c, node-c hangs and reboot. No err log on stderr.
>
>            Once again, I do this on node-c firstly, it's ok; then on node-b,
> it hangs and reboot!
>
> #################################################################
> $ crm configure show
>
> node ha-b
> node ha-c
> primitive dlm ocf:pacemaker:controld \
>     operations $id="dlm-operations" \
>     op monitor interval="10" timeout="20" start-delay="0" \
>     params args="-L -K -P -q 0 "
> primitive drbd ocf:linbit:drbd \
>     operations $id="drbd-operations" \
>     op monitor interval="20" role="Slave" timeout="20" \
>     op monitor interval="10" role="Master" timeout="20" \
>     params drbd_resource="drbd0"
> primitive gfs ocf:pacemaker:controld \
>     operations $id="gfs-operations" \
>     op monitor interval="10" timeout="20" start-delay="0" \
>     params daemon="gfs_controld.pcmk" args="-L -P -g 0"
> group groups dlm gfs
> ms ms-drbd drbd \
>     meta master-max="2" notify="true" target-role="Started"
> clone clone-set groups \
>     meta interleave="true" target-role="Started"
> colocation clone-on-drbd inf: clone-set:Started ms-drbd:Master
> order clone-after-drbd inf: ms-drbd:promote clone-set:start symmetrical=true
> property $id="cib-bootstrap-options" \
>     dc-version="1.1.6-1.el6-9971ebba4494012a93c03b40a2c58ec0eb60f50c" \
>     cluster-infrastructure="openais" \
>     expected-quorum-votes="2" \
>     no-quorum-policy="ignore" \
>     stonith-enabled="false"
> ##########################################################################
>
>
> gfs_controld.log on ha-b
> ##########################################################################
> [root@ha-b ~]# cat /var/log/cluster/gfs_controld.log
>
> Sep 22 09:08:13 gfs_controld gfs_controld 3.0.12 started
> Sep 22 09:08:13 gfs_controld Connected as node 3393650954 to cluster 'cs2c'
> Sep 22 09:08:13 gfs_controld logging mode 3 syslog f 160 p 6 logfile p 7
> /var/log/cluster/gfs_controld.log
> Sep 22 09:08:13 gfs_controld group_mode 3 compat 0
> Sep 22 09:08:13 gfs_controld setup_cpg_daemon 11
> Sep 22 09:08:13 gfs_controld gfs:controld conf 1 1 0 memb -901316342 join
> -901316342 left
> Sep 22 09:08:13 gfs_controld set_protocol member_count 1 propose daemon
> 1.1.1 kernel 1.1.1
> Sep 22 09:08:13 gfs_controld run protocol from nodeid -901316342
> Sep 22 09:08:13 gfs_controld daemon run 1.1.1 max 1.1.1 kernel run 1.1.1 max
> 1.1.1
> Sep 22 09:08:14 gfs_controld gfs:controld conf 2 1 0 memb -901316342
> -884539126 join -884539126 left
> Sep 22 09:11:57 gfs_controld client connection 5 fd 14
> Sep 22 09:11:57 gfs_controld join: /mnt gfs2 lock_dlm cs2c:liang rw
> /dev/drbd0
> Sep 22 09:11:57 gfs_controld liang join: cluster name matches: cs2c
> Sep 22 09:11:57 gfs_controld liang process_dlmcontrol register 0
> Sep 22 09:11:57 gfs_controld gfs:mount:liang conf 1 1 0 memb -901316342 join
> -901316342 left
> Sep 22 09:11:57 gfs_controld liang add_change cg 1 joined nodeid -901316342
> Sep 22 09:11:57 gfs_controld liang add_change cg 1 we joined
> Sep 22 09:11:57 gfs_controld liang add_change cg 1 counts member 1 joined 1
> remove 0 failed 0
> Sep 22 09:11:57 gfs_controld liang wait_conditions skip for zero
> started_count
> Sep 22 09:11:57 gfs_controld liang send_start cg 1 id_count 1 om 0 nm 1 oj 0
> nj 0
> Sep 22 09:11:57 gfs_controld liang receive_start -901316342:1 len 92
> Sep 22 09:11:57 gfs_controld liang match_change -901316342:1 matches cg 1
> Sep 22 09:11:57 gfs_controld liang wait_messages cg 1 got all 1
> Sep 22 09:11:57 gfs_controld liang pick_first_recovery_master low -901316342
> old 0
> Sep 22 09:11:57 gfs_controld liang sync_state all_nodes_new
> first_recovery_needed master -901316342
> Sep 22 09:11:57 gfs_controld liang create_old_nodes all new
> Sep 22 09:11:57 gfs_controld liang create_new_nodes -901316342 ro 0 spect 0
> Sep 22 09:11:57 gfs_controld liang create_failed_journals all new
> Sep 22 09:11:57 gfs_controld liang apply_recovery first start_kernel
> Sep 22 09:11:57 gfs_controld liang start_kernel cg 1 member_count 1
> Sep 22 09:11:57 gfs_controld liang set
> /sys/fs/gfs2/cs2c:liang/lock_module/block to 0
> Sep 22 09:11:57 gfs_controld liang set open
> /sys/fs/gfs2/cs2c:liang/lock_module/block error -1 2
> Sep 22 09:11:57 gfs_controld liang client_reply_join_full ci 5 result 0
> hostdata=jid=0:id=915250580:first=1
> Sep 22 09:11:57 gfs_controld client_reply_join liang ci 5 result 0
> Sep 22 09:11:57 gfs_controld uevent add gfs2 /fs/gfs2/cs2c:liang
> Sep 22 09:11:57 gfs_controld liang ping_kernel_mount 0
> Sep 22 09:11:57 gfs_controld uevent change gfs2 /fs/gfs2/cs2c:liang
> Sep 22 09:11:57 gfs_controld liang recovery_uevent jid 0 first recovery done
> 0
> Sep 22 09:11:57 gfs_controld uevent change gfs2 /fs/gfs2/cs2c:liang
> Sep 22 09:11:57 gfs_controld liang recovery_uevent jid 1 first recovery done
> 0
> Sep 22 09:11:57 gfs_controld uevent change gfs2 /fs/gfs2/cs2c:liang
> Sep 22 09:11:57 gfs_controld liang recovery_uevent jid 1 first recovery done
> 0
> Sep 22 09:11:57 gfs_controld liang recovery_uevent first_done
> Sep 22 09:11:57 gfs_controld liang receive_first_recovery_done from
> -901316342 master -901316342 mount_client_notified 1
> Sep 22 09:11:57 gfs_controld liang wait_recoveries done
> Sep 22 09:11:57 gfs_controld uevent online gfs2 /fs/gfs2/cs2c:liang
> Sep 22 09:11:57 gfs_controld liang ping_kernel_mount 0
> Sep 22 09:11:57 gfs_controld mount_done: liang result 0
> Sep 22 09:11:57 gfs_controld connection 5 read error -1
> Sep 22 09:11:57 gfs_controld liang receive_mount_done from -901316342 result
> 0
> Sep 22 09:11:57 gfs_controld liang wait_recoveries done
> Sep 22 09:12:37 gfs_controld uevent remove gfs2 /fs/gfs2/cs2c:liang
> Sep 22 09:12:37 gfs_controld do_leave liang mnterr 0
> Sep 22 09:12:37 gfs_controld gfs:mount:liang conf 0 0 1 memb join left
> -901316342
> Sep 22 09:12:37 gfs_controld liang confchg for our leave
> ##########################################################################
>
>
> gfs_controld.log on ha-c
> ##########################################################################
> [root@ha-c ~]# cat /var/log/cluster/gfs_controld.log
>
> Sep 22 08:52:12 gfs_controld gfs_controld 3.0.12 started
> Sep 22 08:52:12 gfs_controld Connected as node 3410428170 to cluster 'cs2c'
> Sep 22 08:52:12 gfs_controld logging mode 3 syslog f 160 p 6 logfile p 7
> /var/log/cluster/gfs_controld.log
> Sep 22 08:52:12 gfs_controld group_mode 3 compat 0
> Sep 22 08:52:12 gfs_controld setup_cpg_daemon 11
> Sep 22 08:52:12 gfs_controld gfs:controld conf 2 1 0 memb -901316342
> -884539126 join -884539126 left
> Sep 22 08:52:12 gfs_controld run protocol from nodeid -901316342
> Sep 22 08:52:12 gfs_controld daemon run 1.1.1 max 1.1.1 kernel run 1.1.1 max
> 1.1.1
> Sep 22 08:56:52 gfs_controld client connection 5 fd 14
> Sep 22 08:56:52 gfs_controld join: /mnt gfs2 lock_dlm cs2c:liang rw
> /dev/drbd0
> Sep 22 08:56:52 gfs_controld liang join: cluster name matches: cs2c
> Sep 22 08:56:52 gfs_controld liang process_dlmcontrol register 0
> Sep 22 08:56:52 gfs_controld gfs:mount:liang conf 1 1 0 memb -884539126 join
> -884539126 left
> Sep 22 08:56:52 gfs_controld liang add_change cg 1 joined nodeid -884539126
> Sep 22 08:56:52 gfs_controld liang add_change cg 1 we joined
> Sep 22 08:56:52 gfs_controld liang add_change cg 1 counts member 1 joined 1
> remove 0 failed 0
> Sep 22 08:56:52 gfs_controld liang wait_conditions skip for zero
> started_count
> Sep 22 08:56:52 gfs_controld liang send_start cg 1 id_count 1 om 0 nm 1 oj 0
> nj 0
> Sep 22 08:56:52 gfs_controld liang receive_start -884539126:1 len 92
> Sep 22 08:56:52 gfs_controld liang match_change -884539126:1 matches cg 1
> Sep 22 08:56:52 gfs_controld liang wait_messages cg 1 got all 1
> Sep 22 08:56:52 gfs_controld liang pick_first_recovery_master low -884539126
> old 0
> Sep 22 08:56:52 gfs_controld liang sync_state all_nodes_new
> first_recovery_needed master -884539126
> Sep 22 08:56:52 gfs_controld liang create_old_nodes all new
> Sep 22 08:56:52 gfs_controld liang create_new_nodes -884539126 ro 0 spect 0
> Sep 22 08:56:52 gfs_controld liang create_failed_journals all new
> Sep 22 08:56:52 gfs_controld liang apply_recovery first start_kernel
> Sep 22 08:56:52 gfs_controld liang start_kernel cg 1 member_count 1
> Sep 22 08:56:52 gfs_controld liang set
> /sys/fs/gfs2/cs2c:liang/lock_module/block to 0
> Sep 22 08:56:52 gfs_controld liang set open
> /sys/fs/gfs2/cs2c:liang/lock_module/block error -1 2
> Sep 22 08:56:52 gfs_controld liang client_reply_join_full ci 5 result 0
> hostdata=jid=0:id=915250580:first=1
> Sep 22 08:56:52 gfs_controld client_reply_join liang ci 5 result 0
> Sep 22 08:56:53 gfs_controld uevent add gfs2 /fs/gfs2/cs2c:liang
> Sep 22 08:56:53 gfs_controld liang ping_kernel_mount 0
> Sep 22 08:56:53 gfs_controld uevent change gfs2 /fs/gfs2/cs2c:liang
> Sep 22 08:56:53 gfs_controld liang recovery_uevent jid 0 first recovery done
> 0
> Sep 22 08:56:53 gfs_controld uevent change gfs2 /fs/gfs2/cs2c:liang
> Sep 22 08:56:53 gfs_controld liang recovery_uevent jid 1 first recovery done
> 0
> Sep 22 08:56:53 gfs_controld uevent change gfs2 /fs/gfs2/cs2c:liang
> Sep 22 08:56:53 gfs_controld liang recovery_uevent jid 1 first recovery done
> 0
> Sep 22 08:56:53 gfs_controld liang recovery_uevent first_done
> Sep 22 08:56:53 gfs_controld liang receive_first_recovery_done from
> -884539126 master -884539126 mount_client_notified 1
> Sep 22 08:56:53 gfs_controld liang wait_recoveries done
> Sep 22 08:56:53 gfs_controld uevent online gfs2 /fs/gfs2/cs2c:liang
> Sep 22 08:56:53 gfs_controld liang ping_kernel_mount 0
> Sep 22 08:56:53 gfs_controld mount_done: liang result 0
> Sep 22 08:56:53 gfs_controld connection 5 read error -1
> Sep 22 08:56:53 gfs_controld liang receive_mount_done from -884539126 result
> 0
> Sep 22 08:56:53 gfs_controld liang wait_recoveries done
> Sep 22 08:57:17 gfs_controld gfs:mount:liang conf 2 1 0 memb -901316342
> -884539126 join -901316342 left
> Sep 22 08:57:17 gfs_controld liang add_change cg 2 joined nodeid -901316342
> Sep 22 08:57:17 gfs_controld liang add_change cg 2 counts member 2 joined 1
> remove 0 failed 0
> Sep 22 08:57:17 gfs_controld liang wait_conditions skip for zero
> journals_need_recovery
> Sep 22 08:57:17 gfs_controld liang send_start cg 2 id_count 2 om 1 nm 1 oj 0
> nj 0
> Sep 22 08:57:17 gfs_controld liang receive_start -901316342:1 len 104
> Sep 22 08:57:17 gfs_controld liang match_change -901316342:1 matches cg 2
> Sep 22 08:57:17 gfs_controld liang wait_messages cg 2 need 1 of 2
> Sep 22 08:57:17 gfs_controld liang receive_start -884539126:2 len 104
> Sep 22 08:57:17 gfs_controld liang match_change -884539126:2 matches cg 2
> Sep 22 08:57:17 gfs_controld liang wait_messages cg 2 got all 2
> Sep 22 08:57:17 gfs_controld liang sync_state first_recovery_msg
> Sep 22 08:57:17 gfs_controld liang create_new_nodes -901316342 ro 0 spect 0
> Sep 22 08:57:17 gfs_controld liang wait_recoveries done
> Sep 22 08:57:22 gfs_controld gfs:controld conf 1 0 1 memb -884539126 join
> left -901316342
> Sep 22 08:57:22 gfs_controld gfs:mount:liang conf 1 0 1 memb -884539126 join
> left -901316342
> Sep 22 08:57:22 gfs_controld liang add_change cg 3 remove nodeid -901316342
> reason 3
> Sep 22 08:57:22 gfs_controld liang add_change cg 3 counts member 1 joined 0
> remove 1 failed 1
> Sep 22 08:57:22 gfs_controld liang stop_kernel
> Sep 22 08:57:22 gfs_controld liang set
> /sys/fs/gfs2/cs2c:liang/lock_module/block to 1
> Sep 22 08:57:22 gfs_controld liang check_dlm_notify nodeid -901316342 begin
> Sep 22 08:57:22 gfs_controld liang process_dlmcontrol notified nodeid
> -901316342 result 0
> Sep 22 08:57:22 gfs_controld liang check_dlm_notify done
> Sep 22 08:57:22 gfs_controld liang send_start cg 3 id_count 1 om 1 nm 0 oj 0
> nj 0
> Sep 22 08:57:22 gfs_controld liang receive_start -884539126:3 len 92
> Sep 22 08:57:22 gfs_controld liang match_change -884539126:3 matches cg 3
> Sep 22 08:57:22 gfs_controld liang wait_messages cg 3 got all 1
> Sep 22 08:57:22 gfs_controld liang sync_state first_recovery_msg
> Sep 22 08:57:22 gfs_controld liang set_failed_journals no journal for nodeid
> -901316342
> Sep 22 08:57:22 gfs_controld liang wait_recoveries done
> Sep 22 08:57:22 gfs_controld liang apply_recovery start_kernel
> Sep 22 08:57:22 gfs_controld liang start_kernel cg 3 member_count 1
> Sep 22 08:57:22 gfs_controld liang set
> /sys/fs/gfs2/cs2c:liang/lock_module/block to 0
> ##########################################################################
>
>     My experimental environment:
>
>         2 PC
>             node-b
>             node-c
>
>         Both OS are RHEL6.1X64
>
>         RPMS:
>             pacemaker-cli-1.1.6-1.el6.x86_64
>             pacemaker-doc-1.1.6-1.el6.x86_64
>             pacemaker-libs-1.1.6-1.el6.x86_64
>             pacemaker-1.1.6-1.el6.x86_64
>             pacemaker-cts-1.1.6-1.el6.x86_64
>             pacemaker-libs-devel-1.1.6-1.el6.x86_64
>
>             corosynclib-1.4.1-1.x86_64
>             corosync-1.4.1-1.x86_64
>             corosynclib-devel-1.4.1-1.x86_64
>
>             resource-agents-3.9.2-1.x86_64
>
>             cluster-glue-libs-devel-1.0.7-1.el6.x86_64
>             cluster-glue-libs-1.0.7-1.el6.x86_64
>             cluster-glue-1.0.7-1.el6.x86_64
>
>             openais-1.1.1-7.el6.x86_64
>             openaislib-1.1.1-7.el6.x86_64
>
>             dlm-pcmk-3.0.12-23.el6_0.6.x86_64
>
>             gfs-pcmk-3.0.12-23.el6_0.6.x86_64
>             gfs2-utils-3.0.12-41.el6.x86_64
>
>             clusterlib-3.0.12-41.el6.x86_64
>
>             drbd-udev-8.4.0-1.el6.x86_64
>             drbd-8.4.0-1.el6.x86_64
>             drbd-utils-8.4.0-1.el6.x86_64
>             drbd-heartbeat-8.4.0-1.el6.x86_64
>             drbd-pacemaker-8.4.0-1.el6.x86_64
>             drbd-bash-completion-8.4.0-1.el6.x86_64
>             drbd-xen-8.4.0-1.el6.x86_64
>             drbd-km-2.6.32_131.0.15.el6.x86_64-8.4.0-1.el6.x86_64
>             drbd-kernel-8.4.0-1.el6.x86_64
>
>     My conf:
>         $ cat /etc/corosync/corosync.conf
>
>         compatibility: whitetank
>
>         totem {
>             version: 2
>             secauth: off
>             threads: 0
>             rrp_mode: passive
>             interface {
>                 ringnumber: 0
>                 bindnetaddr: 10.1.71.0
>                 mcastaddr: 235.3.4.5
>                 mcastport: 9876
>             }
>
>             interface {
>                 ringnumber: 1
>                 bindnetaddr: 10.10.10.0
>                 mcastaddr: 235.3.4.6
>                 mcastport: 9877
>             }
>         }
>
>         logging {
>             fileline: off
>             to_stderr: no
>             to_logfile: yes
>             to_syslog: yes
>             logfile: /var/log/cluster/corosync.log
>             debug: off
>             timestamp: on
>             logger_subsys {
>             subsys: AMF
>             debug: off
>             }
>         }
>
>         amf {
>             mode: disabled
>         }
>
>         service {
>             name: pacemaker
>             var: 0
>             use_logd: yes
>             use_mgmtd: yes
>             clustername: cs2c
>         }
>
>
>
> _______________________________________________
> Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
> http://oss.clusterlabs.org/mailman/listinfo/pacemaker
>
> Project Home: http://www.clusterlabs.org
> Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
> Bugs:
> http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker
>
>

_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker

Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://developerbugs.linux-foundation.org/enter_bug.cgi?product=Pacemaker

Reply via email to