[Linux-cluster] Rebooting the Master Node in an RHCS Cluster

gcharles at ups.com gcharles at ups.com
Tue Oct 26 18:08:12 UTC 2010


Here is our cluster.conf, edited for security reasons:

----------------------------------------
<?xml version="1.0"?>
<cluster name="WFF_RHCS" config_version="34">
        <fence_domain clean_start="0" post_fail_delay="20" post_join_delay="3"/>
        <clusternodes>
                <clusternode name="node1" nodeid="1" votes="1">
                        <fence>
                                <method name="1">
                                        <device name="node1-ilo-fence"/>
                                </method>
                        </fence>
                </clusternode>
                <clusternode name="node2" nodeid="2" votes="1">
                        <fence>
                                <method name="1">
                                        <device name="node2-ilo-fence"/>
                                </method>
                        </fence>
                </clusternode>
                <clusternode name="node3" nodeid="3" votes="1">
                        <fence>
                                <method name="1">
                                        <device name="node3-ilo-fence"/>
                                </method>
                        </fence>
                </clusternode>
        </clusternodes>
        <fencedevices>
                <fencedevice agent="fence_ilo" hostname="xxx.xxx.xxx.xxx" login="login" name="node1-ilo-fence" passwd="passwd"/>
                <fencedevice agent="fence_ilo" hostname="xxx.xxx.xxx.xxx" login="login" name="node2-ilo-fence" passwd="passwd"/>
                <fencedevice agent="fence_ilo" hostname="xxx.xxx.xxx.xxx" login="login" name="node3-ilo-fence" passwd="passwd"/>
        </fencedevices>
        <rm log_facility="local5" log_level="7">
                <failoverdomains>
                        <failoverdomain name="rhcstestdomain" nofailback="0" ordered="0" restricted="0">
                                <failoverdomainnode name="node1" priority="1"/>
                                <failoverdomainnode name="node2" priority="1"/>
                                <failoverdomainnode name="node3" priority="1"/>
                        </failoverdomain>
                </failoverdomains>
                <resources>
                <script file="ora_cluster.sh" name="oracle_init_script"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <lvm lv_name="lv_a001_001_oradata" name="ora-001-a001-lv" vg_name="/dev/vg_oracle_001"/>
                <lvm lv_name="lv_b001_001_oradata" name="ora-001-b001-lv" vg_name="/dev/vg_oracle_001"/>
                <lvm lv_name="lv_u001_001_oradata" name="ora-001-u001-lv" vg_name="/dev/vg_oracle_001"/>
                <lvm lv_name="lv_u001_001_admin" name="ora-001-u001-admin-lv" vg_name="/dev/vg_oracle_001"/>
                <fs device="/dev/vg_oracle_001/lv_a001_001_oradata" force_fsck="0" force_unmount="1" fsid="50280" fstype="ext3" mountpoint="/a001/oradata/001" name="ora-001-a001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_001/lv_b001_001_oradata" force_fsck="0" force_unmount="1" fsid="57077" fstype="ext3" mountpoint="/b001/oradata/001" name="ora-001-b001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_001/lv_u001_001_oradata" force_fsck="0" force_unmount="1" fsid="31925" fstype="ext3" mountpoint="/u001/oradata/001" name="ora-u001-001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_001/lv_u001_001_admin" force_fsck="0" force_unmount="1" fsid="14919" fstype="ext3" mountpoint="/u001/app/oracle/admin/001" name="ora-u001-001-admin-fs" self_fence="0"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <lvm lv_name="lv_a001_002_oradata" name="ora-002-a001-lv" vg_name="/dev/vg_oracle_002"/>
                <lvm lv_name="lv_b001_002_oradata" name="ora-002-b001-lv" vg_name="/dev/vg_oracle_002"/>
                <lvm lv_name="lv_u001_002_oradata" name="ora-002-u001-lv" vg_name="/dev/vg_oracle_002"/>
                <lvm lv_name="lv_u001_002_admin" name="ora-002-u001-admin-lv" vg_name="/dev/vg_oracle_002"/>
                <fs device="/dev/vg_oracle_002/lv_a001_002_oradata" force_fsck="0" force_unmount="1" fsid="50280" fstype="ext3" mountpoint="/a001/oradata/002" name="ora-002-a001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_002/lv_b001_002_oradata" force_fsck="0" force_unmount="1" fsid="57077" fstype="ext3" mountpoint="/b001/oradata/002" name="ora-002-b001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_002/lv_u001_002_oradata" force_fsck="0" force_unmount="1" fsid="31925" fstype="ext3" mountpoint="/u001/oradata/002" name="ora-002-u001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_002/lv_u001_002_admin" force_fsck="0" force_unmount="1" fsid="14919" fstype="ext3" mountpoint="/u001/app/oracle/admin/002" name="ora-002-u001-admin-fs" self_fence="0"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <lvm lv_name="lv_a001_003_oradata" name="ora-003-a001-lv" vg_name="/dev/vg_oracle_003"/>
                <lvm lv_name="lv_b001_003_oradata" name="ora-003-b001-lv" vg_name="/dev/vg_oracle_003"/>
                <lvm lv_name="lv_u001_003_oradata" name="ora-003-u001-lv" vg_name="/dev/vg_oracle_003"/>
                <lvm lv_name="lv_u001_003_admin" name="ora-003-u001-admin-lv" vg_name="/dev/vg_oracle_003"/>
                <fs device="/dev/vg_oracle_003/lv_a001_003_oradata" force_fsck="0" force_unmount="1" fsid="50280" fstype="ext3" mountpoint="/a001/oradata/003" name="ora-003-a001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_003/lv_b001_003_oradata" force_fsck="0" force_unmount="1" fsid="57077" fstype="ext3" mountpoint="/b001/oradata/003" name="ora-003-b001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_003/lv_u001_003_oradata" force_fsck="0" force_unmount="1" fsid="31925" fstype="ext3" mountpoint="/u001/oradata/003" name="ora-003-u001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_003/lv_u001_003_admin" force_fsck="0" force_unmount="1" fsid="14919" fstype="ext3" mountpoint="/u001/app/oracle/admin/003" name="ora-003-u001-admin-fs" self_fence="0"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <lvm lv_name="lv_a001_004_oradata" name="ora-004-a001-lv" vg_name="/dev/vg_oracle_004"/>
                <lvm lv_name="lv_b001_004_oradata" name="ora-004-b001-lv" vg_name="/dev/vg_oracle_004"/>
                <lvm lv_name="lv_u001_004_oradata" name="ora-004-u001-lv" vg_name="/dev/vg_oracle_004"/>
                <lvm lv_name="lv_u001_004_admin" name="ora-004-u001-admin-lv" vg_name="/dev/vg_oracle_004"/>
                <fs device="/dev/vg_oracle_004/lv_a001_004_oradata" force_fsck="0" force_unmount="1" fsid="50280" fstype="ext3" mountpoint="/a001/oradata/004" name="ora-004-a001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_004/lv_b001_004_oradata" force_fsck="0" force_unmount="1" fsid="57077" fstype="ext3" mountpoint="/b001/oradata/004" name="ora-004-b001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_004/lv_u001_004_oradata" force_fsck="0" force_unmount="1" fsid="31925" fstype="ext3" mountpoint="/u001/oradata/004" name="ora-004-u001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_004/lv_u001_004_admin" force_fsck="0" force_unmount="1" fsid="14919" fstype="ext3" mountpoint="/u001/app/oracle/admin/004" name="ora-004-u001-admin-fs" self_fence="0"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <lvm lv_name="lv_a001_005_oradata" name="ora-005-a001-lv" vg_name="/dev/vg_oracle_005"/>
                <lvm lv_name="lv_b001_005_oradata" name="ora-005-b001-lv" vg_name="/dev/vg_oracle_005"/>
                <lvm lv_name="lv_u001_005_oradata" name="ora-005-u001-lv" vg_name="/dev/vg_oracle_005"/>
                <lvm lv_name="lv_u001_005_admin" name="ora-005-u001-admin-lv" vg_name="/dev/vg_oracle_005"/>
                <fs device="/dev/vg_oracle_005/lv_a001_005_oradata" force_fsck="0" force_unmount="1" fsid="50280" fstype="ext3" mountpoint="/a001/oradata/005" name="ora-005-a001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_005/lv_b001_005_oradata" force_fsck="0" force_unmount="1" fsid="57077" fstype="ext3" mountpoint="/b001/oradata/005" name="ora-005-b001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_005/lv_u001_005_oradata" force_fsck="0" force_unmount="1" fsid="31925" fstype="ext3" mountpoint="/u001/oradata/005" name="ora-005-u001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_005/lv_u001_005_admin" force_fsck="0" force_unmount="1" fsid="14919" fstype="ext3" mountpoint="/u001/app/oracle/admin/005" name="ora-005-u001-admin-fs" self_fence="0"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <lvm lv_name="lv_a001_006_oradata" name="ora-006-a001-lv" vg_name="/dev/vg_oracle_006"/>
                <lvm lv_name="lv_b001_006_oradata" name="ora-006-b001-lv" vg_name="/dev/vg_oracle_006"/>
                <lvm lv_name="lv_u001_006_oradata" name="ora-006-u001-lv" vg_name="/dev/vg_oracle_006"/>
                <lvm lv_name="lv_u001_006_admin" name="ora-006-u001-admin-lv" vg_name="/dev/vg_oracle_006"/>
                <fs device="/dev/vg_oracle_006/lv_a001_006_oradata" force_fsck="0" force_unmount="1" fsid="50280" fstype="ext3" mountpoint="/a001/oradata/006" name="ora-006-a001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_006/lv_b001_006_oradata" force_fsck="0" force_unmount="1" fsid="57077" fstype="ext3" mountpoint="/b001/oradata/006" name="ora-006-b001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_006/lv_u001_006_oradata" force_fsck="0" force_unmount="1" fsid="31925" fstype="ext3" mountpoint="/u001/oradata/006" name="ora-006-u001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_006/lv_u001_006_admin" force_fsck="0" force_unmount="1" fsid="14919" fstype="ext3" mountpoint="/u001/app/oracle/admin/006" name="ora-006-u001-admin-fs" self_fence="0"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <lvm lv_name="lv_a001_007_oradata" name="ora-007-a001-lv" vg_name="/dev/vg_oracle_007"/>
                <lvm lv_name="lv_b001_007_oradata" name="ora-007-b001-lv" vg_name="/dev/vg_oracle_007"/>
                <lvm lv_name="lv_u001_007_oradata" name="ora-007-u001-lv" vg_name="/dev/vg_oracle_007"/>
                <lvm lv_name="lv_u001_007_admin" name="ora-007-u001-admin-lv" vg_name="/dev/vg_oracle_007"/>
                <fs device="/dev/vg_oracle_007/lv_a001_007_oradata" force_fsck="0" force_unmount="1" fsid="50280" fstype="ext3" mountpoint="/a001/oradata/007" name="ora-007-a001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_007/lv_b001_007_oradata" force_fsck="0" force_unmount="1" fsid="57077" fstype="ext3" mountpoint="/b001/oradata/007" name="ora-007-b001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_007/lv_u001_007_oradata" force_fsck="0" force_unmount="1" fsid="31925" fstype="ext3" mountpoint="/u001/oradata/007" name="ora-007-u001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_007/lv_u001_007_admin" force_fsck="0" force_unmount="1" fsid="14919" fstype="ext3" mountpoint="/u001/app/oracle/admin/007" name="ora-007-u001-admin-fs" self_fence="0"/>
                <ip address="xxx.xxx.xxx.xxx" monitor_link="1"/>
                <lvm lv_name="lv_a001_008_oradata" name="ora-008-a001-lv" vg_name="/dev/vg_oracle_008"/>
                <lvm lv_name="lv_b001_008_oradata" name="ora-008-b001-lv" vg_name="/dev/vg_oracle_008"/>
                <lvm lv_name="lv_u001_008_oradata" name="ora-008-u001-lv" vg_name="/dev/vg_oracle_008"/>
                <lvm lv_name="lv_u001_008_admin" name="ora-008-u001-admin-lv" vg_name="/dev/vg_oracle_008"/>
                <fs device="/dev/vg_oracle_008/lv_a001_008_oradata" force_fsck="0" force_unmount="1" fsid="50280" fstype="ext3" mountpoint="/a001/oradata/008" name="ora-008-a001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_008/lv_b001_008_oradata" force_fsck="0" force_unmount="1" fsid="57077" fstype="ext3" mountpoint="/b001/oradata/008" name="ora-008-b001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_008/lv_u001_008_oradata" force_fsck="0" force_unmount="1" fsid="31925" fstype="ext3" mountpoint="/u001/oradata/008" name="ora-008-u001-fs" self_fence="0"/>
                <fs device="/dev/vg_oracle_008/lv_u001_008_admin" force_fsck="0" force_unmount="1" fsid="14919" fstype="ext3" mountpoint="/u001/app/oracle/admin/008" name="ora-008-u001-admin-fs" self_fence="0"/>
                </resources>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_009" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xxx.xxx"/>
                </service>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_010" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xxx.xxx"/>
                </service>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_011" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xx.xxx"/>
                </service>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_601" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xxx.xxx"/>
                        <script ref="oracle_init_script"/>
                </service>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_001" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xxx.xxx"/>
                        <lvm ref="ora-001-a001-lv">
                                <fs ref="ora-001-a001-fs"/>
                        </lvm>
                        <lvm ref="ora-001-b001-lv">
                                <fs ref="ora-001-b001-fs"/>
                        </lvm>
                        <lvm ref="ora-001-u001-lv">
                                <fs ref="ora-001-u001-fs"/>
                        </lvm>
                        <lvm ref="ora-001-u001-admin-lv">
                                <fs ref="ora-001-u001-admin-fs"/>
                        </lvm>
                        <script ref="oracle_init_script"/>
                </service>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_002" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xxx.xxx"/>
                        <lvm ref="ora-002-a001-lv">
                                <fs ref="ora-002-a001-fs"/>
                        </lvm>
                        <lvm ref="ora-002-b001-lv">
                                <fs ref="ora-002-b001-fs"/>
                        </lvm>
                        <lvm ref="ora-002-u001-lv">
                                <fs ref="ora-002-u001-fs"/>
                        </lvm>
                        <lvm ref="ora-002-u001-admin-lv">
                                <fs ref="ora-002-u001-admin-fs"/>
                        </lvm>
                        <script ref="oracle_init_script"/>
                </service>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_003" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xxx.xxx"/>
                        <lvm ref="ora-003-a001-lv">
                                <fs ref="ora-003-a001-fs"/>
                        </lvm>
                        <lvm ref="ora-003-b001-lv">
                                <fs ref="ora-003-b001-fs"/>
                        </lvm>
                        <lvm ref="ora-003-u001-lv">
                                <fs ref="ora-003-u001-fs"/>
                        </lvm>
                        <lvm ref="ora-003-u001-admin-lv">
                                <fs ref="ora-003-u001-admin-fs"/>
                        </lvm>
                        <script ref="oracle_init_script"/>
                </service>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_004" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xxx.xxx"/>
                        <lvm ref="ora-004-a001-lv">
                                <fs ref="ora-004-a001-fs"/>
                        </lvm>
                        <lvm ref="ora-004-b001-lv">
                                <fs ref="ora-004-b001-fs"/>
                        </lvm>
                        <lvm ref="ora-004-u001-lv">
                                <fs ref="ora-004-u001-fs"/>
                        </lvm>
                        <lvm ref="ora-004-u001-admin-lv">
                                <fs ref="ora-004-u001-admin-fs"/>
                        </lvm>
                        <script ref="oracle_init_script"/>
                </service>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_005" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xxx.xxx"/>
                        <lvm ref="ora-005-a001-lv">
                                <fs ref="ora-005-a001-fs"/>
                        </lvm>
                        <lvm ref="ora-005-b001-lv">
                                <fs ref="ora-005-b001-fs"/>
                        </lvm>
                        <lvm ref="ora-005-u001-lv">
                                <fs ref="ora-005-u001-fs"/>
                        </lvm>
                        <lvm ref="ora-005-u001-admin-lv">
                                <fs ref="ora-005-u001-admin-fs"/>
                        </lvm>
                        <script ref="oracle_init_script"/>
                </service>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_006" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xxx.xxx"/>
                        <lvm ref="ora-006-a001-lv">
                                <fs ref="ora-006-a001-fs"/>
                        </lvm>
                        <lvm ref="ora-006-b001-lv">
                                <fs ref="ora-006-b001-fs"/>
                        </lvm>
                        <lvm ref="ora-006-u001-lv">
                                <fs ref="ora-006-u001-fs"/>
                        </lvm>
                        <lvm ref="ora-006-u001-admin-lv">
                                <fs ref="ora-006-u001-admin-fs"/>
                        </lvm>
                        <script ref="oracle_init_script"/>
                </service>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_007" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xxx.xxx"/>
                        <lvm ref="ora-007-a001-lv">
                                <fs ref="ora-007-a001-fs"/>
                        </lvm>
                        <lvm ref="ora-007-b001-lv">
                                <fs ref="ora-007-b001-fs"/>
                        </lvm>
                        <lvm ref="ora-007-u001-lv">
                                <fs ref="ora-007-u001-fs"/>
                        </lvm>
                        <lvm ref="ora-007-u001-admin-lv">
                                <fs ref="ora-007-u001-admin-fs"/>
                        </lvm>
                        <script ref="oracle_init_script"/>
                </service>
                <service autostart="1" domain="rhcstestdomain" exclusive="0" max_restarts="0" name="oracle_008" recovery="restart" restart_expire_time="0">
                        <ip ref="xxx.xxx.xxx.xxx"/>
                        <lvm ref="ora-008-a001-lv">
                                <fs ref="ora-008-a001-fs"/>
                        </lvm>
                        <lvm ref="ora-008-b001-lv">
                                <fs ref="ora-008-b001-fs"/>
                        </lvm>
                        <lvm ref="ora-008-u001-lv">
                                <fs ref="ora-008-u001-fs"/>
                        </lvm>
                        <lvm ref="ora-008-u001-admin-lv">
                                <fs ref="ora-008-u001-admin-fs"/>
                        </lvm>
                        <script ref="oracle_init_script"/>
                </service>
        </rm>
        <totem consensus="4800" join="60" token="100000" token_retransmits_before_loss_const="20" secauth="off"/>
        <quorumd interval="2" min_score="1" tko="15" votes="1" device="/dev/mapper/qdisk" status_file="/var/log/qdisk.status" log_level="7" log_facility="local5">
        </quorumd>
        <logging syslog_facility="local5" syslog_priority="debug" to_syslog="on" debug="on"/>
---------------------------------------------------------------------------------------------


Greg Charles
Mid Range Systems
gcharles at ups.com

-----Original Message-----
From: linux-cluster-bounces at redhat.com [mailto:linux-cluster-bounces at redhat.com] On Behalf Of Juan Ramon Martin Blanco
Sent: Tuesday, October 26, 2010 9:27 AM
To: linux clustering
Subject: Re: [Linux-cluster] Rebooting the Master Node in an RHCS Cluster

On Tue, Oct 26, 2010 at 2:52 PM,  <gcharles at ups.com> wrote:
> Hello,
>
> Was wondering if anyone else has ever run into this.  We have a
> three-node RHCS cluster:
>
> Three Proliant DL380-G6s, 48G memory
> Dual network, power, QLogic HBAs for redundancy EMC SAN RHEL 5.5
> kernel 2.6.18-194.el5
>
> All three in an RHCS cluster, 12 Oracle database services.  The
> cluster itself runs fine under normal conditions, and all failovers
> function as expected.  There is only one failover domain configured,
> and all three nodes are members of that domain.  Four of the Oracle
> database services contain
> GFS2 file systems; the rest are ext3.
>
> The problem is when we attempt a controlled shutdown of the current
> master node.  We have tested in the following situations:
>
> 1.  Node 1 is the current master and not running any services.  Node 2
> is also not running any services.  Node 3 is running all 12 services.
> We hard-fail node 1 (by logging into the ILO and clicking on "Reset"
> in power
> management) and node 2 immediately takes over the master role and the
> services stay where they are and continue to function.  I believe this
> is the expected behavior.
>
> 2.  Node 1 is the current master and not running any services.  Three
> services are on node 2, and node 3 is running the rest.  Again, we
> hard-fail node 1 as described above and node 2 assumes the master role
> and the services stay where they are and continue to function.
>
> 3.  Repeating the same steps as above; node 1 is the master and not
> running any services, three services on node 2 and the rest on node
> three.  This time we perform a controlled shutdown of node 1 to
> "properly" remove it from the cluster (let's say we're doing a rolling
> patch of the OS on the nodes) with the following steps on the master node:
>  - Unmount any GFS file systems.
>  - service rgmanager stop; service gfs2 stop; service gfs stop
> (clustat shows node1 Online but no rgmanager, as expected)
>  - fence_tool leave    (this removes node 1 from the fence group in
> the hopes that the other nodes don't try to fence it as it is
> rebooting)
>  - service clvmd stop
>  - cman_tool leave remove
>  - service qdiskd stop
>  - shutdown
> Everything appears normal until we execute the 'cman_tool leave
> remove'.  At that point the cluster log on node 2 and node 3 shows
> "Lost contact with quorum device" (we expect that) but also shows "Emergency stop of services"
> for all 12 services.  While access to the quorum device is restored
> almost immediately (node 2 takes over the master role), rgmanager is
> temporarily unavailable on nodes 2 and 3 while the cluster basically
> reconfigures itself, restarting all 12 services.  Eventually all 12
> services properly restart (not necessarily on the original node they
> were on) and when node 1 finishes rebooting, it properly rejoins
> itself to the cluster.  Node 2 retains itself as Master.
>
> If I do the same tests as above and reboot a node that is NOT the
> master, the services remain where they are and the cluster does not
> reconfigure itself or restart any services.
>
> My questions are, Why does the cluster reconfigure itself and restart
> ALL services regardless of what node they are on when I do a
> controlled shutdown of the current Master node?  Do I have to
> hard-reset the Master node in an RHCS cluster so the remaining
> services don't get restarted?  Why does the cluster completely reconfigure itself when the Master node is 'properly'
> removed.
>
Hi, could you please show us your cluster.conf?

Regards,
Juanra
> Thanks for your help, and any suggestions would be appreciated.
>
> Greg Charles
> Mid Range Systems
>
> gcharles at ups.com
>
>
> --
> Linux-cluster mailing list
> Linux-cluster at redhat.com
> https://www.redhat.com/mailman/listinfo/linux-cluster
>

--
Linux-cluster mailing list
Linux-cluster at redhat.com
https://www.redhat.com/mailman/listinfo/linux-cluster




More information about the Linux-cluster mailing list