[Linux-cluster] failover domain not working as expected

Terry td3201 at gmail.com
Thu Jan 8 12:07:13 UTC 2009


Hello,

I have an NFS cluster that isn't quite working as expected.  I intend
to distribute several volumes between both nodes of my cluster and in
the event one node goes down, the other picks up the full load.  I had
a situation where I had to reboot one of the nodes.  I did so and all
the services were restarted on the other node, which is great.  Then,
after a minute or so, some of the services stopped and stayed stopped.
 Here are some relevant parts of my config, anyone see anything
unusual:?

        <fence_daemon clean_start="0" post_fail_delay="0" post_join_delay="3"/>
        <clusternodes>
                <clusternode name="omadvnfs01b" nodeid="1" votes="1">
                        <fence>
                                <method name="1">
                                        <device name="omadvnfs01b-drac"/>
                                </method>
                        </fence>
                </clusternode>
                <clusternode name="omadvnfs01a" nodeid="2" votes="1">
                        <fence>
                                <method name="1">
                                        <device name="omadvnfs01a-drac"/>
                                </method>
                        </fence>
                </clusternode>
        </clusternodes>
        <cman expected_votes="1" two_node="1"/>
        <fencedevices>
                <fencedevice agent="fence_drac" ipaddr="10.98.1.211"
login="root" name="omadvnfs01a-drac" passwd="foobar"/>
                <fencedevice agent="fence_drac" ipaddr="10.98.1.212"
login="root" name="omadvnfs01b-drac" passwd="foobar"/>
        </fencedevices>
        <rm>
                <failoverdomains>
                        <failoverdomain name="fd_omadvnfs01a-nfs"
nofailback="0" ordered="1" restricted="0">
                                <failoverdomainnode name="omadvnfs01a"
priority="1"/>
                                <failoverdomainnode name="omadvnfs01b"
priority="2"/>
                        </failoverdomain>
                        <failoverdomain name="fd_omadvnfs01b-nfs"
nofailback="0" ordered="1" restricted="0">
                                <failoverdomainnode name="omadvnfs01b"
priority="1"/>
                                <failoverdomainnode name="omadvnfs01a"
priority="2"/>
                        </failoverdomain>
                </failoverdomains>
                <resources>

                <service autostart="1" domain="fd_omadvnfs01a-nfs"
exclusive="0" name="omadvnfs01-nfs-a" recovery="relocate">
                        <ip ref="10.199.1.113"/>
                        <fs fstype="ext3" ref="omadvnfs01-data01a">
                                <nfsexport ref="data01a">
                                        <nfsclient ref="omadvdss01a"/>
                                        <nfsclient ref="omadvdss01b"/>
                                        <nfsclient ref="omadvdss01c"/>
                                </nfsexport>
                        </fs>
                </service>
                <service autostart="1" domain="fd_omadvnfs01b-nfs"
exclusive="0" name="omadvnfs01-nfs-b" recovery="relocate">
                        <ip ref="10.199.1.114"/>
                        <fs fstype="ext3" ref="omadvnfs01-data01b">
                                <nfsexport ref="data01b">
                                        <nfsclient ref="omadvdss01a"/>
                                        <nfsclient ref="omadvdss01b"/>
                                        <nfsclient ref="omadvdss01c"/>
                                </nfsexport>
                        </fs>
                </service>




More information about the Linux-cluster mailing list