[Linux-cluster] Some nodes not starting groupd correctly
Pete
jpeteb at gmail.com
Fri Jun 3 14:01:53 UTC 2011
Hello,
I have a startup issue with a cluster that we've set up. We have 34 HP
G7 servers running in a cluster to share one SAN resource, a HP
(Lefthand) P4500. All the servers are running RHEL 5.4. When we reboot
the cluster, a small, random number of nodes will not mount the SAN.
On inspection, the failing nodes are members of the cluster (looking
at clustat). When I run a "service cman status" on them, they say that
groupd is not running. I'm assuming that because of this, clvmd does
not run correctly (I see a "clvmd: Can't open cluster manager socket:
No such file or directory" in the messages log), so no SAN VG and no
SAN mount.
If I do a "service cman restart; service clvmd restart; mount -a" the
SAN will mount correctly.
I've created a sample cluster.conf below. It only contains 4 nodes,
but it is identical to the 34 node system. We use IPMI for the
fencing, as the HP G7 systems are iLO3, and we could not get fence_ilo
to work with them.
Any help is appreciated - thanks!
--pete
<?xml version="1.0" ?>
<cluster config_version="1" name="dasCluster">
<fence_daemon post_fail_delay="0" post_join_delay="6"/>
<totem consensus="45000" join="15000" send_join="1000" token="60000"
token_retransmits_before_loss_const="100"/>
<logging to_stderr="yes">
<logger debug="on" ident="CPG" to_stderr="yes"/>
<logger debug="on" ident="CMAN" to_stderr="yes"/>
</logging>
<clusternodes>
<clusternode name="g2das01x" nodeid="1" votes="1">
<fence>
<method name="1">
<device name="das01"/>
</method>
</fence>
</clusternode>
<clusternode name="g2das02x" nodeid="2" votes="1">
<fence>
<method name="1">
<device name="das02"/>
</method>
</fence>
</clusternode>
<clusternode name="g2das03x" nodeid="3" votes="1">
<fence>
<method name="1">
<device name="das03"/>
</method>
</fence>
</clusternode>
<clusternode name="g2das04x" nodeid="4" votes="1">
<fence>
<method name="1">
<device name="das04"/>
</method>
</fence>
</clusternode>
</clusternodes>
<cman cluster_id="881991"/>
<fencedevices>
<fencedevice action="reboot" agent="fence_ipmilan" auth="password"
ipaddr="g2das01x-ilo" lanplus="1" login="admin" name="das01"
passwd="b"/>
<fencedevice action="reboot" agent="fence_ipmilan" auth="password"
ipaddr="g2das02x-ilo" lanplus="1" login="admin" name="das02"
passwd="c"/>
<fencedevice action="reboot" agent="fence_ipmilan" auth="password"
ipaddr="g2das03x-ilo" lanplus="1" login="admin" name="das03"
passwd="d"/>
<fencedevice action="reboot" agent="fence_ipmilan" auth="password"
ipaddr="g2das04x-ilo" lanplus="1" login="admin" name="das04"
passwd="e"/>
</fencedevices>
<rm>
<failoverdomains/>
<resources/>
</rm>
</cluster>
More information about the Linux-cluster
mailing list