[Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc ...
lhh at sourceware.org
lhh at sourceware.org
Tue Mar 20 19:36:15 UTC 2007
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: lhh at sourceware.org 2007-03-20 19:36:15
Modified files:
cman/man : qdisk.5
cman/qdisk : main.c score.c
Log message:
Fix #220211, pass 2: ensure timings are accurate and provide multi-master conflict resolution
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/qdisk.5.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.4&r2=1.1.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.7&r2=1.1.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/score.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
--- cluster/cman/man/qdisk.5 2007/02/21 20:19:46 1.1.2.4
+++ cluster/cman/man/qdisk.5 2007/03/20 19:36:14 1.1.2.5
@@ -216,7 +216,7 @@
\fItko_up\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must be seen in order to be declared
-online. Default is \fBfloor(tko/2)\fP.
+online. Default is \fBfloor(tko/3)\fP.
.in 9
\fIupgrade_wait\fP\fB="\fP2\fB"\fP
@@ -229,8 +229,9 @@
\fImaster_wait\fP\fB="\fPX\fB"\fP
.in 12
This is the number of cycles a node must wait for votes before declaring
-itself master after making a bid. Default is \fBfloor(tko/3)\fP.
-This can not be less than 2 and should not exceed \fBtko\fP.
+itself master after making a bid. Default is \fBfloor(tko/2)\fP.
+This can not be less than 2, must be greater than tko_up, and should not
+exceed \fBtko\fP.
.in 9
\fIvotes\fP\fB="\fP3\fB"\fP
--- cluster/cman/qdisk/main.c 2007/02/21 20:19:43 1.1.2.7
+++ cluster/cman/qdisk/main.c 2007/03/20 19:36:14 1.1.2.8
@@ -381,22 +381,26 @@
Returns
*/
int
-master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id)
+master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id, int *count)
{
int x;
int masters = 0;
int ret = 0;
+ if (count)
+ *count = 0;
*low_id = ctx->qc_my_id;
for (x = 0; x < max; x++) {
/* See if this one's a master */
if (ni[x].ni_state >= S_RUN &&
- ni[x].ni_status.ps_state == S_MASTER) {
+ ni[x].ni_status.ps_state == S_MASTER &&
+ ni[x].ni_status.ps_nodeid != ctx->qc_my_id) {
if (!ret)
ret = ni[x].ni_status.ps_nodeid;
++masters;
+ continue;
}
/* See if it's us... */
@@ -424,11 +428,8 @@
*low_id = ni[x].ni_status.ps_nodeid;
}
- if (masters > 1) {
- clulog(LOG_CRIT,
- "Critical Error: More than one master found!\n");
- /* XXX Handle this how? */
- }
+ if (count)
+ *count = masters;
/*
else if (masters == 1) {
printf("Node %d is the master\n", ret);
@@ -849,7 +850,7 @@
{
disk_msg_t msg = {0, 0, 0};
int low_id, bid_pending = 0, score, score_max, score_req,
- upgrade = 0;
+ upgrade = 0, count;
memb_mask_t mask, master_mask;
struct timeval maxtime, oldtime, newtime, diff, sleeptime, interval;
@@ -921,11 +922,26 @@
score, score_max, score_req);
ctx->qc_status = S_RUN;
upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
}
}
/* Find master */
- ctx->qc_master = master_exists(ctx, ni, max, &low_id);
+ ctx->qc_master = master_exists(ctx, ni, max, &low_id, &count);
+
+ /* Resolve master conflict, if one exists */
+ if (count > 1 && ctx->qc_status == S_MASTER) {
+ clulog(LOG_WARNING, "Master conflict: abdicating\n");
+
+ /* Handle just like a recent upgrade */
+ ctx->qc_status = S_RUN;
+ upgrade = ctx->qc_upgrade_wait;
+ bid_pending = 0;
+ msg.m_msg = M_NONE;
+ ++msg.m_seq;
+ }
/* Figure out what to do based on what we know */
if (!ctx->qc_master &&
@@ -1163,7 +1179,7 @@
}
/* Get up-tko (transition off->online) */
- ctx->qc_tko_up = (ctx->qc_tko / 2);
+ ctx->qc_tko_up = (ctx->qc_tko / 3);
snprintf(query, sizeof(query), "/cluster/quorumd/@tko_up");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_tko_up = atoi(val);
@@ -1185,14 +1201,14 @@
/* wait this many intervals after bidding for master before
becoming Caesar */
- ctx->qc_master_wait = (ctx->qc_tko / 3);
+ ctx->qc_master_wait = (ctx->qc_tko / 2);
snprintf(query, sizeof(query), "/cluster/quorumd/@master_wait");
if (ccs_get(ccsfd, query, &val) == 0) {
ctx->qc_master_wait = atoi(val);
free(val);
}
- if (ctx->qc_master_wait < 2)
- ctx->qc_master_wait = 2;
+ if (ctx->qc_master_wait <= ctx->qc_tko_up)
+ ctx->qc_master_wait = ctx->qc_tko_up + 1;
/* Get votes */
snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
--- cluster/cman/qdisk/score.c 2007/02/21 20:19:43 1.1.2.3
+++ cluster/cman/qdisk/score.c 2007/03/20 19:36:14 1.1.2.4
@@ -143,7 +143,7 @@
*score = 0;
*maxscore = 0;
- printf("max = %d\n", max);
+ //printf("max = %d\n", max);
/* Allow operation w/o any heuristics */
if (!max) {
*score = *maxscore = 1;
More information about the Cluster-devel
mailing list