[Cluster-devel] cluster/cman man/qdisk.5 qdisk/main.c qdisk/sc ...

lhh at sourceware.org lhh at sourceware.org
Tue Mar 20 19:36:15 UTC 2007


CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL4
Changes by:	lhh at sourceware.org	2007-03-20 19:36:15

Modified files:
	cman/man       : qdisk.5 
	cman/qdisk     : main.c score.c 

Log message:
	Fix #220211, pass 2: ensure timings are accurate and provide multi-master conflict resolution

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/man/qdisk.5.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.4&r2=1.1.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.7&r2=1.1.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cman/qdisk/score.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4

--- cluster/cman/man/qdisk.5	2007/02/21 20:19:46	1.1.2.4
+++ cluster/cman/man/qdisk.5	2007/03/20 19:36:14	1.1.2.5
@@ -216,7 +216,7 @@
 \fItko_up\fP\fB="\fPX\fB"\fP
 .in 12
 This is the number of cycles a node must be seen in order to be declared
-online.  Default is \fBfloor(tko/2)\fP.
+online.  Default is \fBfloor(tko/3)\fP.
 
 .in 9
 \fIupgrade_wait\fP\fB="\fP2\fB"\fP
@@ -229,8 +229,9 @@
 \fImaster_wait\fP\fB="\fPX\fB"\fP
 .in 12
 This is the number of cycles a node must wait for votes before declaring
-itself master after making a bid.  Default is \fBfloor(tko/3)\fP. 
-This can not be less than 2 and should not exceed \fBtko\fP.
+itself master after making a bid.  Default is \fBfloor(tko/2)\fP. 
+This can not be less than 2, must be greater than tko_up, and should not
+exceed \fBtko\fP.
 
 .in 9
 \fIvotes\fP\fB="\fP3\fB"\fP
--- cluster/cman/qdisk/main.c	2007/02/21 20:19:43	1.1.2.7
+++ cluster/cman/qdisk/main.c	2007/03/20 19:36:14	1.1.2.8
@@ -381,22 +381,26 @@
   Returns
  */
 int
-master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id)
+master_exists(qd_ctx *ctx, node_info_t *ni, int max, int *low_id, int *count)
 {
 	int x;
 	int masters = 0;
 	int ret = 0;
 
+	if (count)
+		*count = 0;
 	*low_id = ctx->qc_my_id;
 
 	for (x = 0; x < max; x++) {
 
 		/* See if this one's a master */
 		if (ni[x].ni_state >= S_RUN &&
-		    ni[x].ni_status.ps_state == S_MASTER) {
+		    ni[x].ni_status.ps_state == S_MASTER &&
+		    ni[x].ni_status.ps_nodeid != ctx->qc_my_id) {
 			if (!ret)
 				ret = ni[x].ni_status.ps_nodeid;
 			++masters;
+			continue;
 		}
 
 		/* See if it's us... */
@@ -424,11 +428,8 @@
 			*low_id = ni[x].ni_status.ps_nodeid;
 	}
 
-	if (masters > 1) {
-		clulog(LOG_CRIT,
-		       "Critical Error: More than one master found!\n");
-		/* XXX Handle this how? */
-	}
+	if (count)
+		*count = masters;
 	/*
  	else if (masters == 1) {
 		printf("Node %d is the master\n", ret);
@@ -849,7 +850,7 @@
 {
 	disk_msg_t msg = {0, 0, 0};
 	int low_id, bid_pending = 0, score, score_max, score_req,
-	    upgrade = 0;
+	    upgrade = 0, count;
 	memb_mask_t mask, master_mask;
 	struct timeval maxtime, oldtime, newtime, diff, sleeptime, interval;
 
@@ -921,11 +922,26 @@
 				       score, score_max, score_req);
 				ctx->qc_status = S_RUN;
 				upgrade = ctx->qc_upgrade_wait;
+				bid_pending = 0;
+				msg.m_msg = M_NONE;
+				++msg.m_seq;
 			}
 		}
 
 		/* Find master */
-		ctx->qc_master = master_exists(ctx, ni, max, &low_id);
+		ctx->qc_master = master_exists(ctx, ni, max, &low_id, &count);
+
+		/* Resolve master conflict, if one exists */
+		if (count > 1 && ctx->qc_status == S_MASTER) {
+			clulog(LOG_WARNING, "Master conflict: abdicating\n");
+
+			/* Handle just like a recent upgrade */
+			ctx->qc_status = S_RUN;
+			upgrade = ctx->qc_upgrade_wait;
+			bid_pending = 0;
+			msg.m_msg = M_NONE;
+			++msg.m_seq;
+		}
 
 		/* Figure out what to do based on what we know */
 		if (!ctx->qc_master &&
@@ -1163,7 +1179,7 @@
 	}
 
 	/* Get up-tko (transition off->online) */
-	ctx->qc_tko_up = (ctx->qc_tko / 2);
+	ctx->qc_tko_up = (ctx->qc_tko / 3);
 	snprintf(query, sizeof(query), "/cluster/quorumd/@tko_up");
 	if (ccs_get(ccsfd, query, &val) == 0) {
 		ctx->qc_tko_up = atoi(val);
@@ -1185,14 +1201,14 @@
 
 	/* wait this many intervals after bidding for master before
 	   becoming Caesar  */
-	ctx->qc_master_wait = (ctx->qc_tko / 3);
+	ctx->qc_master_wait = (ctx->qc_tko / 2);
 	snprintf(query, sizeof(query), "/cluster/quorumd/@master_wait");
 	if (ccs_get(ccsfd, query, &val) == 0) {
 		ctx->qc_master_wait = atoi(val);
 		free(val);
 	}
-	if (ctx->qc_master_wait < 2)
-		ctx->qc_master_wait = 2;
+	if (ctx->qc_master_wait <= ctx->qc_tko_up)
+		ctx->qc_master_wait = ctx->qc_tko_up + 1;
 		
 	/* Get votes */
 	snprintf(query, sizeof(query), "/cluster/quorumd/@votes");
--- cluster/cman/qdisk/score.c	2007/02/21 20:19:43	1.1.2.3
+++ cluster/cman/qdisk/score.c	2007/03/20 19:36:14	1.1.2.4
@@ -143,7 +143,7 @@
 	*score = 0;
 	*maxscore = 0;
 	
-	printf("max = %d\n", max);
+	//printf("max = %d\n", max);
 	/* Allow operation w/o any heuristics */
 	if (!max) {
 		*score = *maxscore = 1;




More information about the Cluster-devel mailing list