[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] Cluster Project branch, master, updated. cluster-2.99.05-64-gbcf6459



This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Cluster Project".

http://sources.redhat.com/git/gitweb.cgi?p=cluster.git;a=commitdiff;h=bcf64592c09e6dae7ed0a8d4310c4d267a20fce8

The branch, master has been updated
       via  bcf64592c09e6dae7ed0a8d4310c4d267a20fce8 (commit)
      from  a2699239ed1ba3537865b5dcbeb160bf3d5ecfc9 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit bcf64592c09e6dae7ed0a8d4310c4d267a20fce8
Author: David Teigland <teigland redhat com>
Date:   Thu Jul 10 16:27:14 2008 -0500

    fenced/dlm_controld: fix quorum waiting
    
    Fix how fenced and dlm_controld check for quorum, and how they verify
    that quorum is adjusted for all the necessary failures.
    
    Fix how dlm_controld creates configfs entries for existing cman members.
    
    Signed-off-by: David Teigland <teigland redhat com>

-----------------------------------------------------------------------

Summary of changes:
 fence/fenced/cpg.c               |   17 +++++++++++++----
 group/dlm_controld/action.c      |    3 +++
 group/dlm_controld/cpg.c         |   19 ++++++++++++-------
 group/dlm_controld/member_cman.c |    3 ---
 4 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/fence/fenced/cpg.c b/fence/fenced/cpg.c
index c115cc9..3bc4b6a 100644
--- a/fence/fenced/cpg.c
+++ b/fence/fenced/cpg.c
@@ -495,10 +495,14 @@ static int check_quorum_done(struct fd *fd)
 	struct node_history *node;
 	int wait_count = 0;
 
-	if (!cman_quorate) {
-		log_debug("check_quorum %d", cman_quorate);
-		return 0;
-	}
+	/* We don't want to trust the cman_quorate value until we know
+	   that cman has seen the same nodes fail that we have.  So, we
+	   first make sure that all nodes we've seen fail are also
+	   failed in cman, then we can just check cman_quorate.  This
+	   assumes that we'll get to this function to do all the checks
+	   before any of the failed nodes can actually rejoin and become
+	   cman members again (if that assumption doesn't hold, perhaps
+	   do something with timestamps of join/fail). */
 
 	list_for_each_entry(node, &fd->node_history, list) {
 		if (!node->check_quorum)
@@ -516,6 +520,11 @@ static int check_quorum_done(struct fd *fd)
 	if (wait_count)
 		return 0;
 
+	if (!cman_quorate) {
+		log_debug("check_quorum not quorate");
+		return 0;
+	}
+
 	log_debug("check_quorum done");
 	return 1;
 }
diff --git a/group/dlm_controld/action.c b/group/dlm_controld/action.c
index dea521a..11f6b07 100644
--- a/group/dlm_controld/action.c
+++ b/group/dlm_controld/action.c
@@ -781,6 +781,9 @@ int setup_configfs(void)
 	if (rv < 0)
 		return rv;
 
+	/* add configfs entries for existing nodes */
+	cman_statechange();
+
 	/* the kernel has its own defaults for these values which we
 	   don't want to change unless these have been set; -1 means
 	   they have not been set on command line or config file */
diff --git a/group/dlm_controld/cpg.c b/group/dlm_controld/cpg.c
index f647b1b..8d01d64 100644
--- a/group/dlm_controld/cpg.c
+++ b/group/dlm_controld/cpg.c
@@ -344,7 +344,10 @@ static void node_history_fail(struct lockspace *ls, int nodeid,
 	if (cfgd_enable_fencing && !node->add_time)
 		node->check_fencing = 1;
 
-	if (cfgd_enable_quorum)
+	/* fenced will take care of making sure the quorum value
+	   is adjusted for all the failures */
+
+	if (cfgd_enable_quorum && !cfgd_enable_fencing)
 		node->check_quorum = 1;
 
 	node->check_fs = 1;
@@ -405,8 +408,6 @@ static int check_fencing_done(struct lockspace *ls)
 	return 1;
 }
 
-/* wait for cman to see all the same nodes failed, and to say there's quorum */
-
 static int check_quorum_done(struct lockspace *ls)
 {
 	struct node *node;
@@ -415,10 +416,9 @@ static int check_quorum_done(struct lockspace *ls)
 	if (!cfgd_enable_quorum)
 		return 1;
 
-	if (!cman_quorate) {
-		log_group(ls, "check_quorum %d", cman_quorate);
-		return 0;
-	}
+	/* wait for cman to see all the same nodes failed, so we know that
+	   cman_quorate is adjusted for the same failures we've seen
+	   (see comment in fenced about the assumption here) */
 
 	list_for_each_entry(node, &ls->node_history, list) {
 		if (!node->check_quorum)
@@ -436,6 +436,11 @@ static int check_quorum_done(struct lockspace *ls)
 	if (wait_count)
 		return 0;
 
+	if (!cman_quorate) {
+		log_group(ls, "check_quorum not quorate");
+		return 0;
+	}
+
 	log_group(ls, "check_quorum done");
 	return 1;
 }
diff --git a/group/dlm_controld/member_cman.c b/group/dlm_controld/member_cman.c
index 0b925e6..c191556 100644
--- a/group/dlm_controld/member_cman.c
+++ b/group/dlm_controld/member_cman.c
@@ -198,9 +198,6 @@ int setup_cman(void)
 	memset(&old_nodes, 0, sizeof(old_nodes));
 	cman_node_count = 0;
 	memset(&cman_nodes, 0, sizeof(cman_nodes));
-
-	/* add configfs entries for existing nodes */
-	statechange();
  out:
 	return fd;
 }


hooks/post-receive
--
Cluster Project


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]