[Cluster-devel] cluster/fence fence_node/fence_node.c fence_to ...

cfeist at sourceware.org cfeist at sourceware.org
Wed Dec 20 18:14:30 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL4
Changes by:	cfeist at sourceware.org	2006-12-20 18:14:29

Modified files:
	fence/fence_node: fence_node.c 
	fence/fence_tool: fence_tool.c 
	fence/fenced   : agent.c fd.h recover.c 

Log message:
	- Fixed a bug which would cause fenced to fail to execute secondary fence
	actions if ccs connection times out. (#219633) From jwhiter at redhat.com

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_node/fence_node.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.4&r2=1.2.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5.2.13&r2=1.5.2.14
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/agent.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.7.2.6&r2=1.7.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/fd.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.7.2.4&r2=1.7.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/recover.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.10.2.7&r2=1.10.2.8

--- cluster/fence/fence_node/fence_node.c	2006/07/07 19:40:22	1.2.2.4
+++ cluster/fence/fence_node/fence_node.c	2006/12/20 18:14:29	1.2.2.5
@@ -34,7 +34,7 @@
 static char *prog_name;
 static int force;
 
-int dispatch_fence_agent(int cd, char *victim);
+int dispatch_fence_agent(char *victim, int force);
 
 static void print_usage(void)
 {
@@ -104,28 +104,15 @@
 	if (!victim)
 		die("no node name specified");
 
-	if (force)
-		cd = ccs_force_connect(NULL, 0);
-	else
-		cd = ccs_connect();
-
 	openlog("fence_node", LOG_PID, LOG_USER);
 
-	if (cd < 0) {
-		syslog(LOG_ERR, "cannot connect to ccs %d\n", cd);
-		goto fail;
-	}
-
-	error = dispatch_fence_agent(cd, victim);
+	error = dispatch_fence_agent(victim, force);
 	if (error)
-		goto fail_ccs;
+		goto fail;
 
 	syslog(LOG_NOTICE, "Fence of \"%s\" was successful\n", victim);
-	ccs_disconnect(cd);
 	exit(EXIT_SUCCESS);
 
- fail_ccs:
-	ccs_disconnect(cd);
  fail:
 	syslog(LOG_ERR, "Fence of \"%s\" was unsuccessful\n", victim);
 	exit(EXIT_FAILURE);
--- cluster/fence/fence_tool/fence_tool.c	2006/10/23 16:23:56	1.5.2.13
+++ cluster/fence/fence_tool/fence_tool.c	2006/12/20 18:14:29	1.5.2.14
@@ -67,7 +67,7 @@
 int cl_sock;
 char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1];
 
-int dispatch_fence_agent(int cd, char *victim, int in);
+int dispatch_fence_agent(char *victim, int force);
 
 
 static int check_mounted(void)
--- cluster/fence/fenced/agent.c	2006/05/09 19:42:52	1.7.2.6
+++ cluster/fence/fenced/agent.c	2006/12/20 18:14:29	1.7.2.7
@@ -274,17 +274,44 @@
 	return error;
 }
 
-int dispatch_fence_agent(int cd, char *victim)
+int dispatch_fence_agent(char *victim, int force)
 {
 	char *method = NULL, *device = NULL;
-	int num_methods, num_devices, m, d, error = -1;
+	int num_methods, num_devices, m, d, error = -1, cd;
+
+	if (force)
+		cd = ccs_force_connect(NULL, 0);
+	else {
+		while ((cd = ccs_connect()) < 0)
+			sleep(1);
+	}
+
+	if (cd < 0) {
+		syslog(LOG_ERR, "cannot connect to ccs %d\n", cd);
+		return -1;
+	}
 
 	num_methods = count_methods(cd, victim);
 
 	for (m = 0; m < num_methods; m++) {
 
 		error = get_method(cd, victim, m, &method);
-		if (error)
+
+		/* if the connection timed out while we were trying 
+		 * to fence, try to open the connection again
+		 */
+		if (error == -EBADR) {
+			syslog(LOG_INFO, "ccs connection timed out, "
+				"retrying\n");
+
+			while ((cd = ccs_connect()) < 0)
+				sleep(1);
+			
+			error = get_method(cd, victim, m, &method);
+
+			if (error)
+				continue;
+		} else if (error)
 			continue;
 
 		/* if num_devices is zero we should return an error */
@@ -313,6 +340,8 @@
 			break;
 	}
 
+	ccs_disconnect(cd);
+
 	return error;
 }
 
--- cluster/fence/fenced/fd.h	2005/02/24 07:06:09	1.7.2.4
+++ cluster/fence/fenced/fd.h	2006/12/20 18:14:29	1.7.2.5
@@ -173,6 +173,6 @@
 void do_recovery(fd_t *fd, struct cl_service_event *ev,
 		 struct cl_cluster_node *cl_nodes);
 void do_recovery_done(fd_t *fd);
-int dispatch_fence_agent(int cd, char *victim);
+int dispatch_fence_agent(char *victim, int force);
 
 #endif				/*  __FD_DOT_H__  */
--- cluster/fence/fenced/recover.c	2005/04/20 05:51:15	1.10.2.7
+++ cluster/fence/fenced/recover.c	2006/12/20 18:14:29	1.10.2.8
@@ -12,7 +12,6 @@
 ******************************************************************************/
 
 #include "fd.h"
-#include "ccs.h"
 
 /* Fencing recovery algorithm
 
@@ -429,7 +428,7 @@
 	fd_node_t *node;
 	char *master_name;
 	uint32_t master;
-	int error, cd;
+	int error;
 
 	master = find_master_nodeid(fd, &master_name);
 
@@ -441,9 +440,6 @@
 
 	delay_fencing(fd, ev);
 
-	while ((cd = ccs_connect()) < 0)
-		sleep(1);
-
 	while (!list_empty(&fd->victims)) {
 		node = list_entry(fd->victims.next, fd_node_t, list);
 
@@ -457,7 +453,7 @@
 		log_debug("fencing node %s", node->name);
 		syslog(LOG_INFO, "fencing node \"%s\"", node->name);
 
-		error = dispatch_fence_agent(cd, node->name);
+		error = dispatch_fence_agent(node->name, 0);
 
 		syslog(LOG_INFO, "fence \"%s\" %s", node->name,
 		       error ? "failed" : "success");
@@ -468,8 +464,6 @@
 		}
 		sleep(5);
 	}
-
-	ccs_disconnect(cd);
 }
 
 static void add_victims(fd_t *fd, struct cl_service_event *ev,




More information about the Cluster-devel mailing list