[Cluster-devel] cluster/fence fence_node/fence_node.c fence_to ...
cfeist at sourceware.org
cfeist at sourceware.org
Wed Dec 20 18:14:30 UTC 2006
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: cfeist at sourceware.org 2006-12-20 18:14:29
Modified files:
fence/fence_node: fence_node.c
fence/fence_tool: fence_tool.c
fence/fenced : agent.c fd.h recover.c
Log message:
- Fixed a bug which would cause fenced to fail to execute secondary fence
actions if ccs connection times out. (#219633) From jwhiter at redhat.com
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_node/fence_node.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.4&r2=1.2.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fence_tool/fence_tool.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.5.2.13&r2=1.5.2.14
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/agent.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.7.2.6&r2=1.7.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/fd.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.7.2.4&r2=1.7.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/recover.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.10.2.7&r2=1.10.2.8
--- cluster/fence/fence_node/fence_node.c 2006/07/07 19:40:22 1.2.2.4
+++ cluster/fence/fence_node/fence_node.c 2006/12/20 18:14:29 1.2.2.5
@@ -34,7 +34,7 @@
static char *prog_name;
static int force;
-int dispatch_fence_agent(int cd, char *victim);
+int dispatch_fence_agent(char *victim, int force);
static void print_usage(void)
{
@@ -104,28 +104,15 @@
if (!victim)
die("no node name specified");
- if (force)
- cd = ccs_force_connect(NULL, 0);
- else
- cd = ccs_connect();
-
openlog("fence_node", LOG_PID, LOG_USER);
- if (cd < 0) {
- syslog(LOG_ERR, "cannot connect to ccs %d\n", cd);
- goto fail;
- }
-
- error = dispatch_fence_agent(cd, victim);
+ error = dispatch_fence_agent(victim, force);
if (error)
- goto fail_ccs;
+ goto fail;
syslog(LOG_NOTICE, "Fence of \"%s\" was successful\n", victim);
- ccs_disconnect(cd);
exit(EXIT_SUCCESS);
- fail_ccs:
- ccs_disconnect(cd);
fail:
syslog(LOG_ERR, "Fence of \"%s\" was unsuccessful\n", victim);
exit(EXIT_FAILURE);
--- cluster/fence/fence_tool/fence_tool.c 2006/10/23 16:23:56 1.5.2.13
+++ cluster/fence/fence_tool/fence_tool.c 2006/12/20 18:14:29 1.5.2.14
@@ -67,7 +67,7 @@
int cl_sock;
char our_name[MAX_CLUSTER_MEMBER_NAME_LEN+1];
-int dispatch_fence_agent(int cd, char *victim, int in);
+int dispatch_fence_agent(char *victim, int force);
static int check_mounted(void)
--- cluster/fence/fenced/agent.c 2006/05/09 19:42:52 1.7.2.6
+++ cluster/fence/fenced/agent.c 2006/12/20 18:14:29 1.7.2.7
@@ -274,17 +274,44 @@
return error;
}
-int dispatch_fence_agent(int cd, char *victim)
+int dispatch_fence_agent(char *victim, int force)
{
char *method = NULL, *device = NULL;
- int num_methods, num_devices, m, d, error = -1;
+ int num_methods, num_devices, m, d, error = -1, cd;
+
+ if (force)
+ cd = ccs_force_connect(NULL, 0);
+ else {
+ while ((cd = ccs_connect()) < 0)
+ sleep(1);
+ }
+
+ if (cd < 0) {
+ syslog(LOG_ERR, "cannot connect to ccs %d\n", cd);
+ return -1;
+ }
num_methods = count_methods(cd, victim);
for (m = 0; m < num_methods; m++) {
error = get_method(cd, victim, m, &method);
- if (error)
+
+ /* if the connection timed out while we were trying
+ * to fence, try to open the connection again
+ */
+ if (error == -EBADR) {
+ syslog(LOG_INFO, "ccs connection timed out, "
+ "retrying\n");
+
+ while ((cd = ccs_connect()) < 0)
+ sleep(1);
+
+ error = get_method(cd, victim, m, &method);
+
+ if (error)
+ continue;
+ } else if (error)
continue;
/* if num_devices is zero we should return an error */
@@ -313,6 +340,8 @@
break;
}
+ ccs_disconnect(cd);
+
return error;
}
--- cluster/fence/fenced/fd.h 2005/02/24 07:06:09 1.7.2.4
+++ cluster/fence/fenced/fd.h 2006/12/20 18:14:29 1.7.2.5
@@ -173,6 +173,6 @@
void do_recovery(fd_t *fd, struct cl_service_event *ev,
struct cl_cluster_node *cl_nodes);
void do_recovery_done(fd_t *fd);
-int dispatch_fence_agent(int cd, char *victim);
+int dispatch_fence_agent(char *victim, int force);
#endif /* __FD_DOT_H__ */
--- cluster/fence/fenced/recover.c 2005/04/20 05:51:15 1.10.2.7
+++ cluster/fence/fenced/recover.c 2006/12/20 18:14:29 1.10.2.8
@@ -12,7 +12,6 @@
******************************************************************************/
#include "fd.h"
-#include "ccs.h"
/* Fencing recovery algorithm
@@ -429,7 +428,7 @@
fd_node_t *node;
char *master_name;
uint32_t master;
- int error, cd;
+ int error;
master = find_master_nodeid(fd, &master_name);
@@ -441,9 +440,6 @@
delay_fencing(fd, ev);
- while ((cd = ccs_connect()) < 0)
- sleep(1);
-
while (!list_empty(&fd->victims)) {
node = list_entry(fd->victims.next, fd_node_t, list);
@@ -457,7 +453,7 @@
log_debug("fencing node %s", node->name);
syslog(LOG_INFO, "fencing node \"%s\"", node->name);
- error = dispatch_fence_agent(cd, node->name);
+ error = dispatch_fence_agent(node->name, 0);
syslog(LOG_INFO, "fence \"%s\" %s", node->name,
error ? "failed" : "success");
@@ -468,8 +464,6 @@
}
sleep(5);
}
-
- ccs_disconnect(cd);
}
static void add_victims(fd_t *fd, struct cl_service_event *ev,
More information about the Cluster-devel
mailing list