[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] cluster/fence/fenced fd.h main.c recover.c



CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL5
Changes by:	lhh sourceware org	2007-01-29 20:30:25

Modified files:
	fence/fenced   : fd.h main.c recover.c 

Log message:
	Add manual override for fenced to RHEL5 branch; patch is a merge from HEAD branch

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/fd.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.24.2.1&r2=1.24.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/main.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.38.2.2&r2=1.38.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/fenced/recover.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.25.2.1&r2=1.25.2.2

--- cluster/fence/fenced/fd.h	2006/12/01 19:17:46	1.24.2.1
+++ cluster/fence/fenced/fd.h	2007/01/29 20:30:25	1.24.2.2
@@ -53,6 +53,7 @@
 #define DEFAULT_POST_JOIN_DELAY	6
 #define DEFAULT_POST_FAIL_DELAY	0
 #define DEFAULT_CLEAN_START	0
+#define DEFAULT_OVERRIDE_PATH	"/var/run/cluster/fenced_override"
 #define FENCED_SOCK_PATH	"fenced_socket"
 
 extern char			*prog_name;
@@ -131,10 +132,12 @@
 {
 	int post_join_delay;
 	int post_fail_delay;
+	char *override_path;
 	int8_t clean_start;
 	int8_t post_join_delay_opt;
 	int8_t post_fail_delay_opt;
 	int8_t clean_start_opt;
+	int8_t override_path_opt;
 };
 
 #define FDFL_RUN        (0)
--- cluster/fence/fenced/main.c	2006/12/01 15:27:50	1.38.2.2
+++ cluster/fence/fenced/main.c	2007/01/29 20:30:25	1.38.2.3
@@ -15,7 +15,7 @@
 #include "ccs.h"
 #include "copyright.cf"
 
-#define OPTION_STRING			("cj:f:Dn:hVSw")
+#define OPTION_STRING			("cj:f:Dn:O:hVSw")
 #define LOCKFILE_NAME			"/var/run/fenced.pid"
 
 struct client {
@@ -145,6 +145,23 @@
 			free(str);
 	}
 
+	if (comline.override_path_opt == FALSE) {
+		str = NULL;
+		memset(path, 0, 256);
+		sprintf(path, "/cluster/fence_daemon/@override_path");
+
+		error = ccs_get(cd, path, &str);
+		if (!error)
+			/* XXX These are not explicitly freed on exit; if
+			   we decide to make fenced handle SIGHUP at a later
+			   time, we will need to free this. */
+			comline.override_path = strdup(str);
+		else
+			comline.override_path = strdup(DEFAULT_OVERRIDE_PATH);
+		if (str)
+			free(str);
+	}
+
 	log_debug("delay post_join %ds post_fail %ds",
 		  comline.post_join_delay, comline.post_fail_delay);
 
@@ -500,6 +517,8 @@
 				   DEFAULT_POST_JOIN_DELAY);
 	printf("  -f <secs>	Post-fail fencing delay (default %d)\n",
 				   DEFAULT_POST_FAIL_DELAY);
+	printf("  -O <path>    Override path (default %s)\n",
+	       			   DEFAULT_OVERRIDE_PATH);
 	printf("  -D	       Enable debugging code and don't fork\n");
 	printf("  -h	       Print this help, then exit\n");
 	printf("  -V	       Print program version information, then exit\n");
@@ -547,6 +566,8 @@
 	int cont = TRUE;
 	int optchar;
 
+	comline->override_path_opt = FALSE;
+	comline->override_path = NULL;
 	comline->post_join_delay_opt = FALSE;
 	comline->post_fail_delay_opt = FALSE;
 	comline->clean_start_opt = FALSE;
@@ -571,6 +592,11 @@
 			comline->post_fail_delay_opt = TRUE;
 			break;
 
+		case 'O':
+			comline->override_path = strdup(optarg);
+			comline->override_path_opt = TRUE;
+			break;
+
 		case 'D':
 			daemon_debug_opt = TRUE;
 			break;
--- cluster/fence/fenced/recover.c	2006/12/01 19:17:46	1.25.2.1
+++ cluster/fence/fenced/recover.c	2007/01/29 20:30:25	1.25.2.2
@@ -13,6 +13,9 @@
 
 #include "fd.h"
 #include "ccs.h"
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/select.h>
 
 extern int our_nodeid;
 extern commandline_t comline;
@@ -212,6 +215,79 @@
 	return num_victims;
 }
 
+static inline void close_override(int *fd, char *path)
+{
+	unlink(path);
+	if (fd && *fd >= 0)
+		close(*fd);
+	*fd = -1;
+}
+
+static int open_override(char *path)
+{
+	int ret;
+	mode_t om;
+
+	om = umask(077);
+	ret = mkfifo(path, (S_IRUSR | S_IWUSR));
+	umask(om);
+
+	if (ret < 0)
+		return -1;
+        return open(path, O_RDONLY | O_NONBLOCK);
+}
+
+static int check_override(int ofd, char *nodename, int timeout)
+{
+	char buf[128];
+	fd_set rfds;
+	struct timeval tv = {0, 0};
+	int ret, x;
+
+	if (ofd < 0 || !nodename || !strlen(nodename)) {
+		sleep(timeout);
+		return 0;
+	}
+
+	FD_ZERO(&rfds);
+	FD_SET(ofd, &rfds);
+	tv.tv_usec = 0;
+	tv.tv_sec = timeout;
+
+	ret = select(ofd + 1, &rfds, NULL, NULL, &tv);
+	if (ret < 0) {
+		syslog(LOG_ERR, "select: %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (ret == 0)
+		return 0;
+
+	memset(buf, 0, sizeof(buf));
+	ret = read(ofd, buf, sizeof(buf) - 1);
+	if (ret < 0) {
+		syslog(LOG_ERR, "read: %s\n", strerror(errno));
+		return -1;
+	}
+
+	/* chop off control characters */
+	for (x = 0; x < ret; x++) {
+		if (buf[x] < 0x20) {
+			buf[x] = 0;
+			break;
+		}
+	}
+
+	if (!strcasecmp(nodename, buf)) {
+		/* Case insensitive, but not as nice as, say, name_equal
+		   in the other file... */
+		return 1;
+	}
+
+	return 0;
+}
+
+
 /* If there are victims after a node has joined, it's a good indication that
    they may be joining the cluster shortly.  If we delay a bit they might
    become members and we can avoid fencing them.  This is only really an issue
@@ -282,6 +358,7 @@
 	fd_node_t *node;
 	char *master_name;
 	int master, error, cd;
+	int override = -1;
 
 	master = find_master_nodeid(fd, &master_name);
 
@@ -318,7 +395,22 @@
 			list_del(&node->list);
 			free(node);
 		}
-		sleep(5);
+
+		if (!comline.override_path) {
+			sleep(5);
+			continue;
+		}
+
+		/* Check for manual intervention */
+		override = open_override(comline.override_path);
+		if (check_override(override, node->name, 5) > 0) {
+			syslog(LOG_WARNING, "fence \"%s\" overridden by "
+			       "administrator intervention", node->name);
+
+			list_del(&node->list);
+			free(node);
+		}
+		close_override(&override, comline.override_path);
 	}
 
 	ccs_disconnect(cd);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]