[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [Linux-cluster] CS4 Update 2 / is this problem fix more recent update ?



On Fri, 2006-08-04 at 12:44 +0200, Alain Moulle wrote:
> Hi Ron,
> 
> could you provide me the defects numbers and/or linked patches ?

Here's the current list of pending fixes:

http://bugzilla.redhat.com/bugzilla/buglist.cgi?component=rgmanager&bug_status=MODIFIED&bug_status=FAILS_QA&bug_status=ON_QA

The patch for internal self-monitoring was simply a backport from the
HEAD branch.  I've attached a hand-edited patch which should enable the
self-monitoring bit.

Additionally, there was a segfault fixed in U3.  Here's the errata
advisory, which contains links to bugzillas:

https://rhn.redhat.com/errata/RHBA-2006-0241.html

-- Lon
Index: Makefile
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/Makefile,v
retrieving revision 1.6.2.3
retrieving revision 1.6.2.5
diff -u -r1.6.2.3 -r1.6.2.5
--- Makefile	17 Oct 2005 20:23:52 -0000	1.6.2.3
+++ Makefile	26 May 2006 17:39:32 -0000	1.6.2.5
@@ -40,7 +40,8 @@
 
 clurgmgrd: rg_thread.o rg_locks.o main.o groups.o rg_state.o \
 		rg_queue.o members.o rg_forward.o reslist.o \
-		resrules.o restree.o fo_domain.o
+		resrules.o restree.o fo_domain.o nodeevent.o \
+		watchdog.o
 	$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) $(LDFLAGS) -lccs
 
 #
Index: main.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/main.c,v
retrieving revision 1.9.2.12.2.1
retrieving revision 1.9.2.17
diff -u -r1.9.2.12.2.1 -r1.9.2.17
--- main.c	25 Jan 2006 18:52:33 -0000	1.9.2.12.2.1
+++ main.c	26 May 2006 17:39:32 -0000	1.9.2.17
@@ -44,9 +48,10 @@
 void graceful_exit(int);
 void flag_shutdown(int sig);
 void hard_exit(void);
-int send_rg_states(int);
+int send_rg_states(int );
 int check_config_update(void);
 int svc_exists(char *);
+int watchdog_init(void);
 
 int shutdown_pending = 0, running = 1, need_reconfigure = 0;
 char debug = 0; /* XXX* */
@@ -646,8 +735,11 @@
 	if (foreground)
 		clu_log_console(1);
 
-	if (!foreground && (geteuid() == 0)) 
+	if (!foreground && (geteuid() == 0)) {
 		daemon_init(argv[0]);
+		if (!debug && !watchdog_init())
+			clulog(LOG_NOTICE, "Failed to start watchdog\n");
+	}
 
 	/*
 	   We need quorum before we can read the configuration data from
Index: watchdog.c
===================================================================
RCS file: watchdog.c
diff -N watchdog.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ watchdog.c	26 May 2006 17:39:32 -0000	1.1.2.1
@@ -0,0 +1,97 @@
+/*
+  Copyright Red Hat, Inc. 2005-2006
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by the
+  Free Software Foundation; either version 2, or (at your option) any
+  later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/reboot.h>
+#include <stdlib.h>
+
+#include <signals.h>
+#include <clulog.h>
+
+static pid_t child = 0;
+
+static void 
+signal_handler(int signum)
+{
+        kill(child, signum);
+}
+static void 
+redirect_signals(void)
+{
+        int i;
+        for (i = 0; i < _NSIG; i++) {
+	        switch (i) {
+		case SIGCHLD:
+		case SIGILL:
+		case SIGFPE:
+		case SIGSEGV:
+		case SIGBUS:
+		        setup_signal(i, SIG_DFL);
+			break;
+		default:
+		        setup_signal(i, signal_handler);
+		}
+	}
+}
+
+/**
+ return watchdog's pid, or 0 on failure
+*/
+int 
+watchdog_init(void)
+{
+	int status;
+	pid_t parent;
+	
+	parent = getpid();
+	child = fork();
+	if (child < 0)
+	        return 0;
+	else if (!child)
+		return parent;
+	
+	redirect_signals();
+	
+	while (1) {
+	        if (waitpid(child, &status, 0) <= 0)
+		        continue;
+		
+		if (WIFEXITED(status))
+		        exit(WEXITSTATUS(status));
+		
+		if (WIFSIGNALED(status)) {
+		        if (WTERMSIG(status) == SIGKILL) {
+				clulog(LOG_CRIT, "Watchdog: Daemon killed, exiting\n");
+				raise(SIGKILL);
+				while(1) ;
+			}
+			else {
+#ifdef DEBUG
+			        clulog(LOG_CRIT, "Watchdog: Daemon died, but not rebooting because DEBUG is set\n");
+#else
+				clulog(LOG_CRIT, "Watchdog: Daemon died, rebooting...\n");
+				sync();
+			        reboot(RB_AUTOBOOT);
+#endif
+				exit(255);
+			}
+		}
+	}
+}

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]