[Linux-cluster] [PATCH 0/4] fence

Bastian Blank bastian at waldi.eu.org
Fri Feb 18 10:23:09 UTC 2005


On Fri, Feb 18, 2005 at 10:07:27AM +0100, Bastian Blank wrote:
> > Or, I just thought of another method.  fence_tool's -w handling could
> > could read fenced's unix socket and wait until it sees "finish:".  See
> > fence_tool.c:do_monitor().  do_monitor("finish:") would return when it
> > sees a line matching "finish:".
> > We could also use this method to allow "fence_tool leave -w".
> Hmm, lets think about it.

Okay first part of this: Change print_ev to report the complete event
type in one message.

It look like:

| fenced: 1108721677 event: join:start
| fenced: 1108721677   event_id    = 5
| fenced: 1108721677   last_stop   = 0
| fenced: 1108721677   last_start  = 5
| fenced: 1108721677   last_finish = 0
| fenced: 1108721677   node_count  = 1

and

| fenced: 1108721677 event: join:finish
| fenced: 1108721677   event_id    = 5
| fenced: 1108721677   last_stop   = 0
| fenced: 1108721677   last_start  = 5
| fenced: 1108721677   last_finish = 5
| fenced: 1108721677   node_count  = 0

This resolves a possible race while checking the type of the event via
the socket.

Bastian

-- 
Warp 7 -- It's a law we can live with.
-------------- next part --------------
diff -urN -x CVS -x debian cvs-patch03-quorum/fenced/fd.h cvs-patch05-event/fenced/fd.h
--- cvs-patch03-quorum/fenced/fd.h	2005-02-17 18:43:36.000000000 +0100
+++ cvs-patch05-event/fenced/fd.h	2005-02-18 11:07:26.000000000 +0100
@@ -152,6 +152,7 @@
 	int 			last_stop;
 	int 			last_start;
 	int 			last_finish;
+	int 			last_start_type;
 
 	bool			first_recovery;
 	int 			prev_count;
diff -urN -x CVS -x debian cvs-patch03-quorum/fenced/main.c cvs-patch05-event/fenced/main.c
--- cvs-patch03-quorum/fenced/main.c	2005-02-17 18:34:17.000000000 +0100
+++ cvs-patch05-event/fenced/main.c	2005-02-18 11:13:48.000000000 +0100
@@ -180,42 +180,45 @@
 }
 #endif
 
-static void print_ev(struct cl_service_event *ev)
+static void print_ev(fd_t *fd, struct cl_service_event *ev)
 {
+	char *type = "unknown", *start_type = "unknown";
+
 	switch (ev->type) {
 	case SERVICE_EVENT_STOP:
-		log_debug("stop:");
+		type = "stop";
 		break;
 	case SERVICE_EVENT_START:
-		log_debug("start:");
+		type = "start";
 		break;
 	case SERVICE_EVENT_FINISH:
-		log_debug("finish:");
+		type = "finish";
 		break;
 	case SERVICE_EVENT_LEAVEDONE:
-		log_debug("leavedone:");
+		type = "leavedone";
 		break;
 	}
+
+	if (ev->event_id == fd->last_start)
+		switch (fd->last_start_type) {
+		case SERVICE_START_FAILED:
+			start_type = "failed";
+			break;
+		case SERVICE_START_JOIN:
+			start_type = "join";
+			break;
+		case SERVICE_START_LEAVE:
+			start_type = "leave";
+			break;
+	}
+
+	log_debug("event: %s:%s", start_type, type);
+
 	log_debug("  event_id    = %u", ev->event_id);
 	log_debug("  last_stop   = %u", ev->last_stop);
 	log_debug("  last_start  = %u", ev->last_start);
 	log_debug("  last_finish = %u", ev->last_finish);
 	log_debug("  node_count  = %u", ev->node_count);
-
-	if (ev->type != SERVICE_EVENT_START)
-		return;
-
-	switch (ev->start_type) {
-	case SERVICE_START_FAILED:
-		log_debug("  start_type  = %s", "failed");
-		break;
-	case SERVICE_START_JOIN:
-		log_debug("  start_type  = %s", "join");
-		break;
-	case SERVICE_START_LEAVE:
-		log_debug("  start_type  = %s", "leave");
-		break;
-	}
 }
 
 static void print_members(int count, struct cl_cluster_node *nodes)
@@ -235,11 +238,25 @@
 	struct cl_cluster_node *nodes;
 	int error = 0, n;
 
-	print_ev(ev);
-
-	if (ev->type == SERVICE_EVENT_START) {
+	switch (ev->type)
+	{
+	case SERVICE_EVENT_START:
 		fd->last_start = ev->event_id;
+		fd->last_start_type = ev->start_type;
+		break;
+	case SERVICE_EVENT_STOP:
+		fd->last_stop = fd->last_start;
+		break;
+	case SERVICE_EVENT_FINISH:
+		fd->last_finish = ev->event_id;
+		break;
+	}
 
+	print_ev(fd, ev);
+
+	switch (ev->type)
+	{
+	case SERVICE_EVENT_START:
 		/* space for two extra to be sure it's not too small */
 		n = ev->node_count + 2;
 
@@ -265,17 +282,15 @@
 			log_debug("process_event: start done error");
 
 		free(nodes);
-	}
+		break;
 
-	else if (ev->type == SERVICE_EVENT_LEAVEDONE)
+	case SERVICE_EVENT_LEAVEDONE:
 		leave_finished = 1;
+		break;
 
-	else if (ev->type == SERVICE_EVENT_STOP)
-		fd->last_stop = fd->last_start;
-
-	else if (ev->type == SERVICE_EVENT_FINISH) {
-		fd->last_finish = ev->event_id;
+	case SERVICE_EVENT_FINISH:
 		do_recovery_done(fd);
+		break;
 	}
 }
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 197 bytes
Desc: Digital signature
URL: <http://listman.redhat.com/archives/linux-cluster/attachments/20050218/d84587dd/attachment.sig>


More information about the Linux-cluster mailing list