[Linux-cluster] [PATCH 0/4] fence
Bastian Blank
bastian at waldi.eu.org
Fri Feb 18 10:23:09 UTC 2005
On Fri, Feb 18, 2005 at 10:07:27AM +0100, Bastian Blank wrote:
> > Or, I just thought of another method. fence_tool's -w handling could
> > could read fenced's unix socket and wait until it sees "finish:". See
> > fence_tool.c:do_monitor(). do_monitor("finish:") would return when it
> > sees a line matching "finish:".
> > We could also use this method to allow "fence_tool leave -w".
> Hmm, lets think about it.
Okay first part of this: Change print_ev to report the complete event
type in one message.
It look like:
| fenced: 1108721677 event: join:start
| fenced: 1108721677 event_id = 5
| fenced: 1108721677 last_stop = 0
| fenced: 1108721677 last_start = 5
| fenced: 1108721677 last_finish = 0
| fenced: 1108721677 node_count = 1
and
| fenced: 1108721677 event: join:finish
| fenced: 1108721677 event_id = 5
| fenced: 1108721677 last_stop = 0
| fenced: 1108721677 last_start = 5
| fenced: 1108721677 last_finish = 5
| fenced: 1108721677 node_count = 0
This resolves a possible race while checking the type of the event via
the socket.
Bastian
--
Warp 7 -- It's a law we can live with.
-------------- next part --------------
diff -urN -x CVS -x debian cvs-patch03-quorum/fenced/fd.h cvs-patch05-event/fenced/fd.h
--- cvs-patch03-quorum/fenced/fd.h 2005-02-17 18:43:36.000000000 +0100
+++ cvs-patch05-event/fenced/fd.h 2005-02-18 11:07:26.000000000 +0100
@@ -152,6 +152,7 @@
int last_stop;
int last_start;
int last_finish;
+ int last_start_type;
bool first_recovery;
int prev_count;
diff -urN -x CVS -x debian cvs-patch03-quorum/fenced/main.c cvs-patch05-event/fenced/main.c
--- cvs-patch03-quorum/fenced/main.c 2005-02-17 18:34:17.000000000 +0100
+++ cvs-patch05-event/fenced/main.c 2005-02-18 11:13:48.000000000 +0100
@@ -180,42 +180,45 @@
}
#endif
-static void print_ev(struct cl_service_event *ev)
+static void print_ev(fd_t *fd, struct cl_service_event *ev)
{
+ char *type = "unknown", *start_type = "unknown";
+
switch (ev->type) {
case SERVICE_EVENT_STOP:
- log_debug("stop:");
+ type = "stop";
break;
case SERVICE_EVENT_START:
- log_debug("start:");
+ type = "start";
break;
case SERVICE_EVENT_FINISH:
- log_debug("finish:");
+ type = "finish";
break;
case SERVICE_EVENT_LEAVEDONE:
- log_debug("leavedone:");
+ type = "leavedone";
break;
}
+
+ if (ev->event_id == fd->last_start)
+ switch (fd->last_start_type) {
+ case SERVICE_START_FAILED:
+ start_type = "failed";
+ break;
+ case SERVICE_START_JOIN:
+ start_type = "join";
+ break;
+ case SERVICE_START_LEAVE:
+ start_type = "leave";
+ break;
+ }
+
+ log_debug("event: %s:%s", start_type, type);
+
log_debug(" event_id = %u", ev->event_id);
log_debug(" last_stop = %u", ev->last_stop);
log_debug(" last_start = %u", ev->last_start);
log_debug(" last_finish = %u", ev->last_finish);
log_debug(" node_count = %u", ev->node_count);
-
- if (ev->type != SERVICE_EVENT_START)
- return;
-
- switch (ev->start_type) {
- case SERVICE_START_FAILED:
- log_debug(" start_type = %s", "failed");
- break;
- case SERVICE_START_JOIN:
- log_debug(" start_type = %s", "join");
- break;
- case SERVICE_START_LEAVE:
- log_debug(" start_type = %s", "leave");
- break;
- }
}
static void print_members(int count, struct cl_cluster_node *nodes)
@@ -235,11 +238,25 @@
struct cl_cluster_node *nodes;
int error = 0, n;
- print_ev(ev);
-
- if (ev->type == SERVICE_EVENT_START) {
+ switch (ev->type)
+ {
+ case SERVICE_EVENT_START:
fd->last_start = ev->event_id;
+ fd->last_start_type = ev->start_type;
+ break;
+ case SERVICE_EVENT_STOP:
+ fd->last_stop = fd->last_start;
+ break;
+ case SERVICE_EVENT_FINISH:
+ fd->last_finish = ev->event_id;
+ break;
+ }
+ print_ev(fd, ev);
+
+ switch (ev->type)
+ {
+ case SERVICE_EVENT_START:
/* space for two extra to be sure it's not too small */
n = ev->node_count + 2;
@@ -265,17 +282,15 @@
log_debug("process_event: start done error");
free(nodes);
- }
+ break;
- else if (ev->type == SERVICE_EVENT_LEAVEDONE)
+ case SERVICE_EVENT_LEAVEDONE:
leave_finished = 1;
+ break;
- else if (ev->type == SERVICE_EVENT_STOP)
- fd->last_stop = fd->last_start;
-
- else if (ev->type == SERVICE_EVENT_FINISH) {
- fd->last_finish = ev->event_id;
+ case SERVICE_EVENT_FINISH:
do_recovery_done(fd);
+ break;
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 197 bytes
Desc: Digital signature
URL: <http://listman.redhat.com/archives/linux-cluster/attachments/20050218/d84587dd/attachment.sig>
More information about the Linux-cluster
mailing list