[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] Cluster Project branch, master, updated. gfs-kernel_0_1_22-196-g07e949f



This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Cluster Project".

http://sources.redhat.com/git/gitweb.cgi?p=cluster.git;a=commitdiff;h=07e949fa5bc4eef61f16974bf1cc5a5adb9568a4

The branch, master has been updated
       via  07e949fa5bc4eef61f16974bf1cc5a5adb9568a4 (commit)
      from  0b3fc5e8a21cc8e2011f513892c9e65eb399e9bb (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 07e949fa5bc4eef61f16974bf1cc5a5adb9568a4
Author: David Teigland <teigland redhat com>
Date:   Wed Apr 16 12:55:59 2008 -0500

    fenced: new version
    
    In the same theme as the new version of dlm_controld.
    - uses libcpg directly without libgroup (use the -g0 option)
    - runs in backward compat mode by default, using libgroup to interact
      with old groupd/fenced (-g1 option)
    - move code that runs agents (agent.c) into libfence
    
    Signed-off-by: David Teigland <teigland redhat com>

-----------------------------------------------------------------------

Summary of changes:
 fence/fence_node/Makefile             |   15 +-
 fence/fence_node/fence_node.c         |   15 +-
 fence/fenced/Makefile                 |   12 +-
 fence/fenced/config.c                 |  140 ++++
 fence/fenced/cpg.c                    | 1175 +++++++++++++++++++++++++++++++++
 fence/fenced/fd.h                     |  294 +++++---
 fence/fenced/group.c                  |  285 +++++++-
 fence/fenced/main.c                   |  780 +++++++++++-----------
 fence/fenced/member_cman.c            |  133 ++---
 fence/fenced/recover.c                |  328 ++--------
 {gfs => fence}/include/linux_endian.h |    0 
 fence/include/list.h                  |   11 +
 {cman => fence}/lib/Makefile          |   24 +-
 fence/{fenced => lib}/agent.c         |   40 +-
 fence/lib/libfence.h                  |   36 +
 group/dlm_controld/main.c             |    2 +-
 16 files changed, 2310 insertions(+), 980 deletions(-)
 create mode 100644 fence/fenced/config.c
 create mode 100644 fence/fenced/cpg.c
 copy {gfs => fence}/include/linux_endian.h (100%)
 copy {cman => fence}/lib/Makefile (67%)
 rename fence/{fenced => lib}/agent.c (89%)
 create mode 100644 fence/lib/libfence.h

diff --git a/fence/fence_node/Makefile b/fence/fence_node/Makefile
index 3ac1092..b9c96c4 100644
--- a/fence/fence_node/Makefile
+++ b/fence/fence_node/Makefile
@@ -2,7 +2,7 @@
 ###############################################################################
 ##
 ##  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-##  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+##  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
 ##  
 ##  This copyrighted material is made available to anyone wishing to use,
 ##  modify, copy, or redistribute it subject to the terms and conditions
@@ -15,7 +15,7 @@ TARGET = fence_node
 
 SBINDIRT=$(TARGET)
 
-all: depends ${TARGET}
+all: ${TARGET}
 
 include ../../make/defines.mk
 include $(OBJDIR)/make/cobj.mk
@@ -23,23 +23,20 @@ include $(OBJDIR)/make/clean.mk
 include $(OBJDIR)/make/install.mk
 include $(OBJDIR)/make/uninstall.mk
 
-OBJS=	../fenced/agent.o \
-	fence_node.o
+OBJS=	fence_node.o
 
 CFLAGS += -D_FILE_OFFSET_BITS=64
 
-CFLAGS += -I${ccsincdir} -I${cmanincdir}
+CFLAGS += -I${ccsincdir} -I../lib
 CFLAGS += -I../include
 CFLAGS += -I${incdir}
 
-LDFLAGS += -L${ccslibdir} -L${cmanlibdir} -lccs -lcman
+LDFLAGS += -L${ccslibdir} -lccs
+LDFLAGS += -L../lib -lfence
 
 ${TARGET}: ${OBJS}
 	$(CC) -o $@ $^ $(LDFLAGS)
 
-depends:
-	$(MAKE) -C ../fenced agent.o
-
 clean: generalclean
 
 -include $(OBJS:.o=.d)
diff --git a/fence/fence_node/fence_node.c b/fence/fence_node/fence_node.c
index 65d1e90..ad1234c 100644
--- a/fence/fence_node/fence_node.c
+++ b/fence/fence_node/fence_node.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
 **  
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -17,9 +17,10 @@
 #include <string.h>
 #include <syslog.h>
 
+#include "libfence.h"
 #include "copyright.cf"
 
-#define OPTION_STRING           ("hOuV")
+#define OPTION_STRING           ("huV")
 
 #define die(fmt, args...) \
 do \
@@ -31,9 +32,6 @@ do \
 while (0)
 
 static char *prog_name;
-static int force = 0;
-
-int dispatch_fence_agent(char *victim, int force);
 
 static void print_usage(void)
 {
@@ -44,7 +42,6 @@ static void print_usage(void)
 	printf("Options:\n");
 	printf("\n");
 	printf("  -h               Print this help, then exit\n");
-	printf("  -O               Force connection to CCS\n");
 	printf("  -V               Print program version information, then exit\n");
 	printf("\n");
 }
@@ -66,10 +63,6 @@ int main(int argc, char *argv[])
 			exit(EXIT_SUCCESS);
 			break;
 
-		case 'O':
-			force = 1;
-			break;
-
 		case 'V':
 			printf("%s %s (built %s %s)\n", prog_name,
 				RELEASE_VERSION, __DATE__, __TIME__);
@@ -105,7 +98,7 @@ int main(int argc, char *argv[])
 
 	openlog("fence_node", LOG_PID, LOG_USER);
 
-	error = dispatch_fence_agent(victim, force);
+	error = fence_node(victim);
 
 	if (error) {
 		syslog(LOG_ERR, "Fence of \"%s\" was unsuccessful\n", victim);
diff --git a/fence/fenced/Makefile b/fence/fenced/Makefile
index e9e0f54..555b430 100644
--- a/fence/fenced/Makefile
+++ b/fence/fenced/Makefile
@@ -23,18 +23,20 @@ include $(OBJDIR)/make/clean.mk
 include $(OBJDIR)/make/install.mk
 include $(OBJDIR)/make/uninstall.mk
 
-OBJS=	main.o \
-	recover.o \
+OBJS=	config.o \
+	cpg.o \
 	group.o \
+	main.o \
 	member_cman.o \
-	agent.o 
+	recover.o
 
 CFLAGS += -D_FILE_OFFSET_BITS=64
-CFLAGS += -I${ccsincdir} -I${cmanincdir}
+CFLAGS += -I${ccsincdir} -I${cmanincdir} -I${openaisincdir}
 CFLAGS += -I$(S) -I$(S)/../include -I$(SRCDIR)/group/lib
 CFLAGS += -I${incdir}
 
-LDFLAGS += -L${ccslibdir} -L${cmanlibdir} -lccs -lcman
+LDFLAGS += -L${ccslibdir} -L${cmanlibdir} -L${openaislibdir} -lccs -lcman -lcpg
+LDFLAGS += -L../lib -lfence
 LDFLAGS += -L../../group/lib -l group
 
 
diff --git a/fence/fenced/config.c b/fence/fenced/config.c
new file mode 100644
index 0000000..85f0252
--- /dev/null
+++ b/fence/fenced/config.c
@@ -0,0 +1,140 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "fd.h"
+#include "ccs.h"
+
+static int open_ccs(void)
+{
+	int i = 0, cd;
+
+	while ((cd = ccs_connect()) < 0) {
+		sleep(1);
+		if (++i > 9 && !(i % 10))
+			log_error("connect to ccs error %d, "
+				  "check ccsd or cluster status", cd);
+	}
+	return cd;
+}
+
+static void read_ccs_int(int cd, char *path, int *config_val)
+{
+	char *str;
+	int val;
+	int error;
+
+	error = ccs_get(cd, path, &str);
+	if (error || !str)
+		return;
+
+	val = atoi(str);
+
+	if (val < 0) {
+		log_error("ignore invalid value %d for %s", val, path);
+		return;
+	}
+
+	*config_val = val;
+	log_debug("%s is %u", path, val);
+	free(str);
+}
+
+#define OUR_NAME_PATH "/cluster/clusternodes/clusternode[ name=\"%s\"]/@name"
+#define GROUPD_COMPAT_PATH "/cluster/fence_daemon/@groupd_compat"
+#define CLEAN_START_PATH "/cluster/fence_daemon/@clean_start"
+#define POST_JOIN_DELAY_PATH "/cluster/fence_daemon/@post_join_delay"
+#define POST_FAIL_DELAY_PATH "/cluster/fence_daemon/@post_fail_delay"
+#define OVERRIDE_PATH_PATH "/cluster/fence_daemon/@override_path"
+#define OVERRIDE_TIME_PATH "/cluster/fence_daemon/@override_time"
+
+int read_ccs(struct fd *fd)
+{
+	char path[256];
+	char *str;
+	int error, cd, i = 0, count = 0;
+
+	cd = open_ccs();
+	if (cd < 0)
+		return cd;
+
+	/* Our own nodename must be in cluster.conf before we're allowed to
+	   join the fence domain and then mount gfs; other nodes need this to
+	   fence us. */
+
+	str = NULL;
+	memset(path, 0, 256);
+	snprintf(path, 256, OUR_NAME_PATH, our_name);
+
+	error = ccs_get(cd, path, &str);
+	if (error || !str) {
+		log_error("local cman node name \"%s\" not found in the "
+			  "configuration", our_name);
+		return error;
+	}
+	if (str)
+		free(str);
+
+	/* The comline config options are initially set to the defaults,
+	   then options are read from the command line to override the
+	   defaults, for options not set on command line, we look for
+	   values set in cluster.conf. */
+
+	if (!comline.groupd_compat_opt)
+		read_ccs_int(cd, GROUPD_COMPAT_PATH, &comline.groupd_compat);
+	if (!comline.clean_start_opt)
+		read_ccs_int(cd, CLEAN_START_PATH, &comline.clean_start);
+	if (!comline.post_join_delay_opt)
+		read_ccs_int(cd, POST_JOIN_DELAY_PATH, &comline.post_join_delay);
+	if (!comline.post_fail_delay_opt)
+		read_ccs_int(cd, POST_FAIL_DELAY_PATH, &comline.post_fail_delay);
+	if (!comline.override_time_opt)
+		read_ccs_int(cd, OVERRIDE_TIME_PATH, &comline.override_time);
+
+	if (!comline.override_path_opt) {
+		str = NULL;
+		memset(path, 0, 256);
+		sprintf(path, OVERRIDE_PATH_PATH);
+
+		error = ccs_get(cd, path, &str);
+		if (!error && str) {
+			free(comline.override_path);
+			comline.override_path = strdup(str);
+		}
+		if (str)
+			free(str);
+	}
+
+	if (comline.clean_start) {
+		log_debug("clean start, skipping initial nodes");
+		goto out;
+	}
+
+	for (i = 1; ; i++) {
+		str = NULL;
+		memset(path, 0, 256);
+		sprintf(path, "/cluster/clusternodes/clusternode[%d]/@nodeid", i);
+
+		error = ccs_get(cd, path, &str);
+		if (error || !str)
+			break;
+
+		add_complete_node(fd, atoi(str));
+		free(str);
+		count++;
+	}
+
+	log_debug("added %d nodes from ccs", count);
+ out:
+	ccs_disconnect(cd);
+	return 0;
+}
+
diff --git a/fence/fenced/cpg.c b/fence/fenced/cpg.c
new file mode 100644
index 0000000..2b6933b
--- /dev/null
+++ b/fence/fenced/cpg.c
@@ -0,0 +1,1175 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+**
+**  This copyrighted material is made available to anyone wishing to use,
+**  modify, copy, or redistribute it subject to the terms and conditions
+**  of the GNU General Public License v.2.
+**
+*******************************************************************************
+******************************************************************************/
+
+#include "fd.h"
+
+static unsigned int protocol_active[3] = {1, 0, 0};
+
+struct member {
+	struct list_head list;
+	int nodeid;
+	int start;   /* 1 if we received a start message for this change */
+	int added;   /* 1 if added by this change */
+	int failed;  /* 1 if failed in this change */
+	int disallowed;
+	uint32_t start_flags;
+};
+
+static char *msg_name(int type)
+{
+	switch (type) {
+	case FD_MSG_START:
+		return "start";
+	case FD_MSG_VICTIM_DONE:
+		return "victim_done";
+	case FD_MSG_EXTERNAL:
+		return "external";
+	default:
+		return "unknown";
+	}
+}
+
+static char *str_nums(int *nums, int n_ints)
+{
+	static char buf[128];
+	int i, len, ret, pos = 0;
+
+	len = sizeof(buf);
+	memset(buf, 0, len);
+
+	for (i = 0; i < n_ints; i++) {
+		ret = snprintf(buf + pos, len - pos, "%d ",
+			       le32_to_cpu(nums[i]));
+		if (ret >= len - pos)
+			break;
+		pos += ret;
+	}
+
+	return buf;
+}
+
+static int _send_message(cpg_handle_t h, void *buf, int len, int type)
+{
+	struct iovec iov;
+	cpg_error_t error;
+	int retries = 0;
+
+	iov.iov_base = buf;
+	iov.iov_len = len;
+
+ retry:
+	error = cpg_mcast_joined(h, CPG_TYPE_AGREED, &iov, 1);
+	if (error == CPG_ERR_TRY_AGAIN) {
+		retries++;
+		usleep(1000);
+		if (!(retries % 100))
+			log_error("cpg_mcast_joined retry %d %s",
+				   retries, msg_name(type));
+		goto retry;
+	}
+	if (error != CPG_OK) {
+		log_error("cpg_mcast_joined error %d handle %llx %s",
+			  error, (unsigned long long)h, msg_name(type));
+		return -1;
+	}
+
+	if (retries)
+		log_debug("cpg_mcast_joined retried %d %s",
+			  retries, msg_name(type));
+
+	return 0;
+}
+
+/* header fields caller needs to set: type, to_nodeid, flags, msgdata */
+
+static void fd_send_message(struct fd *fd, char *buf, int len)
+{
+	struct fd_header *hd = (struct fd_header *) buf;
+	int type = hd->type;
+
+	hd->version[0]  = cpu_to_le16(protocol_active[0]);
+	hd->version[1]  = cpu_to_le16(protocol_active[1]);
+	hd->version[2]  = cpu_to_le16(protocol_active[2]);
+	hd->type	= cpu_to_le16(hd->type);
+	hd->nodeid      = cpu_to_le32(our_nodeid);
+	hd->to_nodeid   = cpu_to_le32(hd->to_nodeid);
+	hd->flags       = cpu_to_le32(hd->flags);
+	hd->msgdata     = cpu_to_le32(hd->msgdata);
+
+	_send_message(fd->cpg_handle, buf, len, type);
+}
+
+static struct member *find_memb(struct change *cg, int nodeid)
+{
+	struct member *memb;
+
+	list_for_each_entry(memb, &cg->members, list) {
+		if (memb->nodeid == nodeid)
+			return memb;
+	}
+	return NULL;
+}
+
+static struct fd *find_fd_handle(cpg_handle_t h)
+{
+	struct fd *fd;
+
+	list_for_each_entry(fd, &domains, list) {
+		if (fd->cpg_handle == h)
+			return fd;
+	}
+	return NULL;
+}
+
+static struct fd *find_fd_ci(int ci)
+{
+	struct fd *fd;
+
+	list_for_each_entry(fd, &domains, list) {
+		if (fd->cpg_client == ci)
+			return fd;
+	}
+	return NULL;
+}
+
+void free_cg(struct change *cg)
+{
+	struct member *memb, *safe;
+
+	list_for_each_entry_safe(memb, safe, &cg->members, list) {
+		list_del(&memb->list);
+		free(memb);
+	}
+	list_for_each_entry_safe(memb, safe, &cg->removed, list) {
+		list_del(&memb->list);
+		free(memb);
+	}
+	free(cg);
+}
+
+static struct node_history *get_node_history(struct fd *fd, int nodeid)
+{
+	struct node_history *node;
+
+	list_for_each_entry(node, &fd->node_history, list) {
+		if (node->nodeid == nodeid)
+			return node;
+	}
+	return NULL;
+}
+
+static void node_history_init(struct fd *fd, int nodeid)
+{
+	struct node_history *node;
+
+	node = get_node_history(fd, nodeid);
+	if (node)
+		return;
+
+	node = malloc(sizeof(struct node_history));
+	if (!node)
+		return;
+	memset(node, 0, sizeof(struct node_history));
+
+	node->nodeid = nodeid;
+	list_add_tail(&node->list, &fd->node_history);
+}
+
+static void node_history_start(struct fd *fd, int nodeid)
+{
+	struct node_history *node;
+	
+	node = get_node_history(fd, nodeid);
+	if (!node) {
+		log_error("node_history_start no nodeid %d", nodeid);
+		return;
+	}
+
+	node->add_time = time(NULL);
+}
+
+static void node_history_left(struct fd *fd, int nodeid)
+{
+	struct node_history *node;
+
+	node = get_node_history(fd, nodeid);
+	if (!node) {
+		log_error("node_history_left no nodeid %d", nodeid);
+		return;
+	}
+
+	node->left_time = time(NULL);
+}
+
+static void node_history_fail(struct fd *fd, int nodeid)
+{
+	struct node_history *node;
+
+	node = get_node_history(fd, nodeid);
+	if (!node) {
+		log_error("node_history_fail no nodeid %d", nodeid);
+		return;
+	}
+
+	node->fail_time = time(NULL);
+
+	node->check_quorum = 1;
+}
+
+/* The master node updates this info when it fences the victim, the other
+   domain members update it when they receive the status message from the
+   master. */
+
+void node_history_fence(struct fd *fd, int nodeid, int master, int how)
+{
+	struct node_history *node;
+
+	node = get_node_history(fd, nodeid);
+	if (!node) {
+		log_error("node_history_fence no nodeid %d", nodeid);
+		return;
+	}
+
+	node->fence_time = time(NULL);
+	node->fence_master = master;
+	node->fence_how = how;
+}
+
+/* When the fence_node command is run on a machine, it will first call
+   libfence:fence_node(victim) to do the fencing.  Afterward, it should call
+   libfenced:fence_external(victim) to tell fenced what it's done, so fenced
+   can avoid fencing the node a second time.  This will result in a message
+   being sent to all domain members which will update their node_history entry
+   for the victim.  The recover.c:fence_victims() code can check whether
+   a victim has been externally fenced since the last add_time, and if so
+   skip the fencing.  This won't always work perfectly; a node might in some
+   circumstances be fenced a second time by fenced. */
+
+static void node_history_fence_external(struct fd *fd, int nodeid, int from)
+{
+	struct node_history *node;
+
+	node = get_node_history(fd, nodeid);
+	if (!node) {
+		log_error("node_history_fence_external no nodeid %d", nodeid);
+		return;
+	}
+
+	node->fence_external_time = time(NULL);
+	node->fence_external_node = from;
+}
+
+/* call this from libfenced:fenced_external() */
+
+void send_external(struct fd *fd, int victim)
+{
+	struct fd_header *hd;
+	char *buf;
+	int len;
+
+	len = sizeof(struct fd_header);
+
+	buf = malloc(len);
+	if (!buf) {
+		return;
+	}
+	memset(buf, 0, len);
+
+	hd = (struct fd_header *)buf;
+	hd->type = FD_MSG_EXTERNAL;
+	hd->msgdata = victim;
+
+	log_debug("send_external %u", victim);
+
+	fd_send_message(fd, buf, len);
+
+	free(buf);
+}
+
+/* now, if the victim dies and the fence domain sees it fail,
+   it will be added as an fd victim, but fence_victims() will
+   call is_fenced_external() which will see that it's already
+   fenced and bypass fencing it again */
+
+static void receive_external(struct fd *fd, struct fd_header *hd, int len)
+{
+	log_debug("receive_external from %d len %d victim %d",
+		  hd->nodeid, len, hd->msgdata);
+
+	node_history_fence_external(fd, hd->msgdata, hd->nodeid);
+}
+
+int is_fenced_external(struct fd *fd, int nodeid)
+{
+	struct node_history *node;
+
+	node = get_node_history(fd, nodeid);
+	if (!node) {
+		log_error("is_fenced_external no nodeid %d", nodeid);
+		return 0;
+	}
+
+	if (node->fence_external_time > node->add_time)
+		return 1;
+	return 0;
+}
+
+/* completed victim must be removed from victims list before calling this
+   because we count the number of entries on the victims list for remaining */
+
+void send_victim_done(struct fd *fd, int victim, int how)
+{
+	struct change *cg = list_first_entry(&fd->changes, struct change, list);
+	struct fd_header *hd;
+	int n_ints, len, *p;
+	int remaining = list_count(&fd->victims);
+	char *buf;
+
+	n_ints = 3;
+	len = sizeof(struct fd_header) + (n_ints * sizeof(int));
+
+	buf = malloc(len);
+	if (!buf) {
+		return;
+	}
+	memset(buf, 0, len);
+
+	hd = (struct fd_header *)buf;
+	hd->type = FD_MSG_VICTIM_DONE;
+	hd->msgdata = cg->seq;
+
+	if (fd->init_complete)
+		hd->flags |= FD_MFLG_COMPLETE;
+
+	p = (int *)(buf + sizeof(struct fd_header));
+
+	p[0] = cpu_to_le32(victim);
+	p[1] = cpu_to_le32(how);
+	p[2] = cpu_to_le32(remaining);
+
+	log_debug("send_victim_done %u flags %x victim %d how %d remaining %d",
+		  cg->seq, hd->flags, victim, how, remaining);
+
+	fd_send_message(fd, buf, len);
+
+	free(buf);
+}
+
+static void receive_victim_done(struct fd *fd, struct fd_header *hd, int len)
+{
+	struct node *node, *safe;
+	uint32_t seq = hd->msgdata;
+	int victim, how, remaining, found;
+	int *nums;
+
+	log_debug("receive_victim_done %d:%u flags %x len %d", hd->nodeid, seq,
+		  hd->flags, len);
+
+	/* check that hd->nodeids is fd->master ? */
+
+	nums = (int *)((char *)hd + sizeof(struct fd_header));
+
+	victim = le32_to_cpu(nums[0]);
+	how = le32_to_cpu(nums[1]);
+	remaining = le32_to_cpu(nums[2]);
+
+	/* I don't think there's any problem with the master removing the
+	   victim when it's done instead of waiting to remove it when it
+	   receives its own victim_done message, like the other nodes do */
+
+	if (hd->nodeid == our_nodeid)
+		goto out;
+
+	/* if a domain membership change involves no victims, the master sends
+	   a victim_done message with victim nodeid -1 and remaining 0; new nodes
+	   are interested in seeing the remaining 0 so they can clear their
+	   init_victims */
+
+	if (victim == -1)
+		goto out;
+
+	found = 0;
+
+	list_for_each_entry(node, &fd->victims, list) {
+		if (node->nodeid == victim) {
+			log_debug("receive_victim_done remove %d how %d rem %d",
+				  victim, how, remaining);
+			node_history_fence(fd, victim, hd->nodeid, how);
+			list_del(&node->list);
+			free(node);
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found)
+		log_error("receive_victim_done victim %d not found from %d",
+			  victim, hd->nodeid);
+
+ out:
+	if (!fd->init_complete && !remaining) {
+		log_debug("receive_victim_done init_complete");
+		fd->init_complete = 1;
+
+		/* we may have victims from init which we can clear now */
+		list_for_each_entry_safe(node, safe, &fd->victims, list) {
+			log_debug("receive_victim_done clear victim %d init %d",
+				  node->nodeid, node->init_victim);
+			list_del(&node->list);
+			free(node);
+		}
+	}
+}
+
+static int check_quorum_done(struct fd *fd)
+{
+	struct node_history *node;
+	int wait_count = 0;
+
+	if (!cman_quorate) {
+		log_debug("check_quorum %d", cman_quorate);
+		return 0;
+	}
+
+	list_for_each_entry(node, &fd->node_history, list) {
+		if (!node->check_quorum)
+			continue;
+
+		if (!is_cman_member(node->nodeid)) {
+			node->check_quorum = 0;
+		} else {
+			log_debug("check_quorum %d is_cman_member",
+				  node->nodeid);
+			wait_count++;
+		}
+	}
+
+	if (wait_count)
+		return 0;
+
+	log_debug("check_quorum done");
+	return 1;
+}
+
+static int wait_conditions_done(struct fd *fd)
+{
+	if (!check_quorum_done(fd))
+		return 0;
+	return 1;
+}
+
+static int wait_messages_done(struct fd *fd)
+{
+	struct change *cg = list_first_entry(&fd->changes, struct change, list);
+	struct member *memb;
+	int need = 0, total = 0;
+
+	list_for_each_entry(memb, &cg->members, list) {
+		if (!memb->start)
+			need++;
+		total++;
+	}
+
+	if (need) {
+		log_debug("wait_messages_done need %d of %d", need, total);
+		return 0;
+	}
+
+	log_debug("wait_messages_done got all %d", total);
+	return 1;
+}
+
+static void cleanup_changes(struct fd *fd)
+{
+	struct change *cg = list_first_entry(&fd->changes, struct change, list);
+	struct change *safe;
+
+	list_del(&cg->list);
+	if (fd->started_change)
+		free_cg(fd->started_change);
+	fd->started_change = cg;
+
+	list_for_each_entry_safe(cg, safe, &fd->changes, list) {
+		list_del(&cg->list);
+		free_cg(cg);
+	}
+}
+
+static void set_master(struct fd *fd)
+{
+	struct change *cg = list_first_entry(&fd->changes, struct change, list);
+	struct member *memb;
+	int low = 0, complete = 0;
+
+	list_for_each_entry(memb, &cg->members, list) {
+		if (!low || memb->nodeid < low)
+			low = memb->nodeid;
+
+		if (!(memb->start_flags & FD_MFLG_COMPLETE))
+			continue;
+
+		if (!complete || memb->nodeid < complete)
+			complete = memb->nodeid;
+	}
+
+	log_debug("set_master from %d to %s node %d", fd->master,
+		  complete ? "complete" : "low",
+		  complete ? complete : low);
+
+	fd->master = complete ? complete : low;
+}
+
+/* do the change details in the message match the details of the given change */
+
+static int match_change(struct fd *fd, struct change *cg,
+			struct fd_header *hd, int len)
+{
+	struct member *memb;
+	int member_count, joined_count, remove_count, failed_count;
+	int i, n_ints, *nums, nodeid, members_mismatch;
+	uint32_t seq = hd->msgdata;
+
+	nums = (int *)((char *)hd + sizeof(struct fd_header));
+
+	member_count = le32_to_cpu(nums[0]);
+	joined_count = le32_to_cpu(nums[1]);
+	remove_count = le32_to_cpu(nums[2]);
+	failed_count = le32_to_cpu(nums[3]);
+
+	n_ints = 4 + member_count;
+	if (len != (sizeof(struct fd_header) + (n_ints * sizeof(int)))) {
+		log_debug("match_change fail %d:%u bad len %d nums %s",
+			  hd->nodeid, seq, len, str_nums(nums, n_ints));
+		return 0;
+	}
+
+	/* We can ignore messages if we're not in the list of members.  The one
+	   known time this will happen is after we've joined the cpg, we can
+	   get messages for changes prior to the change in which we're added. */
+
+	for (i = 0; i < member_count; i++) {
+		if (our_nodeid == le32_to_cpu(nums[4+i]))
+			break;
+	}
+	if (i == member_count) {
+		log_debug("match_change fail %d:%u we are not in members",
+			  hd->nodeid, seq);
+		return 0;
+	}
+
+	memb = find_memb(cg, hd->nodeid);
+	if (!memb) {
+		log_debug("match_change fail %d:%u sender not member",
+			  hd->nodeid, seq);
+		return 0;
+	}
+
+	/* verify this is the right change by matching the counts
+	   and the nodeids of the current members */
+
+	if (member_count != cg->member_count ||
+	    joined_count != cg->joined_count ||
+	    remove_count != cg->remove_count ||
+	    failed_count != cg->failed_count) {
+		log_debug("match_change fail %d:%u expect counts "
+			  "%d %d %d %d nums %s",
+			  hd->nodeid, seq,
+			  cg->member_count, cg->joined_count,
+			  cg->remove_count, cg->failed_count,
+			  str_nums(nums, n_ints));
+		return 0;
+	}
+
+	members_mismatch = 0;
+	for (i = 0; i < member_count; i++) {
+		nodeid = le32_to_cpu(nums[4+i]);
+		memb = find_memb(cg, nodeid);
+		if (memb)
+			continue;
+		log_debug("match_change fail %d:%u no memb %d",
+			  hd->nodeid, seq, nodeid);
+		members_mismatch = 1;
+	}
+	if (members_mismatch)
+		return 0;
+
+	return 1;
+}
+
+/* Unfortunately, there's no really simple way to match a message with the
+   specific change that it was sent for.  We hope that by passing all the
+   details of the change in the message, we will be able to uniquely match the
+   it to the correct change. */
+
+/* A start message will usually be for the first (current) change on our list.
+   In some cases it will be for a non-current change, and we can ignore it:
+
+   1. A,B,C get confchg1 adding C
+   2. C sends start for confchg1
+   3. A,B,C get confchg2 adding D
+   4. A,B,C,D recv start from C for confchg1 - ignored
+   5. C,D send start for confchg2
+   6. A,B send start for confchg2
+   7. A,B,C,D recv all start messages for confchg2, and start kernel
+ 
+   In step 4, how do the nodes know whether the start message from C is
+   for confchg1 or confchg2?  Hopefully by comparing the counts and members. */
+
+static struct change *find_change(struct fd *fd, struct fd_header *hd, int len)
+{
+	struct change *cg;
+
+	list_for_each_entry_reverse(cg, &fd->changes, list) {
+		if (!match_change(fd, cg, hd, len))
+			continue;
+		return cg;
+	}
+
+	log_debug("find_change %d:%u no match", hd->nodeid, hd->msgdata);
+	return NULL;
+}
+
+/* We require new members (memb->added) to be joining the domain
+   (memb->joining).  New members that are not joining the domain can happen
+   when the cpg partitions and is then merged back together (shouldn't happen
+   in general, but is possible).  We label these new members that are not
+   joining as "disallowed", and ignore their start message. */
+
+/* Handle spurious joins by ignoring this start message if the node says it's
+   not joining (i.e. it's already a member), but we see it being added (i.e.
+   it's not already a member) */
+
+static void receive_start(struct fd *fd, struct fd_header *hd, int len)
+{
+	struct change *cg;
+	struct member *memb;
+	int joining = 0;
+	uint32_t seq = hd->msgdata;
+
+	log_debug("receive_start %d:%u flags %x len %d", hd->nodeid, seq,
+		  hd->flags, len);
+
+	cg = find_change(fd, hd, len);
+	if (!cg)
+		return;
+
+	memb = find_memb(cg, hd->nodeid);
+	if (!memb) {
+		/* this should never happen since match_change checks it */
+		log_error("receive_start no member %d", hd->nodeid);
+		return;
+	}
+
+	memb->start_flags = hd->flags;
+
+	if (memb->start_flags & FD_MFLG_JOINING)
+		joining = 1;
+
+	if ((memb->added && !joining) || (!memb->added && joining)) {
+		log_error("receive_start %d:%u disallowed added %d joining %d",
+			  hd->nodeid, seq, memb->added, joining);
+		memb->disallowed = 1;
+	} else {
+		node_history_start(fd, hd->nodeid);
+		memb->start = 1;
+	}
+}
+
+static void send_start(struct fd *fd)
+{
+	struct change *cg = list_first_entry(&fd->changes, struct change, list);
+	struct fd_header *hd;
+	struct member *memb;
+	int n_ints, len, *p, i;
+	char *buf;
+
+	n_ints = 4 + cg->member_count;
+	len = sizeof(struct fd_header) + (n_ints * sizeof(int));
+
+	buf = malloc(len);
+	if (!buf) {
+		return;
+	}
+	memset(buf, 0, len);
+
+	hd = (struct fd_header *)buf;
+	hd->type = FD_MSG_START;
+	hd->msgdata = cg->seq;
+
+	if (cg->we_joined)
+		hd->flags |= FD_MFLG_JOINING;
+	if (fd->init_complete)
+		hd->flags |= FD_MFLG_COMPLETE;
+
+	p = (int *)(buf + sizeof(struct fd_header));
+
+	/* sending all this stuff is probably unnecessary, but gives
+	   us more certainty in matching stopped messages to the correct
+	   change that they are for */
+
+	p[0] = cpu_to_le32(cg->member_count);
+	p[1] = cpu_to_le32(cg->joined_count);
+	p[2] = cpu_to_le32(cg->remove_count);
+	p[3] = cpu_to_le32(cg->failed_count);
+
+	i = 4;
+	list_for_each_entry(memb, &cg->members, list)
+		p[i++] = cpu_to_le32(memb->nodeid);
+
+	log_debug("send_start %u flags %x counts %d %d %d %d", cg->seq,
+		  hd->flags, cg->member_count, cg->joined_count,
+		  cg->remove_count, cg->failed_count);
+
+	fd_send_message(fd, buf, len);
+
+	free(buf);
+}
+
+static int nodes_added(struct fd *fd)
+{
+	struct change *cg;
+
+	list_for_each_entry(cg, &fd->changes, list) {
+		if (cg->joined_count)
+			return 1;
+	}
+	return 0;
+}
+
+/* If we're being added by the current change, we'll have an empty victims
+   list, while other previous members may already have nodes in their
+   victims list.  So, we need to assume that any node in cluster.conf that's
+   not a cluster member when we're added to the fd is already a victim.
+   We can go back on that assumption, and clear out any presumed victims, when
+   we see a message from a previous member saying that are no current victims. */
+
+static void add_victims(struct fd *fd, struct change *cg)
+{
+	struct member *memb;
+	struct node *node;
+
+	list_for_each_entry(memb, &cg->removed, list) {
+		if (!memb->failed)
+			continue;
+		node = get_new_node(fd, memb->nodeid);
+		if (!node)
+			return;
+		list_add(&node->list, &fd->victims);
+		log_debug("add node %d to victims", node->nodeid);
+	}
+}
+
+/* with start messages from all members, we can pick which one should be master
+   and do the fencing (low nodeid with state, "COMPLETE").  as the master
+   successfully fences each victim, it sends a status message such that all
+   members remove the node from their victims list.  the status message also
+   indicates the number of remaining victims.
+
+   when a node sees via status message that there are no more outstanding
+   victims, it sets fd->init_complete.  if a node is going from !complete to
+   complete, it may still have entries on its victims list at this point from
+   startup init; it can clear them out.  this node will volunteer to be master
+   in the next round of start messages by setting COMPLETE flag.
+
+   once the master begins fencing victims, it won't process any new changes
+   until it's done.  the non-master members will process changes while the
+   master is fencing, but will wait for the master to catch up in
+   WAIT_MESSAGES.  if the master fails, the others will no longer wait for it. */
+
+static void apply_changes(struct fd *fd)
+{
+	struct change *cg;
+
+	if (list_empty(&fd->changes))
+		return;
+	cg = list_first_entry(&fd->changes, struct change, list);
+
+	switch (cg->state) {
+
+	case CGST_WAIT_CONDITIONS:
+		if (wait_conditions_done(fd)) {
+			send_start(fd);
+			cg->state = CGST_WAIT_MESSAGES;
+		}
+		break;
+
+	case CGST_WAIT_MESSAGES:
+		if (wait_messages_done(fd)) {
+			set_master(fd);
+			if (fd->master == our_nodeid) {
+				if (!list_empty(&fd->victims)) {
+					delay_fencing(fd, nodes_added(fd));
+					fence_victims(fd);
+				} else {
+					send_victim_done(fd, -1, 0);
+				}
+			} else {
+				defer_fencing(fd);
+			}
+
+			cleanup_changes(fd);
+			fd->joining_group = 0;
+		}
+		break;
+
+	default:
+		log_error("apply_changes invalid state %d", cg->state);
+	}
+}
+
+void process_fd_changes(void)
+{
+	struct fd *fd, *safe;
+
+	list_for_each_entry_safe(fd, safe, &domains, list) {
+		if (!list_empty(&fd->changes))
+			apply_changes(fd);
+	}
+}
+
+static int add_change(struct fd *fd,
+		      struct cpg_address *member_list, int member_list_entries,
+		      struct cpg_address *left_list, int left_list_entries,
+		      struct cpg_address *joined_list, int joined_list_entries,
+		      struct change **cg_out)
+{
+	struct change *cg;
+	struct member *memb;
+	int i, error;
+
+	cg = malloc(sizeof(struct change));
+	if (!cg)
+		goto fail_nomem;
+	memset(cg, 0, sizeof(struct change));
+	INIT_LIST_HEAD(&cg->members);
+	INIT_LIST_HEAD(&cg->removed);
+	cg->seq = ++fd->change_seq;
+	cg->state = CGST_WAIT_CONDITIONS;
+
+	cg->member_count = member_list_entries;
+	cg->joined_count = joined_list_entries;
+	cg->remove_count = left_list_entries;
+
+	for (i = 0; i < member_list_entries; i++) {
+		memb = malloc(sizeof(struct member));
+		if (!memb)
+			goto fail_nomem;
+		memset(memb, 0, sizeof(struct member));
+		memb->nodeid = member_list[i].nodeid;
+		list_add_tail(&memb->list, &cg->members);
+	}
+
+	for (i = 0; i < left_list_entries; i++) {
+		memb = malloc(sizeof(struct member));
+		if (!memb)
+			goto fail_nomem;
+		memset(memb, 0, sizeof(struct member));
+		memb->nodeid = left_list[i].nodeid;
+		if (left_list[i].reason == CPG_REASON_NODEDOWN ||
+		    left_list[i].reason == CPG_REASON_PROCDOWN) {
+			memb->failed = 1;
+			cg->failed_count++;
+		}
+		list_add_tail(&memb->list, &cg->removed);
+
+		if (memb->failed)
+			node_history_fail(fd, memb->nodeid);
+		else
+			node_history_left(fd, memb->nodeid);
+
+		log_debug("add_change %u nodeid %d remove reason %d",
+			  cg->seq, memb->nodeid, left_list[i].reason);
+	}
+
+	for (i = 0; i < joined_list_entries; i++) {
+		memb = find_memb(cg, joined_list[i].nodeid);
+		if (!memb) {
+			log_error("no member %d", joined_list[i].nodeid);
+			error = -ENOENT;
+			goto fail;
+		}
+		memb->added = 1;
+
+		if (memb->nodeid == our_nodeid)
+			cg->we_joined = 1;
+		else
+			node_history_init(fd, memb->nodeid);
+
+		log_debug("add_change %u nodeid %d joined", cg->seq,
+			  memb->nodeid);
+	}
+
+	if (cg->we_joined)
+		list_for_each_entry(memb, &cg->members, list)
+			node_history_init(fd, memb->nodeid);
+
+	log_debug("add_change %u member %d joined %d remove %d failed %d",
+		  cg->seq, cg->member_count, cg->joined_count, cg->remove_count,
+		  cg->failed_count);
+
+	list_add(&cg->list, &fd->changes);
+	*cg_out = cg;
+	return 0;
+
+ fail_nomem:
+	log_error("no memory");
+	error = -ENOMEM;
+ fail:
+	free_cg(cg);
+	return error;
+}
+
+static int is_victim(struct fd *fd, int nodeid)
+{
+	struct node *node;
+
+	list_for_each_entry(node, &fd->victims, list) {
+		if (node->nodeid == nodeid)
+			return 1;
+	}
+	return 0;
+}
+
+static void add_victims_init(struct fd *fd, struct change *cg)
+{
+	struct node *node, *safe;
+
+	/* add a victim for each node in complete list that is not
+	   a cman member (and not already a victim) */
+
+	list_for_each_entry_safe(node, safe, &fd->complete, list) {
+		list_del(&node->list);
+
+		if (!is_cman_member(node->nodeid) &&
+		    !find_memb(cg, node->nodeid) &&
+		    !is_victim(fd, node->nodeid)) {
+			node->init_victim = 1;
+			list_add(&node->list, &fd->victims);
+			log_debug("add_victims_init %d", node->nodeid);
+		} else {
+			free(node);
+		}
+	}
+}
+
+static int we_left(struct cpg_address *left_list, int left_list_entries)
+{
+	int i;
+
+	for (i = 0; i < left_list_entries; i++) {
+		if (left_list[i].nodeid == our_nodeid)
+			return 1;
+	}
+	return 0;
+}
+
+static void confchg_cb(cpg_handle_t handle, struct cpg_name *group_name,
+		       struct cpg_address *member_list, int member_list_entries,
+		       struct cpg_address *left_list, int left_list_entries,
+		       struct cpg_address *joined_list, int joined_list_entries)
+{
+	struct fd *fd;
+	struct change *cg;
+	int rv;
+
+	fd = find_fd_handle(handle);
+	if (!fd) {
+		log_error("confchg_cb no fence domain for cpg %s",
+			  group_name->value);
+		return;
+	}
+
+	if (fd->leaving_group && we_left(left_list, left_list_entries)) {
+		/* we called cpg_leave(), and this should be the final
+		   cpg callback we receive */
+		log_debug("confchg for our leave");
+		cpg_finalize(fd->cpg_handle);
+		client_dead(fd->cpg_client);
+		list_del(&fd->list);
+		free_fd(fd);
+		return;
+	}
+
+	rv = add_change(fd, member_list, member_list_entries,
+			left_list, left_list_entries,
+			joined_list, joined_list_entries, &cg);
+	if (rv)
+		return;
+
+	/* failed nodes in this change become victims */
+
+	add_victims(fd, cg);
+
+	/* We need to assume non-member nodes are already victims;
+	   these initial victims are cleared when we get a status
+	   with zero remaining victims from the master.  But, if
+	   we're the master, we do end up fencing these init nodes. */
+
+	if (cg->we_joined)
+		add_victims_init(fd, cg);
+}
+
+static void deliver_cb(cpg_handle_t handle, struct cpg_name *group_name,
+		       uint32_t nodeid, uint32_t pid, void *data, int len)
+{
+	struct fd *fd;
+	struct fd_header *hd;
+
+	fd = find_fd_handle(handle);
+	if (!fd) {
+		log_error("deliver_cb no fd for cpg %s", group_name->value);
+		return;
+	}
+
+	hd = (struct fd_header *)data;
+
+	hd->version[0]  = le16_to_cpu(hd->version[0]);
+	hd->version[1]  = le16_to_cpu(hd->version[1]);
+	hd->version[2]  = le16_to_cpu(hd->version[2]);
+	hd->type        = le16_to_cpu(hd->type);
+	hd->nodeid      = le32_to_cpu(hd->nodeid);
+	hd->to_nodeid   = le32_to_cpu(hd->to_nodeid);
+	hd->global_id   = le32_to_cpu(hd->global_id);
+	hd->flags       = le32_to_cpu(hd->flags);
+	hd->msgdata     = le32_to_cpu(hd->msgdata);
+
+	if (hd->version[0] != protocol_active[0]) {
+		log_error("reject message from %d version %u.%u.%u vs %u.%u.%u",
+			  nodeid, hd->version[0], hd->version[1],
+			  hd->version[2], protocol_active[0],
+			  protocol_active[1], protocol_active[2]);
+		return;
+	}
+
+	if (hd->nodeid != nodeid) {
+		log_error("bad msg nodeid %d %d", hd->nodeid, nodeid);
+		return;
+	}
+
+	switch (hd->type) {
+	case FD_MSG_START:
+		receive_start(fd, hd, len);
+		break;
+	case FD_MSG_VICTIM_DONE:
+		receive_victim_done(fd, hd, len);
+		break;
+	case FD_MSG_EXTERNAL:
+		receive_external(fd, hd, len);
+		break;
+	default:
+		log_error("unknown msg type %d", hd->type);
+	}
+}
+
+static cpg_callbacks_t cpg_callbacks = {
+	.cpg_deliver_fn = deliver_cb,
+	.cpg_confchg_fn = confchg_cb,
+};
+
+static void process_fd_cpg(int ci)
+{
+	struct fd *fd;
+	cpg_error_t error;
+
+	fd = find_fd_ci(ci);
+	if (!fd) {
+		log_error("process_fd_cpg no fence domain for ci %d", ci);
+		return;
+	}
+
+	error = cpg_dispatch(fd->cpg_handle, CPG_DISPATCH_ALL);
+	if (error != CPG_OK) {
+		log_error("cpg_dispatch error %d", error);
+		return;
+	}
+
+	apply_changes(fd);
+}
+
+int fd_join(struct fd *fd)
+{
+	cpg_error_t error;
+	cpg_handle_t h;
+	struct cpg_name name;
+	int i = 0, f, ci;
+
+	error = cpg_initialize(&h, &cpg_callbacks);
+	if (error != CPG_OK) {
+		log_error("cpg_initialize error %d", error);
+		goto fail_free;
+	}
+
+	cpg_fd_get(h, &f);
+
+	ci = client_add(f, process_fd_cpg, NULL);
+
+	list_add(&fd->list, &domains);
+	fd->cpg_handle = h;
+	fd->cpg_client = ci;
+	fd->cpg_fd = f;
+	fd->joining_group = 1;
+
+	memset(&name, 0, sizeof(name));
+	sprintf(name.value, "fenced:%s", fd->name);
+	name.length = strlen(name.value) + 1;
+
+ retry:
+	error = cpg_join(h, &name);
+	if (error == CPG_ERR_TRY_AGAIN) {
+		sleep(1);
+		if (!(++i % 10))
+			log_error("cpg_join error retrying");
+		goto retry;
+	}
+	if (error != CPG_OK) {
+		log_error("cpg_join error %d", error);
+		cpg_finalize(h);
+		goto fail;
+	}
+
+	return 0;
+
+ fail:
+	list_del(&fd->list);
+	client_dead(ci);
+	cpg_finalize(h);
+ fail_free:
+	free(fd);
+	return error;
+}
+
+int fd_leave(struct fd *fd)
+{
+	cpg_error_t error;
+	struct cpg_name name;
+	int i = 0;
+
+	fd->leaving_group = 1;
+
+	memset(&name, 0, sizeof(name));
+	sprintf(name.value, "fenced:%s", fd->name);
+	name.length = strlen(name.value) + 1;
+
+ retry:
+	error = cpg_leave(fd->cpg_handle, &name);
+	if (error == CPG_ERR_TRY_AGAIN) {
+		sleep(1);
+		if (!(++i % 10))
+			log_error("cpg_leave error retrying");
+		goto retry;
+	}
+	if (error != CPG_OK)
+		log_error("cpg_leave error %d", error);
+
+	return 0;
+}
+
diff --git a/fence/fenced/fd.h b/fence/fenced/fd.h
index 990190e..2695de8 100644
--- a/fence/fenced/fd.h
+++ b/fence/fenced/fd.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
 **  
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -14,12 +14,6 @@
 #ifndef __FD_DOT_H__
 #define __FD_DOT_H__
 
-#ifndef TRUE
-#define TRUE 1
-#define FALSE 0
-#endif
-
-#include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>
@@ -30,82 +24,68 @@
 #include <string.h>
 #include <stdint.h>
 #include <syslog.h>
+#include <time.h>
+#include <sched.h>
 #include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
-#include <sys/time.h>
 #include <sys/socket.h>
 #include <sys/un.h>
 #include <sys/poll.h>
+#include <sys/select.h>
+#include <sys/time.h>
+
+#include <openais/saAis.h>
+#include <openais/cpg.h>
 
 #include "list.h"
-#include "libgroup.h"
+#include "linux_endian.h"
 
-#define MAX_NODENAME_LEN	255   /* should match libcman.h */
-#define MAX_GROUPNAME_LEN	32    /* should match libgroup.h */
-#define MAX_NODES		256
-#define MAXARGS                 100  /* FIXME */
-#define MAXLINE                 256
-#define MAX_CLIENTS		5
-#define DUMP_SIZE               (1024 * 1024)
+/* Max name length for a group, pointless since we only ever create the
+   "default" group.  Regardless, set arbitrary max to match dlm's
+   DLM_LOCKSPACE_LEN 64.  The libcpg limit is larger at 128; we prefix
+   the fence domain name with "fenced:" to create the cpg name. */
 
-#define DEFAULT_POST_JOIN_DELAY	6
-#define DEFAULT_POST_FAIL_DELAY	0
-#define DEFAULT_CLEAN_START	0
-#define DEFAULT_OVERRIDE_PATH	"/var/run/cluster/fenced_override"
-#define FENCED_SOCK_PATH	"fenced_socket"
+#define MAX_GROUPNAME_LEN	64
+
+/* Max name length for a node.  This should match libcman's
+   CMAN_MAX_NODENAME_LEN which is 255. */
+
+#define MAX_NODENAME_LEN	255
 
-extern char			*prog_name;
-extern int			daemon_debug_opt;
-extern char			daemon_debug_buf[256];
+/* Maximum members of the fence domain, or cluster.  Should match
+   CPG_MEMBERS_MAX in openais/cpg.h. */
+
+#define MAX_NODES		128
+
+/* Max string length printed on a line, for debugging/dump output. */
+
+#define MAXLINE			256
+
+/* Size of the circular debug buffer. */
+
+#define DUMP_SIZE		(1024 * 1024)
+
+/* group_mode */
+
+#define GROUP_LIBGROUP          2
+#define GROUP_LIBCPG            3
+
+extern int daemon_debug_opt;
+extern int daemon_quit;
+extern struct list_head domains;
+extern int cman_quorate;
+extern int our_nodeid;
+extern char our_name[MAX_NODENAME_LEN+1];
+extern char daemon_debug_buf[256];
 extern char dump_buf[DUMP_SIZE];
 extern int dump_point;
 extern int dump_wrap;
+extern int group_mode;
 
 extern void daemon_dump_save(void);
 
-/* use this one before we fork into the background */
-#define die1(fmt, args...) \
-do \
-{ \
-  fprintf(stderr, "%s: ", prog_name); \
-  fprintf(stderr, fmt "\n", ##args); \
-  exit(EXIT_FAILURE); \
-} \
-while (0)
-
-#define die(fmt, args...) \
-do \
-{ \
-  fprintf(stderr, "%s: ", prog_name); \
-  fprintf(stderr, fmt "\n", ##args); \
-  syslog(LOG_ERR, fmt, ##args); \
-  exit(EXIT_FAILURE); \
-} \
-while (0)
-
-#define ASSERT(x, todo) \
-do \
-{ \
-  if (!(x)) \
-  { \
-    {todo} \
-    die("assertion failed on line %d of file %s\n", __LINE__, __FILE__); \
-  } \
-} \
-while (0)
-
-#define FENCE_RETRY(do_this, until_this) \
-for (;;) \
-{ \
-  do { do_this; } while (0); \
-  if (until_this) \
-    break; \
-  fprintf(stderr, "fenced:  out of memory:  %s, %u\n", __FILE__, __LINE__); \
-  sleep(1); \
-}
-
 #define log_debug(fmt, args...) \
 do { \
 	snprintf(daemon_debug_buf, 255, "%ld " fmt "\n", time(NULL), ##args); \
@@ -119,82 +99,174 @@ do { \
 	syslog(LOG_ERR, fmt, ##args); \
 } while (0)
 
+/* config option defaults */
 
-struct fd;
-struct fd_node;
-struct commandline;
-
-typedef struct fd fd_t;
-typedef struct fd_node fd_node_t;
-typedef struct commandline commandline_t;
+#define DEFAULT_GROUPD_COMPAT	1
+#define DEFAULT_CLEAN_START	0
+#define DEFAULT_POST_JOIN_DELAY	6
+#define DEFAULT_POST_FAIL_DELAY	0
+#define DEFAULT_OVERRIDE_TIME   3
+#define DEFAULT_OVERRIDE_PATH	"/var/run/cluster/fenced_override"
 
 struct commandline
 {
+	int groupd_compat;
+	int clean_start;
 	int post_join_delay;
 	int post_fail_delay;
 	int override_time;
-	int pad;
 	char *override_path;
-	int8_t clean_start;
+
+	int8_t groupd_compat_opt;
+	int8_t clean_start_opt;
 	int8_t post_join_delay_opt;
 	int8_t post_fail_delay_opt;
-	int8_t clean_start_opt;
-	int8_t override_path_opt;
 	int8_t override_time_opt;
+	int8_t override_path_opt;
+};
+
+extern struct commandline comline;
+
+#define FD_MSG_START		1
+#define FD_MSG_VICTIM_DONE	2
+#define FD_MSG_EXTERNAL		3
+
+#define FD_MFLG_JOINING		1  /* accompanies start, we are joining */
+#define FD_MFLG_COMPLETE	2  /* accompanies start, we have complete info */
+
+struct fd_header {
+	uint16_t version[3];
+	uint16_t type;		/* FD_MSG_ */
+	uint32_t nodeid;	/* sender */
+	uint32_t to_nodeid;     /* recipient, 0 for all */
+	uint32_t global_id;     /* global unique id for this domain */
+	uint32_t flags;		/* FD_MFLG_ */
+	uint32_t msgdata;       /* in-header payload depends on MSG type */
+	uint32_t pad1;
+	uint64_t pad2;
+};
+
+#define CGST_WAIT_CONDITIONS	1
+#define CGST_WAIT_MESSAGES	2
+
+struct change {
+	struct list_head list;
+	struct list_head members;
+	struct list_head removed; /* nodes removed by this change */
+	int member_count;
+	int joined_count;
+	int remove_count;
+	int failed_count;
+	int state; /* CGST_ */
+	int we_joined;
+	uint32_t seq; /* just used as a reference when debugging */
 };
 
-#define FDFL_RUN        (0)
-#define FDFL_START      (1)
-#define FDFL_FINISH     (2)
+#define VIC_DONE_AGENT		1
+#define VIC_DONE_MEMBER		2
+#define VIC_DONE_OVERRIDE	3
+#define VIC_DONE_EXTERNAL	4
+
+struct node_history {
+	struct list_head list;
+	int nodeid;
+	int check_quorum;
+	uint64_t add_time;
+	uint64_t left_time;
+	uint64_t fail_time;
+	uint64_t fence_time;
+	uint64_t fence_external_time;
+	int fence_external_node;
+	int fence_master;
+	int fence_how; /* VIC_DONE_ */
+};
+
+struct node {
+	struct list_head 	list;
+	int			nodeid;
+	int			init_victim;
+	char 			name[MAX_NODENAME_LEN+1];
+};
 
 struct fd {
 	struct list_head	list;
-	int			global_id;	/* global unique fd ID */
+	char 			name[MAX_GROUPNAME_LEN+1];
+
+	/* libcpg domain membership */
+
+	cpg_handle_t		cpg_handle;
+	int			cpg_client;
+	int			cpg_fd;
+	uint32_t		change_seq;
+	struct change		*started_change;
+	struct list_head	changes;
+	struct list_head	node_history;
+	int			init_complete;
+
+	/* general domain membership */
+
+	int			master;
+	int			joining_group;
+	int			leaving_group;
+	struct list_head 	victims;
+	struct list_head	complete;
+
+	/* libgroup domain membership */
+
 	int 			last_stop;
 	int 			last_start;
 	int 			last_finish;
 	int			first_recovery;
 	int 			prev_count;
-	int			leave;
 	struct list_head 	prev;
-	struct list_head 	victims;
 	struct list_head 	leaving;
-	struct list_head	complete;
-	char 			name[MAX_GROUPNAME_LEN+1];
 };
 
-struct fd_node {
-	struct list_head 	list;
-	int			nodeid;
-	char 			name[MAX_NODENAME_LEN+1];
-};
+/* config.c */
 
+int read_ccs(struct fd *fd);
 
-/* main.c */
-fd_t *find_domain(char *name);
-
-/* recover.c */
-void add_complete_node(fd_t *fd, int nodeid, char *name);
-void do_recovery(fd_t *fd, int start_type, int member_count, int *nodeids);
-void do_recovery_done(fd_t *fd);
+/* cpg.c */
 
-/* agent.c */
-int dispatch_fence_agent(char *victim, int force);
+void free_cg(struct change *cg);
+void node_history_fence(struct fd *fd, int nodeid, int master, int how);
+void send_external(struct fd *fd, int victim);
+int is_fenced_external(struct fd *fd, int nodeid);
+void send_victim_done(struct fd *fd, int victim, int how);
+void process_fd_changes(void);
+int fd_join(struct fd *fd);
+int fd_leave(struct fd *fd);
 
 /* group.c */
+
+void process_groupd(int ci);
 int setup_groupd(void);
-void exit_groupd(void);
-int process_groupd(void);
-int in_groupd_cpg(int nodeid);
-
-/* member_xxx.c */
-int setup_member(void);
-int process_member(void);
-void exit_member(void);
-int update_cluster_members(void);
-int is_member(char *name);
-int is_fenced(char *name);
-fd_node_t *get_new_node(fd_t *fd, int nodeid, char *in_name);
-void update_cman(char *victim, char *agent);
+int fd_join_group(struct fd *fd);
+int fd_leave_group(struct fd *fd);
+
+/* main.c */
+
+void client_dead(int ci);
+int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci));
+void free_fd(struct fd *fd);
+struct fd *find_fd(char *name);
+
+/* member_cman.c */
+
+void process_cman(int ci);
+int setup_cman(void);
+int is_cman_member(int nodeid);
+char *nodeid_to_name(int nodeid);
+struct node *get_new_node(struct fd *fd, int nodeid);
+
+/* recover.c */
+
+void free_node_list(struct list_head *head);
+void add_complete_node(struct fd *fd, int nodeid);
+int list_count(struct list_head *head);
+void delay_fencing(struct fd *fd, int node_join);
+void defer_fencing(struct fd *fd);
+void fence_victims(struct fd *fd);
 
 #endif				/*  __FD_DOT_H__  */
+
diff --git a/fence/fenced/group.c b/fence/fenced/group.c
index 126212e..92386fe 100644
--- a/fence/fenced/group.c
+++ b/fence/fenced/group.c
@@ -11,6 +11,7 @@
 ******************************************************************************/
 
 #include "fd.h"
+#include "libgroup.h"
 
 #define DO_STOP 1
 #define DO_START 2
@@ -23,14 +24,14 @@
 /* save all the params from callback functions here because we can't
    do the processing within the callback function itself */
 
-group_handle_t gh;
+static group_handle_t gh;
 static int cb_action;
-static char cb_name[MAX_GROUP_NAME_LEN+1];
+static char cb_name[MAX_GROUPNAME_LEN+1];
 static int cb_event_nr;
 static int cb_id;
 static int cb_type;
 static int cb_member_count;
-static int cb_members[MAX_GROUP_MEMBERS];
+static int cb_members[MAX_NODES];
 
 
 static void stop_cbfn(group_handle_t h, void *private, char *name)
@@ -84,7 +85,7 @@ group_callbacks_t callbacks = {
 	setid_cbfn
 };
 
-char *str_members(void)
+static char *str_members(void)
 {
 	static char mbuf[MAXLINE];
 	int i, len = 0;
@@ -96,9 +97,202 @@ char *str_members(void)
 	return mbuf;
 }
 
-int process_groupd(void)
+static int id_in_nodeids(int nodeid, int count, int *nodeids)
 {
-	fd_t *fd;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if (nodeid == nodeids[i])
+			return 1;
+	}
+	return 0;
+}
+
+static int next_complete_nodeid(struct fd *fd, int gt)
+{
+	struct node *node;
+	int low = -1;
+
+	/* find lowest node id in fd_complete greater than gt,
+	   if none, return -1 */
+
+	list_for_each_entry(node, &fd->complete, list) {
+		if (node->nodeid <= gt)
+			continue;
+
+		if (low == -1)
+			low = node->nodeid;
+		else if (node->nodeid < low)
+			low = node->nodeid;
+	}
+	return low;
+}
+
+static void set_master(struct fd *fd)
+{
+	struct node *node;
+	int low = -1;
+
+	/* Find the lowest nodeid common to fd->fd_prev (newest member list)
+	 * and fd->fd_complete (last complete member list). */
+
+	for (;;) {
+		low = next_complete_nodeid(fd, low);
+		if (low == -1)
+			break;
+
+		list_for_each_entry(node, &fd->prev, list) {
+			if (low != node->nodeid)
+				continue;
+			goto out;
+		}
+	}
+
+	/* Special case: we're the first and only FD member */
+
+	if (fd->prev_count == 1)
+		low = our_nodeid;
+
+	/* We end up returning -1 when we're not the only node and we've just
+	   joined.  Because we've just joined we weren't in the last complete
+	   domain group and won't be chosen as master.  We defer to someone who
+	   _was_ in the last complete group.  All we know is it isn't us. */
+
+ out:
+	fd->master = low;
+}
+
+static void new_prev_nodes(struct fd *fd, int member_count, int *nodeids)
+{
+	struct node *node;
+	int i;
+
+	for (i = 0; i < member_count; i++) {
+		node = get_new_node(fd, nodeids[i]);
+		list_add(&node->list, &fd->prev);
+	}
+
+	fd->prev_count = member_count;
+}
+
+static void _add_first_victims(struct fd *fd)
+{
+	struct node *prev_node, *safe;
+
+	/* complete list initialised in init_nodes() to all nodes from ccs */
+	if (list_empty(&fd->complete))
+		log_debug("first complete list empty warning");
+
+	list_for_each_entry_safe(prev_node, safe, &fd->complete, list) {
+		if (!is_cman_member(prev_node->nodeid)) {
+			list_del(&prev_node->list);
+			list_add(&prev_node->list, &fd->victims);
+			log_debug("add first victim %s", prev_node->name);
+			prev_node->init_victim = 1;
+		}
+	}
+}
+
+static void _add_victims(struct fd *fd, int start_type, int member_count,
+			 int *nodeids)
+{
+	struct node *node, *safe;
+
+	/* nodes which haven't completed leaving when a failure restart happens
+	 * are dead (and need fencing) or are still members */
+
+	if (start_type == GROUP_NODE_FAILED) {
+		list_for_each_entry_safe(node, safe, &fd->leaving, list) {
+			list_del(&node->list);
+			if (id_in_nodeids(node->nodeid, member_count, nodeids))
+				list_add(&node->list, &fd->complete);
+			else {
+				list_add(&node->list, &fd->victims);
+				log_debug("add victim %u, was leaving",
+					  node->nodeid);
+			}
+		}
+	}
+
+	/* nodes in last completed group but missing from fr_nodeids are added
+	 * to victims list or leaving list, depending on the type of start. */
+
+	if (list_empty(&fd->complete))
+		log_debug("complete list empty warning");
+
+	list_for_each_entry_safe(node, safe, &fd->complete, list) {
+		if (!id_in_nodeids(node->nodeid, member_count, nodeids)) {
+			list_del(&node->list);
+
+			if (start_type == GROUP_NODE_FAILED)
+				list_add(&node->list, &fd->victims);
+			else
+				list_add(&node->list, &fd->leaving);
+
+			log_debug("add node %u to list %u", node->nodeid,
+				  start_type);
+		}
+	}
+}
+
+static void add_victims(struct fd *fd, int start_type, int member_count,
+			int *nodeids)
+{
+	/* Reset things when the last stop aborted our first
+	 * start, i.e. there was no finish; we got a
+	 * start/stop/start immediately upon joining. */
+
+	if (!fd->last_finish && fd->last_stop) {
+		log_debug("revert aborted first start");
+		fd->last_stop = 0;
+		fd->first_recovery = 0;
+		free_node_list(&fd->prev);
+		free_node_list(&fd->victims);
+		free_node_list(&fd->leaving);
+	}
+
+	log_debug("add_victims stop %d start %d finish %d",
+		  fd->last_stop, fd->last_start, fd->last_finish);
+
+	if (!fd->first_recovery) {
+		fd->first_recovery = 1;
+		_add_first_victims(fd);
+	} else
+		_add_victims(fd, start_type, member_count, nodeids);
+
+	/* "prev" is just a temporary list of node structs matching the list of
+	   nodeids from the start; these nodes are moved to the "complete" list
+	   in the finish callback, and will be used to compare against the
+	   next set of started nodes */
+	   
+	free_node_list(&fd->prev);
+	new_prev_nodes(fd, member_count, nodeids);
+}
+
+static void clear_victims(struct fd *fd)
+{
+	struct node *node, *safe;
+
+	if (fd->last_finish == fd->last_start) {
+		free_node_list(&fd->leaving);
+		free_node_list(&fd->victims);
+	}
+
+	/* Save a copy of this set of nodes which constitutes the latest
+	 * complete group.  Any of these nodes missing in the next start will
+	 * either be leaving or victims.  For the next recovery, the lowest
+	 * remaining nodeid in this group will be the master. */
+
+	free_node_list(&fd->complete);
+	list_for_each_entry_safe(node, safe, &fd->prev, list) {
+		list_del(&node->list);
+		list_add(&node->list, &fd->complete);
+	}
+}
+
+void process_groupd(int ci)
+{
+	struct fd *fd;
 	int error = -EINVAL;
 
 	group_dispatch(gh);
@@ -106,45 +300,56 @@ int process_groupd(void)
 	if (!cb_action)
 		goto out;
 
-	fd = find_domain(cb_name);
+	fd = find_fd(cb_name);
 	if (!fd)
 		goto out;
 
-	/*
-	log_debug("process %s global_id %d cb_action %d last_stop %d "
-		  "last_start %d last_finish %d first %d prev_count %d",
-		  fd->name, fd->global_id, cb_action,
-		  fd->last_stop, fd->last_start, fd->last_finish,
-		  fd->first_recovery, fd->prev_count);
-	*/
-
 	switch (cb_action) {
 	case DO_STOP:
 		log_debug("stop %s", cb_name);
 		fd->last_stop = fd->last_start;
 		group_stop_done(gh, cb_name);
 		break;
+
 	case DO_START:
 		log_debug("start %s %d members %s", cb_name, cb_event_nr,
 			  str_members());
 		fd->last_start = cb_event_nr;
-		do_recovery(fd, cb_type, cb_member_count, cb_members);
+
+		/* we don't get a start callback until there's quorum */
+
+		add_victims(fd, cb_type, cb_member_count, cb_members);
+		set_master(fd);
+		if (fd->master == our_nodeid) {
+			delay_fencing(fd, cb_type == GROUP_NODE_JOIN);
+			fence_victims(fd);
+		} else {
+			defer_fencing(fd);
+		}
+
 		group_start_done(gh, cb_name, cb_event_nr);
+		fd->joining_group = 0;
 		break;
+
 	case DO_FINISH:
 		log_debug("finish %s %d", cb_name, cb_event_nr);
 		fd->last_finish = cb_event_nr;
-		do_recovery_done(fd);
+
+		/* we get terminate callback when all have started, which means
+		   that the low node has successfully fenced all victims */
+		clear_victims(fd);
+
 		break;
+
 	case DO_TERMINATE:
 		log_debug("terminate %s", cb_name);
-		ASSERT(fd->leave,);
+		if (!fd->leaving_group)
+			log_error("process_groupd terminate not leaving");
 		list_del(&fd->list);
-		free(fd);
+		free_fd(fd);
 		break;
+
 	case DO_SETID:
-		log_debug("setid %s %d", cb_name, cb_id);
-		fd->global_id = cb_id;
 		break;
 	default:
 		error = -EINVAL;
@@ -152,7 +357,7 @@ int process_groupd(void)
 
 	cb_action = 0;
  out:
-	return error;
+	return;
 }
 
 int setup_groupd(void)
@@ -170,27 +375,39 @@ int setup_groupd(void)
 	return rv;
 }
 
+/*
 void exit_groupd(void)
 {
 	group_exit(gh);
 }
+*/
 
-int in_groupd_cpg(int nodeid)
+int fd_join_group(struct fd *fd)
 {
-	group_data_t data;
-	int i, rv;
-
-	memset(&data, 0, sizeof(data));
+	int rv;
 
-	rv = group_get_group(-1, "groupd", &data);
-	if (rv)
-		return 0;
+	list_add(&fd->list, &domains);
+	fd->joining_group = 1;
 
-	for (i = 0; i < data.member_count; i++) {
-		if (data.members[i] == nodeid)
-			return 1;
+	rv = group_join(gh, fd->name);
+	if (rv) {
+		log_error("group_join error %d", rv);
+		list_del(&fd->list);
+		free(fd);
 	}
+	return rv;
+}
 
-	return 0;
+int fd_leave_group(struct fd *fd)
+{
+	int rv;
+
+	fd->leaving_group = 1;
+
+	rv = group_leave(gh, fd->name);
+	if (rv)
+		log_error("group_leave error %d", rv);
+
+	return rv;
 }
 
diff --git a/fence/fenced/main.c b/fence/fenced/main.c
index e9ebfb8..05b0923 100644
--- a/fence/fenced/main.c
+++ b/fence/fenced/main.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -12,47 +12,23 @@
 ******************************************************************************/
 
 #include "fd.h"
-#include "ccs.h"
 #include "copyright.cf"
 
-#define OPTION_STRING			("cj:f:Dn:O:T:hVS")
-#define LOCKFILE_NAME			"/var/run/fenced.pid"
+#define FENCED_SOCK_PATH	"fenced_socket"
+#define LOCKFILE_NAME		"/var/run/fenced.pid"
+#define CLIENT_NALLOC		32
+
+static int client_maxi;
+static int client_size = 0;
+static struct client *client = NULL;
+static struct pollfd *pollfd = NULL;
 
 struct client {
 	int fd;
-	char type[32];
+	void *workfn;
+	void *deadfn;
 };
 
-extern group_handle_t gh;
-extern char *our_name;
-
-static int client_size = MAX_CLIENTS;
-static struct client client[MAX_CLIENTS];
-static struct pollfd pollfd[MAX_CLIENTS];
-static int fenced_exit;
-commandline_t comline;
-struct list_head domains;
-
-static int do_write(int fd, void *buf, size_t count)
-{
-	int rv, off = 0;
-
- retry:
-	rv = write(fd, buf + off, count);
-	if (rv == -1 && errno == EINTR)
-		goto retry;
-	if (rv < 0)
-		return rv;
-
-	if (rv != count) {
-		count -= rv;
-		off += rv;
-		goto retry;
-	}
-	return 0;
-}
-
-/*
 static int do_read(int fd, void *buf, size_t count)
 {
 	int rv, off = 0;
@@ -69,236 +45,225 @@ static int do_read(int fd, void *buf, size_t count)
 	}
 	return 0;
 }
-*/
 
-static int setup_ccs(fd_t *fd)
+static int do_write(int fd, void *buf, size_t count)
 {
-	char path[256];
-	char *name = NULL, *str = NULL;
-	int error, cd, i = 0, count = 0;
-
+	int rv, off = 0;
 
-	while ((cd = ccs_connect()) < 0) {
-		sleep(1);
-		if (++i > 9 && !(i % 10))
-			log_error("connect to ccs error %d, "
-				  "check ccsd or cluster status", cd);
+ retry:
+	rv = write(fd, buf + off, count);
+	if (rv == -1 && errno == EINTR)
+		goto retry;
+	if (rv < 0) {
+		log_error("write errno %d", errno);
+		return rv;
 	}
 
+	if (rv != count) {
+		count -= rv;
+		off += rv;
+		goto retry;
+	}
+	return 0;
+}
 
-	/* Our own nodename must be in cluster.conf before we're allowed to
-	   join the fence domain and then mount gfs; other nodes need this to
-	   fence us. */
-
-	memset(path, 0, 256);
-	snprintf(path, 256,
-		 "/cluster/clusternodes/clusternode[ name=\"%s\"]/@name",
-		 our_name);
-
-	error = ccs_get(cd, path, &str);
-	if (error)
-		die1("local cman node name \"%s\" not found in the configuration",
-		     our_name);
-
-
-	/* If an option was set on the command line, don't set it from ccs. */
-
-	if (comline.clean_start_opt == FALSE) {
-		str = NULL;
-		memset(path, 0, 256);
-		sprintf(path, "/cluster/fence_daemon/@clean_start");
+static void do_dump(int fd)
+{
+	int len;
 
-		error = ccs_get(cd, path, &str);
-		if (!error)
-			comline.clean_start = atoi(str);
-		else
-			comline.clean_start = DEFAULT_CLEAN_START;
-		if (str)
-			free(str);
-	}
+	if (dump_wrap) {
+		len = DUMP_SIZE - dump_point;
+		do_write(fd, dump_buf + dump_point, len);
+		len = dump_point;
+	} else
+		len = dump_point;
 
-	if (comline.post_join_delay_opt == FALSE) {
-		str = NULL;
-		memset(path, 0, 256);
-		sprintf(path, "/cluster/fence_daemon/@post_join_delay");
-
-		error = ccs_get(cd, path, &str);
-		if (!error)
-			comline.post_join_delay = atoi(str);
-		else
-			comline.post_join_delay = DEFAULT_POST_JOIN_DELAY;
-		if (str)
-			free(str);
-	}
+	/* NUL terminate the debug string */
+	dump_buf[dump_point] = '\0';
 
-	if (comline.post_fail_delay_opt == FALSE) {
-		str = NULL;
-		memset(path, 0, 256);
-		sprintf(path, "/cluster/fence_daemon/@post_fail_delay");
-
-		error = ccs_get(cd, path, &str);
-		if (!error)
-			comline.post_fail_delay = atoi(str);
-		else
-			comline.post_fail_delay = DEFAULT_POST_FAIL_DELAY;
-		if (str)
-			free(str);
-	}
+	do_write(fd, dump_buf, len);
+}
 
-	if (comline.override_path_opt == FALSE) {
-		str = NULL;
-		memset(path, 0, 256);
-		sprintf(path, "/cluster/fence_daemon/@override_path");
-
-		error = ccs_get(cd, path, &str);
-		if (!error)
-			/* XXX These are not explicitly freed on exit; if
-			   we decide to make fenced handle SIGHUP at a later
-			   time, we will need to free this. */
-			comline.override_path = strdup(str);
-		else
-			comline.override_path = strdup(DEFAULT_OVERRIDE_PATH);
-		if (str)
-			free(str);
-	}
+static void client_alloc(void)
+{
+	int i;
 
-	if (comline.override_time_opt == FALSE) {
-		str = NULL;
-		memset(path, 0, 256);
-		sprintf(path, "/cluster/fence_daemon/@override_time");
-
-		error = ccs_get(cd, path, &str);
-		if (!error && str)
-			comline.override_time = atoi(str);
-		if (str)
-			free(str);
-		if (comline.override_time < 3)
-			comline.override_time = 3;
+	if (!client) {
+		client = malloc(CLIENT_NALLOC * sizeof(struct client));
+		pollfd = malloc(CLIENT_NALLOC * sizeof(struct pollfd));
+	} else {
+		client = realloc(client, (client_size + CLIENT_NALLOC) *
+					 sizeof(struct client));
+		pollfd = realloc(pollfd, (client_size + CLIENT_NALLOC) *
+					 sizeof(struct pollfd));
+		if (!pollfd)
+			log_error("can't alloc for pollfd");
 	}
+	if (!client || !pollfd)
+		log_error("can't alloc for client array");
 
-	log_debug("delay post_join %ds post_fail %ds",
-		  comline.post_join_delay, comline.post_fail_delay);
-
-	if (comline.clean_start) {
-		log_debug("clean start, skipping initial nodes");
-		goto out;
+	for (i = client_size; i < client_size + CLIENT_NALLOC; i++) {
+		client[i].workfn = NULL;
+		client[i].deadfn = NULL;
+		client[i].fd = -1;
+		pollfd[i].fd = -1;
+		pollfd[i].revents = 0;
 	}
+	client_size += CLIENT_NALLOC;
+}
 
-	for (i = 1; ; i++) {
-		name = NULL;
-		memset(path, 0, 256);
-		sprintf(path, "/cluster/clusternodes/clusternode[%d]/@name", i);
+void client_dead(int ci)
+{
+	close(client[ci].fd);
+	client[ci].workfn = NULL;
+	client[ci].fd = -1;
+	pollfd[ci].fd = -1;
+}
 
-		error = ccs_get(cd, path, &name);
-		if (error || !name)
-			break;
+int client_add(int fd, void (*workfn)(int ci), void (*deadfn)(int ci))
+{
+	int i;
 
-		add_complete_node(fd, 0, name);
-		free(name);
-		count++;
+	if (!client)
+		client_alloc();
+ again:
+	for (i = 0; i < client_size; i++) {
+		if (client[i].fd == -1) {
+			client[i].workfn = workfn;
+			if (deadfn)
+				client[i].deadfn = deadfn;
+			else
+				client[i].deadfn = client_dead;
+			client[i].fd = fd;
+			pollfd[i].fd = fd;
+			pollfd[i].events = POLLIN;
+			if (i > client_maxi)
+				client_maxi = i;
+			return i;
+		}
 	}
 
-	log_debug("added %d nodes from ccs", count);
- out:
-	ccs_disconnect(cd);
-	return 0;
+	client_alloc();
+	goto again;
 }
 
-fd_t *find_domain(char *name)
+static void sigterm_handler(int sig)
 {
-	fd_t *fd;
-
-	list_for_each_entry(fd, &domains, list) {
-		if (strlen(name) == strlen(fd->name) &&
-		    !strncmp(fd->name, name, strlen(name)))
-                        return fd;
-	}
-	return NULL;
+	daemon_quit = 1;
 }
 
-static fd_t *create_domain(char *name)
+static struct fd *create_fd(char *name)
 {
-	fd_t *fd;
+	struct fd *fd;
 
 	if (strlen(name) > MAX_GROUPNAME_LEN)
 		return NULL;
 
-	fd = malloc(sizeof(fd_t));
+	fd = malloc(sizeof(struct fd));
 	if (!fd)
 		return NULL;
 
-	memset(fd, 0, sizeof(fd_t));
+	memset(fd, 0, sizeof(struct fd));
 	strcpy(fd->name, name);
 
-	fd->first_recovery = FALSE;
-	fd->last_stop = 0;
-	fd->last_start = 0;
-	fd->last_finish = 0;
-	fd->prev_count = 0;
-	INIT_LIST_HEAD(&fd->prev);
+	INIT_LIST_HEAD(&fd->changes);
+	INIT_LIST_HEAD(&fd->node_history);
 	INIT_LIST_HEAD(&fd->victims);
-	INIT_LIST_HEAD(&fd->leaving);
 	INIT_LIST_HEAD(&fd->complete);
+	INIT_LIST_HEAD(&fd->prev);
+	INIT_LIST_HEAD(&fd->leaving);
 
 	return fd;
 }
 
-int do_join(char *name)
+void free_fd(struct fd *fd)
 {
-	fd_t *fd;
+	struct change *cg, *cg_safe;
+	struct node_history *nodeh, *nodeh_safe;
+
+	list_for_each_entry_safe(cg, cg_safe, &fd->changes, list) {
+		list_del(&cg->list);
+		free_cg(cg);
+	}
+	if (fd->started_change)
+		free_cg(fd->started_change);
+
+	list_for_each_entry_safe(nodeh, nodeh_safe, &fd->node_history, list) {
+		list_del(&nodeh->list);
+		free(nodeh);
+	}
+
+	free_node_list(&fd->victims);
+	free_node_list(&fd->complete);
+	free_node_list(&fd->prev);
+	free_node_list(&fd->leaving);
+
+	free(fd);
+}
+
+struct fd *find_fd(char *name)
+{
+	struct fd *fd;
+
+	list_for_each_entry(fd, &domains, list) {
+		if (strlen(name) == strlen(fd->name) &&
+		    !strncmp(fd->name, name, strlen(name)))
+			return fd;
+	}
+	return NULL;
+}
+
+static int do_join(char *name)
+{
+	struct fd *fd;
 	int rv;
 
-	fd = find_domain(name);
+	fd = find_fd(name);
 	if (fd) {
 		log_debug("join error: domain %s exists", name);
 		rv = -EEXIST;
 		goto out;
 	}
 
-	fd = create_domain(name);
+	fd = create_fd(name);
 	if (!fd) {
 		rv = -ENOMEM;
 		goto out;
 	}
 
-	rv = setup_ccs(fd);
+	rv = read_ccs(fd);
 	if (rv) {
 		free(fd);
 		goto out;
 	}
 
-	list_add(&fd->list, &domains);
-
-	rv = group_join(gh, name);
-	if (rv) {
-		log_error("group_join error %d", rv);
-		list_del(&fd->list);
-		free(fd);
-	}
+	if (group_mode == GROUP_LIBGROUP)
+		rv = fd_join_group(fd);
+	else
+		rv = fd_join(fd);
  out:
 	return rv;
 }
 
-int do_leave(char *name)
+static int do_leave(char *name)
 {
-	fd_t *fd;
+	struct fd *fd;
 	int rv;
 
-	fd = find_domain(name);
+	fd = find_fd(name);
 	if (!fd)
 		return -EINVAL;
 
-	fd->leave = 1;
-
-	rv = group_leave(gh, name);
-	if (rv) {
-		log_error("group_leave error %d", rv);
-		fd->leave = 0;
-	}
+	if (group_mode == GROUP_LIBGROUP)
+		rv = fd_leave_group(fd);
+	else
+		rv = fd_leave(fd);
 
 	return rv;
 }
 
+#define MAXARGS 8
+
 static void make_args(char *buf, int *argc, char **argv, char sep)
 {
 	char *p = buf;
@@ -317,80 +282,29 @@ static void make_args(char *buf, int *argc, char **argv, char sep)
 	*argc = i;
 }
 
-static int client_add(int fd, int *maxi)
-{
-	int i;
-
-	for (i = 0; i < client_size; i++) {
-		if (client[i].fd == -1) {
-			client[i].fd = fd;
-			pollfd[i].fd = fd;
-			pollfd[i].events = POLLIN;
-			if (i > *maxi)
-				*maxi = i;
-			/* log_debug("client %d fd %d added", i, fd); */
-			return i;
-		}
-	}
-	log_debug("client add failed");
-	return -1;
-}
-
-static void client_dead(int ci)
-{
-	/* log_debug("client %d fd %d dead", ci, client[ci].fd); */
-	close(client[ci].fd);
-	client[ci].fd = -1;
-	pollfd[ci].fd = -1;
-}
-
-static void client_init(void)
-{
-	int i;
-
-	for (i = 0; i < client_size; i++)
-		client[i].fd = -1;
-}
-
-static int do_dump(int fd)
-{
-	int len;
-
-	if (dump_wrap) {
-		len = DUMP_SIZE - dump_point;
-		do_write(fd, dump_buf + dump_point, len);
-		len = dump_point;
-	} else
-		len = dump_point;
-
-	/* NUL terminate the debug string */
-	dump_buf[dump_point] = '\0';
-
-	do_write(fd, dump_buf, len);
-
-	return 0;
-}
+#define FENCED_MSGLEN 256
 
-static int client_process(int ci)
+static void process_connection(int ci)
 {
-	char buf[MAXLINE], *argv[MAXARGS], *cmd, *name, out[MAXLINE];
+	char buf[FENCED_MSGLEN];
+	char out[FENCED_MSGLEN];
+	char *argv[MAXARGS];
+	char *cmd, *name;
 	int argc = 0, rv;
 
-	memset(buf, 0, MAXLINE);
-	memset(out, 0, MAXLINE);
+	memset(buf, 0, sizeof(buf));
+	memset(out, 0, sizeof(out));
+	memset(argv, 0, sizeof(char *) * MAXARGS);
 
-	rv = read(client[ci].fd, buf, MAXLINE);
-	if (!rv) {
-		client_dead(ci);
-		return 0;
-	}
+	rv = do_read(client[ci].fd, buf, FENCED_MSGLEN);
 	if (rv < 0) {
-		log_debug("client %d fd %d read error %d %d", ci,
+		log_error("client %d fd %d read error %d %d", ci,
 			   client[ci].fd, rv, errno);
-		return rv;
+		client_dead(ci);
+		return;
 	}
 
-	log_debug("client %d: %s", ci, buf);
+	log_debug("ci %d read %s", ci, buf);
 
 	make_args(buf, &argc, argv, ' ');
 	cmd = argv[0];
@@ -403,19 +317,30 @@ static int client_process(int ci)
 	else if (!strcmp(cmd, "dump")) {
 		do_dump(client[ci].fd);
 		close(client[ci].fd);
-		return 0;
-	} else
-		rv = -EINVAL;
+	}
 
 	sprintf(out, "%d", rv);
-	rv = write(client[ci].fd, out, MAXLINE);
+	write(client[ci].fd, out, FENCED_MSGLEN);
 
 	/* exit: cause fenced loop to exit */
+}
 
-	return rv;
+static void process_listener(int ci)
+{
+	int fd, i;
+
+	fd = accept(client[ci].fd, NULL, NULL);
+	if (fd < 0) {
+		log_error("process_listener: accept error %d %d", fd, errno);
+		return;
+	}
+	
+	i = client_add(fd, process_connection, NULL);
+
+	log_debug("client connection %d fd %d", i, fd);
 }
 
-static int setup_listen(void)
+static int setup_listener(void)
 {
 	struct sockaddr_un addr;
 	socklen_t addrlen;
@@ -447,106 +372,89 @@ static int setup_listen(void)
 		close(s);
 		return rv;
 	}
-
 	return s;
 }
 
+static void cluster_dead(int ci)
+{
+	log_error("cluster is down, exiting");
+	exit(1);
+}
+
 static int loop(void)
 {
-	int rv, i, f, maxi = 0, listen_fd, member_fd, groupd_fd;
+	int rv, i;
+	void (*workfn) (int ci);
+	void (*deadfn) (int ci);
 
-	rv = listen_fd = setup_listen();
+	rv = setup_listener();
 	if (rv < 0)
 		goto out;
-	client_add(listen_fd, &maxi);
+	client_add(rv, process_listener, NULL);
 
-	rv = member_fd = setup_member();
+	rv = setup_cman();
 	if (rv < 0)
 		goto out;
-	client_add(member_fd, &maxi);
+	client_add(rv, process_cman, cluster_dead);
 
-	rv = groupd_fd = setup_groupd();
-	if (rv < 0)
-		goto out;
-	client_add(groupd_fd, &maxi);
+	group_mode = GROUP_LIBCPG;
 
-	log_debug("listen %d member %d groupd %d",
-		  listen_fd, member_fd, groupd_fd);
+	if (comline.groupd_compat) {
+		rv = setup_groupd();
+		if (rv < 0)
+			goto out;
+		client_add(rv, process_groupd, cluster_dead);
 
-	for (;;) {
-		rv = poll(pollfd, maxi + 1, -1);
-		if (rv < 0) {
-			if (errno == EINTR)
-				continue;
-			break;
+		group_mode = GROUP_LIBGROUP;
+
+		if (comline.groupd_compat == 2) {
+			/* set_group_mode(); */
+			group_mode = GROUP_LIBGROUP;
 		}
+	}
 
-		/* client[0] is listening for new connections */
+	if (group_mode == GROUP_LIBCPG) {
+		/*
+		rv = setup_cpg();
+		if (rv < 0)
+			goto out;
+		client_add(rv, process_cpg, cluster_dead);
+		*/
+	}
 
-		if (pollfd[0].revents & POLLIN) {
-			f = accept(client[0].fd, NULL, NULL);
-			if (f < 0)
-				log_debug("accept error %d %d", f, errno);
-			else
-				client_add(f, &maxi);
+	for (;;) {
+		rv = poll(pollfd, client_maxi + 1, -1);
+		if (rv == -1 && errno == EINTR) {
+			if (daemon_quit && list_empty(&domains)) {
+				exit(1);
+			}
+			daemon_quit = 0;
+			continue;
+		}
+		if (rv < 0) {
+			log_error("poll errno %d", errno);
+			goto out;
 		}
 
-		for (i = 1; i <= maxi; i++) {
+		for (i = 0; i <= client_maxi; i++) {
 			if (client[i].fd < 0)
 				continue;
+			if (pollfd[i].revents & POLLIN) {
+				workfn = client[i].workfn;
+				workfn(i);
+			}
 			if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL)) {
-				if (pollfd[i].fd == member_fd) {
-					log_error("cluster is down, exiting");
-					exit(1);
-				}
-				if (pollfd[i].fd == groupd_fd) {
-					log_error("groupd is down, exiting");
-					exit(1);
-				}
-				client_dead(i);
-			} else if (pollfd[i].revents & POLLIN) {
-				if (pollfd[i].fd == groupd_fd)
-					process_groupd();
-				else if (pollfd[i].fd == member_fd)
-					process_member();
-				else
-					client_process(i);
+				deadfn = client[i].deadfn;
+				deadfn(i);
 			}
 		}
-
-		if (fenced_exit)
-			break;
 	}
-
-	group_exit(gh);
+	rv = 0;
  out:
+	free(pollfd);
 	return rv;
 }
 
-static void print_usage(void)
-{
-	printf("Usage:\n");
-	printf("\n");
-	printf("%s [options]\n", prog_name);
-	printf("\n");
-	printf("Options:\n");
-	printf("\n");
-	printf("  -c	       All nodes are in a clean state to start\n");
-	printf("  -j <secs>	Post-join fencing delay (default %d)\n",
-				   DEFAULT_POST_JOIN_DELAY);
-	printf("  -f <secs>	Post-fail fencing delay (default %d)\n",
-				   DEFAULT_POST_FAIL_DELAY);
-	printf("  -O <path>    Override path (default %s)\n",
-	       			   DEFAULT_OVERRIDE_PATH);
-	printf("  -D	       Enable debugging code and don't fork\n");
-	printf("  -h	       Print this help, then exit\n");
-	printf("  -V	       Print program version information, then exit\n");
-	printf("\n");
-	printf("Command line values override those in " DEFAULT_CONFIG_DIR "/" DEFAULT_CONFIG_FILE ".\n");
-	printf("For an unbounded delay use <secs> value of -1.\n");
-	printf("\n");
-}
-
 static void lockfile(void)
 {
 	int fd, error;
@@ -557,8 +465,11 @@ static void lockfile(void)
 
 	fd = open(LOCKFILE_NAME, O_CREAT|O_WRONLY,
 		  S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
-	if (fd < 0)
-		die("cannot open/create lock file %s", LOCKFILE_NAME);
+	if (fd < 0) {
+		fprintf(stderr, "cannot open/create lock file %s\n",
+			LOCKFILE_NAME);
+		exit(EXIT_FAILURE);
+	}
 
 	lock.l_type = F_WRLCK;
 	lock.l_start = 0;
@@ -566,67 +477,109 @@ static void lockfile(void)
 	lock.l_len = 0;
 
 	error = fcntl(fd, F_SETLK, &lock);
-	if (error)
-		die("fenced is already running");
+	if (error) {
+		fprintf(stderr, "is already running\n");
+		exit(EXIT_FAILURE);
+	}
 
 	error = ftruncate(fd, 0);
-	if (error)
-		die("cannot clear lock file %s", LOCKFILE_NAME);
+	if (error) {
+		fprintf(stderr, "cannot clear lock file %s\n", LOCKFILE_NAME);
+		exit(EXIT_FAILURE);
+	}
 
 	sprintf(buf, "%d\n", getpid());
 
 	error = write(fd, buf, strlen(buf));
-	if (error <= 0)
-		die("cannot write lock file %s", LOCKFILE_NAME);
+	if (error <= 0) {
+		fprintf(stderr, "cannot write lock file %s\n", LOCKFILE_NAME);
+		exit(EXIT_FAILURE);
+	}
 }
 
-static void decode_arguments(int argc, char **argv, commandline_t *comline)
+static void print_usage(void)
 {
-	int cont = TRUE;
+	printf("Usage:\n");
+	printf("\n");
+	printf("fenced [options]\n");
+	printf("\n");
+	printf("Options:\n");
+	printf("\n");
+	printf("  -D           Enable debugging code and don't fork\n");
+	printf("  -g <num>     groupd compatibility, 0 off, 1 on\n");
+	printf("               on: use libgroup, compat with cluster2/stable2/rhel5\n");
+	printf("               off: use libcpg, no backward compatability\n");
+	printf("               Default is %d\n", DEFAULT_GROUPD_COMPAT);
+	printf("  -c	       All nodes are in a clean state to start\n");
+	printf("  -j <secs>    Post-join fencing delay (default %d)\n", DEFAULT_POST_JOIN_DELAY);
+	printf("  -f <secs>    Post-fail fencing delay (default %d)\n", DEFAULT_POST_FAIL_DELAY);
+	printf("  -R <secs>    Override time (default %d)\n", DEFAULT_OVERRIDE_TIME);
+
+	printf("  -O <path>    Override path (default %s)\n", DEFAULT_OVERRIDE_PATH);
+	printf("  -h           Print this help, then exit\n");
+	printf("  -V           Print program version information, then exit\n");
+	printf("\n");
+	printf("Command line values override those in " DEFAULT_CONFIG_DIR "/" DEFAULT_CONFIG_FILE ".\n");
+	printf("For an unbounded delay use <secs> value of -1.\n");
+	printf("\n");
+}
+
+#define OPTION_STRING	"gcj:f:Dn:O:T:hVS"
+
+static void read_arguments(int argc, char **argv)
+{
+	int cont = 1;
 	int optchar;
 
-	comline->override_path_opt = FALSE;
-	comline->override_path = NULL;
-	comline->post_join_delay_opt = FALSE;
-	comline->post_fail_delay_opt = FALSE;
-	comline->clean_start_opt = FALSE;
-	comline->override_time_opt = FALSE;
-	comline->override_time = 5;	/* default */
+	comline.override_path_opt = 0;
+	comline.override_path = NULL;
+	comline.post_join_delay_opt = 0;
+	comline.post_fail_delay_opt = 0;
+	comline.clean_start_opt = 0;
+	comline.override_time_opt = 0;
+	comline.override_time = 5;	/* default */
 
 	while (cont) {
 		optchar = getopt(argc, argv, OPTION_STRING);
 
 		switch (optchar) {
 
+		case 'D':
+			daemon_debug_opt = 1;
+			break;
+
+		case 'g':
+			comline.groupd_compat = atoi(optarg);
+			comline.groupd_compat_opt = 1;
+			break;
+
 		case 'c':
-			comline->clean_start = 1;
-			comline->clean_start_opt = TRUE;
+			comline.clean_start = 1;
+			comline.clean_start_opt = 1;
 			break;
 
 		case 'j':
-			comline->post_join_delay = atoi(optarg);
-			comline->post_join_delay_opt = TRUE;
+			comline.post_join_delay = atoi(optarg);
+			comline.post_join_delay_opt = 1;
 			break;
 
 		case 'f':
-			comline->post_fail_delay = atoi(optarg);
-			comline->post_fail_delay_opt = TRUE;
-			break;
-
-		case 'O':
-			comline->override_path = strdup(optarg);
-			comline->override_path_opt = TRUE;
+			comline.post_fail_delay = atoi(optarg);
+			comline.post_fail_delay_opt = 1;
 			break;
 
 		case 'R':
-			comline->override_time = atoi(optarg);
-			if (comline->override_time < 3)
-				comline->override_time = 3;
-			comline->override_time_opt = TRUE;
+			comline.override_time = atoi(optarg);
+			if (comline.override_time < 3)
+				comline.override_time = 3;
+			comline.override_time_opt = 1;
 			break;
 
-		case 'D':
-			daemon_debug_opt = TRUE;
+		case 'O':
+			if (comline.override_path)
+				free(comline.override_path);
+			comline.override_path = strdup(optarg);
+			comline.override_path_opt = 1;
 			break;
 
 		case 'h':
@@ -648,44 +601,57 @@ static void decode_arguments(int argc, char **argv, commandline_t *comline)
 			break;
 
 		case EOF:
-			cont = FALSE;
+			cont = 0;
 			break;
 
 		default:
-			die1("unknown option: %c", optchar);
-			break;
+			fprintf(stderr, "unknown option: %c", optchar);
+			exit(EXIT_FAILURE);
 		};
 	}
 }
 
-int main(int argc, char **argv)
+static void set_oom_adj(int val)
 {
-	int error;
+	FILE *fp;
+
+	fp = fopen("/proc/self/oom_adj", "w");
+	if (!fp)
+		return;
+
+	fprintf(fp, "%i", val);
+	fclose(fp);
+}
 
-	prog_name = argv[0];
-	memset(&comline, 0, sizeof(commandline_t));
-	decode_arguments(argc, argv, &comline);
+int main(int argc, char **argv)
+{
 	INIT_LIST_HEAD(&domains);
-	client_init();
+
+	memset(&comline, 0, sizeof(comline));
+	comline.groupd_compat = DEFAULT_GROUPD_COMPAT;
+	comline.clean_start = DEFAULT_CLEAN_START;
+	comline.post_join_delay = DEFAULT_POST_JOIN_DELAY;
+	comline.post_fail_delay = DEFAULT_POST_FAIL_DELAY;
+	comline.override_time = DEFAULT_OVERRIDE_TIME;
+	comline.override_path = strdup(DEFAULT_OVERRIDE_PATH);
+
+	read_arguments(argc, argv);
 
 	if (!daemon_debug_opt) {
-		if (daemon(0,0) < 0) {
+		if (daemon(0, 0) < 0) {
 			perror("main: cannot fork");
 			exit(EXIT_FAILURE);
 		}
-		
 		chdir("/");
 		umask(0);
 		openlog("fenced", LOG_PID, LOG_DAEMON);
 	}
-
 	lockfile();
+	signal(SIGTERM, sigterm_handler);
 
-	error = loop();
+	set_oom_adj(-16);
 
-	exit_groupd();
-	exit_member();
-	return error;
+	return loop();
 }
 
 void daemon_dump_save(void)
@@ -704,10 +670,52 @@ void daemon_dump_save(void)
 	}
 }
 
-char *prog_name;
 int daemon_debug_opt;
+int daemon_quit;
+struct list_head domains;
+int cman_quorate;
+int our_nodeid;
+char our_name[MAX_NODENAME_LEN+1];
 char daemon_debug_buf[256];
 char dump_buf[DUMP_SIZE];
 int dump_point;
 int dump_wrap;
-
+int group_mode;
+struct commandline comline;
+
+#if 0
+   libfenced
+
+   struct fenced_node:
+   nodeid, name,
+   given node is pending victim?,
+   last time given node was successfully fenced, how, and by whom,
+   last failed fence time (only master will know),
+   last domain join time, last domain leave time
+
+   struct fenced_domain
+   name,
+   current number of members,
+   master nodeid
+   current number of victims,
+   current pending victim,
+   state
+
+   /* tell fenced that an external program has fenced a node, e.g. fence_node;
+      fenced will try to suppress its own fencing of this node a second time */
+   fenced_external(char *domain, int nodeid);
+
+   /* fenced gives info about a single node */
+   fenced_node_info(char *domain, int nodeid, char *name,
+		    struct fenced_node *info);
+
+   /* fenced gives info about the domain */
+   fenced_domain_info(char *domain, struct fenced_domain *info);
+
+   /* fenced copies a node struct for each member */
+   fenced_domain_members(char *domain, int num, struct fenced_node **info);
+
+   fenced_debug_dump();
+   fenced_join();
+   fenced_leave();
+#endif
diff --git a/fence/fenced/member_cman.c b/fence/fenced/member_cman.c
index 3994283..a2bb318 100644
--- a/fence/fenced/member_cman.c
+++ b/fence/fenced/member_cman.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -10,66 +10,19 @@
 *******************************************************************************
 ******************************************************************************/
 
-#include <libcman.h>
 #include "fd.h"
-
-#define BUFLEN		128
+#include <libcman.h>
 
 static cman_handle_t	ch;
-static int		cman_quorate;
 static cman_node_t	cman_nodes[MAX_NODES];
 static int		cman_node_count;
-static char		name_buf[CMAN_MAX_NODENAME_LEN+1];
-
-extern struct list_head domains;
-
-char			*our_name;
-int			our_nodeid;
-
-
-static int name_equal(char *name1, char *name2)
-{
-	char name3[BUFLEN], name4[BUFLEN];
-	int i, len1, len2;
-
-	len1 = strlen(name1);
-	len2 = strlen(name2);
-
-	if (len1 == len2 && !strncmp(name1, name2, len1))
-		return TRUE;
-
-	memset(name3, 0, BUFLEN);
-	memset(name4, 0, BUFLEN);
-
-	for (i = 0; i < BUFLEN && i < len1; i++) {
-		if (name1[i] != '.')
-			name3[i] = name1[i];
-		else
-			break;
-	}
-
-	for (i = 0; i < BUFLEN && i < len2; i++) {
-		if (name2[i] != '.')
-			name4[i] = name2[i];
-		else
-			break;
-	}
-
-	len1 = strlen(name3);
-	len2 = strlen(name4);
-
-	if (len1 == len2 && !strncmp(name3, name4, len1))
-		return TRUE;
 
-	return FALSE;
-}
-
-static cman_node_t *find_cluster_node_name(char *name)
+static cman_node_t *find_cman_node(int nodeid)
 {
 	int i;
 
 	for (i = 0; i < cman_node_count; i++) {
-		if (name_equal(cman_nodes[i].cn_name, name))
+		if (cman_nodes[i].cn_nodeid == nodeid)
 			return &cman_nodes[i];
 	}
 	return NULL;
@@ -90,6 +43,8 @@ static void statechange(void)
 
 static void cman_callback(cman_handle_t h, void *private, int reason, int arg)
 {
+	int quorate = cman_quorate;
+
 	switch (reason) {
 	case CMAN_REASON_TRY_SHUTDOWN:
 		if (list_empty(&domains))
@@ -101,22 +56,26 @@ static void cman_callback(cman_handle_t h, void *private, int reason, int arg)
 		break;
 	case CMAN_REASON_STATECHANGE:
 		statechange();
+
+		/* domain may have been waiting for quorum */
+		if (!quorate && cman_quorate && (group_mode == GROUP_LIBCPG))
+			process_fd_changes();
 		break;
 	}
 }
 
-int process_member(void)
+void process_cman(int ci)
 {
 	int rv;
+
 	rv = cman_dispatch(ch, CMAN_DISPATCH_ALL);
 	if (rv == -1 && errno == EHOSTDOWN) {
 		log_error("cluster is down, exiting");
 		exit(1);
 	}
-	return 0;
 }
 
-int setup_member(void)
+int setup_cman(void)
 {
 	cman_node_t node;
 	int rv, fd;
@@ -148,24 +107,23 @@ int setup_member(void)
 		goto out;
 	}
 
-	memset(name_buf, 0, sizeof(name_buf));
-	strncpy(name_buf, node.cn_name, CMAN_MAX_NODENAME_LEN);
-	our_name = name_buf;
+	memset(our_name, 0, sizeof(our_name));
+	strncpy(our_name, node.cn_name, CMAN_MAX_NODENAME_LEN);
 	our_nodeid = node.cn_nodeid;
 
 	log_debug("our_nodeid %d our_name %s", our_nodeid, our_name);
-	rv = 0;
-
  out:
 	return fd;
 }
 
+/*
 void exit_member(void)
 {
 	cman_finish(ch);
 }
+*/
 
-int is_member(char *name)
+int is_cman_member(int nodeid)
 {
 	cman_node_t *cn;
 
@@ -173,59 +131,44 @@ int is_member(char *name)
 	   have done a statechange() in response to a cman callback */
 	statechange();
 
-	cn = find_cluster_node_name(name);
-	if (cn && cn->cn_member) {
-		if (in_groupd_cpg(cn->cn_nodeid))
-			return 1;
-		log_debug("node \"%s\" not in groupd cpg", name);
-		return 0;
-	}
+	cn = find_cman_node(nodeid);
+	if (cn && cn->cn_member)
+		return 1;
 
-	log_debug("node \"%s\" not a cman member, cn %d", name, cn ? 1 : 0);
+	log_debug("node %d not a cman member, cn %d", nodeid, cn ? 1 : 0);
 	return 0;
 }
 
-int is_fenced(char *name)
+char *nodeid_to_name(int nodeid)
 {
 	cman_node_t *cn;
-	char agent[255];
-	uint64_t fence_time;
-	int fenced = 0;
 
-	/* If the node is a cluster member then we won't even get called */
-	cn = find_cluster_node_name(name);
-	if (cn && cn->cn_member) {
-		return 1;
-	}
-
-	/* If this call fails (though it shouldn't) then regard the node as unfenced */
-	if (cn && cman_get_fenceinfo(ch, cn->cn_nodeid, &fence_time, &fenced, agent)) {
-		log_debug("cman_get_fenceinfo failed: %s", strerror(errno));
-		fenced = 0;
-	}
+	cn = find_cman_node(nodeid);
+	if (cn)
+		return cn->cn_name;
 
-	log_debug("node \"%s\" has%s been fenced", name, fenced?"":" not");
-	return fenced;
+	return "unknown";
 }
 
-fd_node_t *get_new_node(fd_t *fd, int nodeid, char *in_name)
+struct node *get_new_node(struct fd *fd, int nodeid)
 {
 	cman_node_t cn;
-	fd_node_t *node = NULL;
-	char *name = in_name;
+	struct node *node;
 	int rv;
 
-	if (!name) {
-		memset(&cn, 0, sizeof(cn));
-		rv = cman_get_node(ch, nodeid, &cn);
-		name = cn.cn_name;
-	}
-
 	node = malloc(sizeof(*node));
+	if (!node)
+		return NULL;
 	memset(node, 0, sizeof(*node));
 
 	node->nodeid = nodeid;
-	strcpy(node->name, name);
+
+	memset(&cn, 0, sizeof(cn));
+	rv = cman_get_node(ch, nodeid, &cn);
+	if (rv < 0)
+		log_debug("get_new_node %d no cman node %d", nodeid, rv);
+	else
+		strncpy(node->name, cn.cn_name, MAX_NODENAME_LEN);
 
 	return node;
 }
diff --git a/fence/fenced/recover.c b/fence/fenced/recover.c
index ffccd3c..430f778 100644
--- a/fence/fenced/recover.c
+++ b/fence/fenced/recover.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -12,165 +12,27 @@
 ******************************************************************************/
 
 #include "fd.h"
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/select.h>
-
-extern int our_nodeid;
-extern commandline_t comline;
-
-/* Fencing recovery algorithm
-
-   do_recovery (service event start)
-   - complete = list of nodes in previous completed fence domain
-   - cl_nodes = list of current domain members provided by start
-   - victims = list of nodes in complete that are not in cl_nodes
-   - prev = saved version of cl_nodes (in list format)
-   - fence_victims() fences nodes in victims list
-
-   do_recovery_done (service event finish)
-   - complete = prev
-
-   Notes:
-   - When fenced is started, the complete list is initialized to all
-   the nodes in cluster.conf.
-   - fence_victims actually only runs on one of the nodes in the domain
-   so that a victim isn't fenced by everyone.
-   - The node to run fence_victims is the node with lowest id that's in both
-   complete and prev lists.
-   - This node will never be a node that's just joining since by definition
-   the joining node wasn't in the last complete group.
-   - An exception to this is when there is just one node in the group
-   in which case it's chosen even if it wasn't in the last complete group.
-   - There's also a leaving list that parallels the victims list but are
-   not fenced.
-*/
-
-
-static void free_node_list(struct list_head *head)
+
+void free_node_list(struct list_head *head)
 {
-	fd_node_t *node;
+	struct node *node;
+
 	while (!list_empty(head)) {
-		node = list_entry(head->next, fd_node_t, list);
+		node = list_entry(head->next, struct node, list);
 		list_del(&node->list);
 		free(node);
 	}
 }
 
-static inline void free_victims(fd_t *fd)
+void add_complete_node(struct fd *fd, int nodeid)
 {
-	free_node_list(&fd->victims);
-}
+	struct node *node;
 
-static inline void free_leaving(fd_t *fd)
-{
-	free_node_list(&fd->leaving);
-}
-
-static inline void free_prev(fd_t *fd)
-{
-	free_node_list(&fd->prev);
-}
-
-static inline void free_complete(fd_t *fd)
-{
-	free_node_list(&fd->complete);
-}
-
-static int next_complete_nodeid(fd_t *fd, int gt)
-{
-	fd_node_t *node;
-	int low = -1;
-
-	/* find lowest node id in fd_complete greater than gt,
-	   if none, return -1 */
-
-	list_for_each_entry(node, &fd->complete, list) {
-		if (node->nodeid <= gt)
-			continue;
-
-		if (low == -1)
-			low = node->nodeid;
-		else if (node->nodeid < low)
-			low = node->nodeid;
-	}
-	return low;
-}
-
-static int find_master_nodeid(fd_t *fd, char **master_name)
-{
-	fd_node_t *node;
-	int low = -1;
-
-	/* Find the lowest nodeid common to fd->fd_prev (newest member list)
-	 * and fd->fd_complete (last complete member list). */
-
-	for (;;) {
-		low = next_complete_nodeid(fd, low);
-		if (low == -1)
-			break;
-
-		list_for_each_entry(node, &fd->prev, list) {
-			if (low != node->nodeid)
-				continue;
-			*master_name = node->name;
-			goto out;
-		}
-	}
-
-	/* Special case: we're the first and only FD member */
-
-	if (fd->prev_count == 1)
-		low = our_nodeid;
-
-	/* We end up returning -1 when we're not the only node and we've just
-	   joined.  Because we've just joined we weren't in the last complete
-	   domain group and won't be chosen as master.  We defer to someone who
-	   _was_ in the last complete group.  All we know is it isn't us. */
-
-	*master_name = "prior member";
- out:
-	return low;
-}
-
-void add_complete_node(fd_t *fd, int nodeid, char *name)
-{
-	fd_node_t *node;
-	node = get_new_node(fd, nodeid, name);
+	node = get_new_node(fd, nodeid);
 	list_add(&node->list, &fd->complete);
 }
 
-static void new_prev_nodes(fd_t *fd, int member_count, int *nodeids)
-{
-	fd_node_t *node;
-	int i;
-
-	for (i = 0; i < member_count; i++) {
-		node = get_new_node(fd, nodeids[i], NULL);
-		list_add(&node->list, &fd->prev);
-	}
-
-	fd->prev_count = member_count;
-}
-
-static void add_first_victims(fd_t *fd)
-{
-	fd_node_t *prev_node, *safe;
-
-	/* complete list initialised in init_nodes() to all nodes from ccs */
-	if (list_empty(&fd->complete))
-		log_debug("first complete list empty warning");
-
-	list_for_each_entry_safe(prev_node, safe, &fd->complete, list) {
-		if (!is_member(prev_node->name)) {
-			list_del(&prev_node->list);
-			list_add(&prev_node->list, &fd->victims);
-			log_debug("add first victim %s", prev_node->name);
-		}
-	}
-}
-
-static int list_count(struct list_head *head)
+int list_count(struct list_head *head)
 {
 	struct list_head *tmp;
 	int count = 0;
@@ -180,32 +42,31 @@ static int list_count(struct list_head *head)
 	return count;
 }
 
-static int id_in_nodeids(int nodeid, int count, int *nodeids)
+static void victim_done(struct fd *fd, int victim, int how)
 {
-	int i;
+	if (group_mode == GROUP_LIBGROUP)
+		return;
 
-	for (i = 0; i < count; i++) {
-		if (nodeid == nodeids[i])
-			return TRUE;
-	}
-	return FALSE;
+	node_history_fence(fd, victim, our_nodeid, how);
+	send_victim_done(fd, victim, how);
 }
 
 /* This routine should probe other indicators to check if victims
    can be reduced.  Right now we just check if the victim has rejoined the
    cluster. */
 
-static int reduce_victims(fd_t *fd)
+static int reduce_victims(struct fd *fd)
 {
-	fd_node_t *node, *safe;
+	struct node *node, *safe;
 	int num_victims;
 
 	num_victims = list_count(&fd->victims);
 
 	list_for_each_entry_safe(node, safe, &fd->victims, list) {
-		if (is_member(node->name)) {
-			list_del(&node->list);
+		if (is_cman_member(node->nodeid)) {
 			log_debug("reduce victim %s", node->name);
+			victim_done(fd, node->nodeid, VIC_DONE_MEMBER);
+			list_del(&node->list);
 			free(node);
 			num_victims--;
 		}
@@ -288,21 +149,20 @@ static int check_override(int ofd, char *nodename, int timeout)
 	return 0;
 }
 
-
 /* If there are victims after a node has joined, it's a good indication that
    they may be joining the cluster shortly.  If we delay a bit they might
    become members and we can avoid fencing them.  This is only really an issue
    when the fencing method reboots the victims.  Otherwise, the nodes should
    unfence themselves when they start up. */
 
-static void delay_fencing(fd_t *fd, int start_type)
+void delay_fencing(struct fd *fd, int node_join)
 {
 	struct timeval first, last, start, now;
 	int victim_count, last_count = 0, delay = 0;
-	fd_node_t *node;
+	struct node *node;
 	char *delay_type;
 
-	if (start_type == GROUP_NODE_JOIN) {
+	if (node_join) {
 		delay = comline.post_join_delay;
 		delay_type = "post_join_delay";
 	} else {
@@ -354,28 +214,36 @@ static void delay_fencing(fd_t *fd, int start_type)
 	}
 }
 
-static void fence_victims(fd_t *fd, int start_type)
+void defer_fencing(struct fd *fd)
 {
-	fd_node_t *node;
-	char *master_name;
-	int master, error;
-	int override = -1;
+	char *master_name = nodeid_to_name(fd->master);
 
-	master = find_master_nodeid(fd, &master_name);
-
-	if (master != our_nodeid) {
-		log_debug("defer fencing to %d %s", master, master_name);
-		syslog(LOG_INFO, "fencing deferred to %s", master_name);
-		return;
-	}
+	log_debug("defer fencing to %d %s", fd->master, master_name);
+	syslog(LOG_INFO, "fencing deferred to %s", master_name);
+}
 
-	delay_fencing(fd, start_type);
+void fence_victims(struct fd *fd)
+{
+	struct node *node;
+	int error;
+	int override = -1;
+	int member, fenced;
 
 	while (!list_empty(&fd->victims)) {
-		node = list_entry(fd->victims.next, fd_node_t, list);
-
-		if (is_member(node->name) || is_fenced(node->name)) {
-			log_debug("averting fence of node %s", node->name);
+		node = list_entry(fd->victims.next, struct node, list);
+
+		member = is_cman_member(node->nodeid);
+		if (group_mode == GROUP_LIBCPG)
+			fenced = is_fenced_external(fd, node->nodeid);
+		else
+			fenced = 0;
+
+		if (member || fenced) {
+			log_debug("averting fence of node %s "
+				  "member %d external %d",
+				  node->name, member, fenced);
+			victim_done(fd, node->nodeid, member ? VIC_DONE_MEMBER :
+							       VIC_DONE_EXTERNAL);
 			list_del(&node->list);
 			free(node);
 			continue;
@@ -384,12 +252,13 @@ static void fence_victims(fd_t *fd, int start_type)
 		log_debug("fencing node %s", node->name);
 		syslog(LOG_INFO, "fencing node \"%s\"", node->name);
 
-		error = dispatch_fence_agent(node->name, 0);
+		error = fence_node(node->name);
 
 		syslog(LOG_INFO, "fence \"%s\" %s", node->name,
 		       error ? "failed" : "success");
 
 		if (!error) {
+			victim_done(fd, node->nodeid, VIC_DONE_AGENT);
 			list_del(&node->list);
 			free(node);
 			continue;
@@ -406,8 +275,7 @@ static void fence_victims(fd_t *fd, int start_type)
 				   comline.override_time) > 0) {
 			syslog(LOG_WARNING, "fence \"%s\" overridden by "
 			       "administrator intervention", node->name);
-
-			update_cman(node->name, "override");
+			victim_done(fd, node->nodeid, VIC_DONE_OVERRIDE);
 			list_del(&node->list);
 			free(node);
 		}
@@ -415,97 +283,3 @@ static void fence_victims(fd_t *fd, int start_type)
 	}
 }
 
-static void add_victims(fd_t *fd, int start_type, int member_count,
-			int *nodeids)
-{
-	fd_node_t *node, *safe;
-
-	/* nodes which haven't completed leaving when a failure restart happens
-	 * are dead (and need fencing) or are still members */
-
-	if (start_type == GROUP_NODE_FAILED) {
-		list_for_each_entry_safe(node, safe, &fd->leaving, list) {
-			list_del(&node->list);
-			if (id_in_nodeids(node->nodeid, member_count, nodeids))
-				list_add(&node->list, &fd->complete);
-			else {
-				list_add(&node->list, &fd->victims);
-				log_debug("add victim %u, was leaving",
-					  node->nodeid);
-			}
-		}
-	}
-
-	/* nodes in last completed group but missing from fr_nodeids are added
-	 * to victims list or leaving list, depending on the type of start. */
-
-	if (list_empty(&fd->complete))
-		log_debug("complete list empty warning");
-
-	list_for_each_entry_safe(node, safe, &fd->complete, list) {
-		if (!id_in_nodeids(node->nodeid, member_count, nodeids)) {
-			list_del(&node->list);
-
-			if (start_type == GROUP_NODE_FAILED)
-				list_add(&node->list, &fd->victims);
-			else
-				list_add(&node->list, &fd->leaving);
-
-			log_debug("add node %u to list %u", node->nodeid,
-				  start_type);
-		}
-	}
-}
-
-void do_recovery(fd_t *fd, int start_type, int member_count, int *nodeids)
-{
-	/* Reset things when the last stop aborted our first
-	 * start, i.e. there was no finish; we got a
-	 * start/stop/start immediately upon joining. */
-
-	if (!fd->last_finish && fd->last_stop) {
-		log_debug("revert aborted first start");
-		fd->last_stop = 0;
-		fd->first_recovery = FALSE;
-		free_prev(fd);
-		free_victims(fd);
-		free_leaving(fd);
-	}
-
-	log_debug("do_recovery stop %d start %d finish %d",
-		  fd->last_stop, fd->last_start, fd->last_finish);
-
-	if (!fd->first_recovery) {
-		fd->first_recovery = TRUE;
-		add_first_victims(fd);
-	} else
-		add_victims(fd, start_type, member_count, nodeids);
-
-	free_prev(fd);
-	new_prev_nodes(fd, member_count, nodeids);
-
-	if (!list_empty(&fd->victims))
-		fence_victims(fd, start_type);
-}
-
-void do_recovery_done(fd_t *fd)
-{
-	fd_node_t *node, *safe;
-
-	if (fd->last_finish == fd->last_start) {
-		free_leaving(fd);
-		free_victims(fd);
-	}
-
-	/* Save a copy of this set of nodes which constitutes the latest
-	 * complete group.  Any of these nodes missing in the next start will
-	 * either be leaving or victims.  For the next recovery, the lowest
-	 * remaining nodeid in this group will be the master. */
-
-	free_complete(fd);
-	list_for_each_entry_safe(node, safe, &fd->prev, list) {
-		list_del(&node->list);
-		list_add(&node->list, &fd->complete);
-	}
-}
-
diff --git a/gfs/include/linux_endian.h b/fence/include/linux_endian.h
similarity index 100%
copy from gfs/include/linux_endian.h
copy to fence/include/linux_endian.h
diff --git a/fence/include/list.h b/fence/include/list.h
index 566b377..8100cbc 100644
--- a/fence/include/list.h
+++ b/fence/include/list.h
@@ -226,6 +226,17 @@ static inline void list_splice_init(struct list_head *list,
 	container_of(ptr, type, member)
 
 /**
+ * list_first_entry - get the first element from a list
+ * @ptr:        the list head to take the element from.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+	list_entry((ptr)->next, type, member)
+
+/**
  * list_for_each	-	iterate over a list
  * @pos:	the &struct list_head to use as a loop counter.
  * @head:	the head for your list.
diff --git a/cman/lib/Makefile b/fence/lib/Makefile
similarity index 67%
copy from cman/lib/Makefile
copy to fence/lib/Makefile
index 13c6f6e..d69d4f9 100644
--- a/cman/lib/Makefile
+++ b/fence/lib/Makefile
@@ -1,8 +1,7 @@
 ###############################################################################
 ###############################################################################
 ##
-##  Copyright (C) Sistina Software, Inc.  1997-2004  All rights reserved.
-##  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
+##  Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
 ##  
 ##  This copyrighted material is made available to anyone wishing to use,
 ##  modify, copy, or redistribute it subject to the terms and conditions
@@ -11,40 +10,35 @@
 ###############################################################################
 ###############################################################################
 
-TARGET= libcman
+TARGET= libfence
 
-LIBDIRT=$(TARGET).a \
-	$(TARGET).so.$(SOMAJOR).$(SOMINOR)
+LIBDIRT=$(TARGET).so.$(SOMAJOR).$(SOMINOR)
 
-LIBSYMT=$(TARGET).so \
-	$(TARGET).so.$(SOMAJOR)
+LIBSYMT=$(TARGET).so.$(SOMAJOR)
 
 INCDIRT=$(TARGET).h
 
 include ../../make/defines.mk
 
 SHAREDLIB=$(TARGET).so.${SOMAJOR}.${SOMINOR}
-STATICLIB=$(TARGET).a
 
-all: $(STATICLIB) $(SHAREDLIB)
+all: $(SHAREDLIB)
 
 include $(OBJDIR)/make/cobj.mk
 include $(OBJDIR)/make/clean.mk
 include $(OBJDIR)/make/install.mk
 include $(OBJDIR)/make/uninstall.mk
 
-OBJS=	$(TARGET).o
+OBJS=	agent.o
 
 CFLAGS += -fPIC
-CFLAGS += -I${cmanincdir} -I$(S)/../daemon
+CFLAGS += -I${ccsincdir}
 CFLAGS += -I${incdir}
 
-$(TARGET).a: $(OBJS)
-	${AR} r $@ $^
-	${RANLIB} $@
+LDFLAGS += -L${ccslibdir} -lccs
 
 $(TARGET).so.${SOMAJOR}.${SOMINOR}: $(OBJS)
-	$(CC) -shared -o $@ -Wl,-soname=$(TARGET).so.$(SOMAJOR) $<
+	$(CC) $(LDFLAGS) -shared -o $@ -Wl,-soname=$(TARGET).so.$(SOMAJOR) $<
 	ln -sf $(TARGET).so.$(SOMAJOR).$(SOMINOR) $(TARGET).so
 	ln -sf $(TARGET).so.$(SOMAJOR).$(SOMINOR) $(TARGET).so.$(SOMAJOR)
 
diff --git a/fence/fenced/agent.c b/fence/lib/agent.c
similarity index 89%
rename from fence/fenced/agent.c
rename to fence/lib/agent.c
index 3207fe3..de8ce11 100644
--- a/fence/fenced/agent.c
+++ b/fence/lib/agent.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -22,8 +22,6 @@
 #include <time.h>
 #include <syslog.h>
 
-#include <libcman.h>
-
 #include "ccs.h"
 
 #define MAX_METHODS		8
@@ -49,7 +47,7 @@ static void display_agent_output(char *agent, int fd)
 		snprintf(msg, 256, "agent \"%s\" reports: ", agent);
 		strcat(msg, buf);
 
-		printf("%s\n", msg);
+		/* printf("%s\n", msg); */
 		syslog(LOG_ERR, "%s", msg);
 
 		memset(buf, 0, sizeof(buf));
@@ -279,42 +277,13 @@ static int use_device(int cd, char *victim, char *method, int d,
 	return error;
 }
 
-void update_cman(char *victim, char *method)
-{
-	cman_handle_t ch;
-	struct cman_node node;
-	uint64_t the_time = time(NULL);
-
-	ch = cman_admin_init(NULL);
-	if (!ch) {
-		syslog(LOG_ERR, "Unable to connect to to cman: %m");
-		return;
-	}
-	/* Convert name to a number */
-	memset(&node, 0, sizeof(node));
-	strcpy(node.cn_name, victim);
-
-	/* Mark it as fenced */
-	if (!cman_get_node(ch, 0, &node))
-		cman_node_fenced(ch, node.cn_nodeid, the_time, method);
-	else
-		syslog(LOG_ERR, "can't get node number for node %s\n", victim);
-	cman_finish(ch);
-}
-
-int dispatch_fence_agent(char *victim, int force)
+int fence_node(char *victim)
 {
 	char *method = NULL, *device = NULL;
 	char *victim_nodename = NULL;
 	int num_methods, num_devices, m, d, error = -1, cd;
 
-	if (force)
-		cd = ccs_force_connect(NULL, 0);
-	else {
-		while ((cd = ccs_connect()) < 0)
-			sleep(1);
-	}
-
+	cd = ccs_force_connect(NULL, 0);
 	if (cd < 0) {
 		syslog(LOG_ERR, "cannot connect to ccs %d\n", cd);
 		return -1;
@@ -360,7 +329,6 @@ int dispatch_fence_agent(char *victim, int force)
 			if (error)
 				break;
 
-			update_cman(victim, device);
 			free(device);
 			device = NULL;
 		}
diff --git a/fence/lib/libfence.h b/fence/lib/libfence.h
new file mode 100644
index 0000000..6cdbd85
--- /dev/null
+++ b/fence/lib/libfence.h
@@ -0,0 +1,36 @@
+/******************************************************************************
+*******************************************************************************
+**
+**  Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+**
+**  This library is free software; you can redistribute it and/or
+**  modify it under the terms of the GNU Lesser General Public
+**  License as published by the Free Software Foundation; either
+**  version 2 of the License, or (at your option) any later version.
+**
+**  This library is distributed in the hope that it will be useful,
+**  but WITHOUT ANY WARRANTY; without even the implied warranty of
+**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+**  Lesser General Public License for more details.
+**
+**  You should have received a copy of the GNU Lesser General Public
+**  License along with this library; if not, write to the Free Software
+**  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+**
+*******************************************************************************
+******************************************************************************/
+
+#ifndef _LIBFENCE_H_
+#define _LIBFENCE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int fence_node(char *name);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/group/dlm_controld/main.c b/group/dlm_controld/main.c
index 251548b..095cf8f 100644
--- a/group/dlm_controld/main.c
+++ b/group/dlm_controld/main.c
@@ -579,7 +579,7 @@ static int loop(void)
 				workfn = client[i].workfn;
 				workfn(i);
 			}
-			if (pollfd[i].revents & POLLHUP) {
+			if (pollfd[i].revents & (POLLERR | POLLHUP | POLLNVAL)) {
 				deadfn = client[i].deadfn;
 				deadfn(i);
 			}


hooks/post-receive
--
Cluster Project


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]