[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] Mark Hlawatschek <hlawatschek@atix.de>



rgmanager event scripting "RIND" v0.7

RIND is not dependencies.

Patch is against current RHEL5 branch of rgmanager and should apply.
Chances since 0.5 include:

* User request handling is centralized
* Recovery is centralized

Todo:

* Migration
* More testing
* clusvcadm doesn't get correct return codes yet
* Copyright / license stuff.  It all falls under the GPL v2, though.

Requirements:

* You need to install slang and slang-devel to build with this patch.

-- Lon
Index: include/event.h
===================================================================
RCS file: include/event.h
diff -N include/event.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ include/event.h	8 Nov 2007 21:09:00 -0000
@@ -0,0 +1,98 @@
+#ifndef _EVENT_H
+#define _EVENT_H
+
+typedef struct __rge_q {
+	char rg_name[128];
+	uint32_t rg_state;
+	uint32_t pad1;
+	int rg_owner;
+	int rg_last_owner;
+} group_event_t;
+
+typedef struct __ne_q {
+	int ne_local;
+	int ne_nodeid;
+	int ne_state;
+	int ne_clean;
+} node_event_t;
+
+typedef struct __cfg_q {
+	int cfg_version;
+	int cfg_oldversion;
+} config_event_t;
+
+typedef struct __user_q {
+	char u_name[128];
+	msgctx_t *u_ctx;
+	int u_request;
+	int u_arg1;
+	int u_arg2;
+	int u_target;		/* Node ID */
+} user_event_t;
+
+typedef enum {
+	EVENT_NONE=0,
+	EVENT_CONFIG,
+	EVENT_NODE,
+	EVENT_RG,
+	EVENT_USER
+} event_type_t;
+
+/* Data that's distributed which indicates which
+   node is the event master */
+typedef struct __rgm {
+	uint32_t m_magic;
+	uint32_t m_nodeid;
+	uint64_t m_master_time;
+	uint8_t  m_reserved[112];
+} event_master_t;
+
+#define swab_event_master_t(ptr) \
+{\
+	swab32((ptr)->m_nodeid);\
+	swab32((ptr)->m_magic);\
+	swab64((ptr)->m_master_time);\
+}
+
+#define EVENT_MASTER_MAGIC 0xfabab0de
+
+
+typedef struct _event {
+	/* Not used dynamically - part of config info */
+	list_head();
+	char *ev_name;
+	char *ev_script;
+	char *ev_script_file;
+	int ev_prio; 
+	int ev_pad;
+	/* --- end config part */
+	int ev_type;		/* config & generated by rgmanager*/
+	int ev_transaction;
+	union {
+		group_event_t group;
+		node_event_t node;
+		config_event_t config;
+		user_event_t user;
+	} ev;
+} event_t;
+
+
+#define EVENT_PRIO_COUNT 100
+
+typedef struct _event_table {
+	int max_prio;
+	int pad;
+	event_t *entries[0];
+} event_table_t;
+
+
+int construct_events(int ccsfd, event_table_t **);
+void deconstruct_events(event_table_t **);
+void print_events(event_table_t *);
+
+void config_event_q(int old_version, int new_version);
+void node_event_q(int local, int nodeID, int state, int clean);
+void rg_event_q(char *name, uint32_t state, int owner, int last);
+
+
+#endif
Index: include/resgroup.h
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/include/resgroup.h,v
retrieving revision 1.15.2.8
diff -u -p -r1.15.2.8 resgroup.h
--- include/resgroup.h	29 Jun 2007 19:22:11 -0000	1.15.2.8
+++ include/resgroup.h	8 Nov 2007 21:09:00 -0000
@@ -53,6 +53,10 @@ typedef struct {
 #define RG_MAGIC   0x11398fed
 
 #define RG_ACTION_REQUEST	/* Message header */ 0x138582
+/* Argument to RG_ACTION_REQUEST */
+#define RG_ACTION_MASTER	0xfe0db143
+#define RG_ACTION_USER		0x3f173bfd
+/* */
 #define RG_EVENT		0x138583
 
 /* Requests */
@@ -109,6 +113,7 @@ int handle_start_remote_req(char *svcNam
 #define DEFAULT_CHECK_INTERVAL		10
 
 const char *rg_state_str(int val);
+int rg_state_str_to_id(const char *val);
 const char *agent_op_str(int val);
 
 int eval_groups(int local, uint32_t nodeid, int nodeStatus);
@@ -130,7 +135,7 @@ int rt_enqueue_request(const char *resgr
        		       int max, uint32_t target, int arg0, int arg1);
 
 void send_response(int ret, int node, request_t *req);
-void send_ret(msgctx_t *ctx, char *name, int ret, int req);
+void send_ret(msgctx_t *ctx, char *name, int ret, int req, int newowner);
 
 /* do this op on all resource groups.  The handler for the request 
    will sort out whether or not it's a valid request given the state */
Index: include/reslist.h
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/include/reslist.h,v
retrieving revision 1.15.2.6
diff -u -p -r1.15.2.6 reslist.h
--- include/reslist.h	2 Aug 2007 14:46:51 -0000	1.15.2.6
+++ include/reslist.h	8 Nov 2007 21:09:01 -0000
@@ -138,7 +138,7 @@ typedef struct _fod_node {
 	list_head();
 	char	*fdn_name;
 	int	fdn_prio;
-	int	_pad_; /* align */
+	int	fdn_nodeid; /* on rhel4 this will be 64-bit int */
 } fod_node_t;
 
 typedef struct _fod {
Index: include/rg_queue.h
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/include/rg_queue.h,v
retrieving revision 1.6
diff -u -p -r1.6 rg_queue.h
--- include/rg_queue.h	19 Jul 2006 18:43:32 -0000	1.6
+++ include/rg_queue.h	8 Nov 2007 21:09:01 -0000
@@ -19,7 +19,7 @@ typedef struct _request {
 	uint32_t	rr_target;		/** Target node */
 	uint32_t	rr_arg0;		/** Integer argument */
 	uint32_t	rr_arg1;		/** Integer argument */
-	uint32_t	rr_arg3;		/** Integer argument */
+	uint32_t	rr_arg2;		/** Integer argument */
 	uint32_t	rr_line;		/** Line no */
 	msgctx_t *	rr_resp_ctx;		/** FD to send response */
 	char 		*rr_file;		/** Who made req */
@@ -42,5 +42,7 @@ int rq_queue_empty(request_t **q);
 void rq_free(request_t *foo);
 
 void forward_request(request_t *req);
+void forward_message(msgctx_t *ctx, void *msg, int nodeid);
+
 
 #endif
Index: include/sets.h
===================================================================
RCS file: include/sets.h
diff -N include/sets.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ include/sets.h	8 Nov 2007 21:09:01 -0000
@@ -0,0 +1,39 @@
+/*
+  Copyright Red Hat, Inc. 2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+/**
+ @file sets.h - Header file for sets.c
+ @author Lon Hohberger <lhh at redhat.com>
+ */
+#ifndef _SETS_H
+#define _SETS_H
+
+/* #include <stdint.h> */
+typedef int set_type_t;
+
+int s_add(set_type_t *, int *, set_type_t);
+int s_union(set_type_t *, int, set_type_t *,
+	    int, set_type_t **, int *);
+
+int s_intersection(set_type_t *, int, set_type_t *,
+		   int, set_type_t **, int *);
+int s_delta(set_type_t *, int, set_type_t *,
+	    int, set_type_t **, int *);
+int s_subtract(set_type_t *, int, set_type_t *, int, set_type_t **, int *);
+int s_shuffle(set_type_t *, int);
+
+#endif
Index: src/clulib/Makefile
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/clulib/Makefile,v
retrieving revision 1.10.2.3
diff -u -p -r1.10.2.3 Makefile
--- src/clulib/Makefile	24 Jul 2007 13:53:08 -0000	1.10.2.3
+++ src/clulib/Makefile	8 Nov 2007 21:09:01 -0000
@@ -34,7 +34,7 @@ msgtest: msgtest.o libclulib.a
 libclulib.a: clulog.o daemon_init.o signals.o msgsimple.o \
 		gettid.o rg_strings.o message.o members.o fdops.o \
 		lock.o cman.o vft.o msg_cluster.o msg_socket.o \
-		wrap_lock.o tmgr.o
+		wrap_lock.o tmgr.o sets.o
 	${AR} cru $@ $^
 	ranlib $@
 
Index: src/clulib/members.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/clulib/members.c,v
retrieving revision 1.4
diff -u -p -r1.4 members.c
--- src/clulib/members.c	27 Sep 2006 16:28:41 -0000	1.4
+++ src/clulib/members.c	8 Nov 2007 21:09:01 -0000
@@ -233,6 +233,50 @@ member_set_state(int nodeid, int state)
 
 
 int
+member_low_id(void)
+{
+	int x = 0, low = -1;
+
+	pthread_rwlock_wrlock(&memblock);
+	if (!membership) {
+		pthread_rwlock_unlock(&memblock);
+		return low;
+	}
+
+	for (x = 0; x < membership->cml_count; x++) {
+		if ((membership->cml_members[x].cn_member) &&
+		    ((membership->cml_members[x].cn_nodeid < low) || (low == -1)))
+			low = membership->cml_members[x].cn_nodeid;
+	}
+	pthread_rwlock_unlock(&memblock);
+
+	return low;
+}
+
+
+int
+member_high_id(void)
+{
+	int x = 0, high = -1;
+
+	pthread_rwlock_wrlock(&memblock);
+	if (!membership) {
+		pthread_rwlock_unlock(&memblock);
+		return high;
+	}
+
+	for (x = 0; x < membership->cml_count; x++) {
+		if (membership->cml_members[x].cn_member &&
+		    (membership->cml_members[x].cn_nodeid > high))
+			high = membership->cml_members[x].cn_nodeid;
+	}
+	pthread_rwlock_unlock(&memblock);
+
+	return high;
+}
+
+
+int
 member_online(int nodeid)
 {
 	int x = 0, ret = 0;
Index: src/clulib/rg_strings.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/clulib/rg_strings.c,v
retrieving revision 1.5.2.5
diff -u -p -r1.5.2.5 rg_strings.c
--- src/clulib/rg_strings.c	31 Jul 2007 17:54:54 -0000	1.5.2.5
+++ src/clulib/rg_strings.c	8 Nov 2007 21:09:01 -0000
@@ -126,6 +126,21 @@ rg_search_table(const struct string_val 
 }
 
 
+static inline int
+rg_search_table_by_str(const struct string_val *table, const char *val)
+{
+	int x;
+
+	for (x = 0; table[x].str != NULL; x++) {
+		if (!strcasecmp(table[x].str, val))
+			return table[x].val;
+	}
+
+	return -1;
+}
+
+
+
 const char *
 rg_strerror(int val)
 {
@@ -139,6 +154,14 @@ rg_state_str(int val)
 }
 
 
+int
+rg_state_str_to_id(const char *val)
+{
+	return rg_search_table_by_str(rg_state_strings, val);
+}
+
+
+
 const char *
 rg_req_str(int val)
 {
Index: src/clulib/sets.c
===================================================================
RCS file: src/clulib/sets.c
diff -N src/clulib/sets.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/clulib/sets.c	8 Nov 2007 21:09:01 -0000
@@ -0,0 +1,370 @@
+/*
+  Copyright Red Hat, Inc. 2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as published
+  by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+/**
+ @file sets.c - Order-preserving set functions (union / intersection / delta)
+                (designed for integer types; a la int, uint64_t, etc...)
+ @author Lon Hohberger <lhh at redhat.com>
+ */
+#include <stdio.h>
+#include <malloc.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sets.h>
+#include <sys/time.h>
+
+
+/**
+ Add a value to a set.  This function disregards an add if the value is already
+ in the set.  Note that the maximum length of set s must be preallocated; this
+ function doesn't do error or bounds checking. 
+
+ @param s		Set to modify
+ @param curlen		Current length (modified if added)
+ @param val		Value to add
+ @return		0 if not added, 1 if added
+ */
+int
+s_add(set_type_t *s, int *curlen, set_type_t val)
+{
+	int idx=0;
+
+	for (; idx < *curlen; idx++)
+		if (s[idx] == val)
+			return 0;
+	s[*curlen] = val;
+	++(*curlen);
+	return 1;
+}
+
+
+/**
+ Union-set function.  Allocates and returns a new set which is the union of
+ the two given sets 'left' and 'right'.  Also returns the new set length.
+
+ @param left		Left set - order is preserved on this set; that is,
+			this is the set where the caller cares about ordering.
+ @param ll		Length of left set.
+ @param right		Right set - order is not preserved on this set during
+			the union operation
+ @param rl		Length of right set
+ @param ret		Return set.  Should * not * be preallocated.
+ @param retl		Return set length.  Should be ready to accept 1 integer
+			upon calling this function
+ @return 		0 on success, -1 on error
+ */
+int
+s_union(set_type_t *left, int ll, set_type_t *right, int rl,
+	set_type_t **ret, int *retl)
+{
+	int l, r, cnt = 0, total;
+
+	total = ll + rl; /* Union will never exceed both sets */
+
+	*ret = malloc(sizeof(set_type_t)*total);
+	if (!*ret) {
+		return -1;
+	}
+	memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+	cnt = 0;
+
+	/* Add all the ones on the left */
+	for (l = 0; l < ll; l++)
+		s_add(*ret, &cnt, left[l]);
+
+	/* Add the ones on the left */
+	for (r = 0; r < rl; r++)
+		s_add(*ret, &cnt, right[r]);
+
+	*retl = cnt;
+
+	return 0;
+}
+
+
+/**
+ Intersection-set function.  Allocates and returns a new set which is the 
+ intersection of the two given sets 'left' and 'right'.  Also returns the new
+ set length.
+
+ @param left		Left set - order is preserved on this set; that is,
+			this is the set where the caller cares about ordering.
+ @param ll		Length of left set.
+ @param right		Right set - order is not preserved on this set during
+			the union operation
+ @param rl		Length of right set
+ @param ret		Return set.  Should * not * be preallocated.
+ @param retl		Return set length.  Should be ready to accept 1 integer
+			upon calling this function
+ @return 		0 on success, -1 on error
+ */
+int
+s_intersection(set_type_t *left, int ll, set_type_t *right, int rl,
+	       set_type_t **ret, int *retl)
+{
+	int l, r, cnt = 0, total;
+
+	total = ll; /* Intersection will never exceed one of the two set
+		       sizes */
+
+	*ret = malloc(sizeof(set_type_t)*total);
+	if (!*ret) {
+		return -1;
+	}
+	memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+	cnt = 0;
+	/* Find duplicates */
+	for (l = 0; l < ll; l++) {
+		for (r = 0; r < rl; r++) {
+			if (left[l] != right[r])
+				continue;
+			if (s_add(*ret, &cnt, right[r]))
+				break;
+		}
+	}
+
+	*retl = cnt;
+	return 0;
+}
+
+
+/**
+ Delta-set function.  Allocates and returns a new set which is the delta (i.e.
+ numbers not in both sets) of the two given sets 'left' and 'right'.  Also
+ returns the new set length.
+
+ @param left		Left set - order is preserved on this set; that is,
+			this is the set where the caller cares about ordering.
+ @param ll		Length of left set.
+ @param right		Right set - order is not preserved on this set during
+			the union operation
+ @param rl		Length of right set
+ @param ret		Return set.  Should * not * be preallocated.
+ @param retl		Return set length.  Should be ready to accept 1 integer
+			upon calling this function
+ @return 		0 on success, -1 on error
+ */
+int
+s_delta(set_type_t *left, int ll, set_type_t *right, int rl,
+	set_type_t **ret, int *retl)
+{
+	int l, r, cnt = 0, total, found;
+
+	total = ll + rl; /* Union will never exceed both sets */
+
+	*ret = malloc(sizeof(set_type_t)*total);
+	if (!*ret) {
+		return -1;
+	}
+	memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+	cnt = 0;
+
+	/* not efficient, but it works */
+	/* Add all the ones on the left */
+	for (l = 0; l < ll; l++) {
+		found = 0;
+		for (r = 0; r < rl; r++) {
+			if (right[r] == left[l]) {
+				found = 1;
+				break;
+			}
+		}
+		
+		if (found)
+			continue;
+		s_add(*ret, &cnt, left[l]);
+	}
+
+
+	/* Add all the ones on the right*/
+	for (r = 0; r < rl; r++) {
+		found = 0;
+		for (l = 0; l < ll; l++) {
+			if (right[r] == left[l]) {
+				found = 1;
+				break;
+			}
+		}
+		
+		if (found)
+			continue;
+		s_add(*ret, &cnt, right[r]);
+	}
+
+	*retl = cnt;
+
+	return 0;
+}
+
+
+/**
+ Subtract-set function.  Allocates and returns a new set which is the
+ subtraction of the right set from the left set.
+ Also returns the new set length.
+
+ @param left		Left set - order is preserved on this set; that is,
+			this is the set where the caller cares about ordering.
+ @param ll		Length of left set.
+ @param right		Right set - order is not preserved on this set during
+			the union operation
+ @param rl		Length of right set
+ @param ret		Return set.  Should * not * be preallocated.
+ @param retl		Return set length.  Should be ready to accept 1 integer
+			upon calling this function
+ @return 		0 on success, -1 on error
+ */
+int
+s_subtract(set_type_t *left, int ll, set_type_t *right, int rl,
+	   set_type_t **ret, int *retl)
+{
+	int l, r, cnt = 0, total, found;
+
+	total = ll; /* Union will never exceed left set length*/
+
+	*ret = malloc(sizeof(set_type_t)*total);
+	if (!*ret) {
+		return -1;
+	}
+	memset((void *)(*ret), 0, sizeof(set_type_t)*total);
+
+	cnt = 0;
+
+	/* not efficient, but it works */
+	for (l = 0; l < ll; l++) {
+		found = 0;
+		for (r = 0; r < rl; r++) {
+			if (right[r] == left[l]) {
+				found = 1;
+				break;
+			}
+		}
+		
+		if (found)
+			continue;
+		s_add(*ret, &cnt, left[l]);
+	}
+
+	*retl = cnt;
+
+	return 0;
+}
+
+
+/**
+ Shuffle-set function.  Weakly randomizes ordering of a set in-place.
+
+ @param set		Set to randomize
+ @param sl		Length of set
+ @return		0
+ */
+int
+s_shuffle(set_type_t *set, int sl)
+{
+	int x, newidx;
+	unsigned r_state = 0;
+	set_type_t t;
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	r_state = (int)(tv.tv_usec);
+
+	for (x = 0; x < sl; x++) {
+		newidx = (rand_r(&r_state) % sl);
+		if (newidx == x)
+			continue;
+		t = set[x];
+		set[x] = set[newidx];
+		set[newidx] = t;
+	}
+
+	return 0;
+}
+
+
+#ifdef STANDALONE
+/* Testbed */
+/*
+  gcc -o sets sets.c -DSTANDALONE -ggdb -I../../include \
+       -Wall -Werror -Wstrict-prototypes -Wextra
+ */
+int
+main(int __attribute__ ((unused)) argc, char __attribute__ ((unused)) **argv)
+{
+	set_type_t a[] = { 1, 2, 3, 3, 3, 2, 2, 3 };
+	set_type_t b[] = { 2, 3, 4 };
+	set_type_t *i;
+	int ilen = 0, x;
+
+	s_union(a, 8, b, 3, &i, &ilen);
+
+	/* Should return length of 4 - { 1 2 3 4 } */
+	printf("set_union [%d] = ", ilen);
+	for ( x = 0; x < ilen; x++) {
+		printf("%d ", (int)i[x]);
+	}
+	printf("\n");
+
+	s_shuffle(i, ilen);
+	printf("shuffled [%d] = ", ilen);
+	for ( x = 0; x < ilen; x++) {
+		printf("%d ", (int)i[x]);
+	}
+	printf("\n");
+
+
+	free(i);
+
+	/* Should return length of 2 - { 2 3 } */
+	s_intersection(a, 8, b, 3, &i, &ilen);
+
+	printf("set_intersection [%d] = ", ilen);
+	for ( x = 0; x < ilen; x++) {
+		printf("%d ", (int)i[x]);
+	}
+	printf("\n");
+
+	free(i);
+
+	/* Should return length of 2 - { 1 4 } */
+	s_delta(a, 8, b, 3, &i, &ilen);
+
+	printf("set_delta [%d] = ", ilen);
+	for ( x = 0; x < ilen; x++) {
+		printf("%d ", (int)i[x]);
+	}
+	printf("\n");
+
+	free(i);
+
+	/* Should return length of 1 - { 1 } */
+	s_subtract(a, 8, b, 3, &i, &ilen);
+
+	printf("set_subtract [%d] = ", ilen);
+	for ( x = 0; x < ilen; x++) {
+		printf("%d ", (int)i[x]);
+	}
+	printf("\n");
+
+	free(i);
+
+
+	return 0;
+}
+#endif
Index: src/clulib/vft.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/clulib/vft.c,v
retrieving revision 1.17.2.3
diff -u -p -r1.17.2.3 vft.c
--- src/clulib/vft.c	24 Jul 2007 13:53:08 -0000	1.17.2.3
+++ src/clulib/vft.c	8 Nov 2007 21:09:01 -0000
@@ -1368,6 +1368,7 @@ vf_process_msg(msgctx_t *ctx, int nodeid
 {
 	vf_msg_t *hdrp;
 	int ret;
+	key_node_t *kn;
 
 	if ((nbytes <= 0) || (nbytes < sizeof(generic_msg_hdr)) ||
 	    (msgp->gh_command != VF_MESSAGE))
@@ -1422,8 +1423,13 @@ vf_process_msg(msgctx_t *ctx, int nodeid
 #endif
 		pthread_mutex_lock(&key_list_mutex);
 		vf_buffer_commit(msgp->gh_arg2);
-		ret = (vf_resolve_views(kn_find_trans(msgp->gh_arg2)) ?
-			VFR_COMMIT : VFR_OK);
+		kn = kn_find_trans(msgp->gh_arg2);
+		if (!kn) {
+			pthread_mutex_unlock(&key_list_mutex);
+			return VFR_OK;
+		}
+
+		ret = (vf_resolve_views(kn) ? VFR_COMMIT : VFR_OK);
 		pthread_mutex_unlock(&key_list_mutex);
 		return ret;
 
Index: src/daemons/Makefile
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/Makefile,v
retrieving revision 1.14.2.3
diff -u -p -r1.14.2.3 Makefile
--- src/daemons/Makefile	24 Jul 2007 13:53:08 -0000	1.14.2.3
+++ src/daemons/Makefile	8 Nov 2007 21:09:01 -0000
@@ -37,9 +37,14 @@ uninstall:
 
 clurgmgrd: rg_thread.o rg_locks.o main.o groups.o  \
 		rg_queue.o rg_forward.o reslist.o \
-		resrules.o restree.o fo_domain.o nodeevent.o \
-		rg_event.o watchdog.o rg_state.o ../clulib/libclulib.a
-	$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) $(LDFLAGS) -lccs -lcman -lpthread -ldlm
+		resrules.o restree.o fo_domain.o  \
+		rg_event.o watchdog.o rg_state.o event_config.o \
+		slang_event.o service_op.o ../clulib/libclulib.a
+	$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) $(LDFLAGS) -lccs -lcman -lpthread -ldlm -lslang
+
+rg_script_test: slang_event.o
+	$(CC) -o rg_script_test slang_event.o -I/usr/include/slang $(INCLUDE) $(CFLAGS) -lslang $(LDFLAGS)
+
 
 #
 # Our test program links against the local allocator so that
@@ -56,7 +61,8 @@ clurgmgrd: rg_thread.o rg_locks.o main.o
 # packages should run 'make check' as part of the build process.
 #
 rg_test: rg_locks-noccs.o test-noccs.o reslist-noccs.o \
-		resrules-noccs.o restree-noccs.o fo_domain-noccs.o
+		resrules-noccs.o restree-noccs.o fo_domain-noccs.o \
+		event_config-noccs.o
 	$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) -llalloc $(LDFLAGS) -lccs -lcman
 
 clurmtabd: clurmtabd.o clurmtabd_lib.o
Index: src/daemons/event_config.c
===================================================================
RCS file: src/daemons/event_config.c
diff -N src/daemons/event_config.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/daemons/event_config.c	8 Nov 2007 21:09:01 -0000
@@ -0,0 +1,540 @@
+
+/**
+  Copyright Red Hat, Inc. 2002-2007
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by the
+  Free Software Foundation; either version 2, or (at your option) any
+  later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge,
+  MA 02139, USA.
+*/
+/** @file
+ * CCS event parsing, based on failover domain parsing
+ */
+#include <string.h>
+#include <list.h>
+#include <clulog.h>
+#include <resgroup.h>
+#include <reslist.h>
+#include <ccs.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <members.h>
+#include <reslist.h>
+#include <ctype.h>
+#include <event.h>
+
+#define CONFIG_NODE_ID_TO_NAME \
+   "/cluster/clusternodes/clusternode[ nodeid=\"%d\"]/@name"
+#define CONFIG_NODE_NAME_TO_ID \
+   "/cluster/clusternodes/clusternode[ name=\"%s\"]/@nodeid"
+
+void deconstruct_events(event_table_t **);
+void print_event(event_t *ev);
+
+//#define DEBUG
+
+#ifdef DEBUG
+#define ENTER() clulog(LOG_DEBUG, "ENTER: %s\n", __FUNCTION__)
+#define RETURN(val) {\
+	clulog(LOG_DEBUG, "RETURN: %s line=%d value=%d\n", __FUNCTION__, \
+	       __LINE__, (val));\
+	return(val);\
+}
+#else
+#define ENTER()
+#define RETURN(val) return(val)
+#endif
+
+#ifdef NO_CCS
+#define ccs_get(fd, query, ret) conf_get(query, ret)
+#endif
+
+/*
+   <events>
+     <event name="helpful_name_here" class="node"
+            node="nodeid|nodename" nodestate="up|down">
+	    slang_script_stuff();
+	    start_service();
+     </event>
+   </events>
+ */
+int
+event_match(event_t *pattern, event_t *actual)
+{
+	if (pattern->ev_type != EVENT_NONE &&
+	    actual->ev_type != pattern->ev_type)
+		return 0;
+
+	/* If there's no event class specified, the rest is
+	   irrelevant */
+	if (pattern->ev_type == EVENT_NONE)
+		return 1;
+
+	switch(pattern->ev_type) {
+	case EVENT_NODE:
+		if (pattern->ev.node.ne_nodeid >= 0 &&
+		    actual->ev.node.ne_nodeid !=
+				pattern->ev.node.ne_nodeid) {
+			return 0;
+		}
+		if (pattern->ev.node.ne_local >= 0 && 
+		    actual->ev.node.ne_local !=
+				pattern->ev.node.ne_local) {
+			return 0;
+		}
+		if (pattern->ev.node.ne_state >= 0 && 
+		    actual->ev.node.ne_state !=
+				pattern->ev.node.ne_state) {
+			return 0;
+		}
+		if (pattern->ev.node.ne_clean >= 0 && 
+		    actual->ev.node.ne_clean !=
+				pattern->ev.node.ne_clean) {
+			return 0;
+		}
+		return 1; /* All specified params match */
+	case EVENT_RG:
+		if (pattern->ev.group.rg_name[0] &&
+		    strcasecmp(actual->ev.group.rg_name, 
+			       pattern->ev.group.rg_name)) {
+			return 0;
+		}
+		if (pattern->ev.group.rg_state != (uint32_t)-1 && 
+		    actual->ev.group.rg_state !=
+				pattern->ev.group.rg_state) {
+			return 0;
+		}
+		if (pattern->ev.group.rg_owner >= 0 && 
+		    actual->ev.group.rg_owner !=
+				pattern->ev.group.rg_owner) {
+			return 0;
+		}
+		return 1;
+	case EVENT_CONFIG:
+		if (pattern->ev.config.cfg_version >= 0 && 
+		    actual->ev.config.cfg_version !=
+				pattern->ev.config.cfg_version) {
+			return 0;
+		}
+		if (pattern->ev.config.cfg_oldversion >= 0 && 
+		    actual->ev.config.cfg_oldversion !=
+				pattern->ev.config.cfg_oldversion) {
+			return 0;
+		}
+		return 1;
+	case EVENT_USER:
+		if (pattern->ev.user.u_name[0] &&
+		    strcasecmp(actual->ev.user.u_name, 
+			       pattern->ev.user.u_name)) {
+			return 0;
+		}
+		if (pattern->ev.user.u_request != 0 && 
+		    actual->ev.user.u_request !=
+				pattern->ev.user.u_request) {
+			return 0;
+		}
+		if (pattern->ev.user.u_target != 0 && 
+		    actual->ev.user.u_target !=
+				pattern->ev.user.u_target) {
+			return 0;
+		}
+		return 1;
+	default:
+		break;
+	}
+			
+	return 0;
+}
+
+
+char *
+ccs_node_id_to_name(int ccsfd, int nodeid)
+{
+	char xpath[256], *ret = 0;
+
+	snprintf(xpath, sizeof(xpath), CONFIG_NODE_ID_TO_NAME,
+		 nodeid);
+	if (ccs_get(ccsfd, xpath, &ret) == 0)
+		return ret;
+	return NULL;
+}
+
+
+int
+ccs_node_name_to_id(int ccsfd, char *name)
+{
+	char xpath[256], *ret = 0;
+	int rv = 0;
+
+	snprintf(xpath, sizeof(xpath), CONFIG_NODE_NAME_TO_ID,
+		 name);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		rv = atoi(ret);
+		free(ret);
+		return rv;
+	}
+	return 0;
+}
+
+
+static void 
+deconstruct_event(event_t *ev)
+{
+	if (ev->ev_script)
+		free(ev->ev_script);
+	if (ev->ev_name)
+		free(ev->ev_name);
+	free(ev);
+}
+
+
+static int
+get_node_event(int ccsfd, char *base, event_t *ev)
+{
+	char xpath[256], *ret = NULL;
+
+	/* Clear out the possibilitiies */
+	ev->ev.node.ne_nodeid = -1;
+	ev->ev.node.ne_local = -1;
+	ev->ev.node.ne_state = -1;
+	ev->ev.node.ne_clean = -1;
+
+	snprintf(xpath, sizeof(xpath), "%s/@node_id", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		ev->ev.node.ne_nodeid = atoi(ret);
+		free(ret);
+		if (ev->ev.node.ne_nodeid <= 0)
+			return -1;
+	} else {
+		/* See if there's a node name */
+		snprintf(xpath, sizeof(xpath), "%s/@node", base);
+		if (ccs_get(ccsfd, xpath, &ret) == 0) {
+			ev->ev.node.ne_nodeid =
+				ccs_node_name_to_id(ccsfd, ret);
+			free(ret);
+			if (ev->ev.node.ne_nodeid <= 0)
+				return -1;
+		}
+	}
+
+	snprintf(xpath, sizeof(xpath), "%s/@node_state", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		if (!strcasecmp(ret, "up")) {
+			ev->ev.node.ne_state = 1;
+		} else if (!strcasecmp(ret, "down")) {
+			ev->ev.node.ne_state = 0;
+		} else {
+			ev->ev.node.ne_state = !!atoi(ret);
+		}
+		free(ret);
+	}
+
+	snprintf(xpath, sizeof(xpath), "%s/@node_clean", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		ev->ev.node.ne_clean = !!atoi(ret);
+		free(ret);
+	}
+
+	snprintf(xpath, sizeof(xpath), "%s/@node_local", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		ev->ev.node.ne_local = !!atoi(ret);
+		free(ret);
+	}
+
+	return 0;
+}
+
+
+static int
+get_rg_event(int ccsfd, char *base, event_t *ev)
+{
+	char xpath[256], *ret = NULL;
+
+	/* Clear out the possibilitiies */
+	ev->ev.group.rg_name[0] = 0;
+	ev->ev.group.rg_state = (uint32_t)-1;
+	ev->ev.group.rg_owner = -1;
+
+	snprintf(xpath, sizeof(xpath), "%s/@service", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		strncpy(ev->ev.group.rg_name, ret,
+			sizeof(ev->ev.group.rg_name));
+		free(ret);
+		if (!strlen(ev->ev.group.rg_name)) {
+			return -1;
+		}
+	}
+
+	snprintf(xpath, sizeof(xpath), "%s/@service_state", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		if (!isdigit(ret[0])) {
+			ev->ev.group.rg_state =
+			       	rg_state_str_to_id(ret);
+		} else {
+			ev->ev.group.rg_state = atoi(ret);
+		}	
+		free(ret);
+	}
+
+	snprintf(xpath, sizeof(xpath), "%s/@service_owner", base);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		if (!isdigit(ret[0])) {
+			ev->ev.group.rg_owner =
+			       	ccs_node_name_to_id(ccsfd, ret);
+		} else {
+			ev->ev.group.rg_owner = !!atoi(ret);
+		}	
+		free(ret);
+	}
+
+	return 0;
+}
+
+
+static int
+get_config_event(int ccsfd, char *base, event_t *ev)
+{
+	//char xpath[256], *ret = NULL;
+	return -1;
+}
+
+
+static event_t *
+get_event(int ccsfd, char *base, int idx, int *_done)
+{
+	event_t *ev;
+	char xpath[256];
+	char *ret = NULL;
+
+	*_done = 0;
+	snprintf(xpath, sizeof(xpath), "%s/event[%d]/@name",
+		 base, idx);
+	if (ccs_get(ccsfd, xpath, &ret) != 0) {
+		*_done = 1;
+		return NULL;
+	}
+
+	ev = malloc(sizeof(*ev));
+	if (!ev)
+		return NULL;
+	memset(ev, 0, sizeof(*ev));
+	ev->ev_name = ret;
+
+	/* Get the script file / inline from config */
+	ret = NULL;
+	snprintf(xpath, sizeof(xpath), "%s/event[%d]/@file",
+		 base, idx);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		ev->ev_script_file = ret;
+	} else {
+		snprintf(xpath, sizeof(xpath), "%s/event[%d]",
+		         base, idx);
+		if (ccs_get(ccsfd, xpath, &ret) == 0) {
+			ev->ev_script = ret;
+		} else {
+			goto out_fail;
+		}
+	}
+
+	/* Get the priority ordering (must be nonzero) */
+	ev->ev_prio = 99;
+	ret = NULL;
+	snprintf(xpath, sizeof(xpath), "%s/event[%d]/@priority",
+		 base, idx);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		ev->ev_prio = atoi(ret);
+		if (ev->ev_prio <= 0 || ev->ev_prio > EVENT_PRIO_COUNT) {
+			clulog(LOG_ERR,
+			       "event %s: priority %s invalid\n",
+			       ev->ev_name, ret);
+			goto out_fail;
+		}
+		free(ret);
+	}
+
+	/* Get the event class */
+	snprintf(xpath, sizeof(xpath), "%s/event[%d]/@class",
+		 base, idx);
+	ret = NULL;
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		snprintf(xpath, sizeof(xpath), "%s/event[%d]",
+		 	 base, idx);
+		if (!strcasecmp(ret, "node")) {
+			ev->ev_type = EVENT_NODE;
+			if (get_node_event(ccsfd, xpath, ev) < 0)
+				goto out_fail;
+		} else if (!strcasecmp(ret, "service") ||
+			   !strcasecmp(ret, "resource") ||
+			   !strcasecmp(ret, "rg") ) {
+			ev->ev_type = EVENT_RG;
+			if (get_rg_event(ccsfd, xpath, ev) < 0)
+				goto out_fail;
+		} else if (!strcasecmp(ret, "config") ||
+			   !strcasecmp(ret, "reconfig")) {
+			ev->ev_type = EVENT_CONFIG;
+			if (get_config_event(ccsfd, xpath, ev) < 0)
+				goto out_fail;
+		} else {
+			clulog(LOG_ERR,
+			       "event %s: class %s unrecognized\n",
+			       ev->ev_name, ret);
+			goto out_fail;
+		}
+
+		free(ret);
+		ret = NULL;
+	}
+
+	return ev;
+out_fail:
+	if (ret)
+		free(ret);
+	deconstruct_event(ev);
+	return NULL;
+}
+
+
+static event_t *
+get_default_event(void)
+{
+	event_t *ev;
+	char xpath[1024];
+
+	ev = malloc(sizeof(*ev));
+	if (!ev)
+		return NULL;
+	memset(ev, 0, sizeof(*ev));
+	ev->ev_name = strdup("Default");
+
+	/* Get the script file / inline from config */
+	snprintf(xpath, sizeof(xpath), "%s/default_event_script.sl",
+		 RESOURCE_ROOTDIR);
+
+	ev->ev_prio = 100;
+	ev->ev_type = EVENT_NONE;
+	ev->ev_script_file = strdup(xpath);
+	if (!ev->ev_script_file || ! ev->ev_name) {
+		deconstruct_event(ev);
+		return NULL;
+	}
+
+	return ev;
+}
+
+
+/**
+ * similar API to failover domain
+ */
+int
+construct_events(int ccsfd, event_table_t **events)
+{
+	char xpath[256];
+	event_t *ev;
+	int x = 1, done = 0;
+
+	/* Allocate the event list table */
+	*events = malloc(sizeof(event_table_t) +
+			 sizeof(event_t) * (EVENT_PRIO_COUNT+1));
+	if (!*events)
+		return -1;
+	memset(*events, 0, sizeof(event_table_t) +
+	       		   sizeof(event_t) * (EVENT_PRIO_COUNT+1));
+	(*events)->max_prio = EVENT_PRIO_COUNT;
+
+	snprintf(xpath, sizeof(xpath),
+		 RESOURCE_TREE_ROOT "/events");
+
+	do {
+		ev = get_event(ccsfd, xpath, x++, &done);
+		if (ev)
+			list_insert(&((*events)->entries[ev->ev_prio]), ev);
+	} while (!done);
+
+	ev = get_default_event();
+	if (ev)
+		list_insert(&((*events)->entries[ev->ev_prio]), ev);
+	
+	return 0;
+}
+
+
+void
+print_event(event_t *ev)
+{
+	printf("  Name: %s\n", ev->ev_name);
+
+	switch(ev->ev_type) {
+	case EVENT_NODE:
+		printf("    Node %d State %d\n", ev->ev.node.ne_nodeid,
+		       ev->ev.node.ne_state);
+		break;
+	case EVENT_RG:
+		printf("    RG %s State %s\n", ev->ev.group.rg_name,
+		       rg_state_str(ev->ev.group.rg_state));
+		break;
+	case EVENT_CONFIG:
+		printf("    Config change - unsupported\n");
+		break;
+	default:
+		printf("    (Any event)\n");
+		break;
+	}
+	
+	if (ev->ev_script) {
+		printf("    Inline script.\n");
+	} else {
+		printf("    File: %s\n", ev->ev_script_file);
+	}
+}
+
+
+void
+print_events(event_table_t *events)
+{
+	int x, y;
+	event_t *ev;
+
+	for (x = 0; x <= events->max_prio; x++) {
+		if (!events->entries[x])
+			continue;
+		printf("Event Priority Level %d:\n", x);
+		list_for(&(events->entries[x]), ev, y) {
+			print_event(ev);
+		}
+	}
+}
+
+
+void
+deconstruct_events(event_table_t **eventsp)
+{
+	int x;
+	event_table_t *events = *eventsp;
+	event_t *ev = NULL;
+
+	if (!events)
+		return;
+
+	for (x = 0; x <= events->max_prio; x++) {
+		while ((ev = (events->entries[x]))) {
+			list_remove(&(events->entries[x]), ev);
+			deconstruct_event(ev);
+		}
+	}
+
+	free(events);
+	*eventsp = NULL;
+}
+
+
Index: src/daemons/fo_domain.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/fo_domain.c,v
retrieving revision 1.11
diff -u -p -r1.11 fo_domain.c
--- src/daemons/fo_domain.c	27 Sep 2006 16:28:41 -0000	1.11
+++ src/daemons/fo_domain.c	8 Nov 2007 21:09:01 -0000
@@ -33,6 +33,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <members.h>
+#include <sets.h>
 
 
 //#define DEBUG
@@ -95,6 +96,23 @@ get_node(int ccsfd, char *base, int idx,
 	fodn->fdn_name = ret;
 	fodn->fdn_prio = 0;
 
+	snprintf(xpath, sizeof(xpath),
+		 "/cluster/clusternodes/clusternode[ name=\"%s\"]/@nodeid",
+		 ret);
+	if (ccs_get(ccsfd, xpath, &ret) != 0) {
+		clulog(LOG_WARNING, "Node %s has no nodeid attribute\n",
+		       fodn->fdn_name);
+		fodn->fdn_nodeid = -1;
+	} else {
+		/* 64-bit-ism on rhel4? */
+		fodn->fdn_nodeid = atoi(ret);
+	}
+
+	/* Don't even bother getting priority if we're not ordered (it's set
+	   to 0 above */
+	if (!(domain->fd_flags & FOD_ORDERED))
+		return fodn;
+
 	snprintf(xpath, sizeof(xpath), "%s/failoverdomainnode[%d]/@priority",
 		 base, idx);
 	if (ccs_get(ccsfd, xpath, &ret) != 0)
@@ -227,6 +245,11 @@ print_domains(fod_t **domains)
 {
 	fod_t *fod;
 	fod_node_t *fodn = NULL;
+	/*
+	int x;
+	int *node_set = NULL;
+	int node_set_len = 0;
+	 */
 
 	list_do(domains, fod) {
 		printf("Failover domain: %s\n", fod->fd_name);
@@ -244,9 +267,21 @@ print_domains(fod_t **domains)
 		}
 
 		list_do(&fod->fd_nodes, fodn) {
-			printf("  Node %s (priority %d)\n",
-			       fodn->fdn_name, fodn->fdn_prio);
+			printf("  Node %s (id %d, priority %d)\n",
+			       fodn->fdn_name, fodn->fdn_nodeid,
+			       fodn->fdn_prio);
 		} while (!list_done(&fod->fd_nodes, fodn));
+
+		/*
+		node_domain_set(fod, &node_set, &node_set_len);
+		printf("  Failover Order = {");
+		for (x = 0; x < node_set_len; x++) {
+			printf(" %d ", node_set[x]);
+		}
+		free(node_set);
+		printf("}\n");
+		*/
+		
 	} while (!list_done(domains, fod));
 }
 
@@ -312,6 +347,70 @@ node_in_domain(char *nodename, fod_t *do
 }
 
 
+int
+node_domain_set(fod_t *domain, int **ret, int *retlen)
+{
+	int x, i, j;
+	int *tmpset;
+	int ts_count;
+
+	fod_node_t *fodn;
+
+	/* Count domain length */
+	list_for(&domain->fd_nodes, fodn, x) { }
+	
+	*retlen = 0;
+	*ret = malloc(sizeof(int) * x);
+	if (!(*ret))
+		return -1;
+	tmpset = malloc(sizeof(int) * x);
+	if (!(*tmpset))
+		return -1;
+
+	if (domain->fd_flags & FOD_ORDERED) {
+		for (i = 1; i <= 100; i++) {
+			
+			ts_count = 0;
+			list_for(&domain->fd_nodes, fodn, x) {
+				if (fodn->fdn_prio == i) {
+					s_add(tmpset, &ts_count,
+					      fodn->fdn_nodeid);
+				}
+			}
+
+			if (!ts_count)
+				continue;
+
+			/* Shuffle stuff at this prio level */
+			if (ts_count > 1)
+				s_shuffle(tmpset, ts_count);
+			for (j = 0; j < ts_count; j++)
+				s_add(*ret, retlen, tmpset[j]);
+		}
+	}
+
+	/* Add unprioritized nodes */
+	ts_count = 0;
+	list_for(&domain->fd_nodes, fodn, x) {
+		if (!fodn->fdn_prio) {
+			s_add(tmpset, &ts_count,
+			      fodn->fdn_nodeid);
+		}
+	}
+
+	if (!ts_count)
+		return 0;
+
+	/* Shuffle stuff at this prio level */
+	if (ts_count > 1)
+		s_shuffle(tmpset, ts_count);
+	for (j = 0; j < ts_count; j++)
+		s_add(*ret, retlen, tmpset[j]);
+
+	return 0;
+}
+
+
 /**
  * See if a given nodeid should start a specified service svcid.
  *
Index: src/daemons/groups.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/groups.c,v
retrieving revision 1.25.2.12
diff -u -p -r1.25.2.12 groups.c
--- src/daemons/groups.c	2 Aug 2007 14:46:51 -0000	1.25.2.12
+++ src/daemons/groups.c	8 Nov 2007 21:09:01 -0000
@@ -29,6 +29,7 @@
 #include <list.h>
 #include <reslist.h>
 #include <assert.h>
+#include <event.h>
 
 /* Use address field in this because we never use it internally,
    and there is no extra space in the cman_node_t type.
@@ -37,6 +38,8 @@
 #define cn_svccount cn_address.cna_address[0] /* Theses are uint8_t size */
 #define cn_svcexcl  cn_address.cna_address[1]
 
+extern event_table_t *master_event_table;
+
 static int config_version = 0;
 static resource_t *_resources = NULL;
 static resource_rule_t *_rules = NULL;
@@ -82,6 +85,32 @@ node_should_start_safe(uint32_t nodeid, 
 
 
 int
+node_domain_set_safe(char *domainname, int **ret, int *retlen, int *flags)
+{
+	fod_t *fod;
+	int rv = -1, found = 0, x = 0;
+
+	pthread_rwlock_rdlock(&resource_lock);
+
+	list_for(&_domains, fod, x) {
+		if (!strcasecmp(fod->fd_name, domainname)) {
+			found = 1;
+			break;
+		}
+	} // while (!list_done(&_domains, fod));
+
+	if (found) {
+		rv = node_domain_set(fod, ret, retlen);
+		*flags = fod->fd_flags;
+	}
+
+	pthread_rwlock_unlock(&resource_lock);
+
+	return rv;
+}
+
+
+int
 count_resource_groups(cluster_member_list_t *ml)
 {
 	resource_t *res;
@@ -540,6 +569,60 @@ consider_relocate(char *svcName, rg_stat
 }
 
 
+char **
+get_service_names(int *len)
+{
+	resource_node_t *node = NULL;
+	int nservices, ncopied = 0, x;
+	char **ret = NULL;
+	char rg_name[64];
+
+	pthread_rwlock_rdlock(&resource_lock);
+
+	nservices = 0;
+	list_do(&_tree, node) {
+		++nservices;
+	} while (!list_done(&_tree, node));
+	
+	ret = malloc(sizeof(char *) * (nservices + 1));
+	if (!ret)
+		goto out_fail;
+
+	memset(ret, 0, sizeof(char *) * (nservices + 1));
+	nservices = 0;
+	list_for(&_tree, node, nservices) {
+		res_build_name(rg_name, sizeof(rg_name),
+			       node->rn_resource);
+
+		if (!strlen(rg_name))
+			continue;
+
+		ret[ncopied] = strdup(rg_name);
+		if (ret[ncopied]) {
+			ncopied++;
+		} else {
+			goto out_fail;
+		}
+	}
+
+	if (len)
+		*len = ncopied;
+	pthread_rwlock_unlock(&resource_lock);
+	return ret;
+
+out_fail:
+	pthread_rwlock_unlock(&resource_lock);
+	for (x = 0; x < ncopied; x++)
+		free(ret[x]);
+	if (ret)
+		free(ret);
+	return NULL;
+}
+
+
+
+
+
 /**
  * Called to decide what services to start locally during a node_event.
  * Originally a part of node_event, it is now its own function to cut down
@@ -1406,7 +1489,7 @@ do_condstarts(void)
 
 
 int
-check_config_update(void)
+check_config_update(int *new, int *old)
 {
 	int newver = 0, fd, ret = 0;
 	char *val = NULL;
@@ -1426,6 +1509,8 @@ check_config_update(void)
 	pthread_mutex_lock(&config_mutex);
 	if (newver && newver != config_version)
 		ret = 1;
+	if (new) *new = newver;
+	if (old) *old = config_version;
 	pthread_mutex_unlock(&config_mutex);
 	ccs_unlock(fd);
 
@@ -1449,12 +1534,14 @@ dump_config_version(FILE *fp)
 int
 init_resource_groups(int reconfigure)
 {
-	int fd, x;
+	int fd, x, y, cnt;
 
+	event_table_t *evt = NULL;
 	resource_t *reslist = NULL, *res;
 	resource_rule_t *rulelist = NULL, *rule;
 	resource_node_t *tree = NULL;
 	fod_t *domains = NULL, *fod;
+	event_t *evp;
 	char *val;
 
 	if (reconfigure)
@@ -1515,6 +1602,24 @@ init_resource_groups(int reconfigure)
 	x = 0;
 	list_do(&domains, fod) { ++x; } while (!list_done(&domains, fod));
 	clulog(LOG_DEBUG, "%d domains defined\n", x);
+	construct_events(fd, &evt);
+	cnt = 0;
+	if (evt) {
+		for (x=0; x <= evt->max_prio; x++) {
+			if (!evt->entries[x])
+				continue;
+			
+			y = 0;
+
+			list_do(&evt->entries[x], evp) {
+				++y;
+			} while (!list_done(&evt->entries[x], evp));
+
+			cnt += y;
+		}
+	}
+	clulog(LOG_DEBUG, "%d events defined\n", x);
+	
 
 	/* Reconfiguration done */
 	ccs_unlock(fd);
@@ -1543,6 +1648,9 @@ init_resource_groups(int reconfigure)
 	if (_domains)
 		deconstruct_domains(&_domains);
 	_domains = domains;
+	if (master_event_table)
+		deconstruct_events(&master_event_table);
+	master_event_table = evt;
 	pthread_rwlock_unlock(&resource_lock);
 
 	if (reconfigure) {
@@ -1583,6 +1691,60 @@ get_recovery_policy(char *rg_name, char 
 }
 
 
+int
+get_service_property(char *rg_name, char *prop, char *buf, size_t buflen)
+{
+	int ret = 0;
+	resource_t *res;
+	char *val;
+
+	memset(buf, 0, buflen);
+
+#if 0
+	if (!strcmp(prop, "domain")) {
+		/* not needed */
+		strncpy(buf, "", buflen);
+	} else if (!strcmp(prop, "autostart")) {
+		strncpy(buf, "1", buflen);
+	} else if (!strcmp(prop, "hardrecovery")) {
+		strncpy(buf, "0", buflen);
+	} else if (!strcmp(prop, "exclusive")) {
+		strncpy(buf, "0", buflen);
+	} else if (!strcmp(prop, "nfslock")) {
+		strncpy(buf, "0", buflen);
+	} else if (!strcmp(prop, "recovery")) {
+		strncpy(buf, "restart", buflen);
+	} else if (!strcmp(prop, "depend")) {
+		/* not needed */
+		strncpy(buf, "", buflen);
+	} else {
+		/* not found / no defaults */
+		ret = -1;
+	}
+#endif
+
+	pthread_rwlock_rdlock(&resource_lock);
+	res = find_root_by_ref(&_resources, rg_name);
+	if (res) {
+		val = res_attr_value(res, prop);
+		if (val) {
+			ret = 0;
+			strncpy(buf, val, buflen);
+		}
+	}
+	pthread_rwlock_unlock(&resource_lock);
+
+#if 0
+	if (ret == 0)
+		printf("%s(%s, %s) = %s\n", __FUNCTION__, rg_name, prop, buf);
+	else 
+		printf("%s(%s, %s) = NOT FOUND\n", __FUNCTION__, rg_name, prop);
+#endif
+
+	return ret;
+}
+
+
 void
 kill_resource_groups(void)
 {
Index: src/daemons/main.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/main.c,v
retrieving revision 1.34.2.9
diff -u -p -r1.34.2.9 main.c
--- src/daemons/main.c	21 Aug 2007 16:39:02 -0000	1.34.2.9
+++ src/daemons/main.c	8 Nov 2007 21:09:01 -0000
@@ -54,9 +54,10 @@ void set_my_id(int);
 void flag_shutdown(int sig);
 void hard_exit(void);
 int send_rg_states(msgctx_t *, int);
-int check_config_update(void);
+int check_config_update(int *, int *);
 int svc_exists(char *);
 int watchdog_init(void);
+int32_t master_event_callback(char *key, uint64_t viewno, void *data, uint32_t datalen);
 
 int shutdown_pending = 0, running = 1, need_reconfigure = 0;
 char debug = 0; /* XXX* */
@@ -65,7 +66,6 @@ static int port = RG_PORT;
 static char *rgmanager_lsname = "rgmanager"; /* XXX default */
 
 int next_node_id(cluster_member_list_t *membership, int me);
-int rg_event_q(char *svcName, uint32_t state, int owner);
 void malloc_dump_table(FILE *, size_t, size_t);
 
 void
@@ -166,15 +166,25 @@ membership_update(void)
 	old_membership = member_list();
 	new_ml = get_member_list(h);
 
-	for (x = 0; x < new_ml->cml_count; x++) {
+	for(x=0; new_ml && x<new_ml->cml_count;x++) {
+		if (new_ml->cml_members[x].cn_nodeid == 0) {
+		    new_ml->cml_members[x].cn_member = 0;
+		}
+	}
 
-		if (new_ml->cml_members[x].cn_member == 0)
+	for (x = 0; new_ml && x < new_ml->cml_count; x++) {
+
+		if (new_ml->cml_members[x].cn_member == 0) {
+			printf("skipping %d - node not member\n",
+			       new_ml->cml_members[x].cn_nodeid);
 			continue;
+		}
 		if (new_ml->cml_members[x].cn_nodeid == my_id())
 			continue;
 
 #ifdef DEBUG
-		printf("Checking for listening status of %d\n", new_ml->cml_members[x].cn_nodeid);
+		printf("Checking for listening status of %d\n",
+		       new_ml->cml_members[x].cn_nodeid);
 #endif
 
 		do {
@@ -185,6 +195,7 @@ membership_update(void)
 				clulog(LOG_DEBUG, "Node %d is not listening\n",
 					new_ml->cml_members[x].cn_nodeid);
 				new_ml->cml_members[x].cn_member = 0;
+				break;
 			} else if (quorate < 0) {
 				perror("cman_is_listening");
 				usleep(50000);
@@ -201,7 +212,6 @@ membership_update(void)
 
 	cman_finish(h);
 	member_list_update(new_ml);
-	member_set_state(0, 0);		/* Mark qdisk as dead */
 
 	/*
 	 * Handle nodes lost.  Do our local node event first.
@@ -397,7 +407,7 @@ do_lockreq(msgctx_t *ctx, int req)
 int
 dispatch_msg(msgctx_t *ctx, int nodeid, int need_close)
 {
-	int ret = 0, sz = -1;
+	int ret = 0, sz = -1, nid;
 	char msgbuf[4096];
 	generic_msg_hdr	*msg_hdr = (generic_msg_hdr *)msgbuf;
 	SmMessageSt	*msg_sm = (SmMessageSt *)msgbuf;
@@ -494,7 +504,30 @@ dispatch_msg(msgctx_t *ctx, int nodeid, 
 			goto out;
 		}
 
-		/* Queue request */
+		if (central_events_enabled() &&
+		    msg_sm->sm_hdr.gh_arg1 != RG_ACTION_MASTER) {
+			
+			/* Centralized processing or request is from
+			   clusvcadm */
+			nid = event_master();
+			if (nid != my_id()) {
+				/* Forward the message to the event master */
+				forward_message(ctx, msg_sm, nid);
+			} else {
+				/* for us: queue it */
+				user_event_q(msg_sm->sm_data.d_svcName,
+					     msg_sm->sm_data.d_action,
+					     msg_sm->sm_hdr.gh_arg1,
+					     msg_sm->sm_hdr.gh_arg2,
+					     msg_sm->sm_data.d_svcOwner,
+					     ctx);
+			}
+
+			return 0;
+		}
+
+		/* Distributed processing and/or request is from master node
+		   -- Queue request */
 		rt_enqueue_request(msg_sm->sm_data.d_svcName,
 		  		   msg_sm->sm_data.d_action,
 		  		   ctx, 0, msg_sm->sm_data.d_svcOwner,
@@ -520,7 +553,8 @@ dispatch_msg(msgctx_t *ctx, int nodeid, 
 		/* Send to our rg event handler */
 		rg_event_q(msg_sm->sm_data.d_svcName,
 			   msg_sm->sm_data.d_action,
-			   msg_sm->sm_data.d_svcOwner);
+			   msg_sm->sm_hdr.gh_arg1,
+			   msg_sm->sm_hdr.gh_arg2);
 		break;
 
 	case RG_EXITING:
@@ -658,7 +692,7 @@ dump_internal_state(char *loc)
 int
 event_loop(msgctx_t *localctx, msgctx_t *clusterctx)
 {
-	int n, max, ret;
+	int n, max, ret, oldver, newver;
 	fd_set rfds;
 	msgctx_t *newctx;
 	struct timeval tv;
@@ -727,10 +761,10 @@ event_loop(msgctx_t *localctx, msgctx_t 
 	if (!running)
 		return 0;
 
-	if (need_reconfigure || check_config_update()) {
+	if (need_reconfigure || check_config_update(&oldver, &newver)) {
 		need_reconfigure = 0;
 		configure_rgmanager(-1, 0);
-		init_resource_groups(1);
+		config_event_q(oldver, newver);
 		return 0;
 	}
 
@@ -812,8 +846,15 @@ configure_rgmanager(int ccsfd, int dbg)
 	}
 
 	if (ccs_get(ccsfd, "/cluster/rm/@transition_throttling", &v) == 0) {
-		if (!dbg)
-			set_transition_throttling(atoi(v));
+		set_transition_throttling(atoi(v));
+		free(v);
+	}
+
+	if (ccs_get(ccsfd, "/cluster/rm/@central_processing", &v) == 0) {
+		set_central_events(atoi(v));
+		if (atoi(v))
+			clulog(LOG_NOTICE,
+			       "Centralized Event Processing enabled\n");
 		free(v);
 	}
 
@@ -1007,6 +1048,7 @@ main(int argc, char **argv)
 	}
 
 	vf_key_init("rg_lockdown", 10, NULL, lock_commit_cb);
+	vf_key_init("Transition-Master", 10, NULL, master_event_callback);
 #endif
 
 	/*
Index: src/daemons/restree.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/restree.c,v
retrieving revision 1.23.2.12
diff -u -p -r1.23.2.12 restree.c
--- src/daemons/restree.c	25 Sep 2007 21:09:23 -0000	1.23.2.12
+++ src/daemons/restree.c	8 Nov 2007 21:09:01 -0000
@@ -1023,7 +1023,8 @@ do_status(resource_node_t *node)
 
 		/* Ok, it's a 'status' action. See if enough time has
 		   elapsed for a given type of status action */
-		if (delta < node->rn_actions[x].ra_interval)
+		if (delta < node->rn_actions[x].ra_interval ||
+		    !node->rn_actions[x].ra_interval)
 			continue;
 
 		if (idx == -1 ||
Index: src/daemons/rg_event.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_event.c,v
retrieving revision 1.1.2.1
diff -u -p -r1.1.2.1 rg_event.c
--- src/daemons/rg_event.c	24 Jul 2007 13:53:08 -0000	1.1.2.1
+++ src/daemons/rg_event.c	8 Nov 2007 21:09:01 -0000
@@ -23,81 +23,492 @@
 #include <libcman.h>
 #include <ccs.h>
 #include <clulog.h>
-
-typedef struct __rge_q {
-	list_head();
-	char rg_name[128];
-	uint32_t rg_state;
-	int rg_owner;
-} rgevent_t;
+#include <lock.h>
+#include <event.h>
+#include <stdint.h>
+#include <vf.h>
+#include <members.h>
 
 
 /**
  * resource group event queue.
  */
-static rgevent_t *rg_ev_queue = NULL;
-static pthread_mutex_t rg_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_t rg_ev_thread = 0;
+static event_t *event_queue = NULL;
+#ifdef WRAP_LOCKS
+static pthread_mutex_t event_queue_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+static pthread_mutex_t mi_mutex = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
+#else
+static pthread_mutex_t event_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_mutex_t mi_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+static pthread_t event_thread = 0;
+static int transition_throttling = 5;
+static int central_events = 0;
+
+extern int running;
+extern int shutdown_pending;
+static int _master = 0;
+static struct dlm_lksb _master_lock;
+static int _xid = 0;
+static event_master_t *mi = NULL;
+
+void hard_exit(void);
+int init_resource_groups(int);
+void flag_shutdown(int sig);
+void flag_reconfigure(int sig);
 
-void group_event(char *name, uint32_t state, int owner);
+event_table_t *master_event_table = NULL;
+
+
+void
+set_transition_throttling(int nsecs)
+{
+	if (nsecs < 0)
+		nsecs = 0;
+	transition_throttling = nsecs;
+}
+
+
+void
+set_central_events(int flag)
+{
+	central_events = flag;
+}
+
+
+int
+central_events_enabled(void)
+{
+	return central_events;
+}
+
+
+/**
+  Called to handle the transition of a cluster member from up->down or
+  down->up.  This handles initializing services (in the local node-up case),
+  exiting due to loss of quorum (local node-down), and service fail-over
+  (remote node down).
+ 
+  @param nodeID		ID of the member which has come up/gone down.
+  @param nodeStatus		New state of the member in question.
+  @see eval_groups
+ */
+void
+node_event(int local, int nodeID, int nodeStatus, int clean)
+{
+	if (!running)
+		return;
+
+	if (local) {
+
+		/* Local Node Event */
+		if (nodeStatus == 0) {
+			clulog(LOG_ERR, "Exiting uncleanly\n");
+			hard_exit();
+		}
+
+		if (!rg_initialized()) {
+			if (init_resource_groups(0) != 0) {
+				clulog(LOG_ERR,
+				       "#36: Cannot initialize services\n");
+				hard_exit();
+			}
+		}
+
+		if (shutdown_pending) {
+			clulog(LOG_NOTICE, "Processing delayed exit signal\n");
+			running = 0;
+			return;
+		}
+		setup_signal(SIGINT, flag_shutdown);
+		setup_signal(SIGTERM, flag_shutdown);
+		setup_signal(SIGHUP, flag_reconfigure);
+
+		eval_groups(1, nodeID, 1);
+		return;
+	}
+
+	/*
+	 * Nothing to do for events from other nodes if we are not ready.
+	 */
+	if (!rg_initialized()) {
+		clulog(LOG_DEBUG, "Services not initialized.\n");
+		return;
+	}
+
+	eval_groups(0, nodeID, nodeStatus);
+}
+
+
+int
+node_has_fencing(int nodeid)
+{
+	int ccs_desc;
+	char *val = NULL;
+	char buf[1024];
+	int ret = 1;
+	
+	ccs_desc = ccs_connect();
+	if (ccs_desc < 0) {
+		clulog(LOG_ERR, "Unable to connect to ccsd; cannot handle"
+		       " node event!\n");
+		/* Assume node has fencing */
+		return 1;
+	}
+
+	snprintf(buf, sizeof(buf), 
+		 "/cluster/clusternodes/clusternode[ nodeid=\"%d\"]"
+		 "/fence/method/device/@name", nodeid);
+
+	if (ccs_get(ccs_desc, buf, &val) != 0)
+		ret = 0;
+	if (val) 
+		free(val);
+	ccs_disconnect(ccs_desc);
+	return ret;
+}
+
+
+int
+node_fenced(int nodeid)
+{
+	cman_handle_t ch;
+	int fenced = 0;
+	uint64_t fence_time;
+
+	ch = cman_init(NULL);
+	if (cman_get_fenceinfo(ch, nodeid, &fence_time, &fenced, NULL) < 0)
+		fenced = 0;
+
+	cman_finish(ch);
+
+	return fenced;
+}
+
+
+int32_t
+master_event_callback(char *key, uint64_t viewno,
+		      void *data, uint32_t datalen)
+{
+	event_master_t *m;
+
+	m = data;
+	if (datalen != (uint32_t)sizeof(*m)) {
+		clulog(LOG_ERR, "%s: wrong size\n", __FUNCTION__);
+		return 1;
+	}
+
+	swab_event_master_t(m);
+	if (m->m_magic != EVENT_MASTER_MAGIC) {
+		clulog(LOG_ERR, "%s: wrong size\n", __FUNCTION__);
+		return 1;
+	}
+
+	if (m->m_nodeid == my_id())
+		clulog(LOG_DEBUG, "Master Commit: I am master\n");
+	else 
+		clulog(LOG_DEBUG, "Master Commit: %d is master\n", m->m_nodeid);
+
+	pthread_mutex_lock(&mi_mutex);
+	if (mi)
+		free(mi);
+	mi = m;
+	pthread_mutex_unlock(&mi_mutex);
+
+	return 0;
+}
+
+
+int
+find_master(void)
+{
+	event_master_t *masterinfo = NULL;
+	uint32_t sz;
+	cluster_member_list_t *m;
+	uint64_t vn;
+	int master_id = -1;
+
+	m = member_list();
+	if (vf_read(m, "Transition-Master", &vn,
+		    (void **)&masterinfo, &sz) < 0) {
+		clulog(LOG_ERR, "Unable to discover master"
+		       " status\n");
+		masterinfo = NULL;
+	}
+	free_member_list(m);
+
+
+	if (masterinfo && (sz >= sizeof(*masterinfo))) {
+		swab_event_master_t(masterinfo);
+		if (masterinfo->m_magic == EVENT_MASTER_MAGIC) {
+			clulog(LOG_DEBUG, "Master Locate: %d is master\n",
+			       masterinfo->m_nodeid);
+			pthread_mutex_lock(&mi_mutex);
+			if (mi)
+				free(mi);
+			mi = masterinfo;
+			pthread_mutex_unlock(&mi_mutex);
+			master_id = masterinfo->m_nodeid;
+		}
+	}
+
+	return master_id;
+}
 
 
+int
+event_master(void)
+{
+	cluster_member_list_t *m = NULL;
+	event_master_t masterinfo;
+	int master_id = -1;
+
+	/* We hold this forever. */
+	if (_master)
+		return my_id();
+
+	pthread_mutex_lock(&mi_mutex);
+	if (mi) {
+		master_id = mi->m_nodeid;
+		pthread_mutex_unlock(&mi_mutex);
+		clulog(LOG_DEBUG, "%d is master\n", mi->m_nodeid);
+		return master_id;
+	}
+	pthread_mutex_unlock(&mi_mutex);
+
+	memset(&_master_lock, 0, sizeof(_master_lock));
+	if (clu_lock(LKM_EXMODE, &_master_lock, LKF_NOQUEUE,
+		     "Transition-Master") < 0) {
+		/* not us, find out who is master */
+		return find_master();
+	}
+
+	if (_master_lock.sb_status != 0)
+		return -1;
+
+	_master = 1;
+
+	m = member_list();
+	memset(&masterinfo, 0, sizeof(masterinfo));
+	masterinfo.m_magic = EVENT_MASTER_MAGIC;
+	masterinfo.m_nodeid = my_id();
+	masterinfo.m_master_time = (uint64_t)time(NULL);
+	swab_event_master_t(&masterinfo);
+
+	if (vf_write(m, VFF_IGN_CONN_ERRORS | VFF_RETRY,
+		     "Transition-Master", &masterinfo,
+		     sizeof(masterinfo)) < 0) {
+		clulog(LOG_ERR, "Unable to advertise master"
+		       " status to all nodes\n");
+	}
+	free_member_list(m);
+
+	return my_id();
+}
+
+
+
+void group_event(char *name, uint32_t state, int owner);
+
 void *
-rg_event_thread(void *arg)
+_event_thread_f(void *arg)
 {
-	rgevent_t *ev;
+	event_t *ev;
+	int notice = 0, count = 0;
 
 	while (1) {
-		pthread_mutex_lock(&rg_queue_mutex);
-		ev = rg_ev_queue;
+		pthread_mutex_lock(&event_queue_mutex);
+		ev = event_queue;
 		if (ev)
-			list_remove(&rg_ev_queue, ev);
+			list_remove(&event_queue, ev);
 		else
 			break; /* We're outta here */
-		pthread_mutex_unlock(&rg_queue_mutex);
 
-		group_event(ev->rg_name, ev->rg_state, ev->rg_owner);
+		++count;
+		/* Event thread usually doesn't hang around.  When it's
+	   	   spawned, sleep for this many seconds in order to let
+	   	   some events queue up */
+		if ((count==1) && transition_throttling && !central_events)
+			sleep(transition_throttling);
+
+		pthread_mutex_unlock(&event_queue_mutex);
+
+		if (ev->ev_type == EVENT_CONFIG) {
+			/*
+			clulog(LOG_NOTICE, "Config Event: %d -> %d\n",
+			       ev->ev.config.cfg_oldversion,
+			       ev->ev.config.cfg_version);
+			 */
+			init_resource_groups(1);
+			free(ev);
+			continue;
+		}
+
+		if (central_events) {
+			/* If the master node died or there isn't
+			   one yet, take the master lock. */
+			if (event_master() == my_id()) {
+				slang_process_event(master_event_table,
+						    ev);
+			} else {
+				//printf("I am not the transition master\n");
+			}
+			free(ev);
+			continue;
+		}
+
+		if (ev->ev_type == EVENT_RG) {
+			/*
+			clulog(LOG_NOTICE, "RG Event: %s %s %d\n",
+			       ev->ev.group.rg_name,
+			       rg_state_str(ev->ev.group.rg_state),
+			       ev->ev.group.rg_owner);
+			 */
+			group_event(ev->ev.group.rg_name,
+				    ev->ev.group.rg_state,
+				    ev->ev.group.rg_owner);
+		} else if (ev->ev_type == EVENT_NODE) {
+			/*
+			clulog(LOG_NOTICE, "Node Event: %s %d %s %s\n",
+			       ev->ev.node.ne_local?"Local":"Remote",
+			       ev->ev.node.ne_nodeid,
+			       ev->ev.node.ne_state?"UP":"DOWN",
+			       ev->ev.node.ne_clean?"Clean":"Dirty")
+			 */
+
+			if (ev->ev.node.ne_state == 0 &&
+			    !ev->ev.node.ne_clean &&
+			    node_has_fencing(ev->ev.node.ne_nodeid)) {
+				notice = 0;
+				while (!node_fenced(ev->ev.node.ne_nodeid)) {
+					if (!notice) {
+						notice = 1;
+						clulog(LOG_INFO, "Waiting for "
+						       "node #%d to be fenced\n",
+						       ev->ev.node.ne_nodeid);
+					}
+					sleep(2);
+				}
+
+				if (notice)
+					clulog(LOG_INFO, "Node #%d fenced; "
+					       "continuing\n",
+					       ev->ev.node.ne_nodeid);
+			}
+
+			node_event(ev->ev.node.ne_local,
+				   ev->ev.node.ne_nodeid,
+				   ev->ev.node.ne_state,
+				   ev->ev.node.ne_clean);
+		}
 
 		free(ev);
 	}
 
+	clulog(LOG_DEBUG, "%d events processed\n", count);
 	/* Mutex held */
-	rg_ev_thread = 0;
-	pthread_mutex_unlock(&rg_queue_mutex);
+	event_thread = 0;
+	pthread_mutex_unlock(&event_queue_mutex);
 	pthread_exit(NULL);
 }
 
 
-void
-rg_event_q(char *name, uint32_t state, int owner)
+static void
+insert_event(event_t *ev)
 {
-	rgevent_t *ev;
 	pthread_attr_t attrs;
+	pthread_mutex_lock (&event_queue_mutex);
+	ev->ev_transaction = ++_xid;
+	list_insert(&event_queue, ev);
+	if (event_thread == 0) {
+        	pthread_attr_init(&attrs);
+        	pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
+        	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+		pthread_attr_setstacksize(&attrs, 262144);
+
+		pthread_create(&event_thread, &attrs, _event_thread_f, NULL);
+        	pthread_attr_destroy(&attrs);
+	}
+	pthread_mutex_unlock (&event_queue_mutex);
+}
+
+
+static event_t *
+new_event(void)
+{
+	event_t *ev;
 
 	while (1) {
-		ev = malloc(sizeof(rgevent_t));
+		ev = malloc(sizeof(*ev));
 		if (ev) {
 			break;
 		}
 		sleep(1);
 	}
-
 	memset(ev,0,sizeof(*ev));
+	ev->ev_type = EVENT_NONE;
 
-	strncpy(ev->rg_name, name, 128);
-	ev->rg_state = state;
-	ev->rg_owner = owner;
-
-	pthread_mutex_lock (&rg_queue_mutex);
-	list_insert(&rg_ev_queue, ev);
-	if (rg_ev_thread == 0) {
-        	pthread_attr_init(&attrs);
-        	pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
-        	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
-		pthread_attr_setstacksize(&attrs, 262144);
+	return ev;
+}
 
-		pthread_create(&rg_ev_thread, &attrs, rg_event_thread, NULL);
-        	pthread_attr_destroy(&attrs);
-	}
-	pthread_mutex_unlock (&rg_queue_mutex);
+
+
+
+
+void
+rg_event_q(char *name, uint32_t state, int owner, int last)
+{
+	event_t *ev = new_event();
+
+	ev->ev_type = EVENT_RG;
+
+	strncpy(ev->ev.group.rg_name, name, 128);
+	ev->ev.group.rg_state = state;
+	ev->ev.group.rg_owner = owner;
+	ev->ev.group.rg_last_owner = last;
+
+	insert_event(ev);
 }
+
+
+void
+node_event_q(int local, int nodeID, int state, int clean)
+{
+	event_t *ev = new_event();
+
+	ev->ev_type = EVENT_NODE;
+	ev->ev.node.ne_state = state;
+	ev->ev.node.ne_local = local;
+	ev->ev.node.ne_nodeid = nodeID;
+	ev->ev.node.ne_clean = clean;
+	insert_event(ev);
+}
+
+
+void
+config_event_q(int old_version, int new_version)
+{
+	event_t *ev = new_event();
+
+	ev->ev_type = EVENT_CONFIG;
+	ev->ev.config.cfg_version = new_version;
+	ev->ev.config.cfg_oldversion = old_version;
+	insert_event(ev);
+}
+
+void
+user_event_q(char *svc, int request,
+	     int arg1, int arg2, int target, msgctx_t *ctx)
+{
+	event_t *ev = new_event();
+
+	ev->ev_type = EVENT_USER;
+	strncpy(ev->ev.user.u_name, svc, sizeof(ev->ev.user.u_name));
+	ev->ev.user.u_request = request;
+	ev->ev.user.u_arg1 = arg1;
+	ev->ev.user.u_arg2 = arg2;
+	ev->ev.user.u_target = target;
+	ev->ev.user.u_ctx = ctx;
+	insert_event(ev);
+}
+
Index: src/daemons/rg_forward.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_forward.c,v
retrieving revision 1.8.2.3
diff -u -p -r1.8.2.3 rg_forward.c
--- src/daemons/rg_forward.c	2 Aug 2007 14:46:51 -0000	1.8.2.3
+++ src/daemons/rg_forward.c	8 Nov 2007 21:09:01 -0000
@@ -27,11 +27,21 @@
 #include <members.h>
 
 
+struct fw_message {
+	msgctx_t *ctx;
+	SmMessageSt msg;
+	int nodeid;
+};
+
+
 void
-build_message(SmMessageSt *msgp, int action, char *svcName, int target)
+build_message(SmMessageSt *msgp, int action, char *svcName, int target,
+	      int arg1, int arg2)
 {
 	msgp->sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
 	msgp->sm_hdr.gh_command = RG_ACTION_REQUEST;
+	msgp->sm_hdr.gh_arg1 = arg1;
+	msgp->sm_hdr.gh_arg2 = arg2;
 	msgp->sm_hdr.gh_length = sizeof(*msgp);
 	msgp->sm_data.d_action = action;
 	strncpy(msgp->sm_data.d_svcName, svcName,
@@ -90,7 +100,8 @@ forwarding_thread(void *arg)
 	}
 
 	/* Construct message */
-	build_message(&msg, req->rr_request, req->rr_group, req->rr_target);
+	build_message(&msg, req->rr_request, req->rr_group, req->rr_target,
+		      req->rr_arg0, req->rr_arg1);
 
 	if (msg_open(MSG_CLUSTER, rgs.rs_owner, RG_PORT, ctx, 10) < 0) {
 		clulog(LOG_DEBUG, "FW: Failed to open channel to %d CTX: %p\n",
@@ -166,3 +177,121 @@ forward_request(request_t *req)
         pthread_attr_destroy(&attrs);
 }
 
+
+
+void *
+forwarding_thread_v2(void *arg)
+{
+	rg_state_t rgs;
+	msgctx_t *ctx = NULL, *resp_ctx = NULL;
+	cluster_member_list_t *m = NULL;
+	SmMessageSt *msgp = NULL, msg;
+	int response_code = RG_EAGAIN, ret, target = -1, new_owner = 0;
+	int retries = 0;
+	struct fw_message *fwmsg = (struct fw_message *)arg;
+
+	msgp = &fwmsg->msg;
+	resp_ctx = fwmsg->ctx;
+	target = fwmsg->nodeid;
+
+	clulog(LOG_DEBUG, "FW: Forwarding SM request to %d\n",
+	       target);
+
+	ctx = msg_new_ctx();
+	if (ctx == NULL) {
+		clulog(LOG_DEBUG, "FW: Failed to allocate socket context: %s\n",
+		       strerror(errno));
+		goto out_fail;
+	}
+	if (msg_open(MSG_CLUSTER, target, RG_PORT, ctx, 10) < 0) {
+		clulog(LOG_DEBUG, "FW: Failed to open channel to %d CTX: %p\n",
+		       target, ctx);
+		goto out_fail;
+	}
+
+	/* swap + send */
+	swab_SmMessageSt(msgp);
+	if (msg_send(ctx, msgp, sizeof(*msgp)) < sizeof(*msgp)) {
+		clulog(LOG_DEBUG, "FW: Failed to send message to %d CTX: %p\n",
+		       target, ctx);
+		goto out_fail;
+	}
+
+
+        /*
+	 * Ok, we're forwarding a message to another node.  Keep tabs on
+	 * the node to make sure it doesn't die.  Basically, wake up every
+	 * now and again to make sure it's still online.  If it isn't, send
+	 * a response back to the caller.
+	 */
+	do {
+		ret = msg_receive(ctx, &msg, sizeof(msg), 10);
+		if (ret < (int)sizeof(msg)) {
+			if (ret < 0 && errno == ETIMEDOUT) {
+				m = member_list();
+				if (!memb_online(m, rgs.rs_owner)) {
+					response_code = RG_ENODE;
+					goto out_fail;
+				}
+				free_member_list(m);
+				m = NULL;
+				continue;
+			}
+
+			if (ret == 0)
+				continue;
+		}
+		break;
+	} while(++retries < 60); /* old 600 second rule */
+
+	swab_SmMessageSt(&msg);
+
+	response_code = msg.sm_data.d_ret;
+
+out_fail:
+	free(fwmsg); 
+
+	if (resp_ctx) {
+		send_ret(resp_ctx, msgp->sm_data.d_svcName, response_code,
+			 msgp->sm_data.d_action, new_owner);
+		msg_close(resp_ctx);
+		msg_free_ctx(resp_ctx);
+	}
+
+	if (ctx) {
+		msg_close(ctx);
+		msg_free_ctx(ctx);
+	}
+	if (m)
+		free_member_list(m);
+
+	pthread_exit(NULL);
+}
+
+
+void
+forward_message(msgctx_t *ctx, void *msgp, int nodeid)
+{
+	pthread_t newthread;
+	pthread_attr_t attrs;
+	struct fw_message *fwmsg;
+
+	fwmsg = malloc(sizeof(struct fw_message));
+	if (!fwmsg) {
+		msg_close(ctx);
+		msg_free_ctx(ctx);
+		return;
+	}
+
+	memcpy(&fwmsg->msg, msgp, sizeof(fwmsg->msg));
+	fwmsg->ctx = ctx;
+	fwmsg->nodeid = nodeid;
+
+        pthread_attr_init(&attrs);
+        pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
+        pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+	pthread_attr_setstacksize(&attrs, 262144);
+
+	pthread_create(&newthread, &attrs, forwarding_thread_v2, fwmsg);
+        pthread_attr_destroy(&attrs);
+}
Index: src/daemons/rg_state.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_state.c,v
retrieving revision 1.24.2.13
diff -u -p -r1.24.2.13 rg_state.c
--- src/daemons/rg_state.c	30 Aug 2007 16:03:03 -0000	1.24.2.13
+++ src/daemons/rg_state.c	8 Nov 2007 21:09:01 -0000
@@ -87,7 +87,7 @@ next_node_id(cluster_member_list_t *memb
 
 
 void
-broadcast_event(char *svcName, uint32_t state)
+broadcast_event(char *svcName, uint32_t state, int owner, int last)
 {
 	SmMessageSt msgp;
 	msgctx_t everyone;
@@ -95,10 +95,12 @@ broadcast_event(char *svcName, uint32_t 
 	msgp.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
 	msgp.sm_hdr.gh_command = RG_EVENT;
 	msgp.sm_hdr.gh_length = sizeof(msgp);
+	msgp.sm_hdr.gh_arg1 = owner; 
+	msgp.sm_hdr.gh_arg2 = last; 
 	msgp.sm_data.d_action = state;
 	strncpy(msgp.sm_data.d_svcName, svcName,
 		sizeof(msgp.sm_data.d_svcName));
-	msgp.sm_data.d_svcOwner = 0;
+	msgp.sm_data.d_svcOwner = owner;
 	msgp.sm_data.d_ret = 0;
 
 	swab_SmMessageSt(&msgp);
@@ -201,7 +203,7 @@ _rg_unlock_dbg(void *p, char *file, int 
 
 
 void
-send_ret(msgctx_t *ctx, char *name, int ret, int orig_request)
+send_ret(msgctx_t *ctx, char *name, int ret, int orig_request, int new_owner)
 {
 	SmMessageSt msg, *msgp = &msg;
 	if (!ctx)
@@ -213,7 +215,9 @@ send_ret(msgctx_t *ctx, char *name, int 
 	msgp->sm_data.d_action = orig_request;
 	strncpy(msgp->sm_data.d_svcName, name,
 		sizeof(msgp->sm_data.d_svcName));
-	msgp->sm_data.d_svcOwner = my_id(); /* XXX Broken */
+	if (!new_owner)
+		new_owner = my_id();
+	msgp->sm_data.d_svcOwner = new_owner; /* XXX Broken */
 	msgp->sm_data.d_ret = ret;
 
 	swab_SmMessageSt(msgp);
@@ -652,7 +656,7 @@ svc_advise_start(rg_state_t *svcStatus, 
 		/*
 		 * Starting failed service...
 		 */
-		if (req == RG_START_RECOVER) {
+		if (req == RG_START_RECOVER || central_events_enabled()) {
 			clulog(LOG_NOTICE,
 			       "Recovering failed service %s\n",
 			       svcName);
@@ -684,7 +688,7 @@ svc_advise_start(rg_state_t *svcStatus, 
 	
 	case RG_STATE_DISABLED:
 	case RG_STATE_UNINITIALIZED:
-		if (req == RG_ENABLE) {
+		if (req == RG_ENABLE || req == RG_START_REMOTE) {
 			/* Don't actually enable if the RG is locked! */
 			if (rg_locked()) {
 				ret = 3;
@@ -808,7 +812,8 @@ svc_start(char *svcName, int req)
 		       "Service %s started\n",
 		       svcName);
 
-		broadcast_event(svcName, RG_STATE_STARTED);
+		broadcast_event(svcName, RG_STATE_STARTED, svcStatus.rs_owner,
+				svcStatus.rs_last_owner);
 	} else {
 		clulog(LOG_WARNING,
 		       "#68: Failed to start %s; return value: %d\n",
@@ -1264,8 +1269,8 @@ _svc_stop(char *svcName, int req, int re
 
 	clulog(LOG_NOTICE, "Stopping service %s\n", svcName);
 
-	if (recover)
-		svcStatus.rs_state = RG_STATE_ERROR;
+	if (recover) 
+	       	svcStatus.rs_state = RG_STATE_ERROR;
 	else
 		svcStatus.rs_state = RG_STATE_STOPPING;
 	svcStatus.rs_transition = (uint64_t)time(NULL);
@@ -1346,7 +1351,7 @@ _svc_stop_finish(char *svcName, int fail
 	}
 	rg_unlock(&lockp);
 
-	broadcast_event(svcName, newstate);
+	broadcast_event(svcName, newstate, -1, svcStatus.rs_last_owner);
 
 	return 0;
 }
@@ -1427,7 +1432,8 @@ svc_fail(char *svcName)
 	}
 	rg_unlock(&lockp);
 
-	broadcast_event(svcName, RG_STATE_FAILED);
+	broadcast_event(svcName, RG_STATE_FAILED, -1,
+			svcStatus.rs_last_owner);
 
 	return 0;
 }
@@ -1436,8 +1442,8 @@ svc_fail(char *svcName)
 /*
  * Send a message to the target node to start the service.
  */
-static int
-relocate_service(char *svcName, int request, uint32_t target)
+int
+svc_start_remote(char *svcName, int request, uint32_t target)
 {
 	SmMessageSt msg_relo;
 	int msg_ret;
@@ -1447,6 +1453,8 @@ relocate_service(char *svcName, int requ
 	/* Build the message header */
 	msg_relo.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
 	msg_relo.sm_hdr.gh_command = RG_ACTION_REQUEST;
+	/* XXX XXX */
+	msg_relo.sm_hdr.gh_arg1 = RG_ACTION_MASTER;
 	msg_relo.sm_hdr.gh_length = sizeof (SmMessageSt);
 	msg_relo.sm_data.d_action = request;
 	strncpy(msg_relo.sm_data.d_svcName, svcName,
@@ -1469,13 +1477,13 @@ relocate_service(char *svcName, int requ
 	if (msg_send(&ctx, &msg_relo, sizeof (SmMessageSt)) < 
 	    sizeof (SmMessageSt)) {
 		clulog(LOG_ERR,
-		       "#59: Error sending relocate request to member #%d\n",
+		       "#59: Error sending remote-start request to member #%d\n",
 		       target);
 		msg_close(&ctx);
 		return -1;
 	}
 
-	clulog(LOG_DEBUG, "Sent relocate request to %d\n", (int)target);
+	clulog(LOG_DEBUG, "Sent remote-start request to %d\n", (int)target);
 
 	/* Check the response */
 	do {
@@ -1648,7 +1656,7 @@ handle_relocate_req(char *svcName, int r
 		 	 * It's legal to start the service on the given
 		 	 * node.  Try to do so.
 		 	 */
-			if (relocate_service(svcName, request, target) == 0) {
+			if (svc_start_remote(svcName, request, target) == 0) {
 				*new_owner = target;
 				/*
 				 * Great! We're done...
@@ -1678,7 +1686,7 @@ handle_relocate_req(char *svcName, int r
 		if (target == me)
 			goto exhausted;
 
-		ret = relocate_service(svcName, request, target);
+		ret = svc_start_remote(svcName, request, target);
 		switch (ret) {
 		case RG_ERUN:
 			/* Someone stole the service while we were 
@@ -2002,7 +2010,7 @@ handle_fd_start_req(char *svcName, int r
 			ret = RG_EFAIL;
 			goto out;
 		} else {
-			ret = relocate_service(svcName, RG_START_REMOTE, target);
+			ret = svc_start_remote(svcName, RG_START_REMOTE, target);
 		}
 
 		switch(ret) {
@@ -2022,7 +2030,7 @@ handle_fd_start_req(char *svcName, int r
 		default:
 			clulog(LOG_ERR,
 			       "#6X: Invalid reply [%d] from member %d during"
-			       " relocate operation!\n", ret, target);
+			       " operation!\n", ret, target);
 		}
 	}
 
Index: src/daemons/rg_thread.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_thread.c,v
retrieving revision 1.15.2.9
diff -u -p -r1.15.2.9 rg_thread.c
--- src/daemons/rg_thread.c	24 Jul 2007 13:58:47 -0000	1.15.2.9
+++ src/daemons/rg_thread.c	8 Nov 2007 21:09:01 -0000
@@ -16,12 +16,12 @@
   Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
   MA 02139, USA.
 */
+#include <message.h>
 #include <resgroup.h>
 #include <rg_locks.h>
 #include <gettid.h>
 #include <rg_queue.h>
 #include <assert.h>
-#include <message.h>
 
 /**
  * Resource thread list entry.
@@ -54,6 +54,7 @@ static int spawn_if_needed(const char *r
 int rt_enqueue_request(const char *resgroupname, int request,
 		       msgctx_t *response_ctx, int max, uint32_t target,
 		       int arg0, int arg1);
+int central_events_enabled(void);
 
 
 /**
@@ -446,6 +447,11 @@ resgroup_thread_main(void *arg)
 
 			error = svc_stop(myname, RG_STOP_RECOVER);
 			if (error == 0) {
+				/* Stop generates an event - whatever the
+				   result.  If central events are enabled
+				   don't bother trying to recover */
+				if (central_events_enabled())
+					break;
 				error = handle_recover_req(myname, &newowner);
 				if (error == 0)
 					ret = RG_SUCCESS;
@@ -701,7 +707,7 @@ rt_enqueue_request(const char *resgroupn
 		case RG_START:
 		case RG_ENABLE:
 			send_ret(response_ctx, resgroup->rt_name, RG_EDEADLCK,
-				 request);
+				 request, 0);
 			msg_close(response_ctx);
 			msg_free_ctx(response_ctx);
 			break;
Index: src/daemons/service_op.c
===================================================================
RCS file: src/daemons/service_op.c
diff -N src/daemons/service_op.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/daemons/service_op.c	8 Nov 2007 21:09:01 -0000
@@ -0,0 +1,167 @@
+#include <assert.h>
+#include <platform.h>
+#include <message.h>
+#include <members.h>
+#include <stdio.h>
+#include <string.h>
+#include <resgroup.h>
+#include <clulog.h>
+#include <lock.h>
+#include <rg_locks.h>
+#include <ccs.h>
+#include <rg_queue.h>
+#include <msgsimple.h>
+#include <res-ocf.h>
+#include <event.h>
+
+
+/*
+ * Send a message to the target node to start the service.
+ */
+int svc_start_remote(char *svcName, int request, uint32_t target);
+void svc_report_failure(char *);
+int get_service_state_internal(char *svcName, rg_state_t *svcStatus);
+
+
+
+/**
+ *
+ */
+int
+service_op_start(char *svcName,
+		 int *target_list,
+		 int target_list_len,
+		 int *new_owner)
+{
+	int target;
+	int ret, x;
+	rg_state_t svcStatus;
+	
+	if (get_service_state_internal(svcName, &svcStatus) < 0) {
+		return RG_EFAIL;
+	}
+
+	if (svcStatus.rs_state == RG_STATE_FAILED ||
+	    svcStatus.rs_state == RG_STATE_UNINITIALIZED)
+		return RG_EINVAL;
+
+	for (x = 0; x < target_list_len; x++) {
+
+		target = target_list[x];
+		ret = svc_start_remote(svcName, RG_START_REMOTE,
+				       target);
+		switch (ret) {
+		case RG_ERUN:
+			/* Someone stole the service while we were 
+			   trying to start it */
+			get_rg_state_local(svcName, &svcStatus);
+			if (new_owner)
+				*new_owner = svcStatus.rs_owner;
+			return 0;
+		case RG_EDEPEND:
+		case RG_EFAIL:
+			continue;
+		case RG_EABORT:
+			svc_report_failure(svcName);
+			return RG_EFAIL;
+		default:
+			/* deliberate fallthrough */
+			clulog(LOG_ERR,
+			       "#61: Invalid reply from member %d during"
+			       " start operation!\n", target);
+		case RG_NO:
+			/* state uncertain */
+			clulog(LOG_CRIT, "State Uncertain: svc:%s "
+			       "nid:%d req:%s ret:%d\n", svcName,
+			       target, rg_req_str(RG_START_REMOTE), ret);
+			return 0;
+		case 0:
+			if (new_owner)
+				*new_owner = target;
+			clulog(LOG_NOTICE, "Service %s is now running "
+			       "on member %d\n", svcName, (int)target);
+			return 0;
+		}
+	}
+
+	return RG_EFAIL;
+}
+
+
+int
+service_op_stop(char *svcName, int do_disable, int event_type)
+{
+	SmMessageSt msg;
+	int msg_ret;
+	msgctx_t ctx;
+	rg_state_t svcStatus;
+
+	/* Build the message header */
+	msg.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
+	msg.sm_hdr.gh_command = RG_ACTION_REQUEST;
+	msg.sm_hdr.gh_arg1 = RG_ACTION_MASTER; 
+	msg.sm_hdr.gh_length = sizeof (SmMessageSt);
+
+	msg.sm_data.d_action = ((!do_disable) ? RG_STOP:RG_DISABLE);
+
+	if (msg.sm_data.d_action == RG_STOP && event_type == EVENT_USER)
+		msg.sm_data.d_action = RG_STOP_USER;
+
+	strncpy(msg.sm_data.d_svcName, svcName,
+		sizeof(msg.sm_data.d_svcName));
+	msg.sm_data.d_ret = 0;
+	msg.sm_data.d_svcOwner = 0;
+
+	/* Open a connection to the local node - it will decide what to
+	   do in this case. XXX inefficient; should queue requests
+	   locally and immediately forward requests otherwise */
+
+	if (msg_open(MSG_CLUSTER, my_id(), RG_PORT, &ctx, 2)< 0) {
+		clulog(LOG_ERR,
+		       "#58: Failed opening connection to member #%d\n",
+		       my_id());
+		return -1;
+	}
+
+	/* Encode */
+	swab_SmMessageSt(&msg);
+
+	/* Send relocate message to the other node */
+	if (msg_send(&ctx, &msg, sizeof (SmMessageSt)) < 
+	    sizeof (SmMessageSt)) {
+		clulog(LOG_ERR, "Failed to send complete message\n");
+		msg_close(&ctx);
+		return -1;
+	}
+
+	/* Check the response */
+	do {
+		msg_ret = msg_receive(&ctx, &msg,
+				      sizeof (SmMessageSt), 10);
+		if ((msg_ret == -1 && errno != ETIMEDOUT) ||
+		    (msg_ret >= 0)) {
+			break;
+		}
+	} while(1);
+
+	if (msg_ret != sizeof (SmMessageSt)) {
+		clulog(LOG_WARNING, "Strange response size: %d vs %d\n",
+		       msg_ret, (int)sizeof(SmMessageSt));
+		return 0;	/* XXX really UNKNOWN */
+	}
+
+	/* Got a valid response from other node. */
+	msg_close(&ctx);
+
+	/* Decode */
+	swab_SmMessageSt(&msg);
+
+	return msg.sm_data.d_ret;
+}
+
+
+/*
+   TODO
+   service_op_migrate()
+ */
+
Index: src/daemons/slang_event.c
===================================================================
RCS file: src/daemons/slang_event.c
diff -N src/daemons/slang_event.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/daemons/slang_event.c	8 Nov 2007 21:09:01 -0000
@@ -0,0 +1,1128 @@
+#include <platform.h>
+#include <resgroup.h>
+#include <reslist.h>
+#include <clulog.h>
+#include <members.h>
+#include <list.h>
+#include <assert.h>
+#include <event.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <slang/slang.h>
+#include <sys/syslog.h>
+#include <malloc.h>
+#include <clulog.h>
+#include <sets.h>
+
+static int _test = 0;
+static int __sl_initialized = 0;
+
+static char **_service_list = NULL;
+static int _service_list_len = 0;
+
+char **get_service_names(int *len); /* from groups.c */
+int get_service_property(char *rg_name, char *prop, char *buf, size_t buflen);
+void push_int_array(int *stuff, int len);
+
+
+/* ================================================================
+ * Node states 
+ * ================================================================ */
+static const int
+   _ns_online = 1,
+   _ns_offline = 0;
+
+/* Failure / success */
+static const int
+   _rg_fail = -1,
+   _rg_success = 0;
+
+
+/* ================================================================
+ * Event information 
+ * ================================================================ */
+static const int
+   _ev_none = EVENT_NONE,
+   _ev_node = EVENT_NODE,
+   _ev_service = EVENT_RG,
+   _ev_config = EVENT_CONFIG,
+   _ev_user = EVENT_USER;
+
+static int
+   _stop_processing = 0,
+   _my_node_id = 0,
+   _node_state = 0,
+   _node_id = 0,
+   _node_clean = 0,
+   _service_owner = 0,
+   _service_last_owner = 0,
+   _user_request = 0,
+   _user_arg1 = 0,
+   _user_arg2 = 0,
+   _rg_err = 0,
+   _event_type = 0;
+
+static char
+   *_node_name = NULL,
+   *_service_name = NULL,
+   *_service_state = NULL,
+   *_rg_err_str = "No Error";
+
+static int
+   _user_enable = RG_ENABLE,
+   _user_disable = RG_DISABLE,
+   _user_stop = RG_STOP_USER,		/* From clusvcadm */
+   _user_relo = RG_RELOCATE,
+   _user_restart = RG_RESTART,
+   _user_migrate = RG_MIGRATE;
+
+
+SLang_Intrin_Var_Type rgmanager_vars[] =
+{
+	/* Log levels (constants) */
+
+	/* Node state information */
+	MAKE_VARIABLE("NODE_ONLINE",	&_ns_online,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("NODE_OFFLINE",	&_ns_offline,	SLANG_INT_TYPE, 1),
+
+	/* Node event information */
+	MAKE_VARIABLE("node_self",	&_my_node_id,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("node_state",	&_node_state,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("node_id",	&_node_id,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("node_name",	&_node_name,	SLANG_STRING_TYPE,1),
+	MAKE_VARIABLE("node_clean",	&_node_clean,	SLANG_INT_TYPE, 1),
+
+	/* Service event information */
+	MAKE_VARIABLE("service_name",	&_service_name,	SLANG_STRING_TYPE,1),
+	MAKE_VARIABLE("service_state",	&_service_state,SLANG_STRING_TYPE,1),
+	MAKE_VARIABLE("service_owner",	&_service_owner,SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("service_last_owner", &_service_last_owner,
+		      					SLANG_INT_TYPE, 1),
+
+	/* User event information */
+	MAKE_VARIABLE("user_request",	&_user_request,	SLANG_INT_TYPE,1),
+	MAKE_VARIABLE("user_arg1",	&_user_arg1,	SLANG_INT_TYPE,1),
+	MAKE_VARIABLE("user_arg2",	&_user_arg2,	SLANG_INT_TYPE,1),
+	MAKE_VARIABLE("user_service",	&_service_name, SLANG_STRING_TYPE,1),
+	MAKE_VARIABLE("user_target",	&_service_owner,SLANG_INT_TYPE, 1),
+
+	/* General event information */
+	MAKE_VARIABLE("event_type",	&_event_type,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("EVENT_NONE",	&_ev_none,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("EVENT_NODE",	&_ev_node,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("EVENT_CONFIG",	&_ev_config,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("EVENT_SERVICE",	&_ev_service,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("EVENT_USER",	&_ev_user,	SLANG_INT_TYPE, 1),
+
+	/* User request constants */
+	MAKE_VARIABLE("USER_ENABLE",	&_user_enable,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("USER_DISABLE",	&_user_disable,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("USER_STOP",	&_user_stop,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("USER_RELOCATE",	&_user_relo,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("USER_RESTART",	&_user_restart,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("USER_MIGRATE",	&_user_migrate,	SLANG_INT_TYPE, 1),
+
+	/* Errors */
+	MAKE_VARIABLE("rg_error",	&_rg_err,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("rg_error_string",&_rg_err_str,	SLANG_STRING_TYPE,1),
+	MAKE_VARIABLE("FAIL",		&_rg_fail,	SLANG_INT_TYPE, 1),
+	MAKE_VARIABLE("SUCCESS",	&_rg_success,	SLANG_STRING_TYPE,1),
+
+	SLANG_END_TABLE
+};
+
+
+#define rg_error(errortype) \
+do { \
+	_rg_err = errortype; \
+	_rg_err_str = ##errortype; \
+} while(0)
+
+
+int
+get_service_state_internal(char *svcName, rg_state_t *svcStatus)
+{
+	struct dlm_lksb lock;
+	char buf[32];
+
+	get_rg_state_local(svcName, svcStatus);
+	if (svcStatus->rs_state == RG_STATE_UNINITIALIZED) {
+		if (rg_lock(svcName, &lock) < 0) {
+			printf("Failed to get lock?!\n");
+			/* ?!! */
+			return -1;
+		}
+
+		if (get_rg_state(svcName, svcStatus) < 0) {
+			printf("Failed to get state?!\n");
+			/* !!! */
+			rg_unlock(&lock);
+			return -1;
+		}
+
+		if (get_service_property(svcName, "autostart",
+					 buf, sizeof(buf)) == 0) {
+			if (buf[0] == '0' || !strcasecmp(buf, "no")) {
+				svcStatus->rs_state = RG_STATE_DISABLED;
+			} else {
+				svcStatus->rs_state = RG_STATE_STOPPED;
+			}
+		}
+
+		set_rg_state(svcName, svcStatus);
+
+		rg_unlock(&lock);
+	}
+
+	return 0;
+}
+
+
+/*
+   (restarts, last_owner, owner, state) = get_service_status(servicename)
+ */
+void
+sl_service_status(char *svcName)
+{
+	rg_state_t svcStatus;
+	char *state_str;
+
+	if (get_service_state_internal(svcName, &svcStatus) < 0) {
+		clulog(LOG_ERR,
+		       "Failed to get service state for %s", svcName);
+		return;
+	}
+
+	if (SLang_push_integer(svcStatus.rs_restarts) < 0) {
+		printf(" Failed to push restarts \n");
+		return;
+	}
+
+	if (SLang_push_integer(svcStatus.rs_last_owner) < 0) {
+		/* ?!!! */
+		printf(" Failed to push last owner \n");
+		return;
+	}
+
+	switch(svcStatus.rs_state) {
+	case RG_STATE_DISABLED:
+	case RG_STATE_STOPPED:
+	case RG_STATE_FAILED:
+	case RG_STATE_RECOVER:
+	case RG_STATE_ERROR:
+		/* There is no owner for these states.  Ever.  */
+		svcStatus.rs_owner = -1;
+	}
+
+	if (SLang_push_integer(svcStatus.rs_owner) < 0) {
+		/* ?!!! */
+		printf(" Failed to push owner \n");
+		return;
+	}
+
+	state_str = strdup(rg_state_str(svcStatus.rs_state));
+	if (!state_str) {
+		printf("Failed to strdup?!\n");
+		/* ?!!!! */
+		return;
+	}
+
+	if (SLang_push_malloced_string(state_str) < 0) {
+		printf(" Failed to push state \n");
+		free(state_str);
+	}
+
+}
+
+
+/**
+  (nofailback, restricted, ordered, nodelist) = service_domain_info(svcName);
+ */
+void
+sl_domain_info(char *svcName)
+{
+	int *nodelist = NULL, listlen;
+	char buf[64];
+	int flags = 0;
+
+	if (get_service_property(svcName, "domain", buf, sizeof(buf)) < 0) {
+		/* no nodes */
+		SLang_push_integer(0);
+
+		/* no domain? */
+/*
+		str = strdup("none");
+		if (SLang_push_malloced_string(str) < 0) {
+			free(state_str);
+			return;
+		}
+*/
+
+		/* not ordered */
+		SLang_push_integer(0);
+		/* not restricted */
+		SLang_push_integer(0);
+		/* nofailback not set */
+		SLang_push_integer(0);
+	}
+
+	if (node_domain_set_safe(buf, &nodelist, &listlen, &flags) < 0) {
+		SLang_push_integer(0);
+		SLang_push_integer(0);
+		SLang_push_integer(0);
+		SLang_push_integer(0);
+		return;
+	}
+
+	SLang_push_integer(!!(flags & FOD_NOFAILBACK));
+	SLang_push_integer(!!(flags & FOD_RESTRICTED));
+	SLang_push_integer(!!(flags & FOD_ORDERED));
+
+	push_int_array(nodelist, listlen);
+	free(nodelist);
+
+/*
+	str = strdup(buf);
+	if (SLang_push_malloced_string(str) < 0) {
+		free(state_str);
+		return;
+	}
+*/
+}
+
+
+static int
+get_int_array(int **nodelist, int *len)
+{
+	SLang_Array_Type *a = NULL;
+	SLindex_Type i;
+	int *nodes = NULL, t, ret = -1;
+
+	if (!nodelist || !len)
+		return -1;
+
+	t = SLang_peek_at_stack();
+	if (t == SLANG_INT_TYPE) {
+		//_rg_err = RG_EINVAL;
+		//_rg_err_str = rg_error_str(_rg_err);
+
+		nodes = malloc(sizeof(int) * 1);
+		if (!nodes)
+			goto out;
+		if (SLang_pop_integer(&nodes[0]) < 0)
+			goto out;
+
+		*len = 1;
+		ret = 0;
+
+	} else if (t == SLANG_ARRAY_TYPE) {
+		if (SLang_pop_array_of_type(&a, SLANG_INT_TYPE) < 0)
+			goto out;
+		if (a->num_dims > 1)
+			goto out;
+		if (a->dims[0] < 0)
+			goto out;
+		nodes = malloc(sizeof(int) * a->dims[0]);
+		if (!nodes)
+			goto out;
+		for (i = 0; i < a->dims[0]; i++)
+			SLang_get_array_element(a, &i, &nodes[i]);
+
+		*len = a->dims[0];
+		ret = 0;
+	}
+
+out:
+	if (a)
+		SLang_free_array(a);
+	if (ret == 0) {
+		*nodelist = nodes;
+	} else {
+		if (nodes)
+			free(nodes);
+	}
+	
+	return ret;
+}
+
+
+/**
+  get_service_property(service_name, property)
+ */
+char *
+sl_service_property(char *svcName, char *prop)
+{
+	char buf[96];
+
+	if (get_service_property(svcName, prop, buf, sizeof(buf)) < 0)
+		return NULL;
+
+	/* does this work or do I have to push a malloce'd string? */
+	return strdup(buf);
+}
+
+
+/**
+  usage:
+
+  stop_service(name, disable_flag);
+ */
+int
+sl_stop_service(void)
+{
+	char *svcname = NULL;
+	int nargs, t, ret = -1;
+	int do_disable = 0;
+
+	nargs = SLang_Num_Function_Args;
+
+	/* Takes one, two, or three */
+	if (nargs <= 0 || nargs > 2)
+		return -1;
+
+	if (nargs == 2) {
+		t = SLang_peek_at_stack();
+		if (t != SLANG_INT_TYPE) {
+			//_rg_err = RG_EINVAL;
+			//_rg_err_str = rg_error_str(_rg_err);
+			goto out;
+		}
+
+		if (SLang_pop_integer(&do_disable) < 0)
+			goto out;
+
+		//printf("do_disable = %d\n", do_disable);
+		--nargs;
+	}
+
+	if (nargs == 1) {
+		t = SLang_peek_at_stack();
+		if (t != SLANG_STRING_TYPE) {
+			//_rg_err = RG_EINVAL;
+			//_rg_err_str = rg_error_str(_rg_err);
+			goto out;
+		}
+		if (SLpop_string(&svcname) < 0)
+			goto out;
+
+		//printf("Service: %s\n", svcname);
+	}
+
+	/* TODO: Meat of function goes here */
+	ret = service_op_stop(svcname, do_disable, _event_type);
+out:
+	if (svcname)
+		free(svcname);
+	return ret;
+}
+
+
+/**
+  usage:
+
+  start_service(name, <array>ordered_node_list_allowed,
+  		      <array>node_list_illegal)
+ */
+int
+sl_start_service(void)
+{
+	char *svcname = NULL;
+	int *pref_list = NULL, pref_list_len = 0;
+	int *illegal_list = NULL, illegal_list_len = 0;
+	int nargs, t, x, ret = -1;
+
+	nargs = SLang_Num_Function_Args;
+
+	/* Takes one, two, or three */
+	if (nargs <= 0 || nargs > 3)
+		return -1;
+
+	if (nargs == 3) {
+		if (get_int_array(&illegal_list, &illegal_list_len) < 0)
+			goto out;
+
+		/* 
+		printf("Illegal list: ");
+		for (x  = 0; x < illegal_list_len; x++) {
+			printf("%d ", illegal_list[x]);
+		}
+		printf("\n")void;
+		 */
+
+		--nargs;
+	}
+
+	if (nargs == 2) {
+		if (get_int_array(&pref_list, &pref_list_len) < 0)
+			goto out;
+
+		/*
+		printf("Preferred list: ");
+		for (x  = 0; x < pref_list_len; x++) {
+			printf("%d ", pref_list[x]);
+		}
+		printf("\n");
+		 */
+
+		--nargs;
+	}
+
+	if (nargs == 1) {
+		/* Just get the service name */
+		t = SLang_peek_at_stack();
+		if (t != SLANG_STRING_TYPE) {
+			//_rg_err = RG_EINVAL;
+			//_rg_err_str = rg_error_str(_rg_err);
+			goto out;
+		}
+		if (SLpop_string(&svcname) < 0)
+			goto out;
+
+		//printf("Service: %s\n", svcname);
+	}
+
+	/* TODO: Meat of function goes here */
+	ret = service_op_start(svcname, pref_list,
+			       pref_list_len, &x); ;
+
+out:
+	if (svcname)
+		free(svcname);
+	if (illegal_list)
+		free(illegal_list);
+	if (pref_list)
+		free(pref_list);
+	return ret;
+}
+
+
+void
+push_int_array(int *stuff, int len)
+{
+	SLindex_Type arrlen, x;
+	SLang_Array_Type *arr;
+	int i;
+
+	arrlen = len;
+	arr = SLang_create_array(SLANG_INT_TYPE, 0, NULL, &arrlen, 1);
+	if (!arr)
+		return;
+
+	x = 0;
+	for (x = 0; x < len; x++) {
+		i = stuff[x];
+		SLang_set_array_element(arr, &x, &i);
+	}
+	SLang_push_array(arr, 1);
+}
+
+
+
+/*
+   Returns an array of rgmanager-visible nodes online.  How cool is that?
+ */
+void
+sl_nodes_online(void)
+{
+	int i, *nodes, nodecount = 0;
+
+	cluster_member_list_t *membership = member_list();
+	if (!membership)
+		return;
+	nodes = malloc(sizeof(int) * membership->cml_count);
+	if (!nodes)
+		return;
+
+	nodecount = 0;
+	for (i = 0; i < membership->cml_count; i++) {
+		if (membership->cml_members[i].cn_member &&
+		    membership->cml_members[i].cn_nodeid != 0) {
+			nodes[nodecount] = membership->cml_members[i].cn_nodeid;
+			++nodecount;
+		}
+	}
+	free_member_list(membership);
+	push_int_array(nodes, nodecount);
+	free(nodes);
+}
+
+
+/*
+   Returns an array of rgmanager-defined services, in type:name format
+   We allocate/kill this list *once* per event to ensure we don't leak
+   memory
+ */
+void
+sl_service_list(void)
+{
+	SLindex_Type svccount = _service_list_len, x = 0;
+	SLang_Array_Type *svcarray;
+
+	svcarray = SLang_create_array(SLANG_STRING_TYPE, 0, NULL, &svccount, 1);
+	if (!svcarray)
+		return;
+
+	for (; x < _service_list_len; x++) 
+		SLang_set_array_element(svcarray, &x, &_service_list[x]);
+
+	SLang_push_array(svcarray, 1);
+}
+
+
+void
+sl_union(void)
+{
+	int *arr1 = NULL, a1len = 0;
+	int *arr2 = NULL, a2len = 0;
+	int *ret = NULL, retlen = 0;
+	int nargs = SLang_Num_Function_Args;
+
+	if (nargs != 2)
+		return;
+		
+	/* Remember: args on the stack are reversed */
+	get_int_array(&arr2, &a2len);
+	get_int_array(&arr1, &a1len);
+	s_union(arr1, a1len, arr2, a2len, &ret, &retlen);
+	push_int_array(ret, retlen);
+	if (arr1)
+		free(arr1);
+	if (arr2)
+		free(arr2);
+	if (ret)
+		free(ret);
+	return;
+}
+
+void
+sl_intersection(void)
+{
+	int *arr1 = NULL, a1len = 0;
+	int *arr2 = NULL, a2len = 0;
+	int *ret = NULL, retlen = 0;
+	int nargs = SLang_Num_Function_Args;
+
+	if (nargs != 2)
+		return;
+		
+	/* Remember: args on the stack are reversed */
+	get_int_array(&arr2, &a2len);
+	get_int_array(&arr1, &a1len);
+	s_intersection(arr1, a1len, arr2, a2len, &ret, &retlen);
+	push_int_array(ret, retlen);
+	if (arr1)
+		free(arr1);
+	if (arr2)
+		free(arr2);
+	if (ret)
+		free(ret);
+	return;
+}
+
+
+void
+sl_delta(void)
+{
+	int *arr1 = NULL, a1len = 0;
+	int *arr2 = NULL, a2len = 0;
+	int *ret = NULL, retlen = 0;
+	int nargs = SLang_Num_Function_Args;
+
+	if (nargs != 2)
+		return;
+		
+	/* Remember: args on the stack are reversed */
+	get_int_array(&arr2, &a2len);
+	get_int_array(&arr1, &a1len);
+	s_delta(arr1, a1len, arr2, a2len, &ret, &retlen);
+	push_int_array(ret, retlen);
+	if (arr1)
+		free(arr1);
+	if (arr2)
+		free(arr2);
+	if (ret)
+		free(ret);
+	return;
+}
+
+
+void
+sl_subtract(void)
+{
+	int *arr1 = NULL, a1len = 0;
+	int *arr2 = NULL, a2len = 0;
+	int *ret = NULL, retlen = 0;
+	int nargs = SLang_Num_Function_Args;
+
+	if (nargs != 2)
+		return;
+		
+	/* Remember: args on the stack are reversed */
+	get_int_array(&arr2, &a2len);
+	get_int_array(&arr1, &a1len);
+	s_subtract(arr1, a1len, arr2, a2len, &ret, &retlen);
+	push_int_array(ret, retlen);
+	if (arr1)
+		free(arr1);
+	if (arr2)
+		free(arr2);
+	if (ret)
+		free(ret);
+	return;
+}
+
+
+/**
+  Start at the end of the arg list and work backwards, prepending a string.
+  This does not support standard clulog / printf formattting; rather, we 
+  just allow integers / strings to be mixed on the stack, figure out the
+  type, convert it to the right type, and prepend it on to our log message
+
+  The last must be a log level, as specified above:
+     LOG_DEBUG
+     ...
+     LOG_EMERG
+
+  This matches up with clulog / syslog mappings in the var table; the above
+  are constants in the S/Lang interpreter.  Any number of arguments may
+  be provided.  Examples are:
+
+    log(LOG_INFO, "String", 1, "string2");
+
+  Result:  String1string2
+
+    log(LOG_INFO, "String ", 1, " string2");
+
+  Result:  String 1 string2
+
+ */
+static int
+array_to_string(char *buf, int buflen, int *array, int arraylen)
+{
+	char intbuf[16];
+	int x, len, remain = buflen;
+
+	len = snprintf(buf, buflen, "[ ");
+	if (len == buflen)
+		return -1;
+
+	remain -= len;
+	for (x = 0; x < arraylen; x++) {
+		len = snprintf(intbuf, sizeof(intbuf)-1, "%d ", array[x]);
+		remain -= len;
+		if (remain > 0) {
+			strncat(buf, intbuf, len);
+		} else {
+			return -1;
+		}
+	}
+
+	len = snprintf(intbuf, sizeof(intbuf),  "]");
+	remain -= len;
+	if (remain > 0) {
+		strncat(buf, intbuf, len);
+	} else {
+		return -1;
+	}
+	return (buflen - remain);
+}
+
+
+void
+sl_clulog(int level)
+{
+	int t, nargs;
+	//int level;
+	int s_intval;
+	char *s_strval;
+	int *nodes = 0, nlen = 0;
+	char logbuf[512];
+	char tmp[256];
+	int need_free;
+	size_t remain = sizeof(logbuf)-2, len;
+
+	nargs = SLang_Num_Function_Args;
+	if (nargs < 1)
+		return;
+
+	//level = 6;
+	logbuf[sizeof(logbuf)-1] = 0;
+	logbuf[sizeof(logbuf)-2] = '\n';
+
+	while (nargs && (t = SLang_peek_at_stack()) >= 0 && remain) {
+		switch(t) {
+		case SLANG_ARRAY_TYPE:
+			if (get_int_array(&nodes, &nlen) < 0)
+				return;
+			len = array_to_string(tmp, sizeof(tmp),
+					      nodes, nlen);
+			if (len < 0) {
+				free(nodes);
+				return;
+			}
+			free(nodes);
+			break;
+		case SLANG_INT_TYPE:
+			if (SLang_pop_integer(&s_intval) < 0)
+				return;
+			len=snprintf(tmp, sizeof(tmp), "%d", s_intval);
+			break;
+		case SLANG_STRING_TYPE:
+			need_free = 0;
+			if (SLpop_string(&s_strval) < 0)
+				return;
+			len=snprintf(tmp, sizeof(tmp), "%s", s_strval);
+			SLfree(s_strval);
+			break;
+		default:
+			need_free = 0;
+			len=snprintf(tmp, sizeof(tmp),
+				     "{UnknownType %d}", t);
+			break;
+		}
+
+		--nargs;
+
+		if (len > remain)
+			return;
+		remain -= len;
+
+		memcpy(&logbuf[remain], tmp, len);
+	}
+
+#if 0
+	if (_test) {
+		printf("<%d> %s\n", level, &logbuf[remain]);
+	}
+#endif
+	clulog(level, &logbuf[remain]);
+	return;
+}
+
+
+void
+sl_log_debug(void)
+{
+	sl_clulog(LOG_DEBUG);
+}
+
+
+void
+sl_log_info(void)
+{
+	sl_clulog(LOG_INFO);
+}
+
+
+void
+sl_log_notice(void)
+{
+	sl_clulog(LOG_NOTICE);
+}
+
+
+void
+sl_log_warning(void)
+{
+	sl_clulog(LOG_WARNING);
+}
+
+
+void
+sl_log_err(void)
+{
+	sl_clulog(LOG_ERR);
+}
+
+
+void
+sl_log_crit(void)
+{
+	sl_clulog(LOG_CRIT);
+}
+
+
+void
+sl_log_alert(void)
+{
+	sl_clulog(LOG_ALERT);
+}
+
+
+void
+sl_log_emerg(void)
+{
+	sl_clulog(LOG_EMERG);
+}
+
+
+void
+sl_die(void)
+{
+	_stop_processing = 1;
+	return;
+}
+
+
+SLang_Intrin_Fun_Type rgmanager_slang[] =
+{
+	MAKE_INTRINSIC_0("nodes_online", sl_nodes_online, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("service_list", sl_service_list, SLANG_VOID_TYPE),
+
+	MAKE_INTRINSIC_SS("service_property", sl_service_property,
+			  SLANG_STRING_TYPE),
+	MAKE_INTRINSIC_S("service_domain_info", sl_domain_info, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("service_stop", sl_stop_service, SLANG_INT_TYPE),
+	MAKE_INTRINSIC_0("service_start", sl_start_service, SLANG_INT_TYPE),
+	MAKE_INTRINSIC_S("service_status", sl_service_status,
+			 SLANG_VOID_TYPE),
+
+	/* Node list manipulation */
+	MAKE_INTRINSIC_0("union", sl_union, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("intersection", sl_intersection, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("delta", sl_delta, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("subtract", sl_subtract, SLANG_VOID_TYPE),
+
+	/* Logging */
+	MAKE_INTRINSIC_0("debug", sl_log_debug, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("info", sl_log_info, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("notice", sl_log_notice, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("warning", sl_log_warning, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("err", sl_log_err, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("crit", sl_log_crit, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("alert", sl_log_alert, SLANG_VOID_TYPE),
+	MAKE_INTRINSIC_0("emerg", sl_log_emerg, SLANG_VOID_TYPE),
+
+	MAKE_INTRINSIC_0("stop_processing", sl_die, SLANG_VOID_TYPE),
+	SLANG_END_INTRIN_FUN_TABLE
+};
+
+
+
+/* ================================================================
+ * S/Lang initialization
+ * ================================================================ */
+int
+do_init_slang(void)
+{
+	SLang_init_slang();
+	SLang_init_slfile();
+	if (SLadd_intrin_fun_table(rgmanager_slang, NULL) < 0) {
+		printf("Death by chocolate\n");
+		return 1;
+	}
+    	if (SLadd_intrin_var_table (rgmanager_vars, NULL) < 0) {
+		printf("Death by cherries\n");
+		return 1;
+	}
+
+	SLpath_set_load_path(RESOURCE_ROOTDIR);
+
+	_my_node_id = my_id();
+	__sl_initialized = 1;
+
+	return 0;
+}
+
+
+int
+do_slang_run(const char *file, const char *script)
+{
+	int ret = 0;
+
+	if (file) 
+		ret = SLang_load_file((char *)file);
+	else
+		ret = SLang_load_string((char *)script);
+
+	if (ret < 0)
+		SLang_restart(1);
+
+	return ret;
+}
+
+
+
+
+int
+S_node_event(const char *file, const char *script, int nodeid,
+	     int state, int clean)
+{
+	int ret;
+	cluster_member_list_t *membership = member_list();
+
+	_node_name = strdup(memb_id_to_name(membership, nodeid));
+	_node_state = state;
+	_node_clean = clean;
+	_node_id = nodeid;
+	free_member_list(membership);
+
+	ret = do_slang_run(file, script);
+
+	_node_state = 0;
+	_node_clean = 0;
+	_node_id = 0;
+	if (_node_name)
+		free(_node_name);
+	_node_name = NULL;
+
+	return ret;
+}
+
+
+int
+S_service_event(const char *file, const char *script, char *name,
+	        int state, int owner, int last_owner)
+{
+	int ret;
+
+	_service_name = name;
+	_service_state = (char *)rg_state_str(state);
+	_service_owner = owner;
+	_service_last_owner = last_owner;
+
+	switch(state) {
+	case RG_STATE_DISABLED:
+	case RG_STATE_STOPPED:
+	case RG_STATE_FAILED:
+	case RG_STATE_RECOVER:
+	case RG_STATE_ERROR:
+		/* There is no owner for these states.  Ever.  */
+		_service_owner = -1;
+	}
+
+	ret = do_slang_run(file, script);
+
+	_service_name = NULL;
+	_service_state = 0;
+	_service_owner = 0;
+	_service_last_owner = 0;
+
+	return ret;
+}
+
+
+int
+S_user_event(const char *file, const char *script, char *name,
+	     int request, int arg1, int arg2, int target, msgctx_t *ctx)
+{
+	int ret = RG_SUCCESS;
+
+	_service_name = name;
+	_service_owner = target;
+	_user_request = request;
+	_user_arg1 = arg1;
+	_user_arg2 = arg2;
+
+	ret = do_slang_run(file, script);
+
+	_service_name = NULL;
+	_service_owner = 0;
+	_user_request = 0;
+	_user_arg1 = 0;
+	_user_arg2 = 0;
+
+	/* XXX Send response code to caller - that 0 should be the
+	   new service owner, if there is one  */
+	if (ctx) {
+		send_ret(ctx, name, request, ret, 0);
+		msg_close(ctx);
+		msg_free_ctx(ctx);
+	}
+
+	return ret;
+}
+
+
+int
+slang_do_script(event_t *pattern, event_t *ev)
+{
+	_event_type = ev->ev_type;
+	switch(ev->ev_type) {
+	case EVENT_NODE:
+		S_node_event(
+				pattern->ev_script_file,
+				pattern->ev_script,
+				ev->ev.node.ne_nodeid,
+				ev->ev.node.ne_state,
+				ev->ev.node.ne_clean);
+		break;
+	case EVENT_RG:
+		S_service_event(
+				pattern->ev_script_file,
+				pattern->ev_script,
+				ev->ev.group.rg_name,
+				ev->ev.group.rg_state,
+				ev->ev.group.rg_owner,
+				ev->ev.group.rg_last_owner);
+		break;
+	case EVENT_USER:
+		S_user_event(
+				pattern->ev_script_file,
+				pattern->ev_script,
+				ev->ev.user.u_name,
+				ev->ev.user.u_request,
+				ev->ev.user.u_arg1,
+				ev->ev.user.u_arg2,
+				ev->ev.user.u_target,
+				ev->ev.user.u_ctx);
+		break;
+	default:
+		break;
+	}
+	_event_type = EVENT_NONE;
+	return 0;
+}
+
+
+
+/**
+  Process an event given our event table and the event that
+  occurred.  Note that the caller is responsible for freeing the
+  event - do not free (ev) ...
+ */
+int
+slang_process_event(event_table_t *event_table, event_t *ev)
+{
+	int x, y;
+	event_t *pattern;
+
+	if (!__sl_initialized)
+		do_init_slang();
+
+	/* Get the service list once before processing events */
+	if (!_service_list || !_service_list_len)
+		_service_list = get_service_names(&_service_list_len);
+
+	_stop_processing = 0;
+	for (x = 1; x <= event_table->max_prio; x++) {
+		list_for(&event_table->entries[x], pattern, y) {
+			if (event_match(pattern, ev))
+				slang_do_script(pattern, ev);
+			if (_stop_processing)
+				goto out;
+		}
+	}
+
+	/* Default level = 0 */
+	list_for(&event_table->entries[0], pattern, y) {
+		if (event_match(pattern, ev))
+			slang_do_script(pattern, ev);
+		if (_stop_processing)
+			goto out;
+	}
+
+out:
+	/* Free the service list */
+	if (_service_list) {
+		for(x = 0; x < _service_list_len; x++) {
+			free(_service_list[x]);
+		}
+		free(_service_list);
+		_service_list = NULL;
+		_service_list_len = 0;
+	}
+
+	return 0;
+}
Index: src/daemons/test.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/test.c,v
retrieving revision 1.6.2.5
diff -u -p -r1.6.2.5 test.c
--- src/daemons/test.c	31 Jul 2007 17:54:54 -0000	1.6.2.5
+++ src/daemons/test.c	8 Nov 2007 21:09:01 -0000
@@ -27,6 +27,7 @@
 #include <list.h>
 #include <reslist.h>
 #include <pthread.h>
+#include <event.h>
 
 #ifndef NO_CCS
 #error "Can not be built with CCS support."
@@ -130,6 +131,7 @@ test_func(int argc, char **argv)
 	resource_t *reslist = NULL, *curres;
 	resource_node_t *tree = NULL, *tmp, *rn = NULL;
 	int ccsfd, ret = 0, rules = 0;
+	event_table_t *events = NULL;
 
 	fprintf(stderr,"Running in test mode.\n");
 
@@ -142,6 +144,7 @@ test_func(int argc, char **argv)
 
 	load_resource_rules(agentpath, &rulelist);
 	construct_domains(ccsfd, &domains);
+	construct_events(ccsfd, &events);
 	load_resources(ccsfd, &reslist, &rulelist);
 	build_resource_tree(ccsfd, &tree, &rulelist, &reslist);
 
@@ -176,6 +179,11 @@ test_func(int argc, char **argv)
 			printf("=== Failover Domains ===\n");
 			print_domains(&domains);
 		}
+
+		if (events) {
+			printf("=== Event Triggers ===\n");
+			print_events(events);
+		}
 	}
 
 	ccs_unlock(ccsfd);
@@ -246,6 +254,7 @@ test_func(int argc, char **argv)
 	}
 
 out:
+	deconstruct_events(&events);
 	deconstruct_domains(&domains);
 	destroy_resource_tree(&tree);
 	destroy_resources(&reslist);
Index: src/resources/Makefile
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/resources/Makefile,v
retrieving revision 1.13.2.6
diff -u -p -r1.13.2.6 Makefile
--- src/resources/Makefile	12 Jul 2007 11:23:16 -0000	1.13.2.6
+++ src/resources/Makefile	8 Nov 2007 21:09:01 -0000
@@ -34,6 +34,9 @@ UTIL_TARGETS= \
 	utils/httpd-parse-config.pl utils/tomcat-parse-config.pl \
 	utils/member_util.sh
 
+EVENT_TARGETS= \
+	default_event_script.sl
+
 all:
 
 install: all
@@ -44,6 +47,7 @@ install: all
 	install $(TARGETS) ${sharedir}
 	install $(UTIL_TARGETS) ${sharedir}/utils
 	install -m 644 $(METADATA) ${sharedir}
+	install -m 644 $(EVENT_TARGETS) ${sharedir}
 
 uninstall:
 	${UNINSTALL} ${UTIL_TARGETS} ${sharedir}/utils
Index: src/resources/default_event_script.sl
===================================================================
RCS file: src/resources/default_event_script.sl
diff -N src/resources/default_event_script.sl
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/resources/default_event_script.sl	8 Nov 2007 21:09:01 -0000
@@ -0,0 +1,292 @@
+define node_in_set(node_list, node)
+{
+	variable x, len;
+
+	len = length(node_list);
+	for (x = 0; x < len; x++) {
+		if (node_list[x] == node)
+			return 1;
+	}
+
+	return 0;
+}
+
+define move_or_start(service, node_list)
+{
+	variable len;
+	variable state, owner;
+	variable depends;
+
+	depends = service_property(service, "depend");
+	if (depends != "") {
+		(owner, state) = service_status(depends);
+		if (owner < 0) {
+			debug(service, " is not runnable; dependency not met");
+			return -1;
+		}
+	}
+
+	(owner, state) = service_status(service);
+	debug("Evaluating ", service, " state=", state, " owner=", owner);
+
+	len = length(node_list);
+	if (len == 0) {
+		debug(service, " is not runnable");
+		return -1;
+	}
+
+	if (((event_type != EVENT_USER) and (state == "disabled")) or (state == "failed")) {
+		%
+		% Commenting out this block will -not- allow you to
+		% recover failed services from event scripts.  Sorry.
+		% All it will get you is a false log message about
+		% starting this service.
+		%
+		% You may enable disabled services, but I recommend
+		% against it.
+		%
+		debug(service, " is not runnable");
+		return -1;
+	}
+
+	if (node_list[0] == owner) {
+		debug(service, " is already running on best node");
+		return 0;
+	}
+
+	if ((owner >= 0) and (node_in_set(node_list, owner) == 1)) {
+		notice("Moving ", service, " from ", owner,
+		       " to ", node_list);
+		if (service_stop(service) < 0) {
+			return -1;
+		}
+	} else {
+		notice("Starting ", service, " on ", node_list);
+	}
+
+	return service_start(service, node_list);
+}
+
+
+%
+% Returns the set of online nodes in preferred/shuffled order which
+% are allowed to run this service.  Gives highest preference to current
+% owner if nofailback is specified.
+% 
+define allowed_nodes(service)
+{
+	variable anodes;
+	variable online;
+	variable nodes_domain;
+	variable ordered, restricted, nofailback;
+	variable state, owner;
+	variable depends;
+
+	(nofailback, restricted, ordered, nodes_domain) =
+			service_domain_info(service);
+
+	(owner, state) = service_status(service);
+
+	online = nodes_online();
+
+	if (restricted == 1) {
+		anodes = intersection(nodes_domain, online);
+	} else {
+		% Ordered failover domains (nodes_domain) unioned with the
+		% online nodes basically just reorders the online node list
+		% according to failover domain priority rules.
+		anodes = union(intersection(nodes_domain, online),
+			       online);
+	}
+
+	if ((nofailback == 1) or (ordered == 0)) {
+		
+		if ((owner < 0) or (node_in_set(anodes, owner) == 0)) {
+			return anodes;
+		}
+		
+		% Because union takes left as priority, we can
+		% return the union of the current owner with the
+		% allowed node list.  This means the service will
+		% remain on the same node it's currently on.
+		return union(owner, anodes);
+	}
+
+	return anodes;
+}
+
+
+define default_node_event_handler()
+{
+	variable services = service_list();
+	variable x;
+	variable nodes;
+
+	% debug("Executing default node event handler");
+	for (x = 0; x < length(services); x++) {
+		nodes = allowed_nodes(services[x]);
+		()=move_or_start(services[x], nodes);
+	}
+}
+
+
+define default_service_event_handler()
+{
+	variable services = service_list();
+	variable x;
+	variable depends;
+	variable policy;
+	variable nodes;
+	variable tmp;
+
+	% debug("Executing default service event handler");
+	if (service_state == "failed") {
+		return;
+	}
+
+
+	if (service_state == "recovering") {
+
+		policy = service_property(service_name, "recovery");
+		debug("Recovering",
+		      " Service: ", service_name,
+		      " Last owner: ", service_last_owner,
+		      " Policy: ", policy);
+
+		if (policy == "disable") {
+			() = service_stop(service_name, 1);
+			return;
+		}
+
+		nodes = allowed_nodes(service_name);
+		if (policy == "restart") {
+			tmp = union(service_last_owner, nodes);
+		} else {
+			% relocate 
+			tmp = subtract(nodes, service_last_owner);
+			nodes = tmp;
+			tmp = union(nodes, service_last_owner);
+		}
+
+		()=move_or_start(service_name, nodes);
+
+		return;
+	}
+
+	for (x = 0; x < length(services); x++) {
+		if (service_name == services[x]) {
+			% don't do anything to ourself! 
+			continue;
+		}
+
+		%
+		% Simplistic dependency handling
+		%
+		depends = service_property(services[x], "depend");
+
+		% No dependency; do nothing
+		if (depends != service_name) {
+			continue;
+		}
+		
+		if (service_state == "started") {
+			info("Dependency met; starting ", services[x]);
+			nodes = allowed_nodes(services[x]);
+			()=move_or_start(services[x], nodes);
+		}
+
+		% service died - stop service(s) that depend on the dead
+		% service
+		if (service_owner < 0) {
+			info("Dependency lost; stopping", services[x]);
+			service_stop(services[x]);
+		}
+
+	}
+}
+
+define default_config_event_handler()
+{
+	% debug("Executing default config event handler");
+}
+
+define default_user_event_handler()
+{
+	variable ret;
+	variable nodes;
+	variable reordered;
+	variable x;
+	variable target = user_target;
+	variable found = 0;
+	variable owner, state;
+
+	nodes = allowed_nodes(service_name);
+	(owner, state) = service_status(service_name);
+
+	if (user_request == USER_RESTART) {
+
+		if (owner >= 0) {
+			reordered = union(owner, nodes);
+			nodes = reordered;
+		}
+
+		notice("Stopping ", service_name, " for relocate to ", nodes);
+
+		found = service_stop(service_name);
+		notice("service_stop returned ", found);
+		if (found < 0) {
+			notice("Stop failed :( ");
+			return -1;
+		}
+
+		ret = move_or_start(service_name, nodes);
+
+	} else if ((user_request == USER_RELOCATE) or 
+		   (user_request == USER_ENABLE)) {
+
+		if (user_target > 0) {
+			for (x = 0; x < length(nodes); x++) {
+				if (nodes[x] == user_target) {
+					reordered = union(user_target, nodes);
+					nodes = reordered;
+					found = 1;
+				}
+			}
+	
+			if (found == 0) {
+				warning("User specified node ", user_target,
+					" is offline");
+			}
+		}
+
+		if ((owner >= 0) and (user_request == USER_RELOCATE)) {
+			if (service_stop(service_name) < 0) {
+				return -1;
+			}
+		}
+
+		ret = move_or_start(service_name, nodes);
+
+	} else if (user_request == USER_DISABLE) {
+
+		ret = service_stop(service_name, 1);
+
+	} else if (user_request == USER_STOP) {
+
+		ret = service_stop(service_name);
+
+	} 
+	% todo - migrate
+
+	return ret;
+}
+
+if (event_type == EVENT_NODE)
+	default_node_event_handler();
+if (event_type == EVENT_SERVICE)
+	default_service_event_handler();
+if (event_type == EVENT_CONFIG)
+	default_config_event_handler();
+if (event_type == EVENT_USER)
+	()=default_user_event_handler();
+
Index: src/resources/service.sh
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/resources/service.sh,v
retrieving revision 1.7.2.5
diff -u -p -r1.7.2.5 service.sh
--- src/resources/service.sh	31 Jul 2007 17:54:55 -0000	1.7.2.5
+++ src/resources/service.sh	8 Nov 2007 21:09:01 -0000
@@ -77,7 +77,7 @@ meta_data()
             <shortdesc lang="en">
 	    	Automatic start after quorum formation
             </shortdesc>
-            <content type="boolean"/>
+            <content type="boolean" default="1"/>
         </parameter>
 
         <parameter name="hardrecovery" reconfig="1">
@@ -92,7 +92,7 @@ meta_data()
             <shortdesc lang="en">
 	    	Reboot if stop phase fails
             </shortdesc>
-            <content type="boolean"/>
+            <content type="boolean" default="0"/>
         </parameter>
 
         <parameter name="exclusive">
@@ -109,7 +109,7 @@ meta_data()
             <shortdesc lang="en">
 	        Exclusive resource group
             </shortdesc>
-            <content type="boolean"/>
+            <content type="boolean" default="0"/>
         </parameter>
 
 	<parameter name="nfslock">
@@ -125,7 +125,7 @@ meta_data()
 	    <shortdesc lang="en">
 	        Enable NFS lock workarounds
 	    </shortdesc>
-	    <content type="boolean"/>
+	    <content type="boolean" default="0"/>
 	</parameter>
                 
         <parameter name="recovery" reconfig="1">
@@ -141,7 +141,7 @@ meta_data()
             <shortdesc lang="en">
 	    	Failure recovery policy
             </shortdesc>
-            <content type="string"/>
+            <content type="string" default="restart"/>
         </parameter>
 
         <parameter name="depend">
TODO:
* Return correct error codes to clusvcadm (currently it always returns
  "Unknown")
* Write glue for 'migrate' operations and migrate-enabled services

Basic configuration specification:

  <rm>
    <events>
      <event class="node"/>        <!-- all node events -->
      <event class="node"
             node="bar"/>     <!-- events concerning 'bar' -->
      <event class="node"
             node="foo"
             node_state="up"/>     <!-- 'up' events for 'foo' -->
      <event class="node"
             node_id="3"
             node_state="down"/>   <!-- 'down' events for node ID 3 -->

          (note, all service ops and such deal with node ID, not
           with node names)

      <event class="service"/>     <!-- all service events-->
      <event class="service"
             service_name="A"/>    <!-- events concerning 'A' -->
      <event class="service"
             service_name="B"
	     service_state="started"/> <!-- when 'B' is started... -->
      <event class="service"
             service_name="B"
	     service_state="started"/>
	     service_owner="3"/> <!-- when 'B' is started on node 3... -->

      <event class="service"
             priority="1"
	     service_state="started"/>
	     service_owner="3"/> <!-- when 'B' is started on node 3, do this
				      before the other event handlers ... -->


    </events>
    ...
  </rm>

General globals available from all scripts:

   node_self - local node ID
   event_type - event class, either:
       EVENT_NONE - unspecified / unknown
       EVENT_NODE - node transition
       EVENT_SERVICE - service transition
       EVENT_USER - a user-generated request
       EVENT_CONFIG - [NOT CONFIGURABLE]

Node event globals (i.e. when event_type == EVENT_NODE):
  
   node_id - node ID which is transitioning
   node_name - name of node which is transitioning
   node_state - new node state (NODE_ONLINE or NODE_OFFLINE, or if you prefer,
                1 or 0, respectively)
   node_clean - 0 if the node has not been fenced, 1 if the node has been
                fenced

Service event globals (i.e. when event_type == EVENT_SERVICE):

   service_name - Name of service which transitioned
   service_state - new state of service
   service_owner - new owner of service (or <0 if service is no longer
		   running)
   service_last_owner - Last owner of service if known.  Used for when
                   service_state = "recovering" generally, in order to
                   apply restart/relocate/disable policy.

User event globals (i.e. when event_type == EVENT_USER):

   service_name - service to perform request upon
   user_request - request to perform (USER_ENABLE, USER_DISABLE,
                   USER_STOP, USER_RELOCATE, [TODO] USER_MIGRATE)
   user_target - target node ID if applicable


Scripting functions - Informational:

  node_list = nodes_online();

	Returns a list of all online nodes.

  service_list = service_list();

	Returns a list of all configured services.

  (restarts, last_owner, owner, state) = service_status(service_name);

	Returns the state, owner, last_owner, and restarts.  Note that
	all return values are optional, but are right-justified per S-Lang
	specification.  This means if you only want the 'state', you can use:
	
	(state) = service_status(service_name);

	However, if you need the restart count, you must provide all four 
	return values as above.

  (nofailback, restricted, ordered, node_list) =
		service_domain_info(service_name);

	Returns the failover domain specification, if it exists, for the
	specified service name.  The node list returned is an ordered list
	according to priority levels.  In the case of unordered domains, 
	the ordering of the returned list is pseudo-random.

Scripting functions - Operational:

  err = service_start(service_name, node_list, [avoid_list]);

	Start a non-running, (but runnable, i.e. not failed)
	service on the first node in node_list.  Failing that, start it on
	the second node in node_list and so forth.  One may also specify
	an avoid list, but it's better to just use the subtract() function
	below.

  err = service_stop(service_name, [0 = stop, 1 = disable]);

	Stop a running service.  The second parameter is optional, and if
	non-zero is specified, the service will enter the disabled state.

  ... stuff that's not done but needs to be:

  err = service_relocate(service_name, node_list);

	Move a running service to the specified node_list in order of
	preference.  In the case of VMs, this is actually a migrate-or-
	relocate operation.

Utility functions - Node list manipulation

  node_list = union(left_node_list, right_node_list);

	Calculates the union between the two node list, removing duplicates
	and preserving ordering according to left_node_list.  Any added
	values from right_node_list will appear in their order, but
	after left_node_list in the returned list.

  node_list = intersection(left_node_list, right_node_list);

	Calculates the intersection (items in both lists) between the two
	node lists, removing duplicates and preserving ordering according
	to left_node_list.  Any added values from right_node_list will
	appear in their order, but after left_node_list in the returned list.

  node_list = delta(left_node_list, right_node_list);

	Calculates the delta (items not in both lists) between the two
	node lists, removing duplicates and preserving ordering according
	to left_node_list.  Any added values from right_node_list will
	appear in their order, but after left_node_list in the returned list.

  node_list = subtract(left_node_list, right_node_list);

	Removes any duplicates as well as items specified in right_node_list
	from left_node_list.  Example:

	all_nodes = nodes_online();
	allowed_nodes = subtract(nodes_online, node_to_avoid);

Utility functions - Logging:

  debug(item1, item2, ...);	LOG_DEBUG level
  info(...);			LOG_INFO level
  notice(...);			LOG_NOTICE level
  warning(...);			LOG_WARNING level
  err(...);			LOG_ERR level
  crit(...);			LOG_CRIT level
  alert(...);			LOG_ALERT level
  emerg(...);			LOG_EMERG level

	items - These can be strings, integer lists, or integers.  Logging
		string lists is not supported.

	level - the level is consistent with syslog(8)

  stop_processing();

	Calling this function will prevent further event scripts from being
	executed on a particular event.  Call this script if, for example,
	you do not wish for the default event handler to process the event.

	Note: This does NOT terminate the caller script; that is, the
	script being executed will run to completion.

Event scripts are written in a language called S-Lang; documentation specifics
about the language are available at http://www.s-lang.org

Example script (creating a follows-but-avoid-after-start behavior):
%
% If the main queue server and replication queue server are on the same
% node, relocate the replication server somewhere else if possible.
%
define my_sap_event_trigger()
{
	variable state, owner_rep, owner_main;
	variable nodes, allowed;

	%
	% If this was a service event, don't execute the default event
	% script trigger after this script completes.
	%
	if (event_type == EVENT_SERVICE) {
		stop_processing();
	}

	(owner_main, state) = service_status("service:main_queue";);
	(owner_rep, state) = service_status("service:replication_server";);

	if ((event_type == EVENT_NODE) and (owner_main == node_id) and
	    (node_state == NODE_OFFLINE) and (owner_rep >= 0)) {
		%
		% uh oh, the owner of the main server died.  Restart it
		% on the node running the replication server
		%
		notice("Starting Main Queue Server on node ", owner_rep);
		()=service_start("service:main_queue";, owner_rep);
		return;
	}

	%
	% S-Lang doesn't short-circuit prior to 2.1.0
	%
	if ((owner_main >= 0) and
	    ((owner_main == owner_rep) or (owner_rep < 0))) {

		%
		% Get all online nodes
		%
		nodes = nodes_online();

		%
		% Drop out the owner of the main server
		%
		allowed = subtract(nodes, owner_main);
		if ((owner_rep >= 0) and (length(allowed) == 0)) {
			%
			% Only one node is online and the rep server is
			% already running.  Don't do anything else.
			%
			return;
		}

		if ((length(allowed) == 0) and (owner_rep < 0)) {
			%
			% Only node online is the owner ... go ahead
			% and start it, even though it doesn't increase
			% availability to do so.
			%
			allowed = owner_main;
		}

		%
		% Move the replication server off the node that is
		% running the main server if a node's available.
		%
		if (owner_rep >= 0) {
			()=service_stop("service:replication_server";);
		}
		()=service_start("service:replication_server";, allowed);
	}

	return;
}

my_sap_event_trigger();


Relevant <rm> section from cluster.conf:

        <rm central_processing="1">
                <events>
                        <event name="main-start" class="service"
				service="service:main_queue";
				service_state="started"
				file="/tmp/sap.sl"/>
                        <event name="rep-start" class="service"
				service="service:replication_server";
				service_state="started"
				file="/tmp/sap.sl"/>
                        <event name="node-up" node_state="up"
				class="node"
				file="/tmp/sap.sl"/>

                </events>
                <failoverdomains>
                        <failoverdomain name="all" ordered="1" restricted="1">
                                <failoverdomainnode name="molly"
priority="2"/>
                                <failoverdomainnode name="frederick"
priority="1"/>
                        </failoverdomain>
                </failoverdomains>
                <resources/>
                <service name="main_queue"/>
                <service name="replication_server" autostart="0"/>
		<!-- replication server is started when main-server start
		     event completes -->
        </rm>



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]