[Cluster-devel] cluster/rgmanager ChangeLog include/resgroup.h ...

lhh at sourceware.org lhh at sourceware.org
Fri Aug 18 15:26:25 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	lhh at sourceware.org	2006-08-18 15:26:23

Modified files:
	rgmanager      : ChangeLog 
	rgmanager/include: resgroup.h 
	rgmanager/src/clulib: ckpt_state.c 
	rgmanager/src/daemons: groups.c main.c rg_state.c 
	rgmanager/src/resources: clusterfs.sh fs.sh nfsclient.sh 
	                         ra-api-1-modified.dtd script.sh 

Log message:
	2006-08-18 Lon Hohberger <lhh at redhat.com>
	* include/resgroup.h: Change ordering and add magic field to
	rgmanager state field (warning: breaks compatibility from 08/08 CVS!)
	* src/clulib/ckpt_state.c, src/daemons/rg_state.c: Fix bug
	preventing correct operation of ckpt operation after initial boot.
	Get rid of debug info.
	* src/daemons/groups,c, main.c: Fix #202499 - shutdown while handling
	transitions sometimes allows services to restart (due to not locking
	RGs locally)
	* src/resources/clusterfs.sh, fs.sh, nfsclient.sh: Add proper
	warning messages if status check fails
	* src/resources/ra-api-1-modified.dtd: Allow 'migrate' option

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.19&r2=1.20
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&r1=1.12&r2=1.13
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/ckpt_state.c.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.20&r2=1.21
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.28&r2=1.29
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&r1=1.19&r2=1.20
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/clusterfs.sh.diff?cvsroot=cluster&r1=1.10&r2=1.11
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/fs.sh.diff?cvsroot=cluster&r1=1.16&r2=1.17
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsclient.sh.diff?cvsroot=cluster&r1=1.12&r2=1.13
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ra-api-1-modified.dtd.diff?cvsroot=cluster&r1=1.3&r2=1.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/script.sh.diff?cvsroot=cluster&r1=1.7&r2=1.8

--- cluster/rgmanager/ChangeLog	2006/08/09 21:48:34	1.19
+++ cluster/rgmanager/ChangeLog	2006/08/18 15:26:21	1.20
@@ -1,3 +1,16 @@
+2006-08-18 Lon Hohberger <lhh at redhat.com>
+	* include/resgroup.h: Change ordering and add magic field to
+	rgmanager state field (warning: breaks compatibility from 08/08 CVS!)
+	* src/clulib/ckpt_state.c, src/daemons/rg_state.c: Fix bug
+	preventing correct operation of ckpt operation after initial boot.
+	Get rid of debug info.
+	* src/daemons/groups,c, main.c: Fix #202499 - shutdown while handling
+	transitions sometimes allows services to restart (due to not locking
+	RGs locally)
+	* src/resources/clusterfs.sh, fs.sh, nfsclient.sh: Add proper
+	warning messages if status check fails
+	* src/resources/ra-api-1-modified.dtd: Allow 'migrate' option
+
 2006-08-08 Lon Hohberger <lhh at redhat.com>
 	* src/clulib/members.c: Fix gained/lost list creation so that the
 	count is actually nonzero (#201713)
--- cluster/rgmanager/include/resgroup.h	2006/07/19 18:43:32	1.12
+++ cluster/rgmanager/include/resgroup.h	2006/08/18 15:26:22	1.13
@@ -27,31 +27,30 @@
  */
 typedef struct {
 	char		rs_name[64];	/**< Service name */
+	uint32_t	rs_id;		/**< Service ID */
+	uint32_t	rs_magic;	/**< Magic ID */
 	uint32_t	rs_owner;	/**< Member ID running service. */
 	uint32_t	rs_last_owner;	/**< Last member to run the service. */
 	uint32_t	rs_state;	/**< State of service. */
 	uint32_t	rs_restarts;	/**< Number of cluster-induced 
 					     restarts */
 	uint64_t	rs_transition;	/**< Last service transition time */
-	uint32_t	rs_id;		/**< Service ID */
-	uint32_t	rs_pad;		/**< pad to 64-bit boundary */
 } rg_state_t;
 
 #define swab_rg_state_t(ptr) \
 {\
+	swab32((ptr)->rs_id);\
+	swab32((ptr)->rs_magic);\
 	swab32((ptr)->rs_owner);\
 	swab32((ptr)->rs_last_owner);\
 	swab32((ptr)->rs_state);\
 	swab32((ptr)->rs_restarts);\
 	swab64((ptr)->rs_transition);\
-	swab32((ptr)->rs_pad);\
 }
 
 
 #define RG_PORT    177
-#define RG_VF_PORT 178
-#define RG_PURPOSE 0x11398fed
-#define RG_SERVICE_GROUP "usrm::manager"
+#define RG_MAGIC   0x11398fed
 
 #define RG_ACTION_REQUEST	/* Message header */ 0x138582
 #define RG_EVENT		0x138583
--- cluster/rgmanager/src/clulib/ckpt_state.c	2006/08/07 22:05:01	1.1
+++ cluster/rgmanager/src/clulib/ckpt_state.c	2006/08/18 15:26:22	1.2
@@ -75,8 +75,10 @@
 {
 	SaCkptCheckpointCreationAttributesT attrs;
 	SaCkptCheckpointOpenFlagsT flags;
+#if 0
 	SaCkptCheckpointDescriptorT status;
-	SaAisErrorT err;
+#endif
+	SaAisErrorT err = SA_AIS_OK;
 	key_node_t *newnode = NULL;
 	
 	newnode = kn_find_key(keyid);
@@ -111,6 +113,7 @@
 				   &newnode->kn_cph);
 
 	if (err == SA_AIS_OK) {
+#if 0
 		saCkptCheckpointStatusGet(newnode->kn_cph,
 					  &status);
 
@@ -141,12 +144,10 @@
 			(int)status.checkpointCreationAttributes.maxSectionIdSize);
 		printf("Section count = %d\n", status.numberOfSections);
 		printf("\n");
-		
+#endif
 		goto good;
 	}
 
-	printf("Retrying w/ create\n");
-
 	attrs.creationFlags = SA_CKPT_WR_ALL_REPLICAS;
 	attrs.checkpointSize = (SaSizeT)maxsize;
 	attrs.retentionDuration = SA_TIME_ONE_HOUR;
@@ -175,7 +176,9 @@
 	newnode->kn_ready = 1;
 	newnode->kn_next = key_list;
 	key_list = newnode;
+#if 0
 	printf("Opened ckpt %s\n", keyid);
+#endif
 
 	return err;
 }
--- cluster/rgmanager/src/daemons/groups.c	2006/07/19 18:43:32	1.20
+++ cluster/rgmanager/src/daemons/groups.c	2006/08/18 15:26:22	1.21
@@ -418,7 +418,7 @@
 	int ret;
 
 	if (rg_locked()) {
-		clulog(LOG_NOTICE,
+		clulog(LOG_DEBUG,
 			"Resource groups locked; not evaluating\n");
 		return -EAGAIN;
 	}
--- cluster/rgmanager/src/daemons/main.c	2006/08/09 21:48:34	1.28
+++ cluster/rgmanager/src/daemons/main.c	2006/08/18 15:26:22	1.29
@@ -792,6 +792,7 @@
 void *
 shutdown_thread(void *arg)
 {
+	rg_lockall(L_SYS);
 	rg_doall(RG_STOP_EXITING, 1, NULL);
 	running = 0;
 
--- cluster/rgmanager/src/daemons/rg_state.c	2006/08/07 22:05:01	1.19
+++ cluster/rgmanager/src/daemons/rg_state.c	2006/08/18 15:26:22	1.20
@@ -306,11 +306,12 @@
 		if (errno == ENOENT) {
 			ds_key_init(res, DS_MIN_SIZE, 10);
 		} else {
+			perror("ds_read");
 			return -1;
 		}
 	}
 
-	if (datalen < 0) {
+	if (datalen <= 0) {
 
 		ret = init_rg(name, svcblk);
 		if (ret < 0) {
@@ -326,6 +327,7 @@
 	}
 
 	memcpy(svcblk, data, sizeof(*svcblk));
+
 	return 0;
 #else
 	membership = member_list();
--- cluster/rgmanager/src/resources/clusterfs.sh	2006/06/02 17:37:10	1.10
+++ cluster/rgmanager/src/resources/clusterfs.sh	2006/08/18 15:26:22	1.11
@@ -889,12 +889,16 @@
 	;;
 status|monitor)
   	isMounted ${OCF_RESKEY_device} ${OCF_RESKEY_mountpoint}
- 	[ $? -ne $YES ] && exit $OCF_ERR_GENERIC
+ 	if [ $? -ne $YES ]; then
+		ocf_log err "fs:${OCF_RESKEY_name}: ${OCF_RESKEY_device} is not mounted on ${OCF_RESKEY_mountpoint}"
+		exit $OCF_ERR_GENERIC
+	fi
 
  	isAlive ${OCF_RESKEY_mountpoint}
- 	[ $? -ne $YES ] && exit $OCF_ERR_GENERIC
- 	
-	exit 0
+ 	[ $? -eq $YES ] && exit 0
+
+	ocf_log err "fs:${OCF_RESKEY_name}: Mount point is not accessible!"
+	exit $OCF_ERR_GENERIC
 	;;
 restart)
 	stopFilesystem
--- cluster/rgmanager/src/resources/fs.sh	2006/06/02 17:37:10	1.16
+++ cluster/rgmanager/src/resources/fs.sh	2006/08/18 15:26:22	1.17
@@ -243,7 +243,7 @@
 {
 	if [ -z "$OCF_RESKEY_mountpoint" ]; then
 		ocf_log err "No mount point specified."
-		return 1
+		return $OCF_ERR_ARGS
 	fi
 
 	if ! [ -e "$OCF_RESKEY_mountpoint" ]; then
@@ -514,7 +514,7 @@
 	dev=$(real_device $1)
 	if [ -z "$dev" ]; then
 		ocf_log err \
-			"isMounted: Could not match $1 with a real device"
+			"fs (isMounted): Could not match $1 with a real device"
 		return $FAIL
 	fi
 	mp=$2
@@ -553,14 +553,14 @@
 	declare rw
 	
 	if [ $# -ne 1 ]; then
-	        logAndPrint $LOG_ERR "Usage: isAlive mount_point"
+	        ocf_log err "Usage: isAlive mount_point"
 		return $FAIL
 	fi
 	mount_point=$1
 	
 	test -d $mount_point
 	if [ $? -ne 0 ]; then
-		logAndPrint $LOG_ERR "$mount_point is not a directory"
+		ocf_log err "fs (isAlive): $mount_point is not a directory"
 		return $FAIL
 	fi
 	
@@ -707,6 +707,7 @@
 	return $ret
 }
 
+
 activeMonitor() {
 	declare monpath=$OCF_RESKEY_mountpoint/.clumanager
 	declare p
@@ -733,7 +734,7 @@
 	case $1 in
 	start)
 		ocf_log info "Starting active monitoring of $OCF_RESKEY_mountpoint"
-		mkdir -p $(dirname $monpath) || return 1
+		mkdir -p $(dirname $monpath) || return $OCF_ERR_GENERIC
 		devmon $args -p $monpath/devmon.data -P $monpath/devmon.pid
 		;;
 	stop)
@@ -794,7 +795,7 @@
 
 	if [ -z "`which quotaon`" ]; then
 		ocf_log err "quotaon not found in $PATH"
-		return 1
+		return $OCF_ERR_GENERIC
 	fi
 
 	for mopt in `echo $opts | sed -e s/,/\ /g`; do
@@ -1211,29 +1212,35 @@
 	;;
 status|monitor)
   	isMounted ${OCF_RESKEY_device} ${OCF_RESKEY_mountpoint}
- 	[ $? -ne $YES ] && exit $OCF_ERR_GENERIC
+ 	if [ $? -ne $YES ]; then
+		ocf_log err "fs:${OCF_RESKEY_name}: ${OCF_RESKEY_device} is not mounted on ${OCF_RESKEY_mountpoint}"
+		exit $OCF_ERR_GENERIC
+	fi
 
 	if [ "$OCF_RESKEY_active_monitor" = "yes" ] ||
 	   [ "$OCF_RESKEY_active_monitor" = "1" ]; then
 
-		activeMonitor status || exit $OCF_ERR_GENERIC
-		exit 0
+	   	activeMonitor status
+		[ $? -eq 0 ] && exit 0
+		ocf_log err "fs:${OCF_RESKEY_name}: Active Monitoring reported a failure"
+		exit $OCF_ERR_GENERIC
 	fi
  	
  	isAlive ${OCF_RESKEY_mountpoint}
- 	[ $? -ne $YES ] && exit $OCF_ERR_GENERIC
- 	
-	exit 0
+ 	[ $? -eq $YES ] && exit 0
+
+	ocf_log err "fs:${OCF_RESKEY_name}: Mount point is not accessible!"
+	exit $OCF_ERR_GENERIC
 	;;
 restart)
 	stopFilesystem
 	if [ $? -ne 0 ]; then
-		exit 1
+		exit $OCF_ERR_GENERIC
 	fi
 
 	startFilesystem
 	if [ $? -ne 0 ]; then
-		exit 1
+		exit $OCF_ERR_GENERIC
 	fi
 
 	exit 0
--- cluster/rgmanager/src/resources/nfsclient.sh	2006/08/02 17:24:31	1.12
+++ cluster/rgmanager/src/resources/nfsclient.sh	2006/08/18 15:26:22	1.13
@@ -320,7 +320,11 @@
 		sed -e 's/*/[*]/g' -e 's/?/[?]/g' -e 's/\./\\./g') 
         exportfs -v | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -q \
 		"^${OCF_RESKEY_path}[\t ]*.*${OCF_RESKEY_target_regexp}" 
+
 	rv=$? 
+	if [ $rv -ne 0 ]; then
+		ocf_log err "nfsclient:$OCF_RESKEY_name is missing!"
+	fi
 	;;
 
 recover)
--- cluster/rgmanager/src/resources/ra-api-1-modified.dtd	2006/07/19 18:43:32	1.3
+++ cluster/rgmanager/src/resources/ra-api-1-modified.dtd	2006/08/18 15:26:22	1.4
@@ -25,7 +25,8 @@
 	primary (1|0)   "0"
 	required (1|0)	"0"
 	inherit CDATA	""
-	unique	(1|0)	"0">
+	unique	(1|0)	"0"
+	reconfig (1|0)  "0">
 
 <!ELEMENT longdesc ANY>
 <!ATTLIST longdesc
@@ -42,7 +43,7 @@
 
 <!ELEMENT action EMPTY>
 <!ATTLIST action
-	name	(start|stop|recover|status|monitor|reload|meta-data|verify-all|migrate)	#REQUIRED
+	name	(start|stop|recover|status|reconfig|monitor|reload|meta-data|verify-all|migrate)	#REQUIRED
 	timeout		CDATA	#REQUIRED
 	interval 	CDATA	#IMPLIED
 	start-delay 	CDATA	#IMPLIED
--- cluster/rgmanager/src/resources/script.sh	2005/11/21 21:48:42	1.7
+++ cluster/rgmanager/src/resources/script.sh	2006/08/18 15:26:23	1.8
@@ -110,4 +110,10 @@
 
 # Don't need to catch return codes; this one will work.
 ocf_log info "Executing ${OCF_RESKEY_file} $1"
-exec /bin/sh ${OCF_RESKEY_file} $1
+${OCF_RESKEY_file} $1
+
+declare -i rv=$?
+if [ $rv -ne 0 ]; then
+	ocf_log err "script:$OCF_RESKEY_name: $1 of $OCF_RESKEY_file failed (returned $rv)"
+	return $OCF_ERR_GENERIC
+fi




More information about the Cluster-devel mailing list