[Cluster-devel] cluster/rgmanager ChangeLog README include/lis ...
lhh at sourceware.org
lhh at sourceware.org
Fri Jun 16 20:07:52 UTC 2006
CVSROOT: /cvs/cluster
Module name: cluster
Branch: STABLE
Changes by: lhh at sourceware.org 2006-06-16 20:07:47
Modified files:
rgmanager : ChangeLog README
rgmanager/include: list.h resgroup.h rg_locks.h
rgmanager/init.d: rgmanager
rgmanager/man : clusvcadm.8
rgmanager/src/clulib: clulog.c msgsimple.c vft.c
rgmanager/src/daemons: Makefile fo_domain.c groups.c main.c
restree.c rg_locks.c rg_state.c
rg_thread.c
rgmanager/src/resources: Makefile clusterfs.sh fs.sh ip.sh
nfsclient.sh nfsexport.sh
ocf-shellfuncs service.sh
rgmanager/src/utils: Makefile clustat.c clusvcadm.c
Added files:
rgmanager/src/daemons: nodeevent.c watchdog.c
rgmanager/src/resources: svclib_nfslock
Log message:
Merge from RHEL4 branch
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.13&r2=1.5.2.13.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/README.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2&r2=1.2.8.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/list.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1&r2=1.2.2.1.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.5.6.1&r2=1.3.2.5.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/rg_locks.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1&r2=1.1.8.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/init.d/rgmanager.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.1&r2=1.3.2.1.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/man/clusvcadm.8.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.1&r2=1.1.2.1.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/clulog.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1.6.1&r2=1.2.2.1.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/msgsimple.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4&r2=1.4.8.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.7.2.3.6.2&r2=1.7.2.3.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/nodeevent.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=NONE&r2=1.2.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/watchdog.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=NONE&r2=1.2.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/Makefile.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.6.2.2.6.1&r2=1.6.2.2.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/fo_domain.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.2&r2=1.5.2.2.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.8.2.10.6.3&r2=1.8.2.10.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.9.2.8.6.6&r2=1.9.2.8.6.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/restree.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.10.2.2.6.5&r2=1.10.2.2.6.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_locks.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.2&r2=1.4.2.2.6.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_state.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.8.6.3&r2=1.4.2.8.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/rg_thread.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.7.2.3.6.2&r2=1.7.2.3.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/svclib_nfslock.diff?cvsroot=cluster&only_with_tag=STABLE&r1=NONE&r2=1.2.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.3.6.2&r2=1.4.2.3.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/clusterfs.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.3.4.4&r2=1.1.2.3.4.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/fs.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.6.4.4&r2=1.4.2.6.4.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ip.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.4.4.9&r2=1.5.2.4.4.10
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsclient.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.2.6.4&r2=1.3.2.2.6.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/nfsexport.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.4.2.1.6.3&r2=1.4.2.1.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/ocf-shellfuncs.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1&r2=1.2.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/service.sh.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.1.6.2&r2=1.1.2.1.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/Makefile.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.1.6.2&r2=1.3.2.1.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.5.2.3.6.7&r2=1.5.2.3.6.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clusvcadm.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.3.6.3&r2=1.2.2.3.6.4
--- cluster/rgmanager/ChangeLog 2005/03/21 22:01:30 1.5.2.13
+++ cluster/rgmanager/ChangeLog 2006/06/16 20:07:45 1.5.2.13.6.1
@@ -1,3 +1,87 @@
+2006-06-16 Lon Hohberger <lhh at redhat.com>
+ * src/daemons/fo_domain.c, groups.c: Get rid of compiler warnings
+ * src/daemons/rg_state.c: Change clu_lock_verbose to use the NULL
+ lock/convert mechanism offered by DLM to work around #193128
+ * src/resources/fs.sh, clusterfs.sh, nfsexport.sh, nfsclient.sh,
+ service.sh, svclib_nfslock: Finish up initial NFS workaround.
+
+2006-05-23 Lon Hohberger <lhh at redhat.com>
+ * src/daemons/members.c: Zap pad fields on copy-out
+ * src/daemons/main.c: Give notice if skipping an event because of
+ locked services. Call the self-watchdog init function
+ * src/daemons/watchdog.c: Add Stanko Kupcevic's self-watchdog from
+ CVS head (fixes #193247)
+ * src/daemons/groups.c: Add debug messages. Actually count
+ resgroups during node transition handling
+ * src/daemons/rg_state.c: allow failover of stopping services if
+ the owner died (#193255)
+ * src/utils/clustat.c: fix typo, misc. usability problems (#192999)
+
+2006-05-16 Lon Hohberger <lhh at redhat.com>
+ * src/resources/nfsclient.sh: Fix 189218 - nfsclient not matching
+ wildcards correctly when checking status. Allow disabling of
+ recovery for services where the nfs clients are ordered (this will
+ cause a full service restart, but works)
+ * src/resources/clusterfs.sh, fs.sh, svclib_nfslock, service.sh:
+ Implement rudimentary atomic bomb-style NFS lock reclaim handling
+ Needs compatible and correctly configured version of nfs-utils
+ installed and running on the system. For clusterfs.sh, ensure
+ that we flush buffers during service tear-down - regardless of
+ whether or not we unmount the file system.
+ * src/utils/clunfslock.sh: HA-callout program (/usr/sbin/clunfslock)
+ for use with the rpc.statd -H parameter. Copies the client to all
+ cluster-managed mounted file systems so that it will get lock
+ reclaim notification on failover.
+
+2006-05-09 Lon Hohberger <lhh at redhat.com>
+ * include/list.h: Prevent dereferencing curr if it's null for some
+ reason
+ * include/resgroup.h: Clean up alignment, add rgmanager lock/unlock
+ message types
+ * src/daemons/Makefile: Add nodeevent.o to the build for rgmanager
+ * src/clulib/msgsimple.c: Misc code path cleanups
+ * src/clulib/vft.c: Add local reads for fast clustat operation.
+ * src/daemons/groups.c: Count all resource groups for all nodes
+ in one pass, rather than one node per pass. Split queueing of
+ status checks off so we never block the main thread. Mark services
+ which have autostart=0 in the config as "disabled" to help remove
+ confusion between "disabled", "stopped", and the no-longer-needed
+ "stopped but behave like disabled" states. bz #182454 /
+ #190234 / #190408
+ * src/daemons/fo_domain.c: Add patch from Josef Whiter to
+ implement no-failback option for a given FO domain - bz #189841
+ * src/daemons/main.c: Queue node events for another thread to
+ handle, so we never block the main thread. Also, implement
+ cluster-wide service lock/unlock feature from clumanager 1.2.x
+ - bz #175010
+ * src/daemons/nodeevent.c: Split out node event queueing / handling
+ in to a separate thread so the main thread does not block
+ * src/daemons/rg_state.c: Return error codes if resource groups
+ are locked.
+ * src/daemons/rg_thread.c: Fix assertion failure causing segfault
+ in extremely rare cases. Quash the rg queue during shutdown.
+ - bz #181539
+ * src/daemons/rg_state.c: Add fast local service state query to
+ reduce unnecessary lock contention
+ * src/daemons/groups.c: Handle request for expedited information
+ from clustat.
+ * src/daemons/main.c: Pass arg1 to send_rg_states() to enable fast
+ clustat operation.
+ * src/resources/fs.sh: Implement user/group quota support if
+ enabled in the file system options
+ * src/utils/clustat.c: Misc. error handling. Add single service /
+ member output and add -Q to the help information. #185952.
+ Added -f flag.
+ * src/utils/clusvcadm.c: Implement client-side of #175010
+ * src/utils/clustat.c: show transition time in clustat -x
+ - bz #191398
+ * src/resources/fs.sh: enable user/group quotas if enabled in the
+ options attribute - bz #191182
+ * init.d/rgmanager: fix typo - bz #191205
+
+
+-------------
+
2005-03-21 Lon Hohberger <lhh at redhat.com>
* init.d/rgmanager, Makefile: Fix up init script and add Makefile
so that the init script is properly installed #142754
--- cluster/rgmanager/README 2004/08/30 17:49:10 1.2
+++ cluster/rgmanager/README 2006/06/16 20:07:45 1.2.8.1
@@ -1,7 +1,3 @@
-WARNING
-
-This code is not ready for production use.
-
This is a clustered resource group manager layered on top of Magma, a
single API which can talk to multiple cluster infrastructures via their
native APIs. This resource manager requires both magma and one or more
--- cluster/rgmanager/include/list.h 2005/02/28 23:13:49 1.2.2.1
+++ cluster/rgmanager/include/list.h 2006/06/16 20:07:46 1.2.2.1.6.1
@@ -50,7 +50,7 @@
if (*list && (curr = *list)) do
#define list_done(list, curr) \
- (((curr = (void *)le(curr)->le_next)) && (curr == *list))
+ (curr && (((curr = (void *)le(curr)->le_next)) && (curr == *list)))
/*
list_do(list, node) {
--- cluster/rgmanager/include/resgroup.h 2005/07/28 21:19:02 1.3.2.5.6.1
+++ cluster/rgmanager/include/resgroup.h 2006/06/16 20:07:46 1.3.2.5.6.2
@@ -55,25 +55,29 @@
#define RG_ACTION_REQUEST /* Message header */ 0x138582
-#define RG_SUCCESS 0
-#define RG_FAIL 1
-#define RG_START 2
-#define RG_STOP 3
-#define RG_STATUS 4
-#define RG_DISABLE 5
-#define RG_STOP_RECOVER 6
-#define RG_START_RECOVER 7
-#define RG_RESTART 8
-#define RG_EXITING 9
-#define RG_INIT 10
-#define RG_ENABLE 11
-#define RG_STATUS_INQUIRY 12
-#define RG_RELOCATE 13
-#define RG_CONDSTOP 14
-#define RG_CONDSTART 15
-#define RG_START_REMOTE 16 /* Part of a relocate */
-#define RG_STOP_USER 17 /* User-stop request */
-#define RG_NONE 999
+#define RG_SUCCESS 0
+#define RG_FAIL 1
+#define RG_START 2
+#define RG_STOP 3
+#define RG_STATUS 4
+#define RG_DISABLE 5
+#define RG_STOP_RECOVER 6
+#define RG_START_RECOVER 7
+#define RG_RESTART 8
+#define RG_EXITING 9
+#define RG_INIT 10
+#define RG_ENABLE 11
+#define RG_STATUS_INQUIRY 12
+#define RG_RELOCATE 13
+#define RG_CONDSTOP 14
+#define RG_CONDSTART 15
+#define RG_START_REMOTE 16 /* Part of a relocate */
+#define RG_STOP_USER 17 /* User-stop request */
+#define RG_STOP_EXITING 18 /* Exiting. */
+#define RG_LOCK 19
+#define RG_UNLOCK 20
+#define RG_QUERY_LOCK 21
+#define RG_NONE 999
extern const char *rg_req_strings[];
@@ -181,7 +185,7 @@
*/
#define FOD_ORDERED (1<<0)
#define FOD_RESTRICTED (1<<1)
-
+#define FOD_NOFAILBACK (1<<2)
//#define DEBUG
#ifdef DEBUG
--- cluster/rgmanager/include/rg_locks.h 2004/08/13 15:36:50 1.1
+++ cluster/rgmanager/include/rg_locks.h 2006/06/16 20:07:46 1.1.8.1
@@ -4,9 +4,8 @@
int rg_running(void);
int rg_locked(void);
-int rg_lockall(void);
-int rg_unlockall(void);
-int rg_wait_unlockall(void);
+int rg_lockall(int flag);
+int rg_unlockall(int flag);
int rg_quorate(void);
int rg_set_quorate(void);
--- cluster/rgmanager/init.d/rgmanager 2005/03/21 21:02:58 1.3.2.1
+++ cluster/rgmanager/init.d/rgmanager 2006/06/16 20:07:46 1.3.2.1.6.1
@@ -16,7 +16,7 @@
# Grab the network config file
. /etc/sysconfig/network
-# Grap cluster start config if it exists
+# Grab cluster start config if it exists
[ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster
PATH=/sbin:/bin:/usr/sbin:/usr/bin
--- cluster/rgmanager/man/clusvcadm.8 2005/01/18 22:35:35 1.1.2.1
+++ cluster/rgmanager/man/clusvcadm.8 2006/06/16 20:07:46 1.1.2.1.6.1
@@ -46,9 +46,9 @@
.I
service
.IP \-l
-Lock the cluster's service managers. This should only be used if the
+Lock the local resource group manager. This should only be used if the
administrator intends to perform a global, cluster-wide shutdown. This
-prevents ALL service operations on ALL currently running members, thus,
+prevents starting resource groups on the local node, allowing
services will not fail over during the shutdown of the cluster. Generally,
administrators should use the
.B
--- cluster/rgmanager/src/clulib/clulog.c 2005/12/06 18:43:45 1.2.2.1.6.1
+++ cluster/rgmanager/src/clulib/clulog.c 2006/06/16 20:07:46 1.2.2.1.6.2
@@ -20,7 +20,7 @@
/** @file
* Library routines for communicating with the logging daemon.
*
- * $Id: clulog.c,v 1.2.2.1.6.1 2005/12/06 18:43:45 lhh Exp $
+ * $Id: clulog.c,v 1.2.2.1.6.2 2006/06/16 20:07:46 lhh Exp $
*
* Author: Jeff Moyer <moyer at missioncriticallinux.com>
*/
@@ -50,7 +50,7 @@
#include <string.h>
-static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.1.6.1 $";
+static const char *version __attribute__ ((unused)) = "$Revision: 1.2.2.1.6.2 $";
#ifdef DEBUG
#include <assert.h>
--- cluster/rgmanager/src/clulib/msgsimple.c 2004/11/11 19:46:18 1.4
+++ cluster/rgmanager/src/clulib/msgsimple.c 2006/06/16 20:07:46 1.4.8.1
@@ -83,15 +83,19 @@
/*
* Peek at the header. We need the size of the inbound buffer!
*/
+ errno = EAGAIN;
ret = msg_peek(fd, &peek_msg, sizeof (generic_msg_hdr));
if (ret != sizeof (generic_msg_hdr)) {
if (ret == -1) {
if (errno != ECONNRESET)
fprintf(stderr, "fd%d peek: %s\n", fd,
strerror(errno));
+ //perror("msg_peek");
} else if (ret != 0) /* Blank message = probably closed socket */
fprintf(stderr, "fd%d peek: %d/%d bytes\n", fd,
ret, (int)sizeof (generic_msg_hdr));
+ else if (ret == 0)
+ errno = EAGAIN;
return -1;
}
--- cluster/rgmanager/src/clulib/vft.c 2006/01/20 16:27:29 1.7.2.3.6.2
+++ cluster/rgmanager/src/clulib/vft.c 2006/06/16 20:07:46 1.7.2.3.6.3
@@ -1598,6 +1598,47 @@
}
+int
+vf_read_local(char *keyid, uint64_t *view, void **data, uint32_t *datalen)
+{
+ key_node_t *key_node = NULL;
+
+ pthread_mutex_lock(&vf_mutex);
+ pthread_mutex_lock(&key_list_mutex);
+
+ key_node = kn_find_key(keyid);
+ if (!key_node) {
+ pthread_mutex_unlock(&key_list_mutex);
+ pthread_mutex_unlock(&vf_mutex);
+ printf("no key for %s\n", keyid);
+ return VFR_NODATA;
+ }
+
+ if (!key_node->kn_data || !key_node->kn_datalen) {
+ pthread_mutex_unlock(&key_list_mutex);
+ pthread_mutex_unlock(&vf_mutex);
+ return VFR_NODATA;
+ }
+
+ *data = malloc(key_node->kn_datalen);
+ if (! *data) {
+ pthread_mutex_unlock(&key_list_mutex);
+ pthread_mutex_unlock(&vf_mutex);
+ printf("Couldn't malloc %s\n", keyid);
+ return VFR_ERROR;
+ }
+
+ memcpy(*data, key_node->kn_data, key_node->kn_datalen);
+ *datalen = key_node->kn_datalen;
+ *view = key_node->kn_viewno;
+
+ pthread_mutex_unlock(&key_list_mutex);
+ pthread_mutex_unlock(&vf_mutex);
+
+ return VFR_OK;
+}
+
+
static int
vf_send_current(int fd, char *keyid)
{
/cvs/cluster/cluster/rgmanager/src/daemons/nodeevent.c,v --> standard output
revision 1.2.2.1
--- cluster/rgmanager/src/daemons/nodeevent.c
+++ - 2006-06-16 20:07:48.351799000 +0000
@@ -0,0 +1,103 @@
+/*
+ Copyright Red Hat, Inc. 2006
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+ MA 02139, USA.
+*/
+#include <resgroup.h>
+#include <rg_locks.h>
+#include <gettid.h>
+#include <assert.h>
+
+typedef struct __ne_q {
+ list_head();
+ int ne_local;
+ uint64_t ne_nodeid;
+ int ne_state;
+} nevent_t;
+
+int node_event(int, uint64_t, int);
+
+/**
+ * Node event queue.
+ */
+static nevent_t *event_queue = NULL;
+static pthread_mutex_t ne_queue_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_t ne_thread = 0;
+int ne_queue_request(int local, uint64_t nodeid, int state);
+
+
+void *
+node_event_thread(void *arg)
+{
+ nevent_t *ev;
+
+ while (1) {
+ pthread_mutex_lock(&ne_queue_mutex);
+ ev = event_queue;
+ if (ev)
+ list_remove(&event_queue, ev);
+ else
+ break; /* We're outta here */
+ pthread_mutex_unlock(&ne_queue_mutex);
+
+ node_event(ev->ne_local, ev->ne_nodeid, ev->ne_state);
+
+ free(ev);
+ }
+
+ /* Mutex held */
+ ne_thread = 0;
+ rg_dec_threads();
+ pthread_mutex_unlock(&ne_queue_mutex);
+ return NULL;
+}
+
+
+void
+node_event_q(int local, uint64_t nodeID, int state)
+{
+ nevent_t *ev;
+ pthread_attr_t attrs;
+
+ while (1) {
+ ev = malloc(sizeof(nevent_t));
+ if (ev) {
+ break;
+ }
+ sleep(1);
+ }
+
+ memset(ev,0,sizeof(*ev));
+
+ ev->ne_state = state;
+ ev->ne_local = local;
+ ev->ne_nodeid = nodeID;
+
+ pthread_mutex_lock (&ne_queue_mutex);
+ list_insert(&event_queue, ev);
+ if (ne_thread == 0) {
+ pthread_attr_init(&attrs);
+ pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
+ pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+ pthread_attr_setstacksize(&attrs, 262144);
+
+ pthread_create(&ne_thread, &attrs, node_event_thread, NULL);
+ pthread_attr_destroy(&attrs);
+
+ rg_inc_threads();
+ }
+ pthread_mutex_unlock (&ne_queue_mutex);
+}
/cvs/cluster/cluster/rgmanager/src/daemons/watchdog.c,v --> standard output
revision 1.2.2.1
--- cluster/rgmanager/src/daemons/watchdog.c
+++ - 2006-06-16 20:07:48.438384000 +0000
@@ -0,0 +1,97 @@
+/*
+ Copyright Red Hat, Inc. 2005-2006
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; see the file COPYING. If not, write to the
+ Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
+ MA 02139, USA.
+*/
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/reboot.h>
+#include <stdlib.h>
+
+#include <signals.h>
+#include <clulog.h>
+
+static pid_t child = 0;
+
+static void
+signal_handler(int signum)
+{
+ kill(child, signum);
+}
+static void
+redirect_signals(void)
+{
+ int i;
+ for (i = 0; i < _NSIG; i++) {
+ switch (i) {
+ case SIGCHLD:
+ case SIGILL:
+ case SIGFPE:
+ case SIGSEGV:
+ case SIGBUS:
+ setup_signal(i, SIG_DFL);
+ break;
+ default:
+ setup_signal(i, signal_handler);
+ }
+ }
+}
+
+/**
+ return watchdog's pid, or 0 on failure
+*/
+int
+watchdog_init(void)
+{
+ int status;
+ pid_t parent;
+
+ parent = getpid();
+ child = fork();
+ if (child < 0)
+ return 0;
+ else if (!child)
+ return parent;
+
+ redirect_signals();
+
+ while (1) {
+ if (waitpid(child, &status, 0) <= 0)
+ continue;
+
+ if (WIFEXITED(status))
+ exit(WEXITSTATUS(status));
+
+ if (WIFSIGNALED(status)) {
+ if (WTERMSIG(status) == SIGKILL) {
+ clulog(LOG_CRIT, "Watchdog: Daemon killed, exiting\n");
+ raise(SIGKILL);
+ while(1) ;
+ }
+ else {
+#ifdef DEBUG
+ clulog(LOG_CRIT, "Watchdog: Daemon died, but not rebooting because DEBUG is set\n");
+#else
+ clulog(LOG_CRIT, "Watchdog: Daemon died, rebooting...\n");
+ sync();
+ reboot(RB_AUTOBOOT);
+#endif
+ exit(255);
+ }
+ }
+ }
+}
--- cluster/rgmanager/src/daemons/Makefile 2005/10/17 20:30:45 1.6.2.2.6.1
+++ cluster/rgmanager/src/daemons/Makefile 2006/06/16 20:07:46 1.6.2.2.6.2
@@ -40,7 +40,8 @@
clurgmgrd: rg_thread.o rg_locks.o main.o groups.o rg_state.o \
rg_queue.o members.o rg_forward.o reslist.o \
- resrules.o restree.o fo_domain.o
+ resrules.o restree.o fo_domain.o nodeevent.o \
+ watchdog.o
$(CC) -o $@ $^ $(INCLUDE) $(CFLAGS) $(LDFLAGS) -lccs
#
--- cluster/rgmanager/src/daemons/fo_domain.c 2005/01/25 20:05:44 1.5.2.2
+++ cluster/rgmanager/src/daemons/fo_domain.c 2006/06/16 20:07:46 1.5.2.2.6.1
@@ -19,6 +19,9 @@
/** @file
* Fail-over Domain & Preferred Node Ordering Driver. Ripped right from
* the clumanager 1.2 code base.
+ *
+ * April 2006 - Nofailback option added to restrict failover behavior in ordered
+ * + restricted failover domains by Josef Whiter
*/
#include <string.h>
#include <list.h>
@@ -153,6 +156,13 @@
free(ret);
}
+ snprintf(xpath, sizeof(xpath), "%s/failoverdomain[%d]/@nofailback",
+ base, idx);
+ if (ccs_get(ccsfd, xpath, &ret) == 0) {
+ if (atoi(ret) != 0)
+ fod->fd_flags |= FOD_NOFAILBACK;
+ free(ret);
+ }
snprintf(xpath, sizeof(xpath), "%s/failoverdomain[%d]",
base, idx);
@@ -226,7 +236,9 @@
if (fod->fd_flags & FOD_ORDERED)
printf("Ordered ");
if (fod->fd_flags & FOD_RESTRICTED)
- printf("Restricted");
+ printf("Restricted ");
+ if (fod->fd_flags & FOD_NOFAILBACK)
+ printf("No Failback");
printf("\n");
}
@@ -316,8 +328,14 @@
char domainname[128];
int ordered = 0;
int restricted = 0;
+ int nofailback = 0;
fod_t *fod = NULL;
int found = 0;
+ int owned_by_node = 0, started = 0, no_owner = 0;
+#ifndef NO_CCS
+ rg_state_t svc_state;
+ void *lockp;
+#endif
ENTER();
@@ -370,6 +388,11 @@
}
/*
+ * Determine whtehter this domain has failback turned on or not..
+ */
+ nofailback = !!(fod->fd_flags & FOD_NOFAILBACK);
+
+ /*
* Determine whether this domain is restricted or not...
*/
restricted = !!(fod->fd_flags & FOD_RESTRICTED);
@@ -379,6 +402,37 @@
*/
ordered = !!(fod->fd_flags & FOD_ORDERED);
+#ifndef NO_CCS
+ if(nofailback) {
+ if (rg_lock(rg_name, &lockp) != 0) {
+ clulog(LOG_WARNING, "Error getting a lock\n");
+ RETURN(FOD_BEST);
+ }
+
+ if (get_rg_state(rg_name, &svc_state) == FAIL) {
+ /*
+ * Couldn't get the service state, thats odd
+ */
+ clulog(LOG_WARNING, "Problem getting state information for "
+ "%s\n", rg_name);
+ rg_unlock(rg_name, lockp);
+ RETURN(FOD_BEST);
+ }
+ rg_unlock(rg_name, lockp);
+
+ /*
+ * Check to see if the service is started and if we are the owner in case of
+ * restricted+owner+no failback
+ */
+ if (svc_state.rs_state == RG_STATE_STARTED)
+ started = 1;
+ if (svc_state.rs_owner == nodeid)
+ owned_by_node = 1;
+ if (!memb_online(membership, svc_state.rs_owner))
+ no_owner = 1;
+ }
+#endif
+
switch (node_in_domain(nodename, fod, membership)) {
case 0:
/*
@@ -429,6 +483,17 @@
"lowest-ordered\n", nodeid);
#endif
if (ordered) {
+ /*
+ * If we are ordered we want to see if failback is
+ * turned on
+ */
+ if (nofailback && started && owned_by_node && !no_owner) {
+#ifdef DEBUG
+ clulog(LOG_DEBUG,"Ordered mode and no "
+ "failback -> BEST\n");
+#endif
+ RETURN(FOD_BEST);
+ }
#ifdef DEBUG
clulog(LOG_DEBUG,"Ordered mode -> BETTER\n");
#endif
@@ -444,6 +509,16 @@
* Node is a member of the domain and is the lowest-ordered,
* online member.
*/
+
+ if(nofailback && started && !owned_by_node && !no_owner) {
+#ifdef DEBUG
+ clulog(LOG_DEBUG, "Member #%d is the lowest-ordered "
+ "memeber of the domain, but is not the owner "
+ "-> BETTER\n", nodeid);
+#endif
+ RETURN(FOD_BETTER);
+ }
+
/* In this case, we can ignore 'ordered' */
#ifdef DEBUG
clulog(LOG_DEBUG, "Member #%d is the lowest-ordered member "
--- cluster/rgmanager/src/daemons/groups.c 2006/01/20 16:27:29 1.8.2.10.6.3
+++ cluster/rgmanager/src/daemons/groups.c 2006/06/16 20:07:46 1.8.2.10.6.4
@@ -28,6 +28,10 @@
#include <clulog.h>
#include <list.h>
#include <reslist.h>
+#include <assert.h>
+
+#define cm_svccount cm_pad[0] /* Theses are uint8_t size */
+#define cm_svcexcl cm_pad[1]
static int config_version = 0;
@@ -40,6 +44,12 @@
pthread_rwlock_t resource_lock = PTHREAD_RWLOCK_INITIALIZER;
+struct status_arg {
+ int fd;
+ int fast;
+};
+
+
/**
See if a given node ID should start a resource, given cluster membership
@@ -60,16 +70,21 @@
int
-count_resource_groups(uint64_t nodeid, int *excl)
+count_resource_groups(cluster_member_list_t *ml)
{
resource_t *res;
char *rgname, *val;
- int count = 0, exclusive = 0;
+ int x;
rg_state_t st;
void *lockp;
+ cluster_member_t *mp;
- if (excl)
- *excl = 0;
+ for (x = 0; x < ml->cml_count; x++) {
+ ml->cml_members[x].cm_svccount = 0;
+ ml->cml_members[x].cm_svcexcl = 0;
+ }
+
+ pthread_rwlock_rdlock(&resource_lock);
list_do(&_resources, res) {
if (res->r_rule->rr_root == 0)
@@ -77,34 +92,43 @@
rgname = res->r_attrs[0].ra_value;
- if (rg_lock(rgname, &lockp) < 0)
+ if (rg_lock(rgname, &lockp) < 0) {
+ clulog(LOG_ERR, "#XX: Unable to obtain cluster "
+ "lock @ %s:%d: %s\n", __FILE__, __LINE__,
+ strerror(errno));
continue;
+ }
if (get_rg_state(rgname, &st) < 0) {
+ clulog(LOG_ERR, "#34: Cannot get status "
+ "for service %s\n", rgname);
rg_unlock(rgname, lockp);
continue;
}
+
rg_unlock(rgname, lockp);
- if (st.rs_owner != nodeid ||
- (st.rs_state == RG_STATE_STARTED &&
- st.rs_state == RG_STATE_STARTING))
+ if (st.rs_state != RG_STATE_STARTED &&
+ st.rs_state != RG_STATE_STARTING)
continue;
- if (excl) {
- /* Count exclusive resources */
- val = res_attr_value(res, "exclusive");
- exclusive = val && ((!strcmp(val, "yes") ||
- (atoi(val)>0)));
- }
+ mp = memb_id_to_p(ml, st.rs_owner);
+ if (!mp)
+ continue;
+
+ ++mp->cm_svccount;
- ++count;
- if (exclusive && excl)
- ++(*excl);
+ val = res_attr_value(res, "exclusive");
+ if (val && ((!strcmp(val, "yes") ||
+ (atoi(val)>0))) ) {
+ ++mp->cm_svcexcl;
+ }
} while (!list_done(&_resources, res));
- return count;
+ pthread_rwlock_unlock(&resource_lock);
+
+ return 0;
}
@@ -125,7 +149,13 @@
uint64_t highnode = owner, nodeid;
char *val;
resource_t *res;
- int exclusive, count, excl;
+ int exclusive;
+
+ if (lock)
+ pthread_rwlock_rdlock(&resource_lock);
+ count_resource_groups(allowed);
+ if (lock)
+ pthread_rwlock_unlock(&resource_lock);
for (x=0; x < allowed->cml_count; x++) {
if (allowed->cml_members[x].cm_state != STATE_UP)
@@ -141,7 +171,8 @@
pthread_rwlock_rdlock(&resource_lock);
score = node_should_start(nodeid, allowed, rg_name, &_domains);
if (!score) { /* Illegal -- failover domain constraint */
- pthread_rwlock_unlock(&resource_lock);
+ if (lock)
+ pthread_rwlock_unlock(&resource_lock);
continue;
}
@@ -153,19 +184,18 @@
val = res_attr_value(res, "exclusive");
exclusive = val && ((!strcmp(val, "yes") || (atoi(val)>0)));
- count = count_resource_groups(nodeid, &excl);
-
if (lock)
pthread_rwlock_unlock(&resource_lock);
if (exclusive) {
- if (count > 0) {
+
+ if (allowed->cml_members[x].cm_svccount > 0) {
/* Definitely not this guy */
continue;
} else {
score += 2;
}
- } else if (excl) {
+ } else if (allowed->cml_members[x].cm_svcexcl) {
/* This guy has an exclusive resource group.
Can't relocate / failover to him. */
continue;
@@ -192,14 +222,19 @@
cluster_member_list_t *membership)
{
char *val;
- int autostart, exclusive, count = 0, excl = 0;
+ cluster_member_t *mp;
+ int autostart, exclusive;
+ void *lockp;
+
+ mp = memb_id_to_p(membership, my_id());
+ assert(mp);
/*
* Service must be not be running elsewhere to consider for a
* local start.
*/
if (svcStatus->rs_state == RG_STATE_STARTED &&
- svcStatus->rs_state == my_id())
+ svcStatus->rs_owner == mp->cm_id)
return;
if (svcStatus->rs_state == RG_STATE_DISABLED)
@@ -218,6 +253,32 @@
"Skipping RG %s: Autostart disabled\n",
svcName);
*/
+ /*
+ Mark non-autostart services as disabled to avoid
+ confusion!
+ */
+ if (rg_lock(svcName, &lockp) < 0) {
+ clulog(LOG_ERR, "#XX: Unable to obtain cluster "
+ "lock @ %s:%d: %s\n", __FILE__, __LINE__,
+ strerror(errno));
+ return;
+ }
+
+ if (get_rg_state(svcName, svcStatus) != 0) {
+ clulog(LOG_ERR, "#34: Cannot get status "
+ "for service %s\n", svcName);
+ rg_unlock(svcName, lockp);
+ return;
+ }
+
+ if (svcStatus->rs_transition == 0 &&
+ svcStatus->rs_state == RG_STATE_STOPPED) {
+ svcStatus->rs_state = RG_STATE_DISABLED;
+ set_rg_state(svcName, svcStatus);
+ }
+
+ rg_unlock(svcName, lockp);
+
return;
}
}
@@ -225,17 +286,10 @@
val = res_attr_value(node->rn_resource, "exclusive");
exclusive = val && ((!strcmp(val, "yes") || (atoi(val)>0)));
- /*
- Count the normal + exclusive resource groups running locally
- */
- count = count_resource_groups(my_id(), &excl);
-
- if (exclusive && count_resource_groups(my_id(), NULL)) {
- /*
+ if (exclusive && mp->cm_svccount) {
clulog(LOG_DEBUG,
"Skipping RG %s: Exclusive and I am running services\n",
svcName);
- */
return;
}
@@ -243,12 +297,10 @@
Don't start other services if I'm running an exclusive
service.
*/
- if (excl) {
- /*
+ if (mp->cm_svcexcl) {
clulog(LOG_DEBUG,
"Skipping RG %s: I am running an exclusive service\n",
svcName);
- */
return;
}
@@ -256,9 +308,10 @@
* Start any stopped services, or started services
* that are owned by a down node.
*/
- if (node_should_start(my_id(), membership, svcName, &_domains) ==
+ if (node_should_start(mp->cm_id, membership, svcName, &_domains) ==
FOD_BEST)
- rt_enqueue_request(svcName, RG_START, -1, 0, my_id(), 0, 0);
+ rt_enqueue_request(svcName, RG_START, -1, 0, mp->cm_id,
+ 0, 0);
}
@@ -267,6 +320,7 @@
cluster_member_list_t *membership)
{
int a, b;
+
/*
Service must be running locally in order to consider for
a relocate
@@ -291,7 +345,6 @@
if (a <= b)
return;
-
clulog(LOG_DEBUG, "Relocating group %s to better node %s\n",
svcName,
memb_id_to_name(membership, nodeid));
@@ -318,12 +371,18 @@
int ret;
if (rg_locked()) {
- clulog(LOG_NOTICE, "Services locked\n");
+ clulog(LOG_NOTICE,
+ "Resource groups locked; not evaluating\n");
return -EAGAIN;
}
- membership = member_list();
+ membership = member_list();
+
pthread_rwlock_rdlock(&resource_lock);
+
+ /* Requires read lock */
+ count_resource_groups(membership);
+
list_do(&_tree, node) {
if (node->rn_resource->r_rule->rr_root == 0)
@@ -372,7 +431,8 @@
rg_state_str(svcStatus.rs_state),
nodeName);
- if (local && (nodeStatus == STATE_UP)) {
+ if ((local && (nodeStatus == STATE_UP)) ||
+ svcStatus.rs_state == RG_STATE_STOPPED) {
consider_start(node, svcName, &svcStatus, membership);
@@ -401,6 +461,9 @@
pthread_rwlock_unlock(&resource_lock);
cml_free(membership);
+ clulog(LOG_DEBUG, "Event (%d:%d:%d) Processed\n", local,
+ (int)nodeid, nodeStatus);
+
return 0;
}
@@ -513,8 +576,9 @@
@param rgname Resource group name whose state we want to send.
@see send_rg_states
*/
+int get_rg_state_local(char *, rg_state_t *);
void
-send_rg_state(int fd, char *rgname)
+send_rg_state(int fd, char *rgname, int fast)
{
rg_state_msg_t msg, *msgp = &msg;
void *lockp;
@@ -523,18 +587,23 @@
msgp->rsm_hdr.gh_length = sizeof(msg);
msgp->rsm_hdr.gh_command = RG_STATUS;
- if (rg_lock(rgname, &lockp) < 0)
- return;
-
- if (get_rg_state(rgname, &msgp->rsm_state) < 0) {
+ /* try fast read -- only if it fails and fast is not
+ specified should we do the full locked read */
+ if (get_rg_state_local(rgname, &msgp->rsm_state) != 0 &&
+ !fast) {
+ if (rg_lock(rgname, &lockp) < 0)
+ return;
+ if (get_rg_state(rgname, &msgp->rsm_state) < 0) {
+ rg_unlock(rgname, lockp);
+ return;
+ }
rg_unlock(rgname, lockp);
- return;
}
- rg_unlock(rgname, lockp);
swab_rg_state_msg_t(msgp);
- msg_send(fd, msgp, sizeof(msg));
+ if (msg_send(fd, msgp, sizeof(msg)) < 0)
+ perror("msg_send");
}
@@ -545,8 +614,10 @@
static void *
status_check_thread(void *arg)
{
- int fd = *(int *)arg;
+ int fd = ((struct status_arg *)arg)->fd;
+ int fast = ((struct status_arg *)arg)->fast;
resource_t *res;
+ generic_msg_hdr hdr;
free(arg);
@@ -556,12 +627,17 @@
if (res->r_rule->rr_root == 0)
continue;
- send_rg_state(fd, res->r_attrs[0].ra_value);
+ send_rg_state(fd, res->r_attrs[0].ra_value, fast);
} while (!list_done(&_resources, res));
pthread_rwlock_unlock(&resource_lock);
msg_send_simple(fd, RG_SUCCESS, 0, 0);
+
+ /* XXX wait for client to tell us it's done; I don't know why
+ this is needed when doing fast I/O, but it is. */
+ msg_receive_timeout(fd, &hdr, sizeof(hdr), 10);
+
msg_close(fd);
return NULL;
@@ -575,26 +651,27 @@
@return 0
*/
int
-send_rg_states(int fd)
+send_rg_states(int fd, int fast)
{
- int *fdp;
+ struct status_arg *arg;
pthread_t newthread;
pthread_attr_t attrs;
- fdp = malloc(sizeof(int));
- if (!fdp) {
+ arg = malloc(sizeof(struct status_arg));
+ if (!arg) {
msg_send_simple(fd, RG_FAIL, 0, 0);
return -1;
}
- *fdp = fd;
+ arg->fd = fd;
+ arg->fast = fast;
pthread_attr_init(&attrs);
pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
pthread_attr_setstacksize(&attrs, 65535);
- pthread_create(&newthread, &attrs, status_check_thread, fdp);
+ pthread_create(&newthread, &attrs, status_check_thread, arg);
pthread_attr_destroy(&attrs);
return 0;
@@ -631,6 +708,7 @@
{
resource_node_t *curr;
char *name;
+ rg_state_t svcblk;
pthread_rwlock_rdlock(&resource_lock);
list_do(&_tree, curr) {
@@ -644,11 +722,27 @@
if (debugfmt)
clulog(LOG_DEBUG, debugfmt, name);
+ /* Optimization: Don't bother even queueing the request
+ during the exit case if we don't own it */
+ if (request == RG_STOP_EXITING) {
+ if (get_rg_state_local(name, &svcblk) < 0)
+ continue;
+
+ /* Always run stop if we're the owner, regardless
+ of state; otherwise, don't run stop */
+ if (svcblk.rs_owner != my_id())
+ continue;
+ }
+
rt_enqueue_request(name, request, -1, 0,
NODE_ID_NONE, 0, 0);
} while (!list_done(&_tree, curr));
pthread_rwlock_unlock(&resource_lock);
+
+ /* XXX during shutdown, if we're doing a simultaenous shutdown,
+ this will cause this rgmanager to hang waiting for all the
+ other rgmanagers to complete. */
if (block)
rg_wait_threads();
}
@@ -657,13 +751,12 @@
/**
Stop changed resources.
*/
-void
-do_status_checks(void)
+void *
+q_status_checks(void *arg)
{
resource_node_t *curr;
char *name;
rg_state_t svcblk;
- void *lockp;
pthread_rwlock_rdlock(&resource_lock);
list_do(&_tree, curr) {
@@ -674,30 +767,42 @@
/* Group name */
name = curr->rn_resource->r_attrs->ra_value;
- /* If we're not running it, no need to CONDSTOP */
- if (rg_lock(name, &lockp) != 0)
- continue;
- if (get_rg_state(name, &svcblk) < 0) {
- rg_unlock(name, lockp);
+ /* Local check - no one will make us take a service */
+ if (get_rg_state_local(name, &svcblk) < 0) {
continue;
}
- rg_unlock(name, lockp);
if (svcblk.rs_owner != my_id() ||
svcblk.rs_state != RG_STATE_STARTED)
continue;
- /*clulog(LOG_DEBUG, "Checking status of %s\n", name);*/
-
rt_enqueue_request(name, RG_STATUS,
-1, 0, NODE_ID_NONE, 0, 0);
} while (!list_done(&_tree, curr));
pthread_rwlock_unlock(&resource_lock);
- /*rg_wait_threads();*/
+
+ return NULL;
}
+
+void
+do_status_checks(void)
+{
+ pthread_attr_t attrs;
+ pthread_t newthread;
+
+ pthread_attr_init(&attrs);
+ pthread_attr_setinheritsched(&attrs, PTHREAD_INHERIT_SCHED);
+ pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
+ pthread_attr_setstacksize(&attrs, 65535);
+
+ pthread_create(&newthread, &attrs, q_status_checks, NULL);
+ pthread_attr_destroy(&attrs);
+}
+
+
/**
Stop changed resources.
*/
@@ -708,7 +813,6 @@
char *name;
rg_state_t svcblk;
int need_kill;
- void *lockp;
clulog(LOG_INFO, "Stopping changed resources.\n");
@@ -722,13 +826,9 @@
name = curr->rn_resource->r_attrs->ra_value;
/* If we're not running it, no need to CONDSTOP */
- if (rg_lock(name, &lockp) != 0)
- continue;
- if (get_rg_state(name, &svcblk) < 0) {
- rg_unlock(name, lockp);
+ if (get_rg_state_local(name, &svcblk) < 0) {
continue;
}
- rg_unlock(name, lockp);
if (svcblk.rs_owner != my_id())
continue;
@@ -757,9 +857,9 @@
do_condstarts(void)
{
resource_node_t *curr;
- char *name;
+ char *name, *val;
rg_state_t svcblk;
- int need_init, new_groups = 0;
+ int need_init, new_groups = 0, autostart;
void *lockp;
clulog(LOG_INFO, "Starting changed resources.\n");
@@ -779,19 +879,13 @@
if (curr->rn_resource->r_flags & RF_NEEDSTART)
need_init = 1;
- if (rg_lock(name, &lockp) != 0)
- continue;
-
- if (get_rg_state(name, &svcblk) < 0) {
- rg_unlock(name, lockp);
+ if (get_rg_state_local(name, &svcblk) < 0) {
continue;
}
if (!need_init && svcblk.rs_owner != my_id()) {
- rg_unlock(name, lockp);
continue;
}
- rg_unlock(name, lockp);
if (need_init) {
++new_groups;
@@ -842,7 +936,14 @@
}
/* Set it up for an auto-start */
- svcblk.rs_state = RG_STATE_STOPPED;
+ val = res_attr_value(curr->rn_resource, "autostart");
+ autostart = !(val && ((!strcmp(val, "no") ||
+ (atoi(val)==0))));
+ if (autostart)
+ svcblk.rs_state = RG_STATE_STOPPED;
+ else
+ svcblk.rs_state = RG_STATE_DISABLED;
+
set_rg_state(name, &svcblk);
rg_unlock(name, lockp);
--- cluster/rgmanager/src/daemons/main.c 2006/01/24 19:46:59 1.9.2.8.6.6
+++ cluster/rgmanager/src/daemons/main.c 2006/06/16 20:07:46 1.9.2.8.6.7
@@ -34,8 +34,12 @@
#include <rg_queue.h>
#include <malloc.h>
+#define L_SYS (1<<1)
+#define L_USER (1<<0)
+
int configure_logging(int ccsfd);
+void node_event_q(int, uint64_t, int);
int daemon_init(char *);
int init_resource_groups(int);
void kill_resource_groups(void);
@@ -44,9 +48,10 @@
void graceful_exit(int);
void flag_shutdown(int sig);
void hard_exit(void);
-int send_rg_states(int);
+int send_rg_states(int, int);
int check_config_update(void);
int svc_exists(char *);
+int watchdog_init(void);
int shutdown_pending = 0, running = 1, need_reconfigure = 0;
char debug = 0; /* XXX* */
@@ -134,6 +139,9 @@
void
node_event(int local, uint64_t nodeID, int nodeStatus)
{
+ if (!running)
+ return;
+
if (local) {
/* Local Node Event */
@@ -216,8 +224,15 @@
clulog(LOG_INFO, "State change: %s DOWN\n",
node_delta->cml_members[x].cm_name);
- node_event(0, node_delta->cml_members[x].cm_id,
- STATE_DOWN);
+ /* Don't bother evaluating anything resource groups are
+ locked. This is just a performance thing */
+ if (!rg_locked()) {
+ node_event_q(0, node_delta->cml_members[x].cm_id,
+ STATE_DOWN);
+ } else {
+ clulog(LOG_NOTICE, "Not taking action - services"
+ " locked\n");
+ }
}
/* Free nodes */
@@ -231,7 +246,7 @@
me = memb_online(node_delta, my_id());
if (me) {
clulog(LOG_INFO, "State change: Local UP\n");
- node_event(1, my_id(), STATE_UP);
+ node_event_q(1, my_id(), STATE_UP);
}
for (x=0; node_delta && x < node_delta->cml_count; x++) {
@@ -245,14 +260,69 @@
clulog(LOG_INFO, "State change: %s UP\n",
node_delta->cml_members[x].cm_name);
- node_event(0, node_delta->cml_members[x].cm_id,
- STATE_UP);
+ node_event_q(0, node_delta->cml_members[x].cm_id,
+ STATE_UP);
}
cml_free(node_delta);
cml_free(new_ml);
- rg_unlockall();
+ rg_unlockall(L_SYS);
+
+ return 0;
+}
+
+
+int
+lock_commit_cb(char *key, uint64_t viewno, void *data, uint32_t datalen)
+{
+ char lockstate;
+
+ if (datalen != 1) {
+ clulog(LOG_WARNING, "%s: invalid data length!\n", __FUNCTION__);
+ free(data);
+ return 0;
+ }
+
+ lockstate = *(char *)data;
+ free(data);
+
+ if (lockstate == 0) {
+ rg_unlockall(L_USER); /* Doing this multiple times
+ has no effect */
+ clulog(LOG_NOTICE, "Resource Groups Unlocked\n");
+ return 0;
+ }
+
+ if (lockstate == 1) {
+ rg_lockall(L_USER); /* Doing this multiple times
+ has no effect */
+ clulog(LOG_NOTICE, "Resource Groups Locked\n");
+ return 0;
+ }
+
+ clulog(LOG_DEBUG, "Invalid lock state in callback: %d\n", lockstate);
+ return 0;
+}
+
+
+int
+do_lockreq(int fd, int req)
+{
+ int ret;
+ char state;
+ cluster_member_list_t *m = member_list();
+
+ state = (req==RG_LOCK)?1:0;
+ ret = vf_write(m, VFF_IGN_CONN_ERRORS, "rg_lockdown", &state, 1);
+ cml_free(m);
+
+ if (ret == 0) {
+ msg_send_simple(fd, RG_SUCCESS, 0, 0);
+ } else {
+ msg_send_simple(fd, RG_FAIL, 0, 0);
+ }
+
return 0;
}
@@ -292,9 +362,38 @@
switch (msg_hdr.gh_command) {
case RG_STATUS:
clulog(LOG_DEBUG, "Sending service states to fd%d\n",fd);
- send_rg_states(fd);
+ send_rg_states(fd, msg_hdr.gh_arg1);
break;
+
+ case RG_LOCK:
+ msg_receive_timeout(fd, &msg_hdr, sizeof(msg_hdr), 1);
+ if (rg_quorate()) {
+ do_lockreq(fd, RG_LOCK);
+ }
+
+ msg_close(fd);
+ break;
+
+ case RG_UNLOCK:
+ msg_receive_timeout(fd, &msg_hdr, sizeof(msg_hdr), 1);
+ if (rg_quorate()) {
+ do_lockreq(fd, RG_UNLOCK);
+ }
+
+ msg_close(fd);
+ break;
+
+ case RG_QUERY_LOCK:
+ msg_receive_timeout(fd, &msg_hdr, sizeof(msg_hdr), 1);
+ if (rg_quorate()) {
+ ret = (rg_locked() & L_USER) ? RG_LOCK : RG_UNLOCK;
+ msg_send_simple(fd, ret, 0, 0);
+ }
+ msg_close(fd);
+ break;
+
+
case RG_ACTION_REQUEST:
ret = msg_receive_timeout(fd, &msg_sm, sizeof(msg_sm),
@@ -308,17 +407,6 @@
/* Decode SmMessageSt message */
swab_SmMessageSt(&msg_sm);
- if (rg_locked()) {
- msg_sm.sm_data.d_ret = RG_EAGAIN;
- /* Encode before responding... */
- swab_SmMessageSt(&msg_sm);
-
- if (msg_send(fd, &msg_sm, sizeof (SmMessageSt)) !=
- sizeof (SmMessageSt))
- clulog(LOG_ERR, "#40: Error replying to "
- "action request.\n");
- }
-
if (!svc_exists(msg_sm.sm_data.d_svcName)) {
msg_sm.sm_data.d_ret = RG_ENOSERVICE;
/* No such service! */
@@ -375,18 +463,19 @@
break;
case CE_SUSPEND:
clulog(LOG_DEBUG, "Suspend Event\n");
- rg_lockall();
+ rg_lockall(L_SYS);
break;
case CE_MEMB_CHANGE:
clulog(LOG_DEBUG, "Membership Change Event\n");
- if (rg_quorate()) {
- rg_unlockall();
+ if (rg_quorate() && running) {
+ rg_unlockall(L_SYS);
membership_update();
}
break;
case CE_QUORATE:
rg_set_quorate();
- rg_unlockall();
+ rg_unlockall(L_SYS);
+ rg_unlockall(L_USER);
clulog(LOG_NOTICE, "Quorum Achieved\n");
membership_update();
break;
@@ -394,7 +483,7 @@
clulog(LOG_EMERG, "#1: Quorum Dissolved\n");
rg_set_inquorate();
member_list_update(NULL); /* Clear member list */
- rg_lockall();
+ rg_lockall(L_SYS);
rg_doall(RG_INIT, 1, "Emergency stop of %s");
rg_set_uninitialized();
break;
@@ -430,7 +519,7 @@
*/
}
- while (tv.tv_sec || tv.tv_usec) {
+ while (running && (tv.tv_sec || tv.tv_usec)) {
FD_ZERO(&rfds);
max = msg_fill_fdset(&rfds, MSG_LISTEN, RG_PURPOSE);
FD_SET(clusterfd, &rfds);
@@ -489,8 +578,6 @@
return 0;
}
-
-
return 0;
}
@@ -512,7 +599,7 @@
void
hard_exit(void)
{
- rg_lockall();
+ rg_lockall(L_SYS);
rg_doall(RG_INIT, 1, "Emergency stop of %s");
vf_shutdown();
exit(1);
@@ -522,8 +609,8 @@
void
cleanup(int cluster_fd)
{
- rg_lockall();
- rg_doall(RG_STOP, 1, NULL);
+ rg_lockall(L_SYS);
+ rg_doall(RG_STOP_EXITING, 1, NULL);
vf_shutdown();
kill_resource_groups();
member_list_update(NULL);
@@ -648,8 +735,11 @@
if (foreground)
clu_log_console(1);
- if (!foreground && (geteuid() == 0))
+ if (!foreground && (geteuid() == 0)) {
daemon_init(argv[0]);
+ if (!debug && !watchdog_init())
+ clulog(LOG_NOTICE, "Failed to start watchdog\n");
+ }
/*
We need quorum before we can read the configuration data from
@@ -723,6 +813,8 @@
return -1;
}
+ vf_key_init("rg_lockdown", 10, NULL, lock_commit_cb);
+
if (clu_login(cluster_fd, RG_SERVICE_GROUP) == -1) {
if (errno != ENOSYS) {
clu_log_console(1);
@@ -756,7 +848,6 @@
/*malloc_dump_table(); */ /* Only works if alloc.c us used */
/*malloc_stats();*/
- /*malloc_dump_table(1352, 1352);*/
exit(0);
}
--- cluster/rgmanager/src/daemons/restree.c 2006/01/20 16:27:29 1.10.2.2.6.5
+++ cluster/rgmanager/src/daemons/restree.c 2006/06/16 20:07:46 1.10.2.2.6.6
@@ -19,8 +19,8 @@
#include <libxml/parser.h>
#include <libxml/xmlmemory.h>
#include <libxml/xpath.h>
-#include <magma.h>
#include <ccs.h>
+#include <rg_locks.h>
#include <stdlib.h>
#include <stdio.h>
#include <resgroup.h>
@@ -54,6 +54,7 @@
/* XXX from reslist.c */
void * act_dup(resource_act_t *acts);
+time_t get_time(char *action, int depth, resource_node_t *node);
const char *res_ops[] = {
@@ -512,7 +513,7 @@
node->rn_parent = parent;
node->rn_resource = curres;
node->rn_state = RES_STOPPED;
- node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
+ node->rn_actions = (resource_act_t *)act_dup(curres->r_actions);
curres->r_refs++;
list_insert(tree, node);
@@ -862,6 +863,44 @@
void
+set_time(char *action, int depth, resource_node_t *node)
+{
+ time_t now;
+ int x = 0;
+
+ time(&now);
+
+ for (; node->rn_actions[x].ra_name; x++) {
+
+ if (strcmp(node->rn_actions[x].ra_name, action) ||
+ node->rn_actions[x].ra_depth != depth)
+ continue;
+
+ node->rn_actions[x].ra_last = now;
+ break;
+ }
+}
+
+
+time_t
+get_time(char *action, int depth, resource_node_t *node)
+{
+ int x = 0;
+
+ for (; node->rn_actions[x].ra_name; x++) {
+
+ if (strcmp(node->rn_actions[x].ra_name, action) ||
+ node->rn_actions[x].ra_depth != depth)
+ continue;
+
+ return node->rn_actions[x].ra_last;
+ }
+
+ return (time_t)0;
+}
+
+
+void
clear_checks(resource_node_t *node)
{
time_t now;
@@ -899,8 +938,8 @@
@see _res_op_by_level res_exec
*/
int
-_res_op(resource_node_t **tree, resource_t *first, char *type,
- void * __attribute__((unused))ret, int realop)
+_res_op(resource_node_t **tree, resource_t *first,
+ char *type, void * __attribute__((unused))ret, int realop)
{
int rv, me;
resource_node_t *node;
@@ -965,13 +1004,20 @@
/* Start starts before children */
if (me && (op == RS_START)) {
node->rn_flags &= ~RF_NEEDSTART;
+
rv = res_exec(node, op, 0);
- if (rv != 0)
+ if (rv != 0) {
+ node->rn_state = RES_FAILED;
return rv;
+ }
- time(&node->rn_resource->r_started);
+ set_time("start", 0, node);
clear_checks(node);
- ++node->rn_resource->r_incarnations;
+
+ if (node->rn_state != RES_STARTED) {
+ ++node->rn_resource->r_incarnations;
+ node->rn_state = RES_STARTED;
+ }
}
if (node->rn_child) {
@@ -983,13 +1029,18 @@
/* Stop/status/etc stops after children have stopped */
if (me && (op == RS_STOP)) {
node->rn_flags &= ~RF_NEEDSTOP;
- --node->rn_resource->r_incarnations;
rv = res_exec(node, op, 0);
if (rv != 0) {
- ++node->rn_resource->r_incarnations;
+ node->rn_state = RES_FAILED;
return rv;
}
+
+ if (node->rn_state != RES_STOPPED) {
+ --node->rn_resource->r_incarnations;
+ node->rn_state = RES_STOPPED;
+ }
+
} else if (me && (op == RS_STATUS)) {
rv = do_status(node);
--- cluster/rgmanager/src/daemons/rg_locks.c 2005/03/02 07:07:01 1.4.2.2
+++ cluster/rgmanager/src/daemons/rg_locks.c 2006/06/16 20:07:46 1.4.2.2.6.1
@@ -167,11 +167,11 @@
int
-rg_lockall(void)
+rg_lockall(int flag)
{
pthread_mutex_lock(&locks_mutex);
if (!__rg_lock)
- __rg_lock = 1;
+ __rg_lock |= flag;
pthread_mutex_unlock(&locks_mutex);
return 0;
}
@@ -189,11 +189,11 @@
int
-rg_unlockall(void)
+rg_unlockall(int flag)
{
pthread_mutex_lock(&locks_mutex);
if (__rg_lock)
- __rg_lock = 0;
+ __rg_lock &= ~flag;
pthread_cond_broadcast(&unlock_cond);
pthread_mutex_unlock(&locks_mutex);
return 0;
@@ -201,21 +201,6 @@
int
-rg_wait_unlockall(void)
-{
- pthread_mutex_lock(&locks_mutex);
- if (!__rg_lock) {
- pthread_mutex_unlock(&locks_mutex);
- return 0;
- }
-
- pthread_cond_wait(&unlock_cond, &locks_mutex);
- pthread_mutex_unlock(&locks_mutex);
- return 0;
-}
-
-
-int
rg_set_quorate(void)
{
pthread_mutex_lock(&locks_mutex);
--- cluster/rgmanager/src/daemons/rg_state.c 2006/02/02 19:00:02 1.4.2.8.6.3
+++ cluster/rgmanager/src/daemons/rg_state.c 2006/06/16 20:07:46 1.4.2.8.6.4
@@ -117,6 +117,7 @@
struct timeval start, now;
uint64_t nodeid, *p;
int flags;
+ int conv = 0, err;
int block = !(dflt_flags & CLK_NOWAIT);
/* Holder not supported for this call */
@@ -128,6 +129,37 @@
gettimeofday(&start, NULL);
start.tv_sec += 30;
}
+
+ /* Ripped from global.c in magma */
+ if (!(dflt_flags & CLK_CONVERT) &&
+ (block || ((dflt_flags & CLK_EX) == 0))) {
+ /* Acquire NULL lock */
+ ret = clu_lock(resource, CLK_NULL, lockpp);
+ err = errno;
+ if (ret == 0) {
+ if ((flags & CLK_EX) == 0) {
+ /* User only wanted a NULL lock... */
+ return 0;
+ }
+ /*
+ Ok, NULL lock was taken, rest of blocking
+ call should be done using lock conversions.
+ */
+ flags |= CLK_CONVERT;
+ conv = 1;
+ } else {
+ switch(err) {
+ case EINVAL:
+ /* Oops, null locks don't work on this
+ plugin; use normal spam mode */
+ break;
+ default:
+ errno = err;
+ return -1;
+ }
+ }
+ }
+
while (1) {
if (block) {
gettimeofday(&now, NULL);
@@ -144,9 +176,15 @@
}
}
- ret = clu_lock(resource, flags | CLK_NOWAIT, lockpp);
+ *lockpp = NULL;
- if ((ret != 0) && (errno == EAGAIN) && block) {
+ /* Take the lock (convert if possible). */
+ ret = clu_lock(resource, flags | CLK_NOWAIT |
+ ((conv && !timed_out) ? CLK_CONVERT : 0),
+ lockpp);
+ err = errno;
+
+ if ((ret != 0) && (err == EAGAIN) && block) {
if (timed_out) {
p = (uint64_t *)*lockpp;
if (p) {
@@ -175,6 +213,16 @@
break;
}
+ /* Fatal error. If we took an automatic NL lock with the hopes of
+ converting it, release the lock before returning */
+ if (conv == 1 && ret < 0) {
+ clu_unlock(resource, *lockpp);
+ *lockpp = NULL;
+ }
+
+ if (ret < 0)
+ errno = err;
+
return ret;
}
@@ -369,6 +417,46 @@
}
+int vf_read_local(char *, uint64_t *, void *, uint32_t *);
+int
+get_rg_state_local(char *name, rg_state_t *svcblk)
+{
+ char res[256];
+ int ret;
+ void *data = NULL;
+ uint32_t datalen = 0;
+ uint64_t viewno;
+
+ /* ... */
+ if (name)
+ strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name));
+
+ snprintf(res, sizeof(res),"usrm::rg=\"%s\"", svcblk->rs_name);
+ ret = vf_read_local(res, &viewno, &data, &datalen);
+
+ if (ret != VFR_OK || datalen == 0 ||
+ datalen != sizeof(*svcblk)) {
+ if (data)
+ free(data);
+
+ svcblk->rs_owner = NODE_ID_NONE;
+ svcblk->rs_last_owner = NODE_ID_NONE;
+ svcblk->rs_state = RG_STATE_UNINITIALIZED;
+ svcblk->rs_restarts = 0;
+ svcblk->rs_transition = 0;
+ strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name));
+
+ return FAIL;
+ }
+
+ /* Copy out the data. */
+ memcpy(svcblk, data, sizeof(*svcblk));
+ free(data);
+
+ return 0;
+}
+
+
/**
* Advise service manager as to whether or not to stop a service, given
* that we already know it's legal to run the service.
@@ -380,6 +468,7 @@
* 1 = STOP service - return whatever it returns.
* 2 = DO NOT stop service, return 0 (success)
* 3 = DO NOT stop service, return RG_EFORWARD
+ * 4 = DO NOT stop service, return RG_EAGAIN
*/
int
svc_advise_stop(rg_state_t *svcStatus, char *svcName, int req)
@@ -504,6 +593,7 @@
* @return 0 = DO NOT start service, return FAIL
* 1 = START service - return whatever it returns.
* 2 = DO NOT start service, return 0
+ * 3 = DO NOT start service, return RG_EAGAIN
*/
int
svc_advise_start(rg_state_t *svcStatus, char *svcName, int req)
@@ -519,10 +609,6 @@
break;
case RG_STATE_STOPPING:
- clulog(LOG_DEBUG, "RG %s is stopping\n", svcName);
- ret = 2;
- break;
-
case RG_STATE_STARTED:
case RG_STATE_CHECK:
case RG_STATE_STARTING:
@@ -548,7 +634,14 @@
break;
}
+ /* We are allowed to do something with the service. Make
+ sure we're not locked */
if (svcStatus->rs_owner == NODE_ID_NONE) {
+ if (rg_locked()) {
+ ret = 3;
+ break;
+ }
+
clulog(LOG_NOTICE,
"Starting stopped service%s\n",
svcName);
@@ -556,6 +649,13 @@
break;
}
+ if (rg_locked()) {
+ clulog(LOG_WARNING, "Not initiating failover of %s: "
+ "Resource groups locked!\n", svcName);
+ ret = 3;
+ break;
+ }
+
/*
* Service is running but owner is down -> FAILOVER
*/
@@ -588,6 +688,12 @@
break;
case RG_STATE_STOPPED:
+ /* Don't actually enable if the RG is locked! */
+ if (rg_locked()) {
+ ret = 3;
+ break;
+ }
+
clulog(LOG_NOTICE, "Starting stopped service %s\n",
svcName);
ret = 1;
@@ -596,6 +702,12 @@
case RG_STATE_DISABLED:
case RG_STATE_UNINITIALIZED:
if (req == RG_ENABLE) {
+ /* Don't actually enable if the RG is locked! */
+ if (rg_locked()) {
+ ret = 3;
+ break;
+ }
+
clulog(LOG_NOTICE,
"Starting disabled service %s\n",
svcName);
@@ -656,6 +768,9 @@
case 2: /* Don't start service, return 0 */
rg_unlock(svcName, lockp);
return 0;
+ case 3:
+ rg_unlock(svcName, lockp);
+ return RG_EAGAIN;
default:
break;
}
@@ -738,14 +853,12 @@
}
rg_unlock(svcName, lockp);
- if (svcStatus.rs_state == RG_STATE_STARTED &&
- svcStatus.rs_owner != my_id())
- /* Don't check status for other resource groups */
+ if (svcStatus.rs_owner != my_id())
+ /* Don't check status for anything not owned */
return SUCCESS;
- if (svcStatus.rs_state != RG_STATE_STARTED &&
- svcStatus.rs_owner == my_id())
- /* Not-running RGs should not be checked yet. */
+ if (svcStatus.rs_state != RG_STATE_STARTED)
+ /* Not-running RGs should not be checked either. */
return SUCCESS;
return group_op(svcName, RG_STATUS);
@@ -798,6 +911,9 @@
case 3:
rg_unlock(svcName, lockp);
return RG_EFORWARD;
+ case 4:
+ rg_unlock(svcName, lockp);
+ return RG_EAGAIN;
default:
break;
}
@@ -1077,7 +1193,7 @@
handle_relocate_req(char *svcName, int request, uint64_t preferred_target,
uint64_t *new_owner)
{
- cluster_member_list_t *allowed_nodes;
+ cluster_member_list_t *allowed_nodes, *backup = NULL;
uint64_t target = preferred_target, me = my_id();
int ret, x;
@@ -1102,19 +1218,23 @@
If we can't start it on the preferred target, then we'll try
other nodes.
*/
+ //count_resource_groups(allowed_nodes);
+ backup = cml_dup(allowed_nodes);
+
for (x = 0; x < allowed_nodes->cml_count; x++) {
if (allowed_nodes->cml_members[x].cm_id == me ||
- allowed_nodes->cml_members[x].cm_id == preferred_target)
+ allowed_nodes->cml_members[x].cm_id ==
+ preferred_target)
continue;
allowed_nodes->cml_members[x].cm_state = STATE_DOWN;
}
/*
- * First, see if it's legal to relocate to the target node. Legal
- * means: the node is online and is in the [restricted] failover
- * domain of the service, or the service has no failover domain.
+ * First, see if it's legal to relocate to the target node.
+ * Legal means: the node is online and is in the
+ * [restricted] failover domain of the service, or the
+ * service has no failover domain.
*/
-
target = best_target_node(allowed_nodes, me, svcName, 1);
cml_free(allowed_nodes);
@@ -1155,7 +1275,12 @@
* Ok, so, we failed to send it to the preferred target node.
* Try to start it on all other nodes.
*/
- allowed_nodes = member_list();
+ if (backup) {
+ allowed_nodes = backup;
+ } else {
+ allowed_nodes = member_list();
+ //count_resource_groups(allowed_nodes);
+ }
if (preferred_target != NODE_ID_NONE)
memb_mark_down(allowed_nodes, preferred_target);
@@ -1208,12 +1333,14 @@
* We're done.
*/
exhausted:
- clulog(LOG_WARNING,
- "#70: Attempting to restart service %s locally.\n",
- svcName);
- if (svc_start(svcName, RG_START_RECOVER) == 0) {
- *new_owner = me;
- return FAIL;
+ if (!rg_locked()) {
+ clulog(LOG_WARNING,
+ "#70: Attempting to restart service %s locally.\n",
+ svcName);
+ if (svc_start(svcName, RG_START_RECOVER) == 0) {
+ *new_owner = me;
+ return FAIL;
+ }
}
if (svc_stop(svcName, RG_STOP) != 0) {
@@ -1263,6 +1390,12 @@
*/
ret = svc_start(svcName, req);
+ /*
+ If services are locked, return the error
+ */
+ if (ret == RG_EAGAIN)
+ return RG_EAGAIN;
+
/*
* If we succeeded, then we're done.
*/
--- cluster/rgmanager/src/daemons/rg_thread.c 2006/01/20 16:27:29 1.7.2.3.6.2
+++ cluster/rgmanager/src/daemons/rg_thread.c 2006/06/16 20:07:46 1.7.2.3.6.3
@@ -71,7 +71,7 @@
}
-static void
+static int
wait_initialize(const char *name)
{
resthread_t *t;
@@ -80,15 +80,21 @@
pthread_mutex_lock(&reslist_mutex);
t = find_resthread_byname(name);
- assert(t);
+ if (!t) {
+ pthread_mutex_unlock(&reslist_mutex);
+ return -1;
+ }
+
if (t->rt_status != RG_STATE_UNINITIALIZED) {
pthread_mutex_unlock(&reslist_mutex);
- return;
+ return 0;
}
pthread_mutex_unlock(&reslist_mutex);
usleep(50000);
}
+
+ assert(0);
}
@@ -191,7 +197,6 @@
pthread_cond_wait(&my_queue_cond, &my_queue_mutex);
pthread_mutex_unlock(&my_queue_mutex);
-
while(1) {
pthread_mutex_lock(&reslist_mutex);
pthread_mutex_lock(&my_queue_mutex);
@@ -201,7 +206,6 @@
loop with the lock held. */
break;
}
-
pthread_mutex_unlock(&my_queue_mutex);
pthread_mutex_unlock(&reslist_mutex);
@@ -216,6 +220,8 @@
myself = find_resthread_byname(myname);
assert(myself);
myself->rt_request = req->rr_request;
+ if (req->rr_request == RG_STOP_EXITING)
+ myself->rt_status = RG_STATE_STOPPING;
pthread_mutex_unlock(&reslist_mutex);
switch(req->rr_request) {
@@ -289,6 +295,30 @@
break;
+ case RG_STOP_EXITING:
+ /* We're out of here. Don't allow starts anymore */
+ error = svc_stop(myname, RG_STOP);
+
+ if (error == 0) {
+ ret = RG_SUCCESS;
+
+ } else if (error == RG_EFORWARD) {
+ ret = RG_NONE;
+ break;
+ } else {
+ /*
+ * Bad news.
+ */
+ ret = RG_FAIL;
+ }
+
+ pthread_mutex_lock(&my_queue_mutex);
+ purge_all(&my_queue);
+ pthread_mutex_unlock(&my_queue_mutex);
+
+ break;
+
+
case RG_DISABLE:
/* Disable and user stop requests need to be
forwarded; they're user requests */
@@ -454,6 +484,7 @@
int ret;
resthread_t *resgroup = NULL;
+retry:
pthread_mutex_lock(&reslist_mutex);
while (resgroup == NULL) {
resgroup = find_resthread_byname(resgroupname);
@@ -468,10 +499,14 @@
return ret;
}
+ ret = (resgroup->rt_status == RG_STATE_STOPPING);
+
pthread_mutex_unlock(&reslist_mutex);
- wait_initialize(resgroupname);
+ if (wait_initialize(resgroupname) < 0) {
+ goto retry;
+ }
- return 0;
+ return ret;
}
@@ -521,6 +556,9 @@
resthread_t *resgroup;
if (spawn_if_needed(resgroupname) != 0) {
+ /* Usually, we get here if the thread is killing
+ stuff. This prevents us from queueing START requests
+ while we're exiting */
return -1;
}
/cvs/cluster/cluster/rgmanager/src/resources/svclib_nfslock,v --> standard output
revision 1.2.2.1
--- cluster/rgmanager/src/resources/svclib_nfslock
+++ - 2006-06-16 20:07:49.459256000 +0000
@@ -0,0 +1,251 @@
+#!/bin/bash
+#
+# Do reclaim-broadcasts when we kill lockd during shutdown/startup
+# of a cluster service.
+#
+# Exported functions:
+#
+# notify_list_store
+# notify_list_merge
+# notify_list_broadcast
+#
+
+#
+# Usage:
+# statd_notify <directory> <hostname|ip>
+#
+# Copy out a list from <directory>, merge them with the system nfs lock
+# list, and send them out as <hostname|ip> after generating a random
+# state (needed so clients will reclaim their locks)
+#
+nfslock_statd_notify()
+{
+ declare tmpdir=/tmp/statd-$2.$$
+ declare nl_dir=$1
+ declare nl_ip=$2
+ declare command # Work around bugs in rpc.statd
+ declare pid_xxx # Work around bugs in rpc.statd
+ declare owner
+
+ [ -z "$lockd_pid" ] && return 0
+ if ! [ -d $nl_dir ]; then
+ return 0
+ fi
+
+ if [ -z "`ls $nl_dir/sm/* 2> /dev/null`" ]; then
+ ocf_log debug "No hosts to notify"
+ return 0
+ fi
+
+ # Ok, copy the HA directory to something we can use.
+ rm -rf $tmpdir
+ mkdir -p $tmpdir/sm
+
+ # Copy in our specified entries
+ cp -f $nl_dir/sm/* $tmpdir/sm
+
+ # Copy in our global entries
+ # XXX This might be what we just copied.
+
+ if [ -d "/var/lib/nfs/statd/sm" ]; then
+ owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}')
+ cp -f /var/lib/nfs/statd/sm/* $tmpdir/sm
+ elif [ -d "/var/lib/nfs/sm" ]; then
+ owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}')
+ cp -f /var/lib/nfs/sm/* $tmpdir/sm
+ fi
+
+ #
+ # Generate a random state file. If this ends up being what a client
+ # already has in its list, that's bad, but the chances of this
+ # are small - and relocations should be rare.
+ #
+ dd if=/dev/urandom of=$tmpdir/state bs=1 count=4 &> /dev/null
+
+ #
+ # Make sure we set permissions, or statd will not like it.
+ #
+ chown -R $owner $tmpdir
+
+ #
+ # Tell rpc.statd to notify clients. Don't go into background,
+ # because statd is buggy and won't exit like it's supposed to after
+ # sending the notifications out.
+ #
+ ocf_log info "Sending reclaim notifications via $nl_ip"
+ command="rpc.statd -NFP $tmpdir -n $nl_ip"
+ eval $command 2>&1 &
+ sleep 3 # XXX - the instance of rpc.statd we just spawned is supposed
+ # to exit after it finishes notifying clients.
+ # rpc.statd spawned which is still running handles the actual
+ # new SM_MON requests... we hope 3 seconds is enough time
+ # to get all the SM_NOTIFY messages out. rpc.statd = bugged
+ #
+ # clean up
+ #
+ pid_xxx=`ps auwwx | grep "$command" | grep -v grep | awk '{print $2}'`
+ kill $pid_xxx
+ rm -rf $tmpdir
+
+ return 0
+}
+
+
+#
+# Copy of isSlave from svclib_ip and/or ip.sh
+#
+nfslock_isSlave()
+{
+ declare intf=$1
+ declare line
+
+ if [ -z "$intf" ]; then
+ ocf_log err "usage: isSlave <I/F>"
+ return 1
+ fi
+
+ line=$(/sbin/ip link list dev $intf)
+ if [ $? -ne 0 ]; then
+ ocf_log err "$intf not found"
+ return 1
+ fi
+
+ if [ "$line" = "${line/<*SLAVE*>/}" ]; then
+ return 2
+ fi
+
+ # Yes, it is a slave device. Ignore.
+ return 0
+}
+
+
+#
+# Get all the IPs on the system except loopback IPs
+#
+nfslock_ip_address_list()
+{
+ declare idx dev family ifaddr
+
+ while read idx dev family ifaddr; do
+
+ if [ "$family" != "inet" ] && [ "$family" != "inet6" ]; then
+ continue
+ fi
+
+ if [ "$dev" = "lo" ]; then
+ # Ignore loopback
+ continue
+ fi
+
+ nfslock_isSlave $dev
+ if [ $? -ne 2 ]; then
+ continue
+ fi
+
+ idx=${idx/:/}
+
+ echo $dev $family ${ifaddr/\/*/} ${ifaddr/*\//}
+
+ done < <(/sbin/ip -o addr list | awk '{print $1,$2,$3,$4}')
+
+ return 0
+}
+
+
+#
+# Usage: broadcast_notify <state_directory>
+#
+# Send the contents of <state_directory> out via all IPs on the system.
+#
+notify_list_broadcast()
+{
+ declare dev family addr maskbits ip_name
+ declare lockd_pid=$(pidof lockd)
+ declare nl_dir=$1
+
+ while read dev family addr maskbits; do
+ if [ "$family" != "inet" ]; then
+ continue
+ fi
+
+ ip_name=$(clufindhostname -i $addr)
+ if [ -z "$ip_name" ]; then
+ nfslock_statd_notify $nl_dir $addr
+ else
+ nfslock_statd_notify $nl_dir $ip_name
+ fi
+
+ done < <(nfslock_ip_address_list)
+}
+
+
+#
+# Store the lock monitor list from rpc.statd - do this during a teardown
+# after the IP addresses of a service have been taken offline. Note that
+# this should be done by HA-callout programs, but this feature is not in
+# RHEL3.
+#
+notify_list_store()
+{
+ declare nl_dir=$1
+ declare owner
+
+ mkdir -p $nl_dir/sm
+
+ if [ -d "/var/lib/nfs/statd/sm" ]; then
+ if [ -z "`ls /var/lib/nfs/statd/sm/* 2> /dev/null`" ]; then
+ return 1
+ # nothing to do!
+ fi
+
+ owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}')
+ cp -af /var/lib/nfs/statd/sm/* $nl_dir/sm
+ chown -R $owner $nl_dir
+ return 0
+ elif [ -d "/var/lib/nfs/sm" ]; then
+ if [ -z "`ls /var/lib/nfs/sm/* 2> /dev/null`" ]; then
+ return 1
+ # nothing to do!
+ fi
+
+ owner=$(ls -dl /var/lib/nfs/sm | awk '{print $3"."$4}')
+ cp -af /var/lib/nfs/sm/* $nl_dir/sm
+ chown -R $owner $nl_dir
+ return 0
+ fi
+
+ return 1
+}
+
+
+#
+# Merge the contents of <nl_dir>/sm with the system-wide list
+# Make sure ownership is right, or statd will hiccup. This should not
+# actually ever be needed because statd will, upon getting a SM_MON
+# request, create all the entries in this list. It's mostly for
+# housekeeping for next time we relocate the service.
+#
+notify_list_merge()
+{
+ declare nl_dir=$1
+ declare owner
+
+ if [ -z "`ls $nl_dir/* 2> /dev/null`" ]; then
+ return 1
+ fi
+
+ if [ -d "/var/lib/nfs/statd/sm" ]; then
+ owner=$(ls -dl /var/lib/nfs/statd/sm | awk '{print $3"."$4}')
+ cp -af $nl_dir/sm/* /var/lib/nfs/statd/sm
+ chown -R $owner $nl_dir
+ return 0
+ elif [ -d "/var/lib/nfs/sm" ]; then
+ owner=$(ls -dl /var/lib/nfs/sm | awk '{print $3"."$4}')
+ cp -af $nl_dir/sm/* /var/lib/nfs/sm
+ chown -R $owner $nl_dir
+ return 0
+ fi
+
+ return 1
+}
+
--- cluster/rgmanager/src/resources/Makefile 2005/12/06 18:37:04 1.4.2.3.6.2
+++ cluster/rgmanager/src/resources/Makefile 2006/06/16 20:07:46 1.4.2.3.6.3
@@ -20,7 +20,7 @@
RESOURCES=fs.sh service.sh ip.sh nfsclient.sh nfsexport.sh \
script.sh netfs.sh clusterfs.sh smb.sh
-TARGETS=${RESOURCES} ocf-shellfuncs
+TARGETS=${RESOURCES} ocf-shellfuncs svclib_nfslock
all:
--- cluster/rgmanager/src/resources/clusterfs.sh 2005/12/07 20:14:29 1.1.2.3.4.4
+++ cluster/rgmanager/src/resources/clusterfs.sh 2006/06/16 20:07:46 1.1.2.3.4.5
@@ -37,7 +37,16 @@
YES=0
NO=1
YES_STR="yes"
-INVALIDATEBUFFERS="/bin/true"
+
+# Grab nfs lock tricks if available
+export NFS_TRICKS=1
+if [ -f "$(dirname $0)/svclib_nfslock" ]; then
+ . $(dirname $0)/svclib_nfslock
+ NFS_TRICKS=0
+else
+ unset OCF_RESKEY_nfslock
+fi
+
. $(dirname $0)/ocf-shellfuncs
@@ -135,6 +144,18 @@
<content type="string"/>
</parameter>
+ <parameter name="nfslock" inherit="service%nfslock">
+ <longdesc lang="en">
+ If set, the node will try to kill lockd and issue
+ reclaims across all remaining network interface cards.
+ This happens always, regardless of unmounting failed.
+ </longdesc>
+ <shortdesc lang="en">
+ Enable NFS lock workarounds
+ </shortdesc>
+ <content type="boolean"/>
+ </parameter>
+
</parameters>
<actions>
@@ -774,6 +795,23 @@
esac
fi
+ #
+ # Always do this hackery on clustered file systems.
+ #
+ if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
+ [ "$OCF_RESKEY_nfslock" = "1" ]; then
+ ocf_log warning "Dropping node-wide NFS locks"
+ mkdir -p $mp/.clumanager/statd
+ # Copy out the notify list; our
+ # IPs are already torn down
+ if notify_list_store $mp/.clumanager/statd; then
+ notify_list_broadcast $mp/.clumanager/statd
+ fi
+ fi
+
+ # Always invalidate buffers on clusterfs resources
+ clubufflush -f $dev
+
if [ -z "$force_umount" ]; then
ocf_log debug "Not umounting $dev (clustered file system)"
return $SUCCESS
@@ -782,7 +820,6 @@
#
# Unmount the device.
#
-
while [ ! "$done" ]; do
isMounted $dev $mp
case $? in
--- cluster/rgmanager/src/resources/fs.sh 2005/12/07 20:14:29 1.4.2.6.4.4
+++ cluster/rgmanager/src/resources/fs.sh 2006/06/16 20:07:46 1.4.2.6.4.5
@@ -39,6 +39,13 @@
YES_STR="yes"
INVALIDATEBUFFERS="/bin/true"
+# Grab nfs lock tricks if available
+export NFS_TRICKS=1
+if [ -f "$(dirname $0)/svclib_nfslock" ]; then
+ . $(dirname $0)/svclib_nfslock
+ NFS_TRICKS=0
+fi
+
. $(dirname $0)/ocf-shellfuncs
meta_data()
@@ -126,7 +133,6 @@
</parameter>
-->
-
<parameter name="self_fence">
<longdesc lang="en">
If set and unmounting the file system fails, the node will
@@ -139,6 +145,18 @@
<content type="boolean"/>
</parameter>
+ <parameter name="nfslock" inherit="service%nfslock">
+ <longdesc lang="en">
+ If set and unmounting the file system fails, the node will
+ try to kill lockd and issue reclaims across all remaining
+ network interface cards.
+ </longdesc>
+ <shortdesc lang="en">
+ Enable NFS lock workarounds
+ </shortdesc>
+ <content type="boolean"/>
+ </parameter>
+
<parameter name="fsid">
<longdesc lang="en">
File system ID for NFS exports. This can be overridden
@@ -316,6 +334,7 @@
verify_options()
{
declare -i ret=$OCF_SUCCESS
+ declare o
#
# From mount(8)
@@ -762,6 +781,63 @@
}
+#
+# Enable quotas on the mount point if the user requested them
+#
+enable_fs_quotas()
+{
+ declare -i need_check=0
+ declare quotaopts=""
+ declare mopt
+ declare opts=$1
+ declare mp=$2
+
+ if [ -z "`which quotaon`" ]; then
+ ocf_log err "quotaon not found in $PATH"
+ return 1
+ fi
+
+ for mopt in `echo $opts | sed -e s/,/\ /g`; do
+ case $mopt in
+ usrquota)
+ quotaopts="u$quotaopts"
+ continue
+ ;;
+ grpquota)
+ quotaopts="g$quotaopts"
+ continue
+ ;;
+ noquota)
+ quotaopts=""
+ return 0
+ ;;
+ esac
+ done
+
+ [ -z "$quotaopts" ] && return 0
+
+ # Ok, create quota files if they don't exist
+ for f in quota.user aquota.user quota.group aquota.group; do
+ if ! [ -f "$mp/$f" ]; then
+ ocf_log info "$mp/$f was missing - creating"
+ touch "$mp/$f"
+ chmod 600 "$mp/$f"
+ need_check=1
+ fi
+ done
+
+ if [ $need_check -eq 1 ]; then
+ ocf_log info "Checking quota info in $mp"
+ quotacheck -$quotaopts $mp
+ fi
+
+ ocf_log info "Enabling Quotas on $mp"
+ ocf_log debug "quotaon -$quotaopts $mp"
+ quotaon -$quotaopts $mp
+
+ return $?
+}
+
#
# startFilesystem
@@ -958,6 +1034,18 @@
return $FAIL
fi
+ #
+ # Create this for the NFS NLM broadcast bit
+ #
+ if [ $NFS_TRICKS -eq 0 ]; then
+ if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
+ [ "$OCF_RESKEY_nfslock" = "1" ]; then
+ mkdir -p $mp/.clumanager/statd
+ notify_list_merge $mp/.clumanager/statd
+ fi
+ fi
+
+ enable_fs_quotas $opts $mp
activeMonitor start || return $OCF_ERR_GENERIC
return $SUCCESS
@@ -1048,6 +1136,7 @@
activeMonitor stop || return $OCF_ERR_GENERIC
+ quotaoff -gu $mp &> /dev/null
umount $mp
if [ $? -eq 0 ]; then
umount_failed=
@@ -1059,6 +1148,22 @@
if [ "$force_umount" ]; then
killMountProcesses $mp
+ if [ $try -eq 1 ]; then
+ if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
+ [ "$OCF_RESKEY_nfslock" = "1" ]; then
+ ocf_log warning \
+ "Dropping node-wide NFS locks"
+ pkill -KILL -x lockd
+ mkdir -p $mp/.clumanager/statd
+ # Copy out the notify list; our
+ # IPs are already torn down
+ if notify_list_store $mp/.clumanager/statd
+ then
+ notify_list_broadcast \
+ $mp/.clumanager/statd
+ fi
+ fi
+ fi
fi
if [ $try -ge $max_tries ]; then
--- cluster/rgmanager/src/resources/ip.sh 2005/12/07 20:14:29 1.5.2.4.4.9
+++ cluster/rgmanager/src/resources/ip.sh 2006/06/16 20:07:46 1.5.2.4.4.10
@@ -30,6 +30,13 @@
PATH=/bin:/sbin:/usr/bin:/usr/sbin
export LC_ALL LANG PATH
+# Grab nfs lock tricks if available
+export NFS_TRICKS=1
+if [ -f "$(dirname $0)/svclib_nfslock" ]; then
+ . $(dirname $0)/svclib_nfslock
+ NFS_TRICKS=0
+fi
+
. $(dirname $0)/ocf-shellfuncs
@@ -90,6 +97,19 @@
</shortdesc>
<content type="boolean" default="1"/>
</parameter>
+
+ <parameter name="nfslock" inherit="service%nfslock">
+ <longdesc lang="en">
+ If set and unmounting the file system fails, the node will
+ try to kill lockd and issue reclaims across all remaining
+ network interface cards.
+ </longdesc>
+ <shortdesc lang="en">
+ Enable NFS lock workarounds
+ </shortdesc>
+ <content type="boolean"/>
+ </parameter>
+
</parameters>
<actions>
@@ -865,6 +885,13 @@
fi
ip_op ${OCF_RESKEY_family} add ${OCF_RESKEY_address}
+ if [ $NFS_TRICKS -eq 0 ]; then
+ if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
+ [ "$OCF_RESKEY_nfslock" = "1" ]; then
+ notify_list_broadcast /var/lib/nfs/statd
+ fi
+ fi
+
exit $?
;;
stop)
--- cluster/rgmanager/src/resources/nfsclient.sh 2006/01/27 21:06:57 1.3.2.2.6.4
+++ cluster/rgmanager/src/resources/nfsclient.sh 2006/06/16 20:07:46 1.3.2.2.6.5
@@ -95,6 +95,18 @@
<content type="string"/>
</parameter>
+ <parameter name="nfslock" inherit="nfsexport%nfslock">
+ <longdesc lang="en">
+ This tells us whether the service in question has the
+ NFS lock workarounds enabled. If so, we always unexport
+ * rather than the specified client.
+ </longdesc>
+ <shortdesc lang="en">
+ NFS Lock workaround flag
+ </shortdesc>
+ <content type="boolean"/>
+ </parameter>
+
<parameter name="options">
<longdesc lang="en">Defines a list of options for this
particular client. See 'man 5 exports' for a list
@@ -106,6 +118,19 @@
<content type="string"/>
</parameter>
+ <parameter name="allow_recover">
+ <longdesc lang="en">
+ Allows recovery of this NFS client (default = 1) if it
+ disappears from the export list. If set to 0, the service
+ will be restarted. This is useful to help preserve export
+ ordering.
+ </longdesc>
+ <shortdesc lang="en">
+ Allow recovery
+ </shortdesc>
+ <content type="boolean"/>
+ </parameter>
+
</parameters>
<actions>
@@ -282,6 +307,14 @@
stop)
verify_all || exit $OCF_ERR_ARGS
+ if [ "$OCF_RESKEY_nfslock" = "1" ]; then
+ #
+ # If the NFS lock workarounds were enabled, unexport from
+ # the world
+ #
+ export OCF_RESKEY_target="*"
+ fi
+
ocf_log info "Removing export: ${OCF_RESKEY_target}:${OCF_RESKEY_path}"
exportfs -u "${OCF_RESKEY_target}:${OCF_RESKEY_path}"
rv=$?
@@ -299,9 +332,26 @@
# * Exports longer than 14 chars have line breaks inserted, which
# broke the way the status check worked.
#
- exportfs -v | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -q \
- "^${OCF_RESKEY_path}[\t ]*.*${OCF_RESKEY_target}"
- rv=$?
+ # Status check fix from Craig Lewis:
+ # * Exports with RegExp metacharacters need to be escaped.
+ # These metacharacters are: * ? .
+ #
+ export OCF_RESKEY_target_regexp=$(echo $OCF_RESKEY_target | \
+ sed -e 's/*/[*]/g' -e 's/?/[?]/g' -e 's/\./\\./g')
+ exportfs -v | tr -d "\n" | sed -e 's/([^)]*)/\n/g' | grep -q \
+ "^${OCF_RESKEY_path}[\t ]*.*${OCF_RESKEY_target_regexp}"
+ rv=$?
+ ;;
+
+recover)
+ if [ "$OCF_RESKEY_allow_recover" = "0" ] || \
+ [ "$OCF_RESKEY_allow_recover" = "no" ] || \
+ [ "$OCF_RESKEY_allow_recover" = "false" ]; then
+ exit 1
+ fi
+
+ $0 stop || exit 1
+ $0 start || exit 1
;;
restart)
--- cluster/rgmanager/src/resources/nfsexport.sh 2005/12/07 22:53:28 1.4.2.1.6.3
+++ cluster/rgmanager/src/resources/nfsexport.sh 2006/06/16 20:07:46 1.4.2.1.6.4
@@ -97,6 +97,19 @@
</shortdesc>
<content type="string"/>
</parameter>
+
+ <parameter name="nfslock" inherit="nfslock">
+ <longdesc lang="en">
+ If you can see this, your GUI is broken.
+ This inherits an unspecified nfslock parameter so that
+ it works with fs or clusterfs resources.
+ </longdesc>
+ <shortdesc lang="en">
+ If you can see this, your GUI is broken.
+ </shortdesc>
+ <content type="boolean"/>
+ </parameter>
+
</parameters>
<actions>
--- cluster/rgmanager/src/resources/ocf-shellfuncs 2005/10/17 20:53:12 1.2.2.1
+++ cluster/rgmanager/src/resources/ocf-shellfuncs 2006/06/16 20:07:46 1.2.2.2
@@ -1,5 +1,5 @@
#
-# $Id: ocf-shellfuncs,v 1.2.2.1 2005/10/17 20:53:12 lhh Exp $
+# $Id: ocf-shellfuncs,v 1.2.2.2 2006/06/16 20:07:46 lhh Exp $
#
# Common helper functions for the OCF Resource Agents supplied by
# heartbeat.
--- cluster/rgmanager/src/resources/service.sh 2005/12/06 18:37:04 1.1.2.1.6.2
+++ cluster/rgmanager/src/resources/service.sh 2006/06/16 20:07:46 1.1.2.1.6.3
@@ -5,7 +5,12 @@
# resources. ;(
#
-
+# Grab nfs lock tricks if available
+export NFS_TRICKS=1
+if [ -f "$(dirname $0)/svclib_nfslock" ]; then
+ . $(dirname $0)/svclib_nfslock
+ NFS_TRICKS=0
+fi
meta_data()
{
@@ -89,6 +94,22 @@
<content type="boolean"/>
</parameter>
+ <parameter name="nfslock">
+ <longdesc lang="en">
+ Enable NFS lock workarounds. When used with a compatible
+ HA-callout program like clunfslock, this could be used
+ to provide NFS lock failover, but at significant cost to
+ other services on the machine. This requires a compatible
+ version of nfs-utils and manual configuration of rpc.statd;
+ see 'man rpc.statd' to see if your version supports
+ the -H parameter.
+ </longdesc>
+ <shortdesc lang="en">
+ Enable NFS lock workarounds
+ </shortdesc>
+ <content type="boolean"/>
+ </parameter>
+
<parameter name="recovery">
<longdesc lang="en">
This currently has three possible options: "restart" tries
@@ -144,6 +165,17 @@
#
case $1 in
start)
+ #
+ # XXX If this is set, we kill lockd. If there is no
+ # child IP address, then clients will NOT get the reclaim
+ # notification.
+ #
+ if [ $NFS_TRICKS -eq 0 ]; then
+ if [ "$OCF_RESKEY_nfslock" = "yes" ] || \
+ [ "$OCF_RESKEY_nfslock" = "1" ]; then
+ pkill -KILL -x lockd
+ fi
+ fi
exit 0
;;
stop)
--- cluster/rgmanager/src/utils/Makefile 2005/10/17 20:30:45 1.3.2.1.6.2
+++ cluster/rgmanager/src/utils/Makefile 2006/06/16 20:07:47 1.3.2.1.6.3
@@ -23,7 +23,7 @@
CFLAGS+= -L${libdir} -DPACKAGE_VERSION=\"${RELEASE}\"
LDFLAGS+= -lmagmamsg -lmagma -lpthread -ldl -lncurses -L../clulib -lclulib -lccs
-TARGETS=clubufflush clufindhostname clustat clusvcadm clulog
+TARGETS=clubufflush clufindhostname clustat clusvcadm clulog clunfslock
all: ${TARGETS}
@@ -52,6 +52,10 @@
clusvcadm: clusvcadm.o
$(CC) -o $@ $^ $(INLUDE) $(CFLAGS) $(LDFLAGS)
+clunfslock: clunfslock.sh
+ cp clunfslock.sh clunfslock
+ chmod 755 clunfslock
+
clean:
rm -f *.o $(TARGETS)
--- cluster/rgmanager/src/utils/clustat.c 2006/01/20 16:27:30 1.5.2.3.6.7
+++ cluster/rgmanager/src/utils/clustat.c 2006/06/16 20:07:47 1.5.2.3.6.8
@@ -18,6 +18,12 @@
#define FLAG_RGMGR 0x4
#define FLAG_NOCFG 0x8 /* Shouldn't happen */
+#define RG_VERBOSE 0x1
+
+#define QSTAT_ONLY 1
+#define VERSION_ONLY 2
+#define NODEID_ONLY 3
+
int running = 1;
@@ -35,7 +41,7 @@
rg_state_list_t *
-rg_state_list(uint64_t local_node_id)
+rg_state_list(uint64_t local_node_id, int fast)
{
int fd, n, x;
rg_state_list_t *rsl = NULL;
@@ -49,7 +55,7 @@
return NULL;
}
- msg_send_simple(fd, RG_STATUS, 0, 0);
+ msg_send_simple(fd, RG_STATUS, fast, 0);
rsl = malloc(sizeof(rg_state_list_t));
if (!rsl) {
@@ -70,8 +76,10 @@
"from Resource Group Manager\n");
break;
}
+
if (n < 0) {
- if (errno == EINTR)
+ if (errno == EAGAIN ||
+ errno == EINTR)
continue;
fprintf(stderr, "Failed to receive "
"service data: select: %s\n",
@@ -80,8 +88,16 @@
}
n = msg_receive_simple(fd, &msgp, tv.tv_sec);
- if (n < sizeof(generic_msg_hdr))
+ if (n < 0) {
+ if (errno == EAGAIN)
+ continue;
+ perror("msg_receive_simple");
+ break;
+ }
+ if (n < sizeof(generic_msg_hdr)) {
+ printf("Error: Malformed message\n");
break;
+ }
if (!msgp) {
printf("Error: no message?!\n");
@@ -99,6 +115,7 @@
return NULL;
}
+
rsmp = (rg_state_msg_t *)msgp;
swab_rg_state_t(&rsmp->rsm_state);
@@ -119,6 +136,7 @@
msgp = NULL;
}
+ msg_send_simple(fd, RG_SUCCESS, 0, 0);
msg_close(fd);
if (!rsl->rgl_count) {
@@ -260,8 +278,9 @@
return "unknown";
}
+
void
-txt_rg_state(rg_state_t *rs, cluster_member_list_t *members)
+_txt_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags)
{
char owner[31];
@@ -286,39 +305,90 @@
void
-xml_rg_state(rg_state_t *rs, cluster_member_list_t *members)
+_txt_rg_state_v(rg_state_t *rs, cluster_member_list_t *members, int flags)
+{
+ printf("Service Name : %s\n", rs->rs_name);
+ printf(" Current State : %s (%d)\n",
+ rg_state_str(rs->rs_state), rs->rs_state);
+ printf(" Owner : %s\n",
+ my_memb_id_to_name(members, rs->rs_owner));
+ printf(" Last Owner : %s\n",
+ my_memb_id_to_name(members, rs->rs_last_owner));
+ printf(" Last Transition : %s\n",
+ ctime((time_t *)(&rs->rs_transition)));
+}
+
+
+void
+txt_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags)
{
+ if (flags & RG_VERBOSE)
+ _txt_rg_state_v(rs, members, flags);
+ else
+ _txt_rg_state(rs, members, flags);
+}
+
+
+void
+xml_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags)
+{
+ char time_str[32];
+ int x;
+
+ /* Chop off newlines */
+ ctime_r((time_t *)&rs->rs_transition, time_str);
+ for (x = 0; time_str[x]; x++) {
+ if (time_str[x] < 32) {
+ time_str[x] = 0;
+ break;
+ }
+ }
+
printf(" <group name=\"%s\" state=\"%d\" state_str=\"%s\" "
- " owner=\"%s\" last_owner=\"%s\" restarts=\"%d\"/>\n",
+ " owner=\"%s\" last_owner=\"%s\" restarts=\"%d\""
+ " last_transition=\"%llu\" last_transition_str=\"%s\"/>\n",
rs->rs_name,
rs->rs_state,
rg_state_str(rs->rs_state),
my_memb_id_to_name(members, rs->rs_owner),
my_memb_id_to_name(members, rs->rs_last_owner),
- rs->rs_restarts);
+ rs->rs_restarts,
+ (long long unsigned)rs->rs_transition,
+ time_str);
}
void
-txt_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members)
+txt_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members,
+ char *svcname, int flags)
{
int x;
if (!rgl || !members)
return;
- printf(" %-20.20s %-30.30s %-14.14s\n",
- "Service Name", "Owner (Last)", "State");
- printf(" %-20.20s %-30.30s %-14.14s\n",
- "------- ----", "----- ------", "-----");
+ if (!(flags & RG_VERBOSE)) {
+ printf(" %-20.20s %-30.30s %-14.14s\n",
+ "Service Name", "Owner (Last)", "State");
+ printf(" %-20.20s %-30.30s %-14.14s\n",
+ "------- ----", "----- ------", "-----");
+ } else {
+ printf("Service Information\n"
+ "------- -----------\n\n");
+ }
- for (x = 0; x < rgl->rgl_count; x++)
- txt_rg_state(&rgl->rgl_states[x], members);
+ for (x = 0; x < rgl->rgl_count; x++) {
+ if (svcname &&
+ strcmp(rgl->rgl_states[x].rs_name, svcname))
+ continue;
+ txt_rg_state(&rgl->rgl_states[x], members, flags);
+ }
}
void
-xml_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members)
+xml_rg_states(rg_state_list_t *rgl, cluster_member_list_t *members,
+ char *svcname)
{
int x;
@@ -327,8 +397,12 @@
printf(" <groups>\n");
- for (x = 0; x < rgl->rgl_count; x++)
- xml_rg_state(&rgl->rgl_states[x], members);
+ for (x = 0; x < rgl->rgl_count; x++) {
+ if (svcname &&
+ strcmp(rgl->rgl_states[x].rs_name, svcname))
+ continue;
+ xml_rg_state(&rgl->rgl_states[x], members, 0);
+ }
printf(" </groups>\n");
}
@@ -408,22 +482,25 @@
void
-txt_member_states(cluster_member_list_t *membership)
+txt_member_states(cluster_member_list_t *membership, char *name)
{
int x;
printf(" %-40.40s %s\n", "Member Name", "Status");
printf(" %-40.40s %s\n", "------ ----", "------");
- for (x = 0; x < membership->cml_count; x++)
+ for (x = 0; x < membership->cml_count; x++) {
+ if (name && strcmp(membership->cml_members[x].cm_name, name))
+ continue;
txt_member_state(&membership->cml_members[x]);
+ }
printf("\n");
}
void
-xml_member_states(cluster_member_list_t *membership)
+xml_member_states(cluster_member_list_t *membership, char *name)
{
int x;
@@ -431,38 +508,50 @@
return;
printf(" <nodes>\n");
- for (x = 0; x < membership->cml_count; x++)
+ for (x = 0; x < membership->cml_count; x++) {
+ if (name && strcmp(membership->cml_members[x].cm_name, name))
+ continue;
xml_member_state(&membership->cml_members[x]);
+ }
printf(" </nodes>\n");
}
void
txt_cluster_status(int qs, cluster_member_list_t *membership,
- rg_state_list_t *rgs)
+ rg_state_list_t *rgs, char *name, char *svcname,
+ int flags)
{
- txt_quorum_state(qs);
-
- if (!membership || !(qs & QF_GROUPMEMBER)) {
- printf("Resource Group Manager not running; no service "
- "information available.\n\n");
+ if (!svcname && !name) {
+ txt_quorum_state(qs);
+ if (!membership || !(qs & QF_GROUPMEMBER)) {
+ printf("Resource Group Manager not running; "
+ "no service information available.\n\n");
+ }
}
- txt_member_states(membership);
- txt_rg_states(rgs, membership);
+ if (!svcname || (name && svcname))
+ txt_member_states(membership, name);
+ if (!name || (name && svcname))
+ txt_rg_states(rgs, membership, svcname, flags);
}
void
xml_cluster_status(int qs, cluster_member_list_t *membership,
- rg_state_list_t *rgs)
+ rg_state_list_t *rgs, char *name, char *svcname,
+ int flags)
{
printf("<?xml version=\"1.0\"?>\n");
- printf("<clustat version=\"4.1\">\n");
- xml_quorum_state(qs);
- xml_member_states(membership);
- if (rgs)
- xml_rg_states(rgs, membership);
+ printf("<clustat version=\"4.1.1\">\n");
+
+ if (!svcname && !name)
+ xml_quorum_state(qs);
+ if (!svcname || (name && svcname))
+ xml_member_states(membership, name);
+ if (rgs &&
+ (!name || (name && svcname)))
+ xml_rg_states(rgs, membership, svcname);
printf("</clustat>\n");
}
@@ -545,9 +634,12 @@
" with -x.\n"
" -I Display local node ID and exit\n"
" -m <member> Display status of <member> and exit\n"
-" -s <service> Display statis of <service> and exit\n"
+" -s <service> Display status of <service> and exit\n"
" -v Display version & cluster plugin and exit\n"
" -x Dump information as XML\n"
+" -Q Return 0 if quorate, 1 if not (no output)\n"
+" -f Enable fast clustat reports\n"
+" -l Use long format for services\n"
"\n", basename(arg0));
}
@@ -559,37 +651,32 @@
cluster_member_list_t *membership;
rg_state_list_t *rgs = NULL;
uint64_t local_node_id;
+ int fast = 0;
+ int runtype = 0;
int refresh_sec = 0, errors = 0;
- int opt, xml = 0;
- char *member_name;
- char *rg_name;
+ int opt, xml = 0, flags = 0;
+ char *member_name = NULL;
+ char *rg_name = NULL;
- /* Connect & grab all our info */
- fd = clu_connect(RG_SERVICE_GROUP, 0);
- if (fd < 0) {
- printf("Could not connect to cluster service\n");
- return 1;
- }
-
- while ((opt = getopt(argc, argv, "Is:m:i:xvQh?")) != EOF) {
+ while ((opt = getopt(argc, argv, "fIls:m:i:xvQh?")) != EOF) {
switch(opt) {
case 'v':
- printf("%s version %s\n", basename(argv[0]),
- PACKAGE_VERSION);
- printf("Connected via: %s\n", clu_plugin_version());
- goto cleanup;
+ runtype = VERSION_ONLY;
+ break;
case 'I':
- printf("0x%08x%08x\n",(uint32_t)(local_node_id>>32),
- (uint32_t)(local_node_id&0xffffffff));
- goto cleanup;
+ runtype = NODEID_ONLY;
+ break;
case 'i':
refresh_sec = atoi(optarg);
if (refresh_sec <= 0)
refresh_sec = 1;
break;
+ case 'l':
+ flags |= RG_VERBOSE;
+ break;
case 'm':
member_name = optarg;
@@ -597,9 +684,8 @@
case 'Q':
/* Return to shell: 0 true, 1 false... */
- ret = !(clu_quorum_status(RG_SERVICE_GROUP) &
- QF_QUORATE);
- goto cleanup;
+ runtype = QSTAT_ONLY;
+ break;
case 's':
rg_name = optarg;
@@ -615,6 +701,9 @@
xml = 1;
break;
+ case 'f':
+ ++fast;
+ break;
case '?':
case 'h':
usage(argv[0]);
@@ -631,6 +720,37 @@
return 1;
}
+ /* Connect & grab all our info */
+ fd = clu_connect(RG_SERVICE_GROUP, 0);
+
+ switch(runtype) {
+ case QSTAT_ONLY:
+ if (fd < 0)
+ break;
+ ret = !(clu_quorum_status(RG_SERVICE_GROUP) &
+ QF_QUORATE);
+ goto cleanup;
+ case VERSION_ONLY:
+ printf("%s version %s\n", basename(argv[0]),
+ PACKAGE_VERSION);
+ if (fd < 0)
+ break;
+ printf("Connected via: %s\n", clu_plugin_version());
+ goto cleanup;
+ case NODEID_ONLY:
+ if (fd < 0)
+ break;
+ clu_local_nodeid(NULL, &local_node_id);
+ printf("0x%08x%08x\n",(uint32_t)(local_node_id>>32),
+ (uint32_t)(local_node_id&0xffffffff));
+ goto cleanup;
+ }
+
+ if (fd < 0) {
+ printf("Could not connect to cluster service\n");
+ return 1;
+ }
+
/* XXX add member/rg single-shot state */
signal(SIGINT, term_handler);
signal(SIGTERM, term_handler);
@@ -639,7 +759,7 @@
qs = clu_quorum_status(RG_SERVICE_GROUP);
membership = build_member_list(&local_node_id);
- rgs = rg_state_list(local_node_id);
+ rgs = rg_state_list(local_node_id, fast);
if (refresh_sec) {
setupterm((char *) 0, STDOUT_FILENO, (int *) 0);
@@ -647,9 +767,11 @@
}
if (xml)
- xml_cluster_status(qs, membership, rgs);
+ xml_cluster_status(qs, membership, rgs, member_name,
+ rg_name,flags);
else
- txt_cluster_status(qs, membership, rgs);
+ txt_cluster_status(qs, membership, rgs, member_name,
+ rg_name,flags);
if (membership)
cml_free(membership);
--- cluster/rgmanager/src/utils/clusvcadm.c 2005/07/28 21:19:51 1.2.2.3.6.3
+++ cluster/rgmanager/src/utils/clusvcadm.c 2006/06/16 20:07:47 1.2.2.3.6.4
@@ -52,11 +52,107 @@
}
+int
+do_lock_req(int req)
+{
+ int cfd = -1;
+ int fd = -1;
+ int ret = RG_FAIL;
+ cluster_member_list_t *membership = NULL;
+ uint64_t me;
+ generic_msg_hdr hdr;
+
+ fd = clu_connect(RG_SERVICE_GROUP, 0);
+ if (fd < 0) {
+ printf("Could not connect to cluster service\n");
+ goto out;
+ }
+
+ membership = clu_member_list(RG_SERVICE_GROUP);
+ msg_update(membership);
+ clu_local_nodeid(RG_SERVICE_GROUP, &me);
+
+ fd = msg_open(me, RG_PORT, 0, 5);
+ if (fd < 0) {
+ printf("Could not connect to resource group manager\n");
+ goto out;
+ }
+
+ if (msg_send_simple(fd, req, 0, 0) < 0) {
+ printf("Communication failed\n");
+ goto out;
+ }
+
+ if (msg_receive_timeout(fd, &hdr, sizeof(hdr), 5) < sizeof(hdr)) {
+ printf("Receive failed\n");
+ goto out;
+ }
+
+ swab_generic_msg_hdr(&hdr);
+ ret = hdr.gh_command;
+
+out:
+ if (membership)
+ cml_free(membership);
+
+ if (fd >= 0)
+ msg_close(fd);
+
+ if (cfd >= 0)
+ clu_disconnect(cfd);
+
+ return ret;
+}
+
+
+int
+do_lock(void)
+{
+ if (do_lock_req(RG_LOCK) != RG_SUCCESS) {
+ printf("Lock operation failed\n");
+ return 1;
+ }
+ printf("Resource groups locked\n");
+ return 0;
+}
+
+
+int
+do_unlock(void)
+{
+ if (do_lock_req(RG_UNLOCK) != RG_SUCCESS) {
+ printf("Unlock operation failed\n");
+ return 1;
+ }
+ printf("Resource groups unlocked\n");
+ return 0;
+}
+
+
+int
+do_query_lock(void)
+{
+ switch(do_lock_req(RG_QUERY_LOCK)) {
+ case RG_LOCK:
+ printf("Resource groups locked\n");
+ break;
+ case RG_UNLOCK:
+ printf("Resource groups unlocked\n");
+ break;
+ default:
+ printf("Query operation failed\n");
+ return 1;
+ }
+ return 0;
+}
+
void
usage(char *name)
{
-printf("usage: %s -d <group> Disable <group>\n", name);
+printf("Resource Group Control Commands:\n");
+printf(" %s -v Display version and exit\n",name);
+printf(" %s -d <group> Disable <group>\n", name);
printf(" %s -e <group> Enable <group>\n",
name);
printf(" %s -e <group> -m <member> Enable <group>"
@@ -67,7 +163,16 @@
printf(" %s -R <group> Restart a group in place.\n",
name);
printf(" %s -s <group> Stop <group>\n", name);
-printf(" %s -v Display version and exit\n",name);
+printf("\n");
+printf("Resource Group Locking (for cluster Shutdown / Debugging):\n");
+printf(" %s -l Lock local resource group manager.\n"
+ " This prevents resource groups from\n"
+ " starting on the local node.\n",
+ name);
+printf(" %s -S Show lock state\n", name);
+printf(" %s -u Unlock local resource group manager.\n"
+ " This allows resource groups to start\n"
+ " on the local node.\n", name);
}
@@ -90,8 +195,17 @@
return 1;
}
- while ((opt = getopt(argc, argv, "e:d:r:n:m:vR:s:S:qh?")) != EOF) {
+ while ((opt = getopt(argc, argv, "lSue:d:r:n:m:vR:s:qh?")) != EOF) {
switch (opt) {
+ case 'l':
+ return do_lock();
+
+ case 'S':
+ return do_query_lock();
+
+ case 'u':
+ return do_unlock();
+
case 'e':
/* ENABLE */
actionstr = "trying to enable";
More information about the Cluster-devel
mailing list