[Cluster-devel] cluster/rgmanager ChangeLog include/resgroup.h ...
lhh at sourceware.org
lhh at sourceware.org
Fri Sep 1 19:02:24 UTC 2006
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: lhh at sourceware.org 2006-09-01 19:02:22
Modified files:
rgmanager : ChangeLog
rgmanager/include: resgroup.h vf.h
rgmanager/src/clulib: rg_strings.c vft.c
rgmanager/src/daemons: groups.c main.c
rgmanager/src/utils: clustat.c clusvcadm.c
Log message:
2006-09-01 Lon Hohberger <lhh at redhat.com>
* include/resgroup.h: Add proto for rg_strerror
* include/vf.h: Add proto for vf_invalidate (flushes vf cache)
* src/clulib/rg_strings.c: Add rg_strerror function, define
human-readable strings for rgmanager error values
* src/clulib/vft.c: Add vf_invalidate (separate from vf_shutdown)
* src/daemons/groups.c: Fix obvious logic error
* src/daemons/main.c: Fix rg_doall() message during loss of quorum.
Invalidate local VF cache and kill resource configurations on
loss of quorum (#202497). Send RG_EQUORUM back to clustat/clusvcadm
so that they report why they can't get information. Don't queue
status checks if we've lost quorum. Add command line parameter to
disable internal crash watchdog
* src/utils/clustat.c, clusvcadm.c: Handle SIGPIPE, and produce
useful errors if possible.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/ChangeLog.diff?cvsroot=cluster&r1=1.22&r2=1.23
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/resgroup.h.diff?cvsroot=cluster&r1=1.13&r2=1.14
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/include/vf.h.diff?cvsroot=cluster&r1=1.5&r2=1.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/rg_strings.c.diff?cvsroot=cluster&r1=1.4&r2=1.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/clulib/vft.c.diff?cvsroot=cluster&r1=1.15&r2=1.16
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/groups.c.diff?cvsroot=cluster&r1=1.21&r2=1.22
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/daemons/main.c.diff?cvsroot=cluster&r1=1.30&r2=1.31
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clustat.c.diff?cvsroot=cluster&r1=1.19&r2=1.20
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/utils/clusvcadm.c.diff?cvsroot=cluster&r1=1.11&r2=1.12
--- cluster/rgmanager/ChangeLog 2006/08/21 15:14:08 1.22
+++ cluster/rgmanager/ChangeLog 2006/09/01 19:02:20 1.23
@@ -1,3 +1,25 @@
+2006-09-01 Lon Hohberger <lhh at redhat.com>
+ * include/resgroup.h: Add proto for rg_strerror
+ * include/vf.h: Add proto for vf_invalidate (flushes vf cache)
+ * src/clulib/rg_strings.c: Add rg_strerror function, define
+ human-readable strings for rgmanager error values
+ * src/clulib/vft.c: Add vf_invalidate (separate from vf_shutdown)
+ * src/daemons/groups.c: Fix obvious logic error
+ * src/daemons/main.c: Fix rg_doall() message during loss of quorum.
+ Invalidate local VF cache and kill resource configurations on
+ loss of quorum (#202497). Send RG_EQUORUM back to clustat/clusvcadm
+ so that they report why they can't get information. Don't queue
+ status checks if we've lost quorum. Add command line parameter to
+ disable internal crash watchdog
+ * src/utils/clustat.c, clusvcadm.c: Handle SIGPIPE, and produce
+ useful errors if possible.
+
+2006-08-31 Marek Grác <mgrac at redhat.com>
+ * src/daemons/restree.c: Fix #203720. Do not run backup copies (ends
+ with ~) of resource agents.
+ * src/resources/apache.*, mysql.*: Add Apache & MySQL resource agents
+ * src/resources/utils/*: Add utility scripts for resource agents
+
2006-08-21 Lon Hohberger <lhh at redhat.com>
* src/daemons/main.c: Fix #202500 - simultaneous starts confuse
rgmanager. This happened due to the fact that rgmanager was not
--- cluster/rgmanager/include/resgroup.h 2006/08/18 15:26:22 1.13
+++ cluster/rgmanager/include/resgroup.h 2006/09/01 19:02:21 1.14
@@ -174,6 +174,9 @@
#define RG_YES 1
#define RG_NO 2
+char *rg_strerror(int val);
+
+
/*
* Fail-over domain states
*/
--- cluster/rgmanager/include/vf.h 2006/07/12 14:04:06 1.5
+++ cluster/rgmanager/include/vf.h 2006/09/01 19:02:21 1.6
@@ -170,6 +170,7 @@
* VF Stuff. VF only talks to peers.
*/
int vf_init(int, uint16_t, vf_vote_cb_t, vf_commit_cb_t);
+int vf_invalidate(void);
int vf_shutdown(void);
/*
--- cluster/rgmanager/src/clulib/rg_strings.c 2006/07/11 23:52:41 1.4
+++ cluster/rgmanager/src/clulib/rg_strings.c 2006/09/01 19:02:22 1.5
@@ -16,6 +16,39 @@
Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
MA 02139, USA.
*/
+#include <resgroup.h>
+
+struct { int val; char *str; } rg_error_strings[] = {
+ { RG_EQUORUM, "Operation requires quorum" },
+ { RG_EINVAL, "Invalid operation for resource" },
+ { RG_EDEPEND, "Operation violates dependency rule" },
+ { RG_EAGAIN, "Temporary failure; try again" },
+ { RG_EDEADLCK, "Operation would cause a deadlock" },
+ { RG_ENOSERVICE,"Service does not exist" },
+ { RG_EFORWARD, "Service not mastered locally" },
+ { RG_EABORT, "Aborted; service failed" },
+ { RG_EFAIL, "Failure" },
+ { RG_ESUCCESS, "Success" },
+ { RG_YES, "Yes" },
+ { RG_NO, "No" },
+ { 0, NULL }
+};
+
+
+char *rg_strerror(int err)
+{
+ int x;
+
+ for (x = 0; rg_error_strings[x].str != NULL; x++) {
+ if (rg_error_strings[x].val == err) {
+ return rg_error_strings[x].str;
+ }
+ }
+
+ return "Unknown";
+}
+
+
const char *rg_state_strings[] = {
"stopped",
"starting",
@@ -51,3 +84,4 @@
"user stop",
""
};
+
--- cluster/rgmanager/src/clulib/vft.c 2006/08/07 22:05:01 1.15
+++ cluster/rgmanager/src/clulib/vft.c 2006/09/01 19:02:22 1.16
@@ -935,22 +935,13 @@
}
-/**
- Shut down VF
- */
int
-vf_shutdown(void)
+vf_invalidate(void)
{
key_node_t *c_key;
view_node_t *c_jv;
commit_node_t *c_cn;
- pthread_mutex_lock(&vf_mutex);
- vf_thread_ready = 0;
- pthread_cancel(vf_thread);
- pthread_join(vf_thread, NULL);
- _port = 0;
- _node_id = (int)-1;
pthread_mutex_lock(&key_list_mutex);
while ((c_key = key_list) != NULL) {
@@ -974,6 +965,29 @@
}
pthread_mutex_unlock(&key_list_mutex);
+ return 0;
+}
+
+
+/**
+ Shut down VF
+ */
+int
+vf_shutdown(void)
+{
+ key_node_t *c_key;
+ view_node_t *c_jv;
+ commit_node_t *c_cn;
+
+ pthread_mutex_lock(&vf_mutex);
+ vf_thread_ready = 0;
+ pthread_cancel(vf_thread);
+ pthread_join(vf_thread, NULL);
+ _port = 0;
+ _node_id = (int)-1;
+
+ vf_invalidate();
+
pthread_mutex_unlock(&vf_mutex);
return 0;
--- cluster/rgmanager/src/daemons/groups.c 2006/08/18 15:26:22 1.21
+++ cluster/rgmanager/src/daemons/groups.c 2006/09/01 19:02:22 1.22
@@ -273,7 +273,7 @@
* local start.
*/
if (svcStatus->rs_state == RG_STATE_STARTED &&
- svcStatus->rs_owner == mp->cn_nodeid)
+ svcStatus->rs_owner != mp->cn_nodeid)
return;
if (svcStatus->rs_state == RG_STATE_DISABLED)
--- cluster/rgmanager/src/daemons/main.c 2006/08/21 15:14:09 1.30
+++ cluster/rgmanager/src/daemons/main.c 2006/09/01 19:02:22 1.31
@@ -123,7 +123,13 @@
rg_set_inquorate();
member_list_update(NULL);/* Clear member list */
rg_lockall(L_SYS);
- rg_doall(RG_INIT, 1, "Emergency stop of %s");
+ rg_doall(RG_INIT, 1, "Emergency stop of %s\n");
+#ifndef USE_OPENAIS
+ clulog(LOG_DEBUG, "Invalidating local VF cache\n");
+ vf_invalidate();
+#endif
+ clulog(LOG_DEBUG, "Flushing resource group cache\n");
+ kill_resource_groups();
rg_set_uninitialized();
return -1;
} else if (!rg_quorate()) {
@@ -131,7 +137,7 @@
rg_set_quorate();
rg_unlockall(L_SYS);
rg_unlockall(L_USER);
- clulog(LOG_NOTICE, "Quorum Formed\n");
+ clulog(LOG_NOTICE, "Quorum Regained\n");
}
old_membership = member_list();
@@ -562,7 +568,7 @@
case M_STATECHANGE:
msg_receive(ctx, NULL, 0, 0);
clulog(LOG_DEBUG, "Membership Change Event\n");
- if (rg_quorate() && running) {
+ if (running) {
rg_unlockall(L_SYS);
membership_update();
}
@@ -644,6 +650,7 @@
}
if (!rg_initialized()) {
+ msg_send_simple(newctx, RG_FAIL, RG_EQUORUM, 0);
msg_close(newctx);
msg_free_ctx(newctx);
continue;
@@ -651,6 +658,7 @@
if (!rg_quorate()) {
printf("Dropping connect: NO QUORUM\n");
+ msg_send_simple(newctx, RG_FAIL, RG_EQUORUM, 0);
msg_close(newctx);
msg_free_ctx(newctx);
}
@@ -668,7 +676,7 @@
return 0;
/* No new messages. Drop in the status check requests. */
- if (n == 0) {
+ if (n == 0 && rg_quorate()) {
do_status_checks();
return 0;
}
@@ -805,15 +813,18 @@
main(int argc, char **argv)
{
int rv;
- char foreground = 0;
+ char foreground = 0, wd = 1;
cman_node_t me;
msgctx_t *cluster_ctx;
msgctx_t *local_ctx;
pthread_t th;
cman_handle_t clu = NULL;
- while ((rv = getopt(argc, argv, "fd")) != EOF) {
+ while ((rv = getopt(argc, argv, "wfd")) != EOF) {
switch (rv) {
+ case 'w':
+ wd = 0;
+ break;
case 'd':
debug = 1;
break;
@@ -834,7 +845,7 @@
if (!foreground && (geteuid() == 0)) {
daemon_init(argv[0]);
- if (!debug && !watchdog_init())
+ if (wd && !debug && !watchdog_init())
clulog(LOG_NOTICE, "Failed to start watchdog\n");
}
--- cluster/rgmanager/src/utils/clustat.c 2006/08/07 22:05:01 1.19
+++ cluster/rgmanager/src/utils/clustat.c 2006/09/01 19:02:22 1.20
@@ -10,6 +10,7 @@
#include <termios.h>
#include <ccs.h>
#include <libcman.h>
+#include <signal.h>
#ifdef HAVE_CONFIG_H
#include <config.h>
@@ -46,7 +47,7 @@
rg_state_list(int local_node_id, int fast)
{
msgctx_t ctx;
- int max, n, x;
+ int max = 0, n, x;
rg_state_list_t *rsl = NULL;
generic_msg_hdr *msgp = NULL;
rg_state_msg_t *rsmp = NULL;
@@ -91,6 +92,7 @@
}
n = msg_receive_simple(&ctx, &msgp, tv.tv_sec);
+
if (n < 0) {
if (errno == EAGAIN)
continue;
@@ -109,6 +111,13 @@
swab_generic_msg_hdr(msgp);
+ if (msgp->gh_command == RG_FAIL) {
+ printf("Service states unavailable: %s\n",
+ rg_strerror(msgp->gh_arg1));
+ msg_close(&ctx);
+ return NULL;
+ }
+
if (msgp->gh_command == RG_SUCCESS) {
free(msgp);
break;
@@ -736,6 +745,8 @@
return 1;
}
+ signal(SIGPIPE, SIG_IGN);
+
/* Connect & grab all our info */
ch = cman_init(NULL);
--- cluster/rgmanager/src/utils/clusvcadm.c 2006/08/09 21:48:34 1.11
+++ cluster/rgmanager/src/utils/clusvcadm.c 2006/09/01 19:02:22 1.12
@@ -31,6 +31,7 @@
#include <libcman.h>
#include <resgroup.h>
#include <msgsimple.h>
+#include <signal.h>
#ifdef HAVE_CONFIG_H
#include <config.h>
@@ -187,6 +188,7 @@
msgctx_t ctx;
cman_handle_t ch;
SmMessageSt msg;
+ generic_msg_hdr *h = (generic_msg_hdr *)&msg;
int action = RG_STATUS;
int node_specified = 0;
int me, svctarget = 0;
@@ -274,6 +276,8 @@
svcname = realsvcname;
}
+ signal(SIGPIPE, SIG_IGN);
+
/* No login */
ch = cman_init(NULL);
if (!ch) {
@@ -320,48 +324,23 @@
return 1;
}
- opt = msg_send(&ctx, &msg, sizeof(msg));
-
- if (opt < sizeof(msg)) {
- perror("msg_send");
- fprintf(stderr, "Could not send entire message!\n");
- return 1;
- }
+ msg_send(&ctx, &msg, sizeof(msg));
- if (msg_receive(&ctx, &msg, sizeof(msg), 0) != sizeof(msg)) {
+ /* Reusing opt here */
+ if ((opt = msg_receive(&ctx, &msg, sizeof(msg), 0)) < sizeof(*h)) {
perror("msg_receive");
fprintf(stderr, "Error receiving reply!\n");
return 1;
}
/* Decode */
- swab_SmMessageSt(&msg);
- switch (msg.sm_data.d_ret) {
- case RG_ESUCCESS:
- printf("success\n");
- break;
- case RG_EFAIL:
- printf("failed\n");
- break;
- case RG_EABORT:
- printf("cancelled by resource manager\n");
- break;
- case RG_ENOSERVICE:
- printf("failed: Service does not exist\n");
- break;
- case RG_EDEADLCK:
- printf("failed: Operation would deadlock\n");
- break;
- case RG_EAGAIN:
- printf("failed: Try again (resource groups locked)\n");
- break;
- case RG_EDEPEND:
- printf("failed: Operation would break dependency\n");
- break;
- default:
- printf("failed: unknown reason %d\n", msg.sm_data.d_ret);
- break;
+ if (opt < sizeof(msg)) {
+ swab_generic_msg_hdr(h);
+ printf("%s\n", rg_strerror(h->gh_arg1));
+ return h->gh_arg1;
}
+ swab_SmMessageSt(&msg);
+ printf("%s\n", rg_strerror(msg.sm_data.d_ret));
return msg.sm_data.d_ret;
}
More information about the Cluster-devel
mailing list