[Cluster-devel] cluster/group/gfs_controld Makefile cpg.c grou ...
teigland at sourceware.org
teigland at sourceware.org
Thu Jun 15 20:41:47 UTC 2006
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: teigland at sourceware.org 2006-06-15 20:41:46
Modified files:
group/gfs_controld: Makefile cpg.c group.c lock_dlm.h main.c
member_cman.c recover.c
Log message:
Complete the code to support withdraw, not yet tested. This also
switches from using dlm locks for withdraw notifications to simply
using messages. The way the daemon now works allows a much simpler
approach to withdraw than what we had before where we needed the
dlm locks. Setting up a dlm lockspace for the daemon was also an
annoyingly heavy-weight step and the dlm kernel state of the daemon
made cleaning up from crashes difficult.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/Makefile.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/cpg.c.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/group.c.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/lock_dlm.h.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/main.c.diff?cvsroot=cluster&r1=1.2&r2=1.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/member_cman.c.diff?cvsroot=cluster&r1=1.1&r2=1.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/recover.c.diff?cvsroot=cluster&r1=1.2&r2=1.3
--- cluster/group/gfs_controld/Makefile 2006/06/09 20:59:57 1.1
+++ cluster/group/gfs_controld/Makefile 2006/06/15 20:41:46 1.2
@@ -22,8 +22,7 @@
-I../include/ \
-I../lib/ \
-I../../cman/lib/ \
- -I../../cman/daemon/openais/trunk/include/ \
- -I../../dlm/lib/
+ -I../../cman/daemon/openais/trunk/include/
TARGET=gfs_controld
@@ -38,8 +37,6 @@
group.o \
plock.o \
recover.o \
- withdraw.o \
- ../../dlm/lib/libdlm_lt.a \
../../cman/lib/libcman.a \
../../cman/daemon/openais/trunk/lib/libcpg.a \
../lib/libgroup.a
--- cluster/group/gfs_controld/cpg.c 2006/06/15 15:27:43 1.2
+++ cluster/group/gfs_controld/cpg.c 2006/06/15 20:41:46 1.3
@@ -24,6 +24,7 @@
void receive_options(struct mountgroup *mg, char *buf, int len, int from);
void receive_remount(struct mountgroup *mg, char *buf, int len, int from);
void receive_plock(struct mountgroup *mg, char *buf, int len, int from);
+void receive_withdraw(struct mountgroup *mg, char *buf, int len, int from);
void receive_recovery_status(struct mountgroup *mg, char *buf, int len,
int from);
void receive_recovery_done(struct mountgroup *mg, char *buf, int len, int from);
@@ -88,6 +89,10 @@
receive_recovery_done(mg, data, len, nodeid);
break;
+ case MSG_WITHDRAW:
+ receive_withdraw(mg, data, len, nodeid);
+ break;
+
default:
log_error("unknown message type %d from %d",
hd->type, hd->nodeid);
--- cluster/group/gfs_controld/group.c 2006/06/09 20:59:57 1.1
+++ cluster/group/gfs_controld/group.c 2006/06/15 20:41:46 1.2
@@ -147,8 +147,6 @@
log_debug("groupd callback: terminate %s", cb_name);
mg->last_callback = DO_TERMINATE;
do_terminate(mg);
- list_del(&mg->list);
- free(mg);
break;
case DO_SETID:
--- cluster/group/gfs_controld/lock_dlm.h 2006/06/15 15:27:43 1.2
+++ cluster/group/gfs_controld/lock_dlm.h 2006/06/15 20:41:46 1.3
@@ -36,7 +36,6 @@
#include "list.h"
#include "linux_endian.h"
#include "libgroup.h"
-#include "libdlm.h"
#define MAXARGS 64
#define MAXLINE 256
@@ -184,11 +183,10 @@
int wait_gfs_recover_done;
int gone_event;
int gone_type;
- int mount_finished;
+ int finished;
int local_recovery_status;
int recovery_status;
- int withdraw;
- struct dlm_lksb wd_lksb;
+ int withdrawing;
int needs_journals;
};
@@ -197,6 +195,7 @@
MSG_OPTIONS,
MSG_REMOUNT,
MSG_PLOCK,
+ MSG_WITHDRAW,
MSG_RECOVERY_STATUS,
MSG_RECOVERY_DONE,
};
@@ -223,12 +222,9 @@
int process_cpg(void);
int setup_groupd(void);
int process_groupd(void);
-int setup_libdlm(void);
-int process_libdlm(void);
int setup_plocks(void);
int process_plocks(void);
void exit_cman(void);
-void exit_libdlm(void);
int do_mount(int ci, char *dir, char *type, char *proto, char *table,
char *options);
--- cluster/group/gfs_controld/main.c 2006/06/15 15:27:43 1.2
+++ cluster/group/gfs_controld/main.c 2006/06/15 20:41:46 1.3
@@ -29,10 +29,10 @@
static int listen_fd;
static int groupd_fd;
static int uevent_fd;
-static int libdlm_fd;
static int plocks_fd;
extern struct list_head mounts;
+extern struct list_head withdrawn_mounts;
int no_withdraw;
static void make_args(char *buf, int *argc, char **argv, char sep)
@@ -266,14 +266,6 @@
goto out;
client_add(uevent_fd, &maxi);
- if (no_withdraw)
- goto next;
-
- rv = libdlm_fd = setup_libdlm();
- if (rv < 0)
- goto next;
- client_add(libdlm_fd, &maxi);
- next:
rv = plocks_fd = setup_plocks();
if (rv < 0)
goto out;
@@ -309,9 +301,6 @@
process_cpg();
else if (pollfd[i].fd == uevent_fd)
process_uevent();
- else if (!no_withdraw &&
- pollfd[i].fd == libdlm_fd)
- process_libdlm();
else if (pollfd[i].fd == plocks_fd)
process_plocks();
else
@@ -456,6 +445,7 @@
{
prog_name = argv[0];
INIT_LIST_HEAD(&mounts);
+ INIT_LIST_HEAD(&withdrawn_mounts);
client_init();
decode_arguments(argc, argv);
--- cluster/group/gfs_controld/member_cman.c 2006/06/09 20:59:57 1.1
+++ cluster/group/gfs_controld/member_cman.c 2006/06/15 20:41:46 1.2
@@ -43,10 +43,7 @@
void exit_cman(void)
{
- /* do we want to try to forcibly clean some stuff up
- in the kernel here? */
log_error("cluster is down, exiting");
- exit_libdlm();
exit(1);
}
--- cluster/group/gfs_controld/recover.c 2006/06/15 15:27:43 1.2
+++ cluster/group/gfs_controld/recover.c 2006/06/15 20:41:46 1.3
@@ -26,14 +26,12 @@
extern char *clustername;
extern int our_nodeid;
extern group_handle_t gh;
+extern int no_withdraw;
struct list_head mounts;
+struct list_head withdrawn_mounts;
void send_journals(struct mountgroup *mg, int nodeid);
-int hold_withdraw_locks(struct mountgroup *mg);
-void release_withdraw_lock(struct mountgroup *mg, struct mg_member *memb);
-void release_withdraw_locks(struct mountgroup *mg);
-
void start_participant_init_2(struct mountgroup *mg);
void start_spectator_init_2(struct mountgroup *mg);
void start_spectator_2(struct mountgroup *mg);
@@ -146,6 +144,46 @@
mg->remount_client = 0;
}
+void send_withdraw(struct mountgroup *mg)
+{
+ struct gdlm_header *hd;
+ int len;
+ char *buf;
+
+ len = sizeof(struct gdlm_header);
+
+ buf = malloc(len);
+ if (!buf)
+ return;
+ memset(buf, 0, len);
+
+ hd = (struct gdlm_header *)buf;
+ hd->type = MSG_WITHDRAW;
+ hd->nodeid = our_nodeid;
+ hd->to_nodeid = 0;
+
+ log_group(mg, "send_withdraw");
+
+ send_group_message(mg, len, buf);
+
+ free(buf);
+}
+
+void receive_withdraw(struct mountgroup *mg, char *buf, int len, int from)
+{
+ struct mg_member *memb;
+
+ memb = find_memb_nodeid(mg, from);
+ if (!memb) {
+ log_group(mg, "receive_withdraw no member %d", from);
+ return;
+ }
+ memb->withdrawing = 1;
+
+ if (from == our_nodeid)
+ group_leave(gh, mg->name);
+}
+
#define SEND_RS_INTS 3
void send_recovery_status(struct mountgroup *mg)
@@ -267,6 +305,8 @@
return "MSG_RECOVERY_STATUS";
case MSG_RECOVERY_DONE:
return "MSG_RECOVERY_DONE";
+ case MSG_WITHDRAW:
+ return "MSG_WITHDRAW";
}
return "unknown";
}
@@ -911,7 +951,7 @@
- no journal cb if we've already done a journl cb */
if ((memb->gone_type == GROUP_NODE_FAILED ||
- memb->withdraw) &&
+ memb->withdrawing) &&
memb->jid != JID_INIT &&
!memb->spectator &&
!memb->wait_gfs_recover_done) {
@@ -925,7 +965,7 @@
memb->nodeid, memb->tell_gfs_to_recover,
mg->spectator,
mg->start_type,
- memb->withdraw,
+ memb->withdrawing,
memb->jid,
memb->spectator,
memb->wait_gfs_recover_done);
@@ -944,7 +984,7 @@
}
list_for_each_entry(memb, &mg->members, list) {
- if (!memb->mount_finished)
+ if (!memb->finished)
continue;
if (low == -1 || memb->nodeid < low)
low = memb->nodeid;
@@ -1186,7 +1226,12 @@
struct mg_member *memb;
int rv;
- if (mg->spectator || mg->readonly || mg->our_jid == JID_INIT) {
+ /* we can't do journal recovery if: we're a spectator or readonly
+ mount, gfs is currently withdrawing, or we're mounting and haven't
+ received a journals message yet */
+
+ if (mg->spectator || mg->readonly || mg->withdraw ||
+ mg->our_jid == JID_INIT) {
list_for_each_entry(memb, &mg->members_gone, list) {
if (!memb->tell_gfs_to_recover)
continue;
@@ -1406,11 +1451,25 @@
{
struct mountgroup *mg;
+ list_for_each_entry(mg, &withdrawn_mounts, list) {
+ if (!strcmp(mg->dir, dir)) {
+ log_group(mg, "unmount withdrawn fs");
+ list_del(&mg->list);
+ free(mg);
+ return 0;
+ }
+ }
+
mg = find_mg_dir(dir);
if (!mg) {
log_error("do_unmount: unknown mount dir %s", dir);
return -1;
}
+
+ if (mg->withdraw) {
+ log_error("do_unmount: fs on %s is withdrawing", dir);
+ return -1;
+ }
/* Check to see if we're waiting for a kernel recovery_done to do a
start_done(). If so, call the start_done() here because we won't be
@@ -1567,9 +1626,6 @@
from members_gone if their journals have been recovered */
list_for_each_entry_safe(memb, safe, &mg->members_gone, list) {
- if (!memb->withdraw)
- release_withdraw_lock(mg, memb);
-
if (!memb->recovery_status) {
list_del(&memb->list);
free(memb);
@@ -1588,18 +1644,8 @@
}
}
- list_for_each_entry(memb, &mg->members, list) {
- memb->mount_finished = 1;
-
- /* If there are still withdrawing nodes that haven't left
- the group, we need to keep lock requests blocked */
-
- if (memb->withdraw) {
- log_group(mg, "finish: leave locks blocked for "
- "withdrawing node %d", memb->nodeid);
- leave_blocked = 1;
- }
- }
+ list_for_each_entry(memb, &mg->members, list)
+ memb->finished = 1;
if (mg->needs_recovery) {
log_group(mg, "finish: leave locks blocked for needs_recovery");
@@ -1674,7 +1720,6 @@
mg->first_mounter_done = 0;
mg->got_our_options = 1;
mg->got_our_journals = 1;
- hold_withdraw_locks(mg);
}
start_done(mg);
notify_mount_client(mg);
@@ -1688,7 +1733,6 @@
log_group(mg, "start_participant_init");
set_our_memb_options(mg);
send_options(mg);
- hold_withdraw_locks(mg);
start_done(mg);
mg->start2_fn = start_participant_init_2;
}
@@ -1732,8 +1776,6 @@
log_group(mg, "start_participant pos=%d neg=%d", pos, neg);
if (pos) {
- hold_withdraw_locks(mg);
-
/* If we're the first mounter, and we're adding a second
node here, but haven't gotten first_done (others_may_mount)
from gfs yet, then don't do the start_done() to complete
@@ -1765,7 +1807,6 @@
log_group(mg, "start_spectator_init");
set_our_memb_options(mg);
send_options(mg);
- hold_withdraw_locks(mg);
start_done(mg);
mg->start2_fn = start_spectator_init_2;
}
@@ -1795,7 +1836,6 @@
log_group(mg, "start_spectator pos=%d neg=%d", pos, neg);
if (pos) {
- hold_withdraw_locks(mg);
start_done(mg);
process_saved_options(mg);
} else if (neg) {
@@ -1937,12 +1977,57 @@
that needs journal recovery, we have a problem because we wait to
call group_start_done() until gfs in the kernel to signal that
the journal recovery is done. If we've unmounted gfs isn't there
- any more to give us this signal and we'll never call start_done. */
+ any more to give us this signal and we'll never call start_done.
+
+ update: we should be dealing with all these issues correctly now. */
int do_terminate(struct mountgroup *mg)
{
- log_group(mg, "termination of our unmount leave");
- release_withdraw_locks(mg);
+ /* FIXME: all group members aren't guaranteed to be stopped for
+ our leave yet when we get terminate. We need that guarantee
+ before we tell a withdrawing gfs to drop locks. */
+
+ if (mg->withdraw) {
+ log_group(mg, "termination of our withdraw leave");
+ set_sysfs(mg, "withdraw", 1);
+ list_move(&mg->list, &withdrawn_mounts);
+ } else {
+ log_group(mg, "termination of our unmount leave");
+ list_del(&mg->list);
+ free(mg);
+ }
+
+ return 0;
+}
+
+/* The basic rule of withdraw is that we don't want to tell the kernel to drop
+ all locks until we know gfs has been stopped/blocked on all nodes. They'll
+ be stopped for our leave, we just need to know when they've all arrived
+ there.
+
+ A withdrawing node is very much like a readonly node, differences are
+ that others recover its journal when they remove it from the group,
+ and when it's been removed from the group (gets terminate for its leave),
+ it tells the locally withdrawing gfs to clear out locks. */
+
+int do_withdraw(char *table)
+{
+ struct mountgroup *mg;
+ char *name = strstr(table, ":") + 1;
+
+ if (no_withdraw) {
+ log_error("withdraw feature not enabled");
+ return 0;
+ }
+
+ mg = find_mg(name);
+ if (!mg) {
+ log_error("do_withdraw no mountgroup %s", name);
+ return -1;
+ }
+
+ mg->withdraw = 1;
+ send_withdraw(mg);
return 0;
}
More information about the Cluster-devel
mailing list