[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] cluster group/gfs_controld/lock_dlm.h group/gf ...



CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	teigland sourceware org	2006-08-14 18:22:53

Modified files:
	group/gfs_controld: lock_dlm.h main.c recover.c 
	gfs2/mount     : util.c 

Log message:
	There's been a relatively unusual problem explained in the comments that
	I'd been putting off fixing for lack of a nice solution.  Turns out this
	problem could crop up more often than hoped, so have had to fix it.
	
	1) mount.gfs asks gfs_controld to join mount group
	2) gfs_controld does and notifies mount.gfs to go ahead with mount(2)
	3) gfs_controld gets a stop callback for the group due to another node
	mounting
	4) gfs_controld needs to wait for the kernel mount to complete before it
	can stop/suspend the mount group (through sysfs)
	5) mount(2) fails in the kernel for whatever reason
	6) mount.gfs tells gfs_controld the kernel mount failed
	
	gfs_controld is waiting for the kernel mount to complete outside its
	normal poll loop, though, so it won't ever get the message in step 6, and
	will wait forever for the failed mount to actually complete.
	
	Added a pipe between mount.gfs and gfs_controld that mount.gfs just uses
	to send a failed mount message.  gfs_controld watches the pipe for this
	error message while waiting for the kernel mount.  mount.gfs uses unix
	socket ancilliary data to send an fd to gfs_controld.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/lock_dlm.h.diff?cvsroot=cluster&r1=1.13&r2=1.14
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/main.c.diff?cvsroot=cluster&r1=1.8&r2=1.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/recover.c.diff?cvsroot=cluster&r1=1.11&r2=1.12
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs2/mount/util.c.diff?cvsroot=cluster&r1=1.12&r2=1.13

--- cluster/group/gfs_controld/lock_dlm.h	2006/08/09 19:35:26	1.13
+++ cluster/group/gfs_controld/lock_dlm.h	2006/08/14 17:22:53	1.14
@@ -142,6 +142,7 @@
 	int			mount_client_delay;
 	int			delay_send_journals;
 	int			kernel_mount_error;
+	int			mount_error_fd;
 	int			got_kernel_mount;
 	int			first_mounter;
 	int			first_mounter_done;
@@ -255,6 +256,8 @@
 int process_plocks(void);
 void exit_cman(void);
 
+void setup_mount_error_fd(struct mountgroup *mg);
+
 int do_mount(int ci, char *dir, char *type, char *proto, char *table,
 	     char *options);
 int do_unmount(int ci, char *dir, int mnterr);
--- cluster/group/gfs_controld/main.c	2006/08/09 19:35:26	1.8
+++ cluster/group/gfs_controld/main.c	2006/08/14 17:22:53	1.9
@@ -143,6 +143,56 @@
 	return 0;
 }
 
+/* mount.gfs sends us a special fd that it will write an error message to
+   if mount(2) fails.  We can monitor this fd for an error message while
+   waiting for the kernel mount outside our main poll loop */
+
+void setup_mount_error_fd(struct mountgroup *mg)
+{
+	struct msghdr msg;
+	struct cmsghdr *cmsg;
+	struct iovec vec;
+	char tmp[CMSG_SPACE(sizeof(int))];
+	int fd, socket = client[mg->mount_client].fd;
+	char ch;
+	ssize_t n;
+
+	memset(&msg, 0, sizeof(msg));
+
+	vec.iov_base = &ch;
+	vec.iov_len = 1;
+	msg.msg_iov = &vec;
+	msg.msg_iovlen = 1;
+	msg.msg_control = tmp;
+	msg.msg_controllen = sizeof(tmp);
+
+	n = recvmsg(socket, &msg, 0);
+	if (n < 0) {
+		log_group(mg, "setup_mount_error_fd recvmsg err %d errno %d",
+			  n, errno);
+		return;
+	}
+	if (n != 1) {
+		log_group(mg, "setup_mount_error_fd recvmsg got %ld", (long)n);
+		return;
+	}
+
+	cmsg = CMSG_FIRSTHDR(&msg);
+
+	if (cmsg->cmsg_type != SCM_RIGHTS) {
+		log_group(mg, "setup_mount_error_fd expected type %d got %d",
+			  SCM_RIGHTS, cmsg->cmsg_type);
+		return;
+	}
+
+	fd = (*(int *)CMSG_DATA(cmsg));
+	mg->mount_error_fd = fd;
+
+	fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK);
+
+	log_group(mg, "setup_mount_error_fd got fd %d", fd);
+}
+
 static int process_client(int ci)
 {
 	char *cmd, *dir, *type, *proto, *table, *extra;
--- cluster/group/gfs_controld/recover.c	2006/08/10 19:40:50	1.11
+++ cluster/group/gfs_controld/recover.c	2006/08/14 17:22:53	1.12
@@ -1174,6 +1174,8 @@
 
 	list_add(&mg->list, &mounts);
 
+	setup_mount_error_fd(mg);
+
 	group_join(gh, name);
 	return 0;
 
@@ -1565,7 +1567,29 @@
 		if (!rv)
 			break;
 		usleep(100000);
+
+		memset(buf, 0, sizeof(buf));
+
+		/* attempt to solve the problem described below where we
+		   don't get the kernel_mount_error until after the stop and
+		   this loop... this mount_error_fd was sent from mount.gfs and
+		   mount.gfs will write on this fd if there was a mount(2)
+		   error */
+
+		if (!mg->mount_error_fd)
+			continue;
+
+		rv = read(mg->mount_error_fd, buf, sizeof(buf));
+		if (rv > 0) {
+			log_group(mg, "wait_for_kernel_mount: mount error %s",
+				  buf);
+			mg->kernel_mount_error = 1;
+			break;
+		}
 	}
+
+	close(mg->mount_error_fd);
+	mg->mount_error_fd = 0;
 }
 
 /* The processing of new mounters (send/recv options, send/recv journals,
@@ -1615,7 +1639,8 @@
 			   3) kernel mount fails, 4) mount.gfs sends a leave
 			   with mnterr, 5) we don't recv it and don't set
 			   kernel_mount_error because we're stuck in
-			   wait_for_kernel_mount() from do_stop */
+			   wait_for_kernel_mount() from do_stop.  update:
+			   attempt to fix above using mount_error_fd */
 
 			if (!mg->kernel_mount_error)
 				wait_for_kernel_mount(mg);
--- cluster/gfs2/mount/util.c	2006/07/20 20:19:04	1.12
+++ cluster/gfs2/mount/util.c	2006/08/14 17:22:53	1.13
@@ -11,6 +11,7 @@
 extern char *prog_name;
 extern char *fsname;
 extern int verbose;
+static int mount_error_fd;
 
 #define LOCK_DLM_SOCK_PATH "gfs_controld_sock"	/* FIXME: use a header */
 #define MAXLINE 256			/* size of messages with gfs_controld */
@@ -310,6 +311,57 @@
 	return fd;
 }
 
+/* We create a pipe and pass the receiving end to gfs_controld.  If the
+   mount fails, we write an error message to this pipe.  gfs_controld monitors
+   this fd outside its main poll loop because it may need to detect a mount
+   failure while watching for the kernel mount (while waiting for the kernel
+   mount, gfs_controld is _not_ in its main poll loop which is why the normal
+   leave message w/ mnterr we send isn't sufficient.) */
+
+void setup_mount_error_fd(int socket)
+{
+	struct msghdr msg;
+	struct cmsghdr *cmsg;
+	struct iovec vec;
+	char tmp[CMSG_SPACE(sizeof(int))];
+	char ch = '\0';
+	ssize_t n;
+	int rv, fds[2];
+
+	rv = pipe(fds);
+	if (rv < 0) {
+		log_debug("setup_mount_error_fd pipe error %d %d", rv, errno);
+		return;
+	}
+
+	memset(&msg, 0, sizeof(msg));
+
+	msg.msg_control = (caddr_t)tmp;
+	msg.msg_controllen = CMSG_LEN(sizeof(int));
+	cmsg = CMSG_FIRSTHDR(&msg);
+	cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_RIGHTS;
+	*(int *)CMSG_DATA(cmsg) = fds[0];
+
+	vec.iov_base = &ch;
+	vec.iov_len = 1;
+	msg.msg_iov = &vec;
+	msg.msg_iovlen = 1;
+
+	n = sendmsg(socket, &msg, 0);
+	if (n < 0) {
+		log_debug("setup_mount_error_fd sendmsg error %d %d", n, errno);
+		close(fds[0]);
+		close(fds[1]);
+		return;
+	}
+
+	mount_error_fd = fds[1];
+
+	log_debug("setup_mount_error_fd %d %d", fds[0], fds[1]);
+}
+
 int lock_dlm_join(struct mount_options *mo, struct gen_sb *sb)
 {
 	int i, fd, rv;
@@ -363,6 +415,8 @@
 		goto out;
 	}
 
+	setup_mount_error_fd(fd);
+
 	/*
 	 * read response from gfs_controld to our join request:
 	 * it sends back an int as a string, 0 or -EXXX
@@ -481,6 +535,11 @@
 	log_debug("message to gfs_controld: asking to leave mountgroup:");
 	log_debug("lock_dlm_leave: write \"%s\"", buf);
 
+	if (mnterr && mount_error_fd) {
+		rv = write(mount_error_fd, buf, sizeof(buf));
+		log_debug("lock_dlm_leave: write to mount_error_fd %d", rv);
+	}
+
 	rv = write(fd, buf, sizeof(buf));
 	if (rv < 0) {
 		warn("lock_dlm_leave: gfs_controld write error: %d", rv);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]