[Cluster-devel] cluster/group/gfs_controld lock_dlm.h recover.c

teigland at sourceware.org teigland at sourceware.org
Mon Oct 16 14:44:03 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	teigland at sourceware.org	2006-10-16 14:44:02

Modified files:
	group/gfs_controld: lock_dlm.h recover.c 

Log message:
	A node that was just added would incorrectly conclude that the node
	after it needed to do first mounter recovery.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/lock_dlm.h.diff?cvsroot=cluster&r1=1.19&r2=1.20
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/recover.c.diff?cvsroot=cluster&r1=1.19&r2=1.20

--- cluster/group/gfs_controld/lock_dlm.h	2006/10/13 20:00:02	1.19
+++ cluster/group/gfs_controld/lock_dlm.h	2006/10/16 14:44:02	1.20
@@ -149,6 +149,7 @@
 	int			first_mount_pending_stop;
 	int			first_mounter;
 	int			first_mounter_done;
+	int			global_first_recover_done;
 	int			emulate_first_mounter;
 	int			wait_first_done;
 	int			low_nodeid;
--- cluster/group/gfs_controld/recover.c	2006/10/13 20:00:02	1.19
+++ cluster/group/gfs_controld/recover.c	2006/10/16 14:44:02	1.20
@@ -822,9 +822,22 @@
 		goto out;
 	}
 
+	/* when we received our journals, no one was flagged with OPT_RECOVER
+	   which means no first mounter recovery is needed or is current */
+
+	if (mg->global_first_recover_done) {
+		log_group(mg, "assign_journal: global_firsts_recover_done");
+		goto out;
+	}
+
 	/* no one has done kernel mount successfully and no one is doing first
 	   mounter recovery, the new node gets to try first mounter recovery */
 
+	log_group(mg, "kernel_mount_done %d kernel_mount_error %d "
+		      "first_mounter %d first_mounter_done %d",
+		      mg->kernel_mount_done, mg->kernel_mount_error,
+		      mg->first_mounter, mg->first_mounter_done);
+
 	log_group(mg, "assign_journal: memb %d gets OPT_RECOVER", new->nodeid);
 	new->opts |= MEMB_OPT_RECOVER;
 
@@ -1007,6 +1020,7 @@
 	struct mg_member *memb, *memb2;
 	struct gdlm_header *hd;
 	int *ids, count, i, nodeid, jid, opts;
+	int current_first_recover = 0;
 
 	hd = (struct gdlm_header *)buf;
 
@@ -1048,8 +1062,16 @@
 			else if (opts & MEMB_OPT_SPECT)
 				memb->spectator = 1;
 		}
+
+		if (opts & MEMB_OPT_RECOVER)
+			current_first_recover = 1;
 	}
 
+	/* FIXME: use global_first_recover_done more widely instead of
+	   as a single special case */
+	if (!current_first_recover)
+		mg->global_first_recover_done = 1;
+
 	process_saved_mount_status(mg);
 
 	/* we delay processing any options messages from new mounters




More information about the Cluster-devel mailing list