[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] cluster/group/gfs_controld lock_dlm.h plock.c ...



CVSROOT:	/cvs/cluster
Module name:	cluster
Changes by:	teigland sourceware org	2006-08-18 16:33:08

Modified files:
	group/gfs_controld: lock_dlm.h plock.c recover.c 

Log message:
	when the low nodeid fails, the checkpoint needs to be unlinked,
	otherwise creating the ckpt will fail down the road when another
	node mounts

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/lock_dlm.h.diff?cvsroot=cluster&r1=1.14&r2=1.15
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/plock.c.diff?cvsroot=cluster&r1=1.17&r2=1.18
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/group/gfs_controld/recover.c.diff?cvsroot=cluster&r1=1.13&r2=1.14

--- cluster/group/gfs_controld/lock_dlm.h	2006/08/14 17:22:53	1.14
+++ cluster/group/gfs_controld/lock_dlm.h	2006/08/18 16:33:08	1.15
@@ -276,5 +276,6 @@
 int dump_plocks(char *name, int fd);
 void process_saved_plocks(struct mountgroup *mg);
 void purge_plocks(struct mountgroup *mg, int nodeid, int unmount);
+int unlink_checkpoint(struct mountgroup *mg);
 
 #endif
--- cluster/group/gfs_controld/plock.c	2006/08/17 19:39:17	1.17
+++ cluster/group/gfs_controld/plock.c	2006/08/18 16:33:08	1.18
@@ -1034,7 +1034,7 @@
 	return 0;
 }
 
-int unlink_checkpoint(struct mountgroup *mg, SaNameT *name)
+int _unlink_checkpoint(struct mountgroup *mg, SaNameT *name)
 {
 	SaCkptCheckpointHandleT h;
 	SaCkptCheckpointDescriptorT s;
@@ -1097,6 +1097,16 @@
 	return ret;
 }
 
+int unlink_checkpoint(struct mountgroup *mg)
+{
+	SaNameT name;
+	int len;
+
+	len = snprintf(name.value, SA_MAX_NAME_LENGTH, "gfsplock.%s", mg->name);
+	name.length = len;
+	return _unlink_checkpoint(mg, &name);
+}
+
 /* Copy all plock state into a checkpoint so new node can retrieve it.  The
    node creating the ckpt for the mounter needs to be the same node that's
    sending the mounter its journals message (i.e. the low nodeid).  The new
@@ -1139,7 +1149,7 @@
 
 	/* unlink an old checkpoint before we create a new one */
 	if (mg->cp_handle) {
-		if (unlink_checkpoint(mg, &name))
+		if (_unlink_checkpoint(mg, &name))
 			return;
 	}
 
@@ -1231,7 +1241,7 @@
 			/* this shouldn't happen in general */
 			log_group(mg, "store_plocks: clearing old ckpt");
 			saCkptCheckpointClose(h);
-			unlink_checkpoint(mg, &name);
+			_unlink_checkpoint(mg, &name);
 			goto open_retry;
 		}
 		if (rv != SA_AIS_OK) {
@@ -1318,6 +1328,9 @@
 			goto out_it;
 		}
 
+		if (!desc.sectionSize)
+			continue;
+
 		iov.sectionId = desc.sectionId;
 		iov.dataBuffer = &section_buf;
 		iov.dataSize = desc.sectionSize;
@@ -1362,7 +1375,7 @@
  out:
 	if (mg->low_nodeid == our_nodeid) {
 		log_group(mg, "retrieve_plocks: unlink ckpt from old low node");
-		unlink_checkpoint(mg, &name);
+		_unlink_checkpoint(mg, &name);
 	} else
 		saCkptCheckpointClose(h);
 }
@@ -1372,8 +1385,7 @@
 	struct posix_lock *po, *po2;
 	struct lock_waiter *w, *w2;
 	struct resource *r, *r2;
-	int len, purged = 0;
-	SaNameT name;
+	int purged = 0;
 
 	list_for_each_entry_safe(r, r2, &mg->resources, list) {
 		list_for_each_entry_safe(po, po2, &r->locks, list) {
@@ -1408,12 +1420,8 @@
 	   we need to unlink it so another node can create a new ckpt for
 	   the next mounter after we leave */
 
-	if (unmount && mg->cp_handle) {
-		len = snprintf(name.value, SA_MAX_NAME_LENGTH,
-			       "gfsplock.%s", mg->name);
-		name.length = len;
-		unlink_checkpoint(mg, &name);
-	}
+	if (unmount && mg->cp_handle)
+		unlink_checkpoint(mg);
 }
 
 int dump_plocks(char *name, int fd)
--- cluster/group/gfs_controld/recover.c	2006/08/15 21:38:00	1.13
+++ cluster/group/gfs_controld/recover.c	2006/08/18 16:33:08	1.14
@@ -917,7 +917,7 @@
  		     int *nodeids, int *pos_out, int *neg_out)
 {
 	struct mg_member *memb, *safe;
-	int i, found, id, pos = 0, neg = 0, low = -1;
+	int i, found, id, pos = 0, neg = 0, low = -1, old_low_finished_nodeid;
 
 	/* move departed nodes from members list to members_gone */
 
@@ -990,6 +990,7 @@
 		if (low == -1 || memb->nodeid < low)
 			low = memb->nodeid;
 	}
+	old_low_finished_nodeid = mg->low_finished_nodeid;
 	mg->low_finished_nodeid = low;
 
 	*pos_out = pos;
@@ -997,6 +998,15 @@
 
 	log_group(mg, "total members %d low_finished_nodeid %d",
 		  mg->memb_count, low);
+
+	/* the low nodeid failed and we're the new low nodeid, we need
+	   to unlink the ckpt that the failed node had open so new ckpts
+	   can be created down the road */
+	if ((old_low_finished_nodeid != low) && (our_nodeid == low)) {
+		log_group(mg, "unlink ckpt for failed low node %d",
+			  old_low_finished_nodeid);
+		unlink_checkpoint(mg);
+	}
 }
 
 struct mountgroup *create_mg(char *name)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]