[Cluster-devel] Cluster Project branch, RHEL4, updated. gfs-kernel_2_6_9_76-87-g3ab0042

jbrassow at sourceware.org jbrassow at sourceware.org
Fri Jun 27 14:36:18 UTC 2008


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Cluster Project".

http://sources.redhat.com/git/gitweb.cgi?p=cluster.git;a=commitdiff;h=3ab00427e3eaf45e99d5f40fed6f3b459faccb14

The branch, RHEL4 has been updated
       via  3ab00427e3eaf45e99d5f40fed6f3b459faccb14 (commit)
      from  a0e6a6d02a4a55b98078dc874204c5555dbf74a4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 3ab00427e3eaf45e99d5f40fed6f3b459faccb14
Author: Jonathan Brassow <jbrassow at redhat.com>
Date:   Fri Jun 27 09:36:00 2008 -0500

    dm-cmirror.ko:  Fix for bug 450939, and other minor cleanups
    
    - If a write-recovery conflict is detected, halt recovery
      rather than calling BUG() (the fix bug 450939)
    - Minor code style cleanups

-----------------------------------------------------------------------

Summary of changes:
 cmirror-kernel/src/dm-cmirror-client.c |   81 ++++++++++++---------------
 cmirror-kernel/src/dm-cmirror-server.c |   95 ++++++++++++++++++--------------
 2 files changed, 90 insertions(+), 86 deletions(-)

diff --git a/cmirror-kernel/src/dm-cmirror-client.c b/cmirror-kernel/src/dm-cmirror-client.c
index 79794c2..0ca1741 100644
--- a/cmirror-kernel/src/dm-cmirror-client.c
+++ b/cmirror-kernel/src/dm-cmirror-client.c
@@ -311,11 +311,10 @@ static int _consult_server(struct log_c *lc, region_t region,
 	request_count++;
 
 	lr = kmalloc(sizeof(struct log_request), GFP_NOFS);
-	if(!lr){
+	if (!lr) {
 		BUG();
 		error = -ENOMEM;
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 
 	memset(lr, 0, sizeof(struct log_request));
@@ -344,10 +343,9 @@ static int _consult_server(struct log_c *lc, region_t region,
   
 	saddr_in.sin_family = AF_INET;
 	saddr_in.sin_port = CLUSTER_LOG_PORT;
-	if(!(saddr_in.sin_addr.s_addr = nodeid_to_ipaddr(lc->server_id))){
+	if (!(saddr_in.sin_addr.s_addr = nodeid_to_ipaddr(lc->server_id))) {
 		error = -ENXIO;
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 	msg.msg_name = &saddr_in;
 	msg.msg_namelen = sizeof(saddr_in);
@@ -362,7 +360,7 @@ static int _consult_server(struct log_c *lc, region_t region,
 
 	set_fs(fs);
 
-	if(len < sizeof(struct log_request)){
+	if (len < sizeof(struct log_request)) {
 		DMWARN("unable to send log request to server");
 		error = -EBADE;
 		goto fail;
@@ -379,14 +377,13 @@ rerecv:
 			 sizeof(struct log_request), 0, 15);
 	set_fs(fs);
 
-	if(len <= 0){
+	if (len <= 0) {
 		/* ATTENTION -- what do we do with this ? */
 		DMWARN("Error listening for server(%u) response for %s: %d",
 		       lc->server_id, lc->uuid + (strlen(lc->uuid) - 8), len);
 		error = len;
-		*retry = 1;
 		seq++;
-		goto fail;
+		goto retry;
 	}
     
 	if (seq != lr->lr_seq) {
@@ -400,9 +397,8 @@ rerecv:
 		}
 		DMERR(" Seq# mismatch: Must try to resend request, %s", RQ_STRING(type));
 		error = -EBADE;
-		*retry = 1;
 		seq++;
-		goto fail;
+		goto retry;
 	}
 	seq++;
 
@@ -410,8 +406,7 @@ rerecv:
 		DMERR("Got incorrect message type back: %s/%s",
 		      RQ_STRING(type), RQ_STRING(lr->lr_type));
 		error = -EBADE;
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 
 	if (memcmp(lc->uuid, lr->lr_uuid, MAX_NAME_LEN)) {
@@ -419,54 +414,52 @@ rerecv:
 		DMERR(" Expected UUID: %s", lc->uuid);
 		DMERR(" Recieved UUID: %s", lr->lr_uuid);
 		error = -EBADE;
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 
-	if(lr->u.lr_int_rtn == -EAGAIN){
-		DMWARN("Server (%u), request type %d, -EAGAIN."
-		       "  Mirror suspended?",
+	if (lr->u.lr_int_rtn == -EAGAIN) {
+		DMWARN("Server (%u), request type %d, -EAGAIN.",
 		       lc->server_id, lr->lr_type);
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 
 	if (lr->u.lr_int_rtn == -ENXIO) {
 		DMDEBUG("Server (%u) says it no longer controls this log (%s)",
 			lc->server_id, lc->uuid + (strlen(lc->uuid) - 8));
 		lc->server_id = 0xDEAD;
-		*retry = 1;
-		goto fail;
+		goto retry;
 	}
 
-	if(result)
+	if (result)
 		*result = lr->u.lr_region_rtn;
 
 	error = lr->u.lr_int_rtn;
 	kfree(lr);
 	return error;
- fail:
-	if(*retry){
-		request_retry_count++;
-		if(!(request_retry_count & 0x1F)){
-			DMINFO("Clustered mirror retried requests :: %u of %u (%u%%)",
-			       request_retry_count,
-			       request_count,
-			       dm_div_up(request_retry_count*100, request_count));
-			DMDEBUG("Last request:");
-			DMDEBUG(" - my_id   :: %u", my_id);
-			DMDEBUG(" - server  :: %u", lc->server_id);
-			DMDEBUG(" - log uuid:: %s (%s)",
-			       lc->uuid + (strlen(lc->uuid) - 8),
-			       atomic_read(&lc->suspended) ? "suspended" : "active");
-			DMDEBUG(" - request :: %s", RQ_STRING(type));
-			DMDEBUG(" - error   :: %d", error);
-			DMINFO("Too many retries, attempting to re-establish server connection.");
-			lc->server_id = 0xDEAD;
-		}
+
+ retry:
+	*retry = 1;
+	request_retry_count++;
+	if (!(request_retry_count & 0x1F)) {
+		DMINFO("Clustered mirror retried requests :: %u of %u (%u%%)",
+		       request_retry_count,
+		       request_count,
+		       dm_div_up(request_retry_count*100, request_count));
+		DMDEBUG("Last request:");
+		DMDEBUG(" - my_id   :: %u", my_id);
+		DMDEBUG(" - server  :: %u", lc->server_id);
+		DMDEBUG(" - log uuid:: %s (%s)",
+			lc->uuid + (strlen(lc->uuid) - 8),
+			atomic_read(&lc->suspended) ? "suspended" : "active");
+		DMDEBUG(" - request :: %s", RQ_STRING(type));
+		DMDEBUG(" - error   :: %d", error);
+		DMINFO("Too many retries, attempting to re-establish server connection.");
+		lc->server_id = 0xDEAD;
 	}
 
-	if(lr) kfree(lr);
+fail:
+	if (lr)
+		kfree(lr);
 	return error;
 }
 
diff --git a/cmirror-kernel/src/dm-cmirror-server.c b/cmirror-kernel/src/dm-cmirror-server.c
index a2857f4..25a82b5 100644
--- a/cmirror-kernel/src/dm-cmirror-server.c
+++ b/cmirror-kernel/src/dm-cmirror-server.c
@@ -339,14 +339,17 @@ static int disk_resume(struct log_c *lc)
 	}
 
 	/* set or clear any new bits -- device has grown */
-	if (lc->sync == NOSYNC)
+	if (lc->sync == NOSYNC) {
+		DMDEBUG("  NOSYNC            :: set");
 		for (i = lc->header.nr_regions; i < lc->region_count; i++)
 			/* FIXME: amazingly inefficient */
 			log_set_bit(lc, lc->clean_bits, i);
-	else
+	} else {
+		DMDEBUG("  NOSYNC            :: unset");
 		for (i = lc->header.nr_regions; i < lc->region_count; i++)
 			/* FIXME: amazingly inefficient */
 			log_clear_bit(lc, lc->clean_bits, i);
+	}
 
 	/* clear any old/unused bits -- device has shrunk */
 	for(i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++)
@@ -358,10 +361,16 @@ static int disk_resume(struct log_c *lc)
 	/* must go through the list twice.  The dead node could have been using **
 	** the same region as other nodes and we want any region that was in    **
 	** use by the dead node to be marked _not_ in-sync..................... */
+	lc->recovering_region = (uint64_t)-1;
 	list_for_each_entry(ru, &lc->region_users, ru_list){
-		if(live_nodes[ru->ru_nodeid/8] & 1 << (ru->ru_nodeid%8)){
+		if (live_nodes[ru->ru_nodeid/8] & 1 << (ru->ru_nodeid%8)) {
 			good_count++;
-			log_set_bit(lc, lc->sync_bits, ru->ru_region);
+			if (ru->ru_rw == RU_WRITE) {
+				log_set_bit(lc, lc->sync_bits, ru->ru_region);
+			} else if (ru->ru_rw == RU_RECOVER) {
+				log_clear_bit(lc, lc->sync_bits, ru->ru_region);
+				lc->recovering_region = ru->ru_region;
+			}
 		}
 	}
 
@@ -387,11 +396,12 @@ static int disk_resume(struct log_c *lc)
 	lc->sync_count = count_bits32(lc->sync_bits, lc->bitset_uint32_count);
 	lc->sync_search = 0;
 
+	DMDEBUG("  in_sync           :: %d", atomic_read(&lc->in_sync));
 	DMDEBUG("  Sync count        :: %Lu", lc->sync_count);
 	DMDEBUG("  Disk Region count :: %Lu", lc->header.nr_regions);
 	DMDEBUG("  Region count      :: %Lu", lc->region_count);
 
-	if(lc->header.nr_regions != lc->region_count){
+	if (lc->header.nr_regions != lc->region_count) {
 		DMDEBUG("  NOTE:  Mapping has changed.");
 	}
 /* Take this out for now.
@@ -566,9 +576,11 @@ static int server_mark_region(struct log_c *lc, struct log_request *lr, uint32_t
 		DMERR("  lc->in_sync = %d", atomic_read(&lc->in_sync));
 		DMERR("  lc->sync_pass = %d", lc->sync_pass);
 		DMERR("  lc->sync_search = %d", lc->sync_search);
-		DMERR("  lc->recovery_halted = %d", lc->recovery_halted);
+		DMERR("  lc->recovery_halted = %d -> 1", lc->recovery_halted);
 
-		BUG();
+		lc->recovery_halted = 1;
+		mempool_free(new, region_user_pool);
+		return -EAGAIN;
 	} else {
 		list_add(&new->ru_list, &ru->ru_list);
 	}
@@ -641,8 +653,9 @@ static int server_flush(struct log_c *lc, uint32_t who)
 
 static int server_get_resync_work(struct log_c *lc, struct log_request *lr, uint32_t who)
 {
-	struct region_user *new;
+	struct region_user *new, *test;
 	region_t *region = &(lr->u.lr_region_rtn);
+	region_t sync_search = lc->sync_search;
 
 	lr->u.lr_int_rtn = 0; /* Default to no work */
 
@@ -673,50 +686,48 @@ static int server_get_resync_work(struct log_c *lc, struct log_request *lr, uint
 
 	if ((lc->recovering_next != (uint64_t)-1) &&
 	    (!log_test_bit(lc->sync_bits, lc->recovering_next))) {
-		new = mempool_alloc(region_user_pool, GFP_NOFS);
-		if (!new)
-			return -ENOMEM;
-		*region = lc->recovering_region = lc->recovering_next;
+		*region = lc->recovering_next;
 		DMDEBUG("Preempting normal recovery work for preferred region...");
 	} else {
 		*region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
 						  lc->region_count,
 						  lc->sync_search);
-		if ((new = find_ru_by_region(lc, *region))) {
-			/*
-			 * We disallow writes to regions that have not yet been
-			 * recovered via is_remote_recovering(), so this should
-			 * not happen.
-			 */
-			DMERR("Recovery blocked by outstanding write on region %Lu/%s",
-			      *region, lc->uuid + (strlen(lc->uuid) - 8));
-			DMERR("  region_user { %s, %u, %Lu }",
-			      (new->ru_rw == RU_WRITE) ? "RU_WRITE":
-			      (new->ru_rw == RU_RECOVER) ? "RU_RECOVER":
-			      (new->ru_rw == RU_READ) ? "RU_READ" : "UNKOWN",
-			      new->ru_nodeid, new->ru_region);
-			DMERR("  lc->recovering_region = %Lu", lc->recovering_region);
-			DMERR("  lc->sync_count = %Lu", lc->sync_count);
-			DMERR("  lc->in_sync = %d", atomic_read(&lc->in_sync));
-			DMERR("  lc->sync_pass = %d", lc->sync_pass);
-			DMERR("  lc->sync_search = %d", lc->sync_search);
-			DMERR("  lc->recovery_halted = %d", lc->recovery_halted);
-			BUG();
-			return 0;
-		}
+		sync_search = *region + 1;
+	}
 
-		if (*region >= lc->region_count)
-			return 0;
+	if ((test = find_ru_by_region(lc, *region))) {
+		/*
+		 * We disallow writes to regions that have not yet been
+		 * recovered via is_remote_recovering(), so this should
+		 * not happen.
+		 */
+		DMERR("Recovery blocked by outstanding write on region %Lu/%s",
+		      *region, lc->uuid + (strlen(lc->uuid) - 8));
+		DMERR("  region_user { %s, %u, %Lu }",
+		      (test->ru_rw == RU_WRITE) ? "RU_WRITE":
+		      (test->ru_rw == RU_RECOVER) ? "RU_RECOVER":
+		      (test->ru_rw == RU_READ) ? "RU_READ" : "UNKOWN",
+		      test->ru_nodeid, test->ru_region);
+		DMERR("  lc->recovering_region = %Lu", lc->recovering_region);
+		DMERR("  lc->sync_count = %Lu", lc->sync_count);
+		DMERR("  lc->in_sync = %d", atomic_read(&lc->in_sync));
+		DMERR("  lc->sync_pass = %d", lc->sync_pass);
+		DMERR("  lc->sync_search = %d", lc->sync_search);
+		DMERR("  lc->recovery_halted = %d -> 1", lc->recovery_halted);
 
-		new = mempool_alloc(region_user_pool, GFP_NOFS);
-		if (!new)
-			return -ENOMEM;
+		lc->recovery_halted = 1;
+		return 0;
+	}
 
-		lc->sync_search = *region + 1;
+	if (*region >= lc->region_count)
+		return 0;
 
-		lc->recovering_region = *region;
-	}
+	new = mempool_alloc(region_user_pool, GFP_NOFS);
+	if (!new)
+		return -ENOMEM;
 
+	lc->sync_search = sync_search;
+	lc->recovering_region = *region;
 	lc->recovering_next = (uint64_t)-1;
 	lr->u.lr_int_rtn = 1; /* Assigning work */
 	new->ru_nodeid = who;


hooks/post-receive
--
Cluster Project




More information about the Cluster-devel mailing list