[Cluster-devel] cluster/cmirror-kernel/src dm-cmirror-client.c ...
jbrassow at sourceware.org
jbrassow at sourceware.org
Wed Jul 11 16:18:04 UTC 2007
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: jbrassow at sourceware.org 2007-07-11 16:18:03
Modified files:
cmirror-kernel/src: dm-cmirror-client.c dm-cmirror-server.c
dm-cmirror-xfr.h
Log message:
Bug 238629: dm-cmirror: Remote recovery conflict...
The kernel changes are now in place (marking/clearing the log
during writes to nosync regions) to allow nominal I/O to
region that have yet to be recovered.
Also moved around some debugging messages and removed
'is_remote_recovering()'. (is_remote_recovering is obviated by
the new mechanism for handling recovery/write ordering.)
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-client.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.48&r2=1.1.2.49
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-server.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.35&r2=1.1.2.36
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/cmirror-kernel/src/dm-cmirror-xfr.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.5&r2=1.1.2.6
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/05/09 21:44:34 1.1.2.48
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-client.c 2007/07/11 16:18:03 1.1.2.49
@@ -286,8 +286,8 @@
lc->server_id = lr.u.lr_coordinator;
} else {
/* ATTENTION -- what do we do with this ? */
- DMWARN("Failed to receive election results from server: (%s,%d)",
- lc->uuid + (strlen(lc->uuid) - 8), len);
+ DMWARN("Failed to receive election results from server: (%s/%d,%d)",
+ lc->uuid + (strlen(lc->uuid) - 8), lc->uuid_ref, len);
error = len;
}
@@ -601,6 +601,7 @@
INIT_LIST_HEAD(&lc->mark_logged);
spin_lock_init(&lc->state_lock);
+ atomic_set(&lc->suspended, 1);
lc->server_valid = 0;
lc->server_id = 0xDEAD;
@@ -853,15 +854,7 @@
static int cluster_is_remote_recovering(struct dirty_log *log, region_t region)
{
- int rtn;
- struct log_c *lc = (struct log_c *) log->context;
-
- if(atomic_read(&lc->in_sync) == 1){
- return 0;
- }
-
- rtn = consult_server(lc, region, LRT_IS_REMOTE_RECOVERING, NULL);
- return rtn;
+ return 0;
}
static int cluster_in_sync(struct dirty_log *log, region_t region, int block)
@@ -977,57 +970,43 @@
spin_lock(&lc->state_lock);
-
/*
- * It is possible for the following in the mirror code:
- * 0) Mark is already logged for a region
- * 1) rh_dec, sets region state to RH_CLEAN (asynchronous)
- * 2) rh_update_states (DOESN'T FLUSH!!!, bug #235040)
- * 3) do_writes, trys to mark region
- *
- * The following shouldn't have to be handled b/c of the flush
- * 0) Region finishes recovery
- * 1) rh_update_states clears region (DOES FLUSH)
- * 2) do_writes, trys to mark region
- *
- * This can lead to this next case being valid.
+ * An item on the clear_waiting list should have been flushed
+ * before getting this mark_region call.
*/
- list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list) {
+ list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list)
if (region == rs->rs_region) {
- if (!rs->rs_mark_logged) {
- DMERR("Moving region(%Lu/%s) from clear_waiting -> mark_waiting",
- region, lc->uuid + (strlen(lc->uuid) - 8));
- }
- list_del_init(&rs->rs_list);
- list_add(&rs->rs_list,
- (rs->rs_mark_logged) ?
- &lc->mark_logged : &lc->mark_waiting);
- goto out;
+ DMERR("Region being marked found on clear_waiting list (%Lu/%s)",
+ region, lc->uuid + (strlen(lc->uuid) - 8));
+ BUG();
}
- }
+
+ /*
+ * We should never get two marks before a flush, unless the
+ * region is not in-sync. One valid scenario would be:
+ * 0) region not in-sync
+ * 1) rh_inc (mark region)
+ * 2) rh_update_states
+ * 3) rh_dec (dec pending and put on clean_region list)
+ * 4) do_writes -> rh_inc (second mark)
+ */
+ list_for_each_entry_safe(rs, tmp_rs, &lc->mark_waiting, rs_list)
+ if (region == rs->rs_region)
+ goto out;
/*
* It is possible for the following in the mirror code:
* 0) Mark is already logged for a region
- * 1) rh_update_states
+ * 1) rh_update_states (were the clear_region happens)
* 2) rh_dec, sets region state to RH_CLEAN (asynchronous)
* 3) do_writes, trys to mark region
*
* This can lead to this next case being valid.
*/
- list_for_each_entry_safe(rs, tmp_rs, &lc->mark_logged, rs_list){
- if (region == rs->rs_region) {
+ list_for_each_entry_safe(rs, tmp_rs, &lc->mark_logged, rs_list)
+ if (region == rs->rs_region)
goto out;
- }
- }
- list_for_each_entry_safe(rs, tmp_rs, &lc->mark_waiting, rs_list){
- if (region == rs->rs_region) {
- DMERR("Mark already waiting (%Lu/%s)",
- region, lc->uuid + (strlen(lc->uuid) - 8));
- BUG();
- }
- }
spin_unlock(&lc->state_lock);
rs_new = mempool_alloc(region_state_pool, GFP_NOFS);
@@ -1074,14 +1053,13 @@
* 6) we recover the region
* 7) clearing the region after recovery causes us to get here
*
- * Once 235040 is cleared, any entries found in this list should
- * cause a bug.
+ * Bug 235040 cleared. This should no longer happen.
*/
list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list){
if(region == rs->rs_region){
- DMERR("%d) Double clear on region ("
- SECTOR_FORMAT ")", __LINE__, region);
- goto out;
+ DMERR("Double clear on region (%Lu/%s)",
+ region, lc->uuid + (strlen(lc->uuid) - 8));
+ BUG();
}
}
@@ -1140,7 +1118,7 @@
list_for_each_entry_safe(rs, tmp_rs, &lc->clear_waiting, rs_list) {
if (*region == rs->rs_region) {
DMERR("WARNING: Bug 235039/235040 detected!");
- DMERR("Work-around in place.");
+ BUG();
}
}
}
@@ -1204,16 +1182,6 @@
switch(status){
case STATUSTYPE_INFO:
- DMDEBUG("LOG INFO:");
- DMDEBUG(" uuid: %s", lc->uuid);
- DMDEBUG(" uuid_ref : %d", lc->uuid_ref);
- DMDEBUG(" ?region_count: %Lu", lc->region_count);
- DMDEBUG(" ?sync_count : %Lu", lc->sync_count);
- DMDEBUG(" ?sync_search : %d", lc->sync_search);
- DMDEBUG(" in_sync : %s", (atomic_read(&lc->in_sync)) ? "YES" : "NO");
- DMDEBUG(" server_id : %u", lc->server_id);
- DMDEBUG(" server_valid: %s",
- ((lc->server_id != 0xDEAD) && lc->server_valid) ? "YES" : "NO");
if(lc->sync != DEFAULTSYNC)
arg_count++;
@@ -1228,6 +1196,42 @@
break;
case STATUSTYPE_TABLE:
+ DMDEBUG("LOG INFO:");
+ DMDEBUG(" uuid: %s", lc->uuid);
+ DMDEBUG(" uuid_ref : %d", lc->uuid_ref);
+ DMDEBUG(" ?region_count: %Lu", lc->region_count);
+ DMDEBUG(" ?sync_count : %Lu", lc->sync_count);
+ DMDEBUG(" ?sync_search : %d", lc->sync_search);
+ DMDEBUG(" in_sync : %s", (atomic_read(&lc->in_sync)) ? "YES" : "NO");
+ DMDEBUG(" suspended : %s", (atomic_read(&lc->suspended)) ? "YES" : "NO");
+ DMDEBUG(" server_id : %u", lc->server_id);
+ DMDEBUG(" server_valid: %s",
+ ((lc->server_id != 0xDEAD) && lc->server_valid) ? "YES" : "NO");
+ {
+ struct log_c *tmp_lc;
+
+ down(&log_list_lock);
+ list_for_each_entry(tmp_lc, &log_list_head, log_list){
+ if(!strncmp(tmp_lc->uuid, lc->uuid, MAX_NAME_LEN) &&
+ (tmp_lc->uuid_ref != lc->uuid_ref)){
+ DMDEBUG("LOG INFO [COPY FOUND]:");
+ DMDEBUG(" uuid: %s", tmp_lc->uuid);
+ DMDEBUG(" uuid_ref : %d", tmp_lc->uuid_ref);
+ DMDEBUG(" ?region_count: %Lu", tmp_lc->region_count);
+ DMDEBUG(" ?sync_count : %Lu", tmp_lc->sync_count);
+ DMDEBUG(" ?sync_search : %d", tmp_lc->sync_search);
+ DMDEBUG(" in_sync : %s",
+ (atomic_read(&tmp_lc->in_sync)) ? "YES" : "NO");
+ DMDEBUG(" suspended : %s",
+ (atomic_read(&tmp_lc->suspended)) ? "YES" : "NO");
+ DMDEBUG(" server_id : %u", tmp_lc->server_id);
+ DMDEBUG(" server_valid: %s",
+ ((tmp_lc->server_id != 0xDEAD) &&
+ tmp_lc->server_valid) ? "YES" : "NO");
+ }
+ }
+ up(&log_list_lock);
+ }
if(lc->sync != DEFAULTSYNC)
arg_count++;
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/04/26 16:54:49 1.1.2.35
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-server.c 2007/07/11 16:18:03 1.1.2.36
@@ -221,54 +221,6 @@
return count;
}
-struct region_user *find_ru_by_region(struct log_c *lc, region_t region);
-static int _core_get_resync_work(struct log_c *lc, region_t *region)
-{
- int sync_search, conflict = 0;
-
- if (lc->recovering_region != (uint64_t)-1) {
- DMDEBUG("Someone is already recovering region %Lu/%s",
- lc->recovering_region, lc->uuid + (strlen(lc->uuid) - 8));
- return 0;
- }
-
- if (lc->sync_search >= lc->region_count) {
- /*
- * FIXME: pvmove is not supported yet, but when it is,
- * an audit of sync_count changes will need to be made
- */
- if ((lc->sync_count < lc->region_count) && !lc->sync_pass) {
- lc->sync_search = 0;
- lc->sync_pass++;
- } else {
- return 0;
- }
- }
- for (sync_search = lc->sync_search;
- sync_search < lc->region_count;
- sync_search = (*region + 1)) {
- *region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
- lc->region_count,
- sync_search);
- if (find_ru_by_region(lc, *region)) {
- conflict = 1;
- DMDEBUG("Recovery blocked by outstanding write on region %Lu/%s",
- *region, lc->uuid + (strlen(lc->uuid) - 8));
- } else {
- break;
- }
- }
- if (!conflict)
- lc->sync_search = *region + 1;
-
- if (*region >= lc->region_count)
- return 0;
-
- lc->recovering_region = *region;
- return 1;
-}
-
-
static int print_zero_bits(unsigned char *str, int offset, int bit_count){
int i,j;
int count=0;
@@ -492,39 +444,6 @@
return 0;
}
-static int server_is_remote_recovering(struct log_c *lc, struct log_request *lr)
-{
- region_t region;
- struct region_user *ru;
-
- /*
- * This gets a bit complicated. I wish we didn't have to use this
- * function, but because the mirror code doesn't mark regions which
- * it writes to that are out-of-sync, we need this function.
- *
- * Problem is, we don't know how long the user is going to take to
- * write to the region after they have called this function. So,
- * we are forced at this point to deny any writes to regions we
- * are recovering or might recover in the future.
- *
- * We can count on the client side to not send us one of these
- * requests if the mirror is known to be in-sync.
- *
- * And yes, it sucks to take this much time off the I/O.
- */
- region = ext2_find_next_zero_bit((unsigned long *) lc->sync_bits,
- lc->region_count, 0);
-
- if (lr->u.lr_region >= region) {
- DMDEBUG("Remote recovery conflict: (%Lu >= %Lu)/%s",
- lr->u.lr_region, region, lc->uuid + (strlen(lc->uuid) - 8));
- lr->u.lr_int_rtn = 1;
- } else
- lr->u.lr_int_rtn = 0;
-
- return 0;
-}
-
static int server_in_sync(struct log_c *lc, struct log_request *lr)
{
if (lr->u.lr_region > lc->region_count) {
@@ -555,7 +474,13 @@
new->ru_region = lr->u.lr_region;
new->ru_rw = RU_WRITE;
- if (!(ru = find_ru_by_region(lc, lr->u.lr_region))) {
+ if (find_ru(lc, who, lr->u.lr_region)) {
+ DMWARN("Attempt to mark a already marked region (%u,"
+ SECTOR_FORMAT
+ "/%s)",
+ who, lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
+ mempool_free(new, region_user_pool);
+ } else if (!(ru = find_ru_by_region(lc, lr->u.lr_region))) {
log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
list_add(&new->ru_list, &lc->region_users);
} else if (ru->ru_rw == RU_RECOVER) {
@@ -572,6 +497,22 @@
DMDEBUG("Mark requester : %u", who);
log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
list_add_tail(&new->ru_list, &lc->region_users);
+ } else {
+ list_add(&new->ru_list, &ru->ru_list);
+ }
+
+ /*
+ if (!(ru = find_ru_by_region(lc, lr->u.lr_region))) {
+ log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
+ list_add(&new->ru_list, &lc->region_users);
+ } else if (ru->ru_rw == RU_RECOVER) {
+ DMDEBUG("Attempt to mark a region " SECTOR_FORMAT
+ "/%s which is being recovered.",
+ lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
+ DMDEBUG("Current recoverer: %u", ru->ru_nodeid);
+ DMDEBUG("Mark requester : %u", who);
+ log_clear_bit(lc, lc->clean_bits, lr->u.lr_region);
+ list_add_tail(&new->ru_list, &lc->region_users);
} else if (!find_ru(lc, who, lr->u.lr_region)) {
list_add(&new->ru_list, &ru->ru_list);
} else {
@@ -581,6 +522,7 @@
who, lr->u.lr_region, lc->uuid + (strlen(lc->uuid) - 8));
mempool_free(new, region_user_pool);
}
+ */
return 0;
}
@@ -611,35 +553,28 @@
{
int r = 0;
int count = 0;
- struct region_user *ru, *ru2;
+ struct region_user *ru, *marker = NULL, *recoverer = NULL;
if (lc->recovering_region != (uint64_t)-1) {
list_for_each_entry(ru, &lc->region_users, ru_list)
- if (ru->ru_region == lc->recovering_region)
- count++;
-
- if (count > 1) {
- list_for_each_entry(ru, &lc->region_users, ru_list)
+ if (ru->ru_region == lc->recovering_region) {
if (ru->ru_rw == RU_RECOVER)
- break;
+ recoverer = ru;
+ else
+ marker = ru;
+ count++;
+ }
- DMDEBUG("Flush includes region which is being recovered (%u/%Lu). Delaying...",
- ru->ru_nodeid, ru->ru_region);
- DMDEBUG("Recovering region: %Lu", lc->recovering_region);
+ if (marker && recoverer) {
+ DMDEBUG("Flush/recovery collision (%Lu/%s): count = %d, marker = %u, recoverer = %u",
+ marker->ru_region, lc->uuid + (strlen(lc->uuid) - 8),
+ count, marker->ru_nodeid, recoverer->ru_nodeid);
+ /*
DMDEBUG(" sync_bit: %s, clean_bit: %s",
log_test_bit(lc->sync_bits, lc->recovering_region) ? "set" : "unset",
log_test_bit(lc->clean_bits, lc->recovering_region) ? "set" : "unset");
+ */
- list_for_each_entry(ru2, &lc->region_users, ru_list)
- if (ru->ru_region == ru2->ru_region)
- DMDEBUG(" %s", (ru2->ru_rw == RU_RECOVER) ? "recover" :
- (ru2->ru_rw == RU_WRITE) ? "writer" : "unknown");
-
- /* FIXME: work-around for bug 235040 */
- DMDEBUG("Revoking resync work");
- lc->recovering_region = (uint64_t)-1;
- list_del(&ru->ru_list);
- mempool_free(ru, region_user_pool);
return -EBUSY;
}
}
@@ -1121,9 +1056,6 @@
case LRT_IS_CLEAN:
error = server_is_clean(lc, lr);
break;
- case LRT_IS_REMOTE_RECOVERING:
- error = server_is_remote_recovering(lc, lr);
- break;
case LRT_IN_SYNC:
error = server_in_sync(lc, lr);
break;
--- cluster/cmirror-kernel/src/Attic/dm-cmirror-xfr.h 2007/04/24 20:08:57 1.1.2.5
+++ cluster/cmirror-kernel/src/Attic/dm-cmirror-xfr.h 2007/07/11 16:18:03 1.1.2.6
@@ -10,19 +10,18 @@
#define MAX_NAME_LEN 128
#define LRT_IS_CLEAN 1
-#define LRT_IS_REMOTE_RECOVERING 2
-#define LRT_IN_SYNC 3
-#define LRT_MARK_REGION 4
-#define LRT_CLEAR_REGION 5
-#define LRT_FLUSH 6
-#define LRT_GET_RESYNC_WORK 7
-#define LRT_COMPLETE_RESYNC_WORK 8
-#define LRT_GET_SYNC_COUNT 9
-
-#define LRT_ELECTION 10
-#define LRT_SELECTION 11
-#define LRT_MASTER_ASSIGN 12
-#define LRT_MASTER_LEAVING 13
+#define LRT_IN_SYNC 2
+#define LRT_MARK_REGION 3
+#define LRT_CLEAR_REGION 4
+#define LRT_FLUSH 5
+#define LRT_GET_RESYNC_WORK 6
+#define LRT_COMPLETE_RESYNC_WORK 7
+#define LRT_GET_SYNC_COUNT 8
+
+#define LRT_ELECTION 9
+#define LRT_SELECTION 10
+#define LRT_MASTER_ASSIGN 11
+#define LRT_MASTER_LEAVING 12
#define CLUSTER_LOG_PORT 51005
More information about the Cluster-devel
mailing list