[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[lvm-devel] [PATCH] Improve handling of inconsistent precommit metadata



Hi,

working on RHBZ 651590 (failure to lock LV results in failure to repair
mirror after transient error), I have tracked the problem down to the
following sequence of events:

1) devices fail IO, triggering repair
2) dmeventd starts fixing up the mirror
3) during the downconversion, a new metadata version is written

--> the devices come back online here

4) the mirror device suspend/resume is called to update DM tables
5) during the suspend/resume cycle, *pre*-commit metadata is read;
   however, since the failed devices are now back online, we get back
   inconsistent set of precommit metadata and the whole operation fails

I am attaching a patch that relaxes the check that fails in step 5
above, namely by ignoring inconsistencies coming from PVs that are
marked MISSING.

The patch may fail to apply completely cleanly to current CVS, but it
should be good enough for review. I will check it in myself when
reviewed.

Yours,
   Petr

PS: There are couple more patches from me in the review backlog (most
importantly the vgreduce --removemissing --force refactor). If you have
a while, please do have a look.

Index: lib/format_text/format-text.c
===================================================================
RCS file: /cvs/lvm2/LVM2/lib/format_text/format-text.c,v
retrieving revision 1.178
diff -u -p -r1.178 format-text.c
--- lib/format_text/format-text.c	11 Mar 2011 15:10:17 -0000	1.178
+++ lib/format_text/format-text.c	10 Apr 2011 21:04:23 -0000
@@ -131,6 +131,12 @@ static unsigned _mda_locns_match_raw(str
 	return 0;
 }
 
+static struct device *_mda_get_device_raw(struct metadata_area *mda)
+{
+	struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
+	return mdac->area.dev;
+}
+
 /*
  * For circular region between region_start and region_start + region_size,
  * back up one SECTOR_SIZE from 'region_ptr' and return the value.
@@ -1635,7 +1641,8 @@ static struct metadata_area_ops _metadat
 	.mda_total_sectors = _mda_total_sectors_raw,
 	.mda_in_vg = _mda_in_vg_raw,
 	.pv_analyze_mda = _pv_analyze_mda_raw,
-	.mda_locns_match = _mda_locns_match_raw
+	.mda_locns_match = _mda_locns_match_raw,
+	.mda_get_device = _mda_get_device_raw
 };
 
 static int _text_pv_setup(const struct format_type *fmt,
Index: lib/metadata/metadata.c
===================================================================
RCS file: /cvs/lvm2/LVM2/lib/metadata/metadata.c,v
retrieving revision 1.454
diff -u -p -r1.454 metadata.c
--- lib/metadata/metadata.c	8 Apr 2011 14:40:20 -0000	1.454
+++ lib/metadata/metadata.c	10 Apr 2011 21:04:25 -0000
@@ -2789,6 +2794,7 @@ static struct volume_group *_vg_read(str
 	int inconsistent_pvs = 0;
 	int inconsistent_seqno = 0;
 	int inconsistent_mdas = 0;
+	int inconsistent_mda_count = 0;
 	unsigned use_precommitted = precommitted;
 	unsigned saved_handles_missing_pvs = cmd->handles_missing_pvs;
 	struct dm_list *pvids;
@@ -2865,6 +2871,7 @@ static struct volume_group *_vg_read(str
 		return_NULL;
 
 	/* Ensure contents of all metadata areas match - else do recovery */
+	inconsistent_mda_count=0;
 	dm_list_iterate_items(mda, &fid->metadata_areas_in_use) {
 		if ((use_precommitted &&
 		     !(vg = mda->ops->vg_read_precommit(fid, vgname, mda))) ||
@@ -2874,6 +2881,7 @@ static struct volume_group *_vg_read(str
 			free_vg(vg);
 			continue;
 		}
+
 		if (!correct_vg) {
 			correct_vg = vg;
 			continue;
@@ -2892,7 +2900,10 @@ static struct volume_group *_vg_read(str
 			if (vg->seqno > correct_vg->seqno) {
 				free_vg(correct_vg);
 				correct_vg = vg;
-			}
+			} else {
+				mda->status |= MDA_INCONSISTENT;
+				++inconsistent_mda_count;
+			}
 		}
 
 		if (vg != correct_vg)
@@ -3019,6 +3030,7 @@ static struct volume_group *_vg_read(str
 		}
 
 		/* Ensure contents of all metadata areas match - else recover */
+		inconsistent_mda_count=0;
 		dm_list_iterate_items(mda, &fid->metadata_areas_in_use) {
 			if ((use_precommitted &&
 			     !(vg = mda->ops->vg_read_precommit(fid, vgname,
@@ -3063,6 +3075,9 @@ static struct volume_group *_vg_read(str
 				if (vg->seqno > correct_vg->seqno) {
 					free_vg(correct_vg);
 					correct_vg = vg;
+				} else {
+					mda->status |= MDA_INCONSISTENT;
+					++inconsistent_mda_count;
 				}
 			}
 
@@ -3089,13 +3104,26 @@ static struct volume_group *_vg_read(str
 		if (use_precommitted) {
 			log_error("Inconsistent pre-commit metadata copies "
 				  "for volume group %s", vgname);
-			/* FIXME: during repair, there is inconsistent flag set because some metadata areas
-			 * are missing (on missing PVs). Code should create list of missing PVs, compare it
-			 * with PV marked missing in metadata and if equals, use it as consistent vg.
-			 * For now, return precommited metadata if remainng seq match here to allow
-			 * preloading table in suspend call.
+
+			/*
+			 * Check whether all of the inconsistent MDAs were on
+			 * MISSING PVs -- in that case, we should be safe.
 			 */
-			if (!inconsistent_seqno) {
+			dm_list_iterate_items(mda, &fid->metadata_areas_in_use) {
+				if (mda->status & MDA_INCONSISTENT) {
+					log_error("Checking inconsistent MDA: %s", dev_name(mda_get_device(mda)));
+					dm_list_iterate_items(pvl, &correct_vg->pvs) {
+						if (mda_get_device(mda) == pvl->pv->dev &&
+						    (pvl->pv->status & MISSING_PV))
+							--inconsistent_mda_count;
+					}
+				}
+			}
+
+			if (inconsistent_mda_count < 0)
+				log_error(INTERNAL_ERROR "Too many inconsistent MDAs.");
+
+			if (!inconsistent_mda_count) {
 				*consistent = 0;
 				_free_pv_list(&all_pvs);
 				return correct_vg;
@@ -4238,6 +4266,13 @@ unsigned mda_locns_match(struct metadata
 	return mda1->ops->mda_locns_match(mda1, mda2);
 }
 
+struct device *mda_get_device(struct metadata_area *mda)
+{
+	if (!mda->ops->mda_get_device)
+		return NULL;
+	return mda->ops->mda_get_device(mda);
+}
+
 unsigned mda_is_ignored(struct metadata_area *mda)
 {
 	return (mda->status & MDA_IGNORED);
Index: lib/metadata/metadata.h
===================================================================
RCS file: /cvs/lvm2/LVM2/lib/metadata/metadata.h,v
retrieving revision 1.242
diff -u -p -r1.242 metadata.h
--- lib/metadata/metadata.h	11 Mar 2011 14:50:15 -0000	1.242
+++ lib/metadata/metadata.h	10 Apr 2011 21:04:25 -0000
@@ -175,9 +175,12 @@ struct metadata_area_ops {
 	 */
 	unsigned (*mda_locns_match)(struct metadata_area *mda1,
 				    struct metadata_area *mda2);
+
+	struct device *(*mda_get_device)(struct metadata_area *mda);
 };
 
-#define MDA_IGNORED 0x00000001
+#define MDA_IGNORED      0x00000001
+#define MDA_INCONSISTENT 0x00000002
 
 struct metadata_area {
 	struct dm_list list;
@@ -191,6 +194,7 @@ struct metadata_area *mda_copy(struct dm
 unsigned mda_is_ignored(struct metadata_area *mda);
 void mda_set_ignored(struct metadata_area *mda, unsigned ignored);
 unsigned mda_locns_match(struct metadata_area *mda1, struct metadata_area *mda2);
+struct device *mda_get_device(struct metadata_area *mda);
 
 struct format_instance_ctx {
 	uint32_t type;
Index: test/t-lvconvert-repair-transient-dmeventd.sh
===================================================================
RCS file: /cvs/lvm2/LVM2/test/t-lvconvert-repair-transient-dmeventd.sh,v
retrieving revision 1.1
diff -u -p -r1.1 t-lvconvert-repair-transient-dmeventd.sh
--- test/t-lvconvert-repair-transient-dmeventd.sh	7 Jan 2011 14:56:10 -0000	1.1
+++ test/t-lvconvert-repair-transient-dmeventd.sh	10 Apr 2011 21:04:25 -0000
@@ -19,9 +19,9 @@ lvchange --monitor y $vg/4way
 aux disable_dev $dev2 $dev4
 mkfs.ext3 $DM_DEV_DIR/$vg/4way
 aux enable_dev $dev2 $dev4
-sleep 10
+sleep 3
 lvs -a -o +devices | not grep unknown
 check mirror $vg 4way
 check mirror_legs $vg 4way 2
-lvs -a -o +devices | should not grep mimage_1
-lvs -a -o +devices | should not grep mimage_3
+lvs -a -o +devices | not grep mimage_1
+lvs -a -o +devices | not grep mimage_3
-- 
id' Ash = Ash; id' Dust = Dust; id' _ = undefined

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]