[Cluster-devel] [PATCH] RHEL fix for bz428751

Benjamin Marzinski bmarzins at redhat.com
Wed Mar 5 06:49:46 UTC 2008


Here is my proposed fix for 428751. It needs some heavy testing, both
for correctness and performance, before it gets pushed.

-Ben
-------------- next part --------------
diff -urpN gfs2/glock.c gfs2-patched/glock.c
--- gfs2/glock.c	2008-03-04 10:54:02.000000000 -0600
+++ gfs2-patched/glock.c	2008-03-04 10:53:13.000000000 -0600
@@ -42,6 +42,7 @@
 #include "quota.h"
 #include "super.h"
 #include "util.h"
+#include "lm_deadlk.h"
 
 struct gfs2_gl_hash_bucket {
         struct hlist_head hb_list;
@@ -785,7 +786,8 @@ static void xmote_bh(struct gfs2_glock *
 
 	state_change(gl, ret & LM_OUT_ST_MASK);
 
-	if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
+	if ((prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) ||
+	    ret & LM_OUT_DEADLK) {
 		if (glops->go_inval)
 			glops->go_inval(gl, DIO_METADATA);
 	} else if (gl->gl_state == LM_ST_DEFERRED) {
@@ -815,6 +817,15 @@ static void xmote_bh(struct gfs2_glock *
 		}
 	} else {
 		spin_lock(&gl->gl_spin);
+		if (ret & LM_OUT_DEADLK) {
+			gh->gh_error = 0;
+			gl->gl_req_bh = NULL;
+			set_bit(GLF_DEADLK, &gl->gl_flags);
+			spin_unlock(&gl->gl_spin);
+			gfs2_glock_drop_th(gl);
+			gfs2_glock_put(gl);
+			return;
+		}
 		list_del_init(&gh->gh_list);
 		gh->gh_error = -EIO;
 		if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 
@@ -920,6 +931,16 @@ static void drop_bh(struct gfs2_glock *g
 
 	state_change(gl, LM_ST_UNLOCKED);
 
+	if (test_and_clear_bit(GLF_DEADLK, &gl->gl_flags)) {
+		spin_lock(&gl->gl_spin);
+		gh->gh_error = 0;
+		gl->gl_req_bh = NULL;
+		spin_unlock(&gl->gl_spin);
+		gfs2_glock_xmote_th(gl, gl->gl_req_gh);
+		gfs2_glock_put(gl);
+		return;
+	}
+
 	if (glops->go_inval)
 		glops->go_inval(gl, DIO_METADATA);
 
diff -urpN gfs2/incore.h gfs2-patched/incore.h
--- gfs2/incore.h	2008-03-04 10:54:02.000000000 -0600
+++ gfs2-patched/incore.h	2008-03-04 10:53:13.000000000 -0600
@@ -172,6 +172,7 @@ enum {
 	GLF_PENDING_DEMOTE	= 4,
 	GLF_DIRTY		= 5,
 	GLF_DEMOTE_IN_PROGRESS	= 6,
+	GLF_DEADLK		= 7,
 };
 
 struct gfs2_glock {
diff -urpN gfs2/lm.c gfs2-patched/lm.c
--- gfs2/lm.c	2008-03-04 10:54:02.000000000 -0600
+++ gfs2-patched/lm.c	2008-03-04 10:54:28.000000000 -0600
@@ -21,6 +21,7 @@
 #include "lm.h"
 #include "super.h"
 #include "util.h"
+#include "lm_deadlk.h"
 
 /**
  * gfs2_lm_mount - mount a locking protocol
@@ -35,7 +36,7 @@ int gfs2_lm_mount(struct gfs2_sbd *sdp, 
 {
 	char *proto = sdp->sd_proto_name;
 	char *table = sdp->sd_table_name;
-	int flags = 0;
+	int flags = LM_MFLAG_ALLOW_DEADLK;
 	int error;
 
 	if (sdp->sd_args.ar_spectator)
diff -urpN gfs2/lm_deadlk.h gfs2-patched/lm_deadlk.h
--- gfs2/lm_deadlk.h	1969-12-31 18:00:00.000000000 -0600
+++ gfs2-patched/lm_deadlk.h	2008-03-04 10:53:13.000000000 -0600
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ */
+
+#ifndef __LM_DEADLK_DOT_H__
+#define __LM_DEADLK_DOT_H__
+
+/* This is a hack. These flags really belong in lm_interface.h */
+#define LM_OUT_DEADLK    0x00000200
+#define LM_MFLAG_ALLOW_DEADLK 0x00000002
+
+#endif /* __LM_DEADLK_DOT_H__ */
diff -urpN gfs2/locking/dlm/lock.c gfs2-patched/locking/dlm/lock.c
--- gfs2/locking/dlm/lock.c	2008-03-04 10:54:02.000000000 -0600
+++ gfs2-patched/locking/dlm/lock.c	2008-03-04 10:53:13.000000000 -0600
@@ -8,6 +8,7 @@
  */
 
 #include "lock_dlm.h"
+#include "../../lm_deadlk.h"
 
 static char junk_lvb[GDLM_LVB_SIZE];
 
@@ -137,7 +138,8 @@ static inline unsigned int make_flags(st
 
 		/* Conversion deadlock avoidance by DLM */
 
-		if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
+		if (!(lp->ls->fsflags & LM_MFLAG_ALLOW_DEADLK) &&
+		    !test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
 		    !(lkf & DLM_LKF_NOQUEUE) &&
 		    cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
 			lkf |= DLM_LKF_CONVDEADLK;
diff -urpN gfs2/locking/dlm/thread.c gfs2-patched/locking/dlm/thread.c
--- gfs2/locking/dlm/thread.c	2008-03-04 10:54:02.000000000 -0600
+++ gfs2-patched/locking/dlm/thread.c	2008-03-04 10:53:13.000000000 -0600
@@ -8,6 +8,7 @@
  */
 
 #include "lock_dlm.h"
+#include "../../lm_deadlk.h"
 
 /* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
    thread gets to it. */
@@ -135,7 +136,15 @@ static void process_complete(struct gdlm
 			 lp->lksb.sb_status, lp->lockname.ln_type,
 			 (unsigned long long)lp->lockname.ln_number,
 			 lp->flags);
-		return;
+		if (lp->lksb.sb_status == -EDEADLOCK &&
+		    lp->ls->fsflags & LM_MFLAG_ALLOW_DEADLK) {
+			lp->req = lp->cur;
+			acb.lc_ret |= LM_OUT_DEADLK;
+			if (lp->cur == DLM_LOCK_IV)
+				lp->lksb.sb_lkid = 0;
+			goto out;
+		} else
+			return;
 	}
 
 	/*


More information about the Cluster-devel mailing list