[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] Re: [NFS] [PATCH 2/4 Revised] NLM failover - nlm_set_igrace



Revised patch based on 2.6.21-rc4.

The change enables per NFS-export entry lockd grace period. A linked list, fo_fsid_list, contains entries of fsid info. This list is expected to be short and entries expire within a maximum of 50 seconds. It follows the existing NLM grace period handling logic and is triggered via echoing the NFS export filesystem id (fsid) into nfsd procfs entry as:

shell> echo 1234 > /proc/fs/nfsd/nlm_set_grace_for_fsid

-- Wendy

 Signed-off-by: S. Wendy Cheng <wcheng redhat com>
 Signed-off-by: Lon Hohberger  <lhh redhat com>

 fs/lockd/svc.c              |    8 +-
 fs/lockd/svc4proc.c         |   15 ++-
 fs/lockd/svcproc.c          |   12 +--
 fs/lockd/svcsubs.c          |  169 ++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfsctl.c            |   27 +++++++
 include/linux/lockd/bind.h  |    2
 include/linux/lockd/lockd.h |   30 +++++++
 7 files changed, 248 insertions(+), 15 deletions(-)

--- linux-nlm-1/include/linux/lockd/lockd.h	2007-03-26 10:29:44.000000000 -0400
+++ linux/include/linux/lockd/lockd.h	2007-03-26 17:37:39.000000000 -0400
@@ -114,6 +114,16 @@ struct nlm_file {
 	struct mutex		f_mutex;	/* avoid concurrent access */
 };
 
+#define NLM_FO_MAX_FSID_GP	127
+
+/* Server fsid linked list for NLM lock failover */
+struct fo_fsid {
+	struct list_head	g_list;		/* linked list */
+	unsigned long		g_expire;	/* when this grace period
+						 * will expire */
+	int			g_fsid;		/* exported fsid */
+};
+
 /*
  * This is a server block (i.e. a lock requested by some client which
  * couldn't be granted because of a conflicting lock).
@@ -193,6 +203,8 @@ void		  nlmsvc_traverse_blocks(struct nl
 					nlm_host_match_fn_t match);
 void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
 
+unsigned long set_grace_period(void); /*required by svcsubs.c and svc.c 
+					to support nlm failover */
 /*
  * File handling for the server personality
  */
@@ -204,6 +216,7 @@ void		  nlmsvc_free_host_resources(struc
 void		  nlmsvc_invalidate_all(void);
 int		  nlmsvc_same_fsid(struct nlm_host *, struct nlm_host *);
 int		  nlmsvc_fo_unlock(int *fsid);
+int		  nlmsvc_fo_check(struct nfs_fh *fh);
 
 static __inline__ struct inode *
 nlmsvc_file_inode(struct nlm_file *file)
@@ -234,6 +247,23 @@ nlm_compare_locks(const struct file_lock
 	     &&(fl1->fl_type  == fl2->fl_type || fl2->fl_type == F_UNLCK);
 }
 
+extern struct list_head fo_fsid_list;
+
+/*Check for grace period: return TRUE or FALSE */
+static inline int
+nlmsvc_check_grace_period(struct nlm_args *argp)
+{
+	/* check for system wide grace period */
+	if (nlmsvc_grace_period)
+		return 1;
+
+	/* check for per exported fsid grace period */
+        if (unlikely(!list_empty(&fo_fsid_list)))
+                return(nlmsvc_fo_check(&argp->lock.fh));
+
+        return 0;
+}
+
 extern struct lock_manager_operations nlmsvc_lock_operations;
 
 #endif /* __KERNEL__ */
--- linux-nlm-1/include/linux/lockd/bind.h	2007-03-26 10:29:44.000000000 -0400
+++ linux/include/linux/lockd/bind.h	2007-03-26 11:11:14.000000000 -0400
@@ -38,5 +38,7 @@ extern int	nlmclnt_proc(struct inode *, 
 extern int	lockd_up(int proto);
 extern void	lockd_down(void);
 extern int	nlmsvc_fo_unlock(int *fsid);
+extern int	nlmsvc_fo_setgrace(int fsid);
+extern void	nlmsvc_fo_reset_servs(void);
 
 #endif /* LINUX_LOCKD_BIND_H */
--- linux-nlm-1/fs/nfsd/nfsctl.c	2007-03-26 10:23:36.000000000 -0400
+++ linux/fs/nfsd/nfsctl.c	2007-03-26 15:40:12.000000000 -0400
@@ -55,6 +55,7 @@ enum {
 	NFSD_List,
 	NFSD_Fh,
 	NFSD_NlmUnlock,
+	NFSD_NlmGrace,
 	NFSD_Threads,
 	NFSD_Pool_Threads,
 	NFSD_Versions,
@@ -91,6 +92,8 @@ static ssize_t write_maxblksize(struct f
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
 static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
 #endif
+static ssize_t write_fo_unlock(struct file *file, char *buf, size_t size);
+static ssize_t write_fo_grace(struct file *file, char *buf, size_t size);
 
 static ssize_t (*write_op[])(struct file *, char *, size_t) = {
 	[NFSD_Svc] = write_svc,
@@ -102,6 +105,7 @@ static ssize_t (*write_op[])(struct file
 	[NFSD_Getfs] = write_getfs,
 	[NFSD_Fh] = write_filehandle,
 	[NFSD_NlmUnlock] = write_fo_unlock,
+	[NFSD_NlmGrace] = write_fo_grace,
 	[NFSD_Threads] = write_threads,
 	[NFSD_Pool_Threads] = write_pool_threads,
 	[NFSD_Versions] = write_versions,
@@ -372,6 +376,28 @@ static ssize_t write_fo_unlock(struct fi
 	return strlen(buf);
 }
 
+static ssize_t write_fo_grace(struct file *file, char *buf, size_t size)
+{
+	char *mesg = buf;
+	int fsid, rc;
+ 
+	if (size <= 0) return -EINVAL;
+ 
+	/* convert string into a valid fsid */
+	rc = get_int(&mesg, &fsid);
+	if (rc) 
+		return rc;
+ 
+	/* call nlm to set the grace period */
+	rc = nlmsvc_fo_setgrace(fsid);
+	if (rc) 
+		return rc;
+ 
+	/* done */
+	sprintf(buf, "nlm set per fsid=%d grace period\n", fsid);
+	return strlen(buf);
+}
+
 extern int nfsd_nrthreads(void);
 
 static ssize_t write_threads(struct file *file, char *buf, size_t size)
@@ -676,6 +702,7 @@ static int nfsd_fill_super(struct super_
 		[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
 		[NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_NlmUnlock] = {"nlm_unlock", &transaction_ops, S_IWUSR|S_IRUSR},
+		[NFSD_NlmGrace] = {"nlm_set_grace_for_fsid", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
 		[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
--- linux-nlm-1/fs/lockd/svcsubs.c	2007-03-26 10:23:22.000000000 -0400
+++ linux/fs/lockd/svcsubs.c	2007-03-26 16:01:54.000000000 -0400
@@ -31,6 +31,13 @@
 static struct hlist_head	nlm_files[FILE_NRHASH];
 static DEFINE_MUTEX(nlm_file_mutex);
 
+/* 
+ * Global control structure for lock failover 
+ */
+static spinlock_t nlm_fo_lock=SPIN_LOCK_UNLOCKED;
+static int fo_fsid_cnt=0;
+LIST_HEAD(fo_fsid_list);
+
 #ifdef NFSD_DEBUG
 static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
 {
@@ -430,3 +437,165 @@ nlmsvc_fo_unlock(int *fsid)
 }
 
 
+EXPORT_SYMBOL(nlmsvc_fo_setgrace);
+
+/*
+ * Add fsid into global fo_fsid_list.
+ *
+ * If this routine is repeatedly called with the same fsid, instead 
+ * of searching thru the list to purge old entries (to make the code 
+ * un-necessarily complicated), we will keep the old entries. Since
+ * the list is later searched in top-down order (newer entry first), 
+ * as soon as one is found, the search stops. This implies the older 
+ * entries will not be used and always expire before new entry.    
+ *
+ * As an admin interface, the list is expected to be short and 
+ * entries are purged (expired) quickly.
+ */
+int
+nlmsvc_fo_setgrace(int fsid)
+{
+	struct list_head *p, *tlist;
+	struct fo_fsid *per_fsid, *entry;
+	int done=0;
+
+	/* allocate the entry */
+	per_fsid = kmalloc(sizeof(struct fo_fsid), GFP_KERNEL);
+	if (per_fsid == NULL) {
+		printk("lockd: nlmsvc_fo_setgrace kmalloc fails\n");
+		return(-ENOMEM);
+	}
+
+	/* debug printk */
+	dprintk("lockd: nlmsvc_fo_setgrace fsid=%d jiffies=%lu\n", 
+		fsid, jiffies);
+
+	/* fill in info */
+	per_fsid->g_expire = set_grace_period();
+	per_fsid->g_fsid   = fsid;
+
+	spin_lock(&nlm_fo_lock);
+
+	if (list_empty(&fo_fsid_list)) {
+		list_add(&per_fsid->g_list, &fo_fsid_list);
+		fo_fsid_cnt = 1;
+		done = 1;
+		goto nlmsvc_fo_setgrace_out;
+	} else if (fo_fsid_cnt > NLM_FO_MAX_FSID_GP) {
+                kfree(per_fsid);
+                printk("lockd: fo_setgrace max cnt reached fsid=%d not added\n",                        fsid);
+                goto nlmsvc_fo_setgrace_out;
+        }
+
+	list_for_each_safe(p, tlist, &fo_fsid_list) {
+		entry = list_entry(p, struct fo_fsid, g_list);
+		if (!done) {
+			/* add the new fsid into the list */
+			if (entry->g_expire <= per_fsid->g_expire) {
+				list_add(&per_fsid->g_list, &entry->g_list);
+				fo_fsid_cnt++;
+				done = 1;
+			}
+		}
+		if (done && (entry->g_fsid == fsid)) {
+			/* multiple fsid(s) */
+			BUG_ON(entry->g_expire > per_fsid->g_expire); 
+			list_del(p); 
+			fo_fsid_cnt--;
+			kfree(entry);
+		} else if (time_before(entry->g_expire, jiffies)) {
+			/* garbage collection */
+			dprintk("nlmsvc fo_fsid = %d expires\n", entry->g_fsid);
+			list_del(p);
+			fo_fsid_cnt--;
+			kfree(entry);
+		} 
+	}
+	
+nlmsvc_fo_setgrace_out:
+
+	spin_unlock(&nlm_fo_lock);
+
+	/* debug */
+	if (done)
+		dprintk("nlmsvc fo setgrace: fsid=%d, jiffies=%lu, expire=%lu\n",
+			per_fsid->g_fsid, jiffies, per_fsid->g_expire);
+	else
+		dprintk("nlmsvc_fo_setgrace: adding fsid=%d fails\n", fsid);
+
+	return 0;
+}
+
+/* 
+ * Reset global fo_fsid_list list 
+ */
+void 
+nlmsvc_fo_reset_servs()
+{
+	struct fo_fsid *e_purge;
+	struct list_head *p, *tlist;
+
+	spin_lock(&nlm_fo_lock);
+
+	/* nothing to do */
+	if (list_empty(&fo_fsid_list)) {
+		spin_unlock(&nlm_fo_lock);
+		return;
+	}
+
+	dprintk("lockd: nlmsvc_fo_reset fo_fsid_list\n");
+
+	/* purge the entries */
+	list_for_each_safe(p, tlist, &fo_fsid_list) {
+		e_purge = list_entry(p, struct fo_fsid, g_list);
+		list_del(p);
+		kfree(e_purge);
+	}
+	fo_fsid_cnt = 0;
+
+	spin_unlock(&nlm_fo_lock);
+}
+
+/*
+ * Check whether the fsid is in the failover list: fo_fsid_list.
+ *	return TRUE (1) if fsid in nlm_serv.
+ */
+int
+nlmsvc_fo_check(struct nfs_fh *fh)
+{
+	struct fo_fsid *e_this;
+	struct list_head *p, *tlist;
+	int rc=0, this_fsid;
+
+	/* see if this fh has fsid */
+	if (!get_fsid(fh, &this_fsid)) {
+		return 0;
+	}
+
+	spin_lock(&nlm_fo_lock);
+
+	/* no failover entry */
+	if (list_empty(&fo_fsid_list))  
+		goto nlmsvc_fo_check_out;
+
+	/* check to see whether this_fsid is in fo_fsid_list list */
+	list_for_each_safe(p, tlist, &fo_fsid_list) {
+		e_this = list_entry(p, struct fo_fsid, g_list);
+		if (time_before(e_this->g_expire, jiffies)) {
+			printk("lockd: fsid=%d grace period expires\n",
+				e_this->g_fsid);
+			list_del(p);
+			fo_fsid_cnt--;
+			kfree(e_this);
+		} else if (e_this->g_fsid == this_fsid) {
+			printk("lockd: fsid=%d in grace period\n",
+				e_this->g_fsid);
+			rc = 1;
+		}
+	}
+
+nlmsvc_fo_check_out:
+	spin_unlock(&nlm_fo_lock);
+	return rc;
+}
+
--- linux-nlm-1/fs/lockd/svc4proc.c	2007-03-26 10:23:22.000000000 -0400
+++ linux/fs/lockd/svc4proc.c	2007-03-26 17:34:27.000000000 -0400
@@ -18,9 +18,10 @@
 #include <linux/lockd/share.h>
 #include <linux/lockd/sm_inter.h>
 
-
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
+extern struct list_head fo_fsid_list;
+
 /*
  * Obtain client and file from arguments
  */
@@ -89,7 +90,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp
 	resp->cookie = argp->cookie;
 
 	/* Don't accept test requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_check_grace_period(argp)) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -119,7 +120,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period && !argp->reclaim) {
+	if (nlmsvc_check_grace_period(argp) && !argp->reclaim) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -162,7 +163,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqs
 	resp->cookie = argp->cookie;
 
 	/* Don't accept requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_check_grace_period(argp)) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -195,7 +196,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqs
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_check_grace_period(argp)) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -332,7 +333,7 @@ nlm4svc_proc_share(struct svc_rqst *rqst
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period && !argp->reclaim) {
+	if (nlmsvc_check_grace_period(argp) && !argp->reclaim) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -365,7 +366,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rq
 	resp->cookie = argp->cookie;
 
 	/* Don't accept requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_check_grace_period(argp)) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
--- linux-nlm-1/fs/lockd/svcproc.c	2007-03-26 10:23:23.000000000 -0400
+++ linux/fs/lockd/svcproc.c	2007-03-26 17:37:30.000000000 -0400
@@ -117,7 +117,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp,
 	resp->cookie = argp->cookie;
 
 	/* Don't accept test requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_check_grace_period(argp)) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -148,7 +148,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp,
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period && !argp->reclaim) {
+	if (nlmsvc_check_grace_period(argp) && !argp->reclaim) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -191,7 +191,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqst
 	resp->cookie = argp->cookie;
 
 	/* Don't accept requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_check_grace_period(argp)) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -224,7 +224,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqst
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_check_grace_period(argp)) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -363,7 +363,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp
 	resp->cookie = argp->cookie;
 
 	/* Don't accept new lock requests during grace period */
-	if (nlmsvc_grace_period && !argp->reclaim) {
+	if (nlmsvc_check_grace_period(argp) && !argp->reclaim) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
@@ -396,7 +396,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqs
 	resp->cookie = argp->cookie;
 
 	/* Don't accept requests during grace period */
-	if (nlmsvc_grace_period) {
+	if (nlmsvc_check_grace_period(argp)) {
 		resp->status = nlm_lck_denied_grace_period;
 		return rpc_success;
 	}
--- linux-nlm-1/fs/lockd/svc.c	2007-03-26 10:23:22.000000000 -0400
+++ linux/fs/lockd/svc.c	2007-03-26 11:16:27.000000000 -0400
@@ -75,7 +75,7 @@ static const int		nlm_port_min = 0, nlm_
 
 static struct ctl_table_header * nlm_sysctl_table;
 
-static unsigned long set_grace_period(void)
+unsigned long set_grace_period(void)
 {
 	unsigned long grace_period;
 
@@ -85,7 +85,6 @@ static unsigned long set_grace_period(vo
 				/ nlm_timeout) * nlm_timeout * HZ;
 	else
 		grace_period = nlm_timeout * 5 * HZ;
-	nlmsvc_grace_period = 1;
 	return grace_period + jiffies;
 }
 
@@ -133,6 +132,8 @@ lockd(struct svc_rqst *rqstp)
 	nlmsvc_timeout = nlm_timeout * HZ;
 
 	grace_period_expire = set_grace_period();
+	nlmsvc_grace_period = 1;
+	(void) nlmsvc_fo_reset_servs();
 
 	/*
 	 * The main request loop. We don't terminate until the last
@@ -148,6 +149,8 @@ lockd(struct svc_rqst *rqstp)
 			if (nlmsvc_ops) {
 				nlmsvc_invalidate_all();
 				grace_period_expire = set_grace_period();
+				nlmsvc_grace_period = 1;
+				(void) nlmsvc_fo_reset_servs();
 			}
 		}
 
@@ -194,6 +197,7 @@ lockd(struct svc_rqst *rqstp)
 		nlm_shutdown_hosts();
 		nlmsvc_pid = 0;
 		nlmsvc_serv = NULL;
+		(void) nlmsvc_fo_reset_servs();
 	} else
 		printk(KERN_DEBUG
 			"lockd: new process, skipping host shutdown\n");

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]