[Cluster-devel] [PATCH][GFS2] Bouncing locks in a cluster is slow in GFS2 - Try #2

Wed Mar 9 11:52:11 UTC 2011

Hi,

On Wed, 2011-01-26 at 21:25 -0500, Bob Peterson wrote:
> ----- Original Message -----
> | Hi,
> | 
> | You shouldn't need to do the dual workqueue trick upstream since the
> | workqueues will already start as many threads as required (so even
> | better than just using the two here). If that isn't happening we
> | should
> | ask Tejun about it.
> | 
> | So I think we only need the min hold time bit here,
> | 
> | Steve.
> Hi,
> 
> Based on your feedback, I reworked this patch a bit.  This is take two.
> Rather than using a "qwork" function to police the delay based on
> glock type, I went back to the original queue_delayed_work and
> adjusted delay based on glock type.  So this version has fewer lines
> changed, but still accomplishes the same thing.
> 
> Note that I had to tweak function glock_work_func in order to
> preserve the rather odd logic that decides whether or not to
> actually queue the work.
> 
> This patch is a performance improvement for GFS2 in a clustered
> environment. It makes the glock hold time self-adjusting.
> 
> Regards,
> 
> Bob Peterson
> Red Hat File Systems
> 
> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
> 

I know this patch has been pending a long time while I've been pondering
what to do about it. I think probably the best approach, bearing in mind
that we are not too far away from a merge window, is to hold off until
after the merge window. Then this can be the first patch in the next
-nmw tree after that. That way we'll give it the maximum exposure before
it is merged.

Does that sound like a reasonable plan?

Steve.

> Bouncing locks in a cluster is slow in GFS2
> --
> [Cluster-devel] [PATCH][GFS2] Bouncing locks in a cluster is slow in GFS2
> 
>  fs/gfs2/glock.c  |   39 +++++++++++++++++++++++++++++----------
>  fs/gfs2/glock.h  |    6 ++++++
>  fs/gfs2/glops.c  |    2 --
>  fs/gfs2/incore.h |    2 +-
>  4 files changed, 36 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index c75d499..0523b20 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -407,6 +407,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
>  	if (held1 && held2 && list_empty(&gl->gl_holders))
>  		clear_bit(GLF_QUEUED, &gl->gl_flags);
>  
> +	if (new_state != gl->gl_target)
> +		/* shorten our minimum hold time */
> +		gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
> +				       GL_GLOCK_MIN_HOLD);
>  	gl->gl_state = new_state;
>  	gl->gl_tchange = jiffies;
>  }
> @@ -670,16 +674,21 @@ static void glock_work_func(struct work_struct *work)
>  	    gl->gl_state != LM_ST_UNLOCKED &&
>  	    gl->gl_demote_state != LM_ST_EXCLUSIVE) {
>  		unsigned long holdtime, now = jiffies;
> -		holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
> +		holdtime = gl->gl_tchange + gl->gl_hold_time;
>  		if (time_before(now, holdtime))
>  			delay = holdtime - now;
>  		set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags);
>  	}
>  	run_queue(gl, 0);
>  	spin_unlock(&gl->gl_spin);
> -	if (!delay ||
> -	    queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
> +	if (!delay)
>  		gfs2_glock_put(gl);
> +	else {
> +		if (gl->gl_name.ln_type != LM_TYPE_INODE)
> +			delay = 0;
> +		if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
> +			gfs2_glock_put(gl);
> +	}
>  	if (drop_ref)
>  		gfs2_glock_put(gl);
>  }
> @@ -741,6 +750,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
>  	gl->gl_tchange = jiffies;
>  	gl->gl_object = NULL;
>  	gl->gl_sbd = sdp;
> +	gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
>  	INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
>  	INIT_WORK(&gl->gl_delete, delete_work_func);
>  
> @@ -852,8 +862,15 @@ static int gfs2_glock_demote_wait(void *word)
>  
>  static void wait_on_holder(struct gfs2_holder *gh)
>  {
> +	unsigned long time1 = jiffies;
> +
>  	might_sleep();
>  	wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
> +	if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
> +		/* Lengthen the minimum hold time. */
> +		gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
> +					      GL_GLOCK_HOLD_INCR,
> +					      GL_GLOCK_MAX_HOLD);
>  }
>  
>  static void wait_on_demote(struct gfs2_glock *gl)
> @@ -1086,8 +1103,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
>  
>  	gfs2_glock_hold(gl);
>  	if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
> -	    !test_bit(GLF_DEMOTE, &gl->gl_flags))
> -		delay = gl->gl_ops->go_min_hold_time;
> +	    !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
> +	    gl->gl_name.ln_type == LM_TYPE_INODE)
> +		delay = gl->gl_hold_time;
>  	if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
>  		gfs2_glock_put(gl);
>  }
> @@ -1270,12 +1288,13 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
>  	unsigned long now = jiffies;
>  
>  	gfs2_glock_hold(gl);
> -	holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
> -	if (test_bit(GLF_QUEUED, &gl->gl_flags)) {
> +	holdtime = gl->gl_tchange + gl->gl_hold_time;
> +	if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
> +	    gl->gl_name.ln_type == LM_TYPE_INODE) {
>  		if (time_before(now, holdtime))
>  			delay = holdtime - now;
>  		if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
> -			delay = gl->gl_ops->go_min_hold_time;
> +			delay = gl->gl_hold_time;
>  	}
>  
>  	spin_lock(&gl->gl_spin);
> @@ -1658,7 +1677,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
>  	dtime *= 1000000/HZ; /* demote time in uSec */
>  	if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
>  		dtime = 0;
> -	gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n",
> +	gfs2_print_dbg(seq, "G:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d m:%ld\n",
>  		  state2str(gl->gl_state),
>  		  gl->gl_name.ln_type,
>  		  (unsigned long long)gl->gl_name.ln_number,
> @@ -1666,7 +1685,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
>  		  state2str(gl->gl_target),
>  		  state2str(gl->gl_demote_state), dtime,
>  		  atomic_read(&gl->gl_ail_count),
> -		  atomic_read(&gl->gl_ref));
> +		  atomic_read(&gl->gl_ref), gl->gl_hold_time);
>  
>  	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
>  		error = dump_holder(seq, gh);
> diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
> index afa8bfe..3233add 100644
> --- a/fs/gfs2/glock.h
> +++ b/fs/gfs2/glock.h
> @@ -113,6 +113,12 @@ enum {
>  
>  #define GLR_TRYFAILED		13
>  
> +#define GL_GLOCK_MAX_HOLD        (long)(HZ / 5)
> +#define GL_GLOCK_DFT_HOLD        (long)(HZ / 5)
> +#define GL_GLOCK_MIN_HOLD        (long)(0)
> +#define GL_GLOCK_HOLD_INCR       (long)(HZ / 20)
> +#define GL_GLOCK_HOLD_DECR       (long)(HZ / 40)
> +
>  struct lm_lockops {
>  	const char *lm_proto_name;
>  	int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
> diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
> index ac5fac9..bba125e 100644
> --- a/fs/gfs2/glops.c
> +++ b/fs/gfs2/glops.c
> @@ -399,7 +399,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
>  	.go_lock = inode_go_lock,
>  	.go_dump = inode_go_dump,
>  	.go_type = LM_TYPE_INODE,
> -	.go_min_hold_time = HZ / 5,
>  	.go_flags = GLOF_ASPACE,
>  };
>  
> @@ -410,7 +409,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
>  	.go_unlock = rgrp_go_unlock,
>  	.go_dump = gfs2_rgrp_dump,
>  	.go_type = LM_TYPE_RGRP,
> -	.go_min_hold_time = HZ / 5,
>  	.go_flags = GLOF_ASPACE,
>  };
>  
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 720c1e6..f21f075 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -163,7 +163,6 @@ struct gfs2_glock_operations {
>  	int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
>  	void (*go_callback) (struct gfs2_glock *gl);
>  	const int go_type;
> -	const unsigned long go_min_hold_time;
>  	const unsigned long go_flags;
>  #define GLOF_ASPACE 1
>  };
> @@ -237,6 +236,7 @@ struct gfs2_glock {
>  	struct delayed_work gl_work;
>  	struct work_struct gl_delete;
>  	struct rcu_head gl_rcu;
> +	long gl_hold_time;
>  };
>  
>  #define GFS2_MIN_LVB_SIZE 32	/* Min size of LVB that gfs2 supports */