[Cluster-devel] [PATCH][GFS2] Bouncing locks in a cluster is slow in GFS2 - Try #2
Steven Whitehouse
swhiteho at redhat.com
Wed Mar 9 11:52:11 UTC 2011
Hi,
On Wed, 2011-01-26 at 21:25 -0500, Bob Peterson wrote:
> ----- Original Message -----
> | Hi,
> |
> | You shouldn't need to do the dual workqueue trick upstream since the
> | workqueues will already start as many threads as required (so even
> | better than just using the two here). If that isn't happening we
> | should
> | ask Tejun about it.
> |
> | So I think we only need the min hold time bit here,
> |
> | Steve.
> Hi,
>
> Based on your feedback, I reworked this patch a bit. This is take two.
> Rather than using a "qwork" function to police the delay based on
> glock type, I went back to the original queue_delayed_work and
> adjusted delay based on glock type. So this version has fewer lines
> changed, but still accomplishes the same thing.
>
> Note that I had to tweak function glock_work_func in order to
> preserve the rather odd logic that decides whether or not to
> actually queue the work.
>
> This patch is a performance improvement for GFS2 in a clustered
> environment. It makes the glock hold time self-adjusting.
>
> Regards,
>
> Bob Peterson
> Red Hat File Systems
>
> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
>
I know this patch has been pending a long time while I've been pondering
what to do about it. I think probably the best approach, bearing in mind
that we are not too far away from a merge window, is to hold off until
after the merge window. Then this can be the first patch in the next
-nmw tree after that. That way we'll give it the maximum exposure before
it is merged.
Does that sound like a reasonable plan?
Steve.
> Bouncing locks in a cluster is slow in GFS2
> --
> [Cluster-devel] [PATCH][GFS2] Bouncing locks in a cluster is slow in GFS2
>
> fs/gfs2/glock.c | 39 +++++++++++++++++++++++++++++----------
> fs/gfs2/glock.h | 6 ++++++
> fs/gfs2/glops.c | 2 --
> fs/gfs2/incore.h | 2 +-
> 4 files changed, 36 insertions(+), 13 deletions(-)
>
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index c75d499..0523b20 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -407,6 +407,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
> if (held1 && held2 && list_empty(&gl->gl_holders))
> clear_bit(GLF_QUEUED, &gl->gl_flags);
>
> + if (new_state != gl->gl_target)
> + /* shorten our minimum hold time */
> + gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
> + GL_GLOCK_MIN_HOLD);
> gl->gl_state = new_state;
> gl->gl_tchange = jiffies;
> }
> @@ -670,16 +674,21 @@ static void glock_work_func(struct work_struct *work)
> gl->gl_state != LM_ST_UNLOCKED &&
> gl->gl_demote_state != LM_ST_EXCLUSIVE) {
> unsigned long holdtime, now = jiffies;
> - holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
> + holdtime = gl->gl_tchange + gl->gl_hold_time;
> if (time_before(now, holdtime))
> delay = holdtime - now;
> set_bit(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, &gl->gl_flags);
> }
> run_queue(gl, 0);
> spin_unlock(&gl->gl_spin);
> - if (!delay ||
> - queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
> + if (!delay)
> gfs2_glock_put(gl);
> + else {
> + if (gl->gl_name.ln_type != LM_TYPE_INODE)
> + delay = 0;
> + if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
> + gfs2_glock_put(gl);
> + }
> if (drop_ref)
> gfs2_glock_put(gl);
> }
> @@ -741,6 +750,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
> gl->gl_tchange = jiffies;
> gl->gl_object = NULL;
> gl->gl_sbd = sdp;
> + gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
> INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
> INIT_WORK(&gl->gl_delete, delete_work_func);
>
> @@ -852,8 +862,15 @@ static int gfs2_glock_demote_wait(void *word)
>
> static void wait_on_holder(struct gfs2_holder *gh)
> {
> + unsigned long time1 = jiffies;
> +
> might_sleep();
> wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
> + if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
> + /* Lengthen the minimum hold time. */
> + gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
> + GL_GLOCK_HOLD_INCR,
> + GL_GLOCK_MAX_HOLD);
> }
>
> static void wait_on_demote(struct gfs2_glock *gl)
> @@ -1086,8 +1103,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
>
> gfs2_glock_hold(gl);
> if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
> - !test_bit(GLF_DEMOTE, &gl->gl_flags))
> - delay = gl->gl_ops->go_min_hold_time;
> + !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
> + gl->gl_name.ln_type == LM_TYPE_INODE)
> + delay = gl->gl_hold_time;
> if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
> gfs2_glock_put(gl);
> }
> @@ -1270,12 +1288,13 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
> unsigned long now = jiffies;
>
> gfs2_glock_hold(gl);
> - holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
> - if (test_bit(GLF_QUEUED, &gl->gl_flags)) {
> + holdtime = gl->gl_tchange + gl->gl_hold_time;
> + if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
> + gl->gl_name.ln_type == LM_TYPE_INODE) {
> if (time_before(now, holdtime))
> delay = holdtime - now;
> if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
> - delay = gl->gl_ops->go_min_hold_time;
> + delay = gl->gl_hold_time;
> }
>
> spin_lock(&gl->gl_spin);
> @@ -1658,7 +1677,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
> dtime *= 1000000/HZ; /* demote time in uSec */
> if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
> dtime = 0;
> - gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n",
> + gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d m:%ld\n",
> state2str(gl->gl_state),
> gl->gl_name.ln_type,
> (unsigned long long)gl->gl_name.ln_number,
> @@ -1666,7 +1685,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
> state2str(gl->gl_target),
> state2str(gl->gl_demote_state), dtime,
> atomic_read(&gl->gl_ail_count),
> - atomic_read(&gl->gl_ref));
> + atomic_read(&gl->gl_ref), gl->gl_hold_time);
>
> list_for_each_entry(gh, &gl->gl_holders, gh_list) {
> error = dump_holder(seq, gh);
> diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
> index afa8bfe..3233add 100644
> --- a/fs/gfs2/glock.h
> +++ b/fs/gfs2/glock.h
> @@ -113,6 +113,12 @@ enum {
>
> #define GLR_TRYFAILED 13
>
> +#define GL_GLOCK_MAX_HOLD (long)(HZ / 5)
> +#define GL_GLOCK_DFT_HOLD (long)(HZ / 5)
> +#define GL_GLOCK_MIN_HOLD (long)(0)
> +#define GL_GLOCK_HOLD_INCR (long)(HZ / 20)
> +#define GL_GLOCK_HOLD_DECR (long)(HZ / 40)
> +
> struct lm_lockops {
> const char *lm_proto_name;
> int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
> diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
> index ac5fac9..bba125e 100644
> --- a/fs/gfs2/glops.c
> +++ b/fs/gfs2/glops.c
> @@ -399,7 +399,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
> .go_lock = inode_go_lock,
> .go_dump = inode_go_dump,
> .go_type = LM_TYPE_INODE,
> - .go_min_hold_time = HZ / 5,
> .go_flags = GLOF_ASPACE,
> };
>
> @@ -410,7 +409,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
> .go_unlock = rgrp_go_unlock,
> .go_dump = gfs2_rgrp_dump,
> .go_type = LM_TYPE_RGRP,
> - .go_min_hold_time = HZ / 5,
> .go_flags = GLOF_ASPACE,
> };
>
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 720c1e6..f21f075 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -163,7 +163,6 @@ struct gfs2_glock_operations {
> int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
> void (*go_callback) (struct gfs2_glock *gl);
> const int go_type;
> - const unsigned long go_min_hold_time;
> const unsigned long go_flags;
> #define GLOF_ASPACE 1
> };
> @@ -237,6 +236,7 @@ struct gfs2_glock {
> struct delayed_work gl_work;
> struct work_struct gl_delete;
> struct rcu_head gl_rcu;
> + long gl_hold_time;
> };
>
> #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
More information about the Cluster-devel
mailing list