[Cluster-devel] [GFS2 PATCH 06/15] GFS2: Prevent gl_delete work for re-used inodes

Bob Peterson rpeterso at redhat.com
Mon Oct 5 15:36:28 UTC 2015


This patch adds a new glock flag GLF_INODE_DELETING which signifies
when a glock is being used to change an inode from unlinked to
deleted. The flag is used in a few places:
1. If an iopen callback is received, it checks the flag. If the bit
   is set, someone else has already started deleting the inode.
   In that case, the delete_func may already be running, so we don't
   want to queue it to run another time. Doing so only gets us into
   trouble.
2. When a dinode is in the process of being created, we've been
   assigned that block by the allocator, so it must have been free.
   At that point, we check if there is pending delete work pending,
   and if so, we cancel it to prevent the block from being deleted
   while we're creating it. This is necessary because there could
   be pending delete work that was queued up a while ago, but the
   delete work might have been done on another node, which is how
   the block became freed. However, we keep the GLF_INODE_DELETING
   set to prevent new delete work from being queued. After we're
   done creating, we clear the bit, otherwise the file may not be
   deleted ever again, even in legitimate cases in the future.
3. In function try_rgrp_unlink, we also make sure the bit isn't
   already set before we try to reclaim an unlinked block.

Signed-off-by: Bob Peterson <rpeterso at redhat.com>
---
 fs/gfs2/glops.c  |  3 ++-
 fs/gfs2/incore.h |  1 +
 fs/gfs2/inode.c  | 21 ++++++++++++++++++++-
 fs/gfs2/rgrp.c   |  6 ++++++
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 1f6c9c3..b604343 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -553,7 +553,8 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
 		return;
 
 	if (gl->gl_demote_state == LM_ST_UNLOCKED &&
-	    gl->gl_state == LM_ST_SHARED && ip) {
+	    gl->gl_state == LM_ST_SHARED && ip &&
+	    !test_and_set_bit(GLF_INODE_DELETING, &gl->gl_flags)) {
 		gl->gl_lockref.count++;
 		if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
 			gl->gl_lockref.count--;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 121ed08..5065e0c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -326,6 +326,7 @@ enum {
 	GLF_LRU				= 13,
 	GLF_OBJECT			= 14, /* Used only for tracing */
 	GLF_BLOCKING			= 15,
+	GLF_INODE_DELETING		= 16, /* Was unlinked, being deleted */
 };
 
 struct gfs2_glock {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index ce4b793..833f8fa 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -597,6 +597,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	int error, free_vfs_inode = 0;
 	u32 aflags = 0;
 	unsigned blocks = 1;
+	int delete_prevented = 0;
 	struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, };
 
 	if (!name->len || name->len > GFS2_FNAMESIZE)
@@ -705,6 +706,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (error)
 		goto fail_free_inode;
 
+	/*
+	 * Cancel any pending delete work for this glock. If there's pending
+	 * delete work, we'd otherwise try to delete the dinode, but since we
+	 * were assigned this address by alloc_dinode, the block is already
+	 * free, so there's no need to attempt to change it from unlinked to
+	 * free. We'd just get into trouble trying to do so. The biggest
+	 * problem is having gfs2_delete_inode called while there pages
+	 * still in existence due to a race between create and delete.
+	 */
+	if (cancel_work_sync(&ip->i_gl->gl_delete))
+		delete_prevented = 1;
 	ip->i_gl->gl_object = ip;
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
 	if (error)
@@ -762,6 +774,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		*opened |= FILE_CREATED;
 		error = finish_open(file, dentry, gfs2_open_common, opened);
 	}
+	if (delete_prevented) {
+		gfs2_glock_put(ip->i_gl); /* fix the gl reference count */
+		clear_bit(GLF_INODE_DELETING, &ip->i_gl->gl_flags);
+	}
 	gfs2_glock_dq_uninit(ghs);
 	gfs2_glock_dq_uninit(ghs + 1);
 	return error;
@@ -772,8 +788,11 @@ fail_gunlock3:
 fail_gunlock2:
 	gfs2_glock_dq_uninit(ghs + 1);
 fail_free_inode:
-	if (ip->i_gl)
+	if (ip->i_gl) {
+		if (delete_prevented)
+			clear_bit(GLF_INODE_DELETING, &ip->i_gl->gl_flags);
 		gfs2_glock_put(ip->i_gl);
+	}
 	gfs2_rs_delete(ip, NULL);
 fail_free_acls:
 	if (default_acl)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 475985d..b936ee1 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1807,6 +1807,12 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 		if (error)
 			continue;
 
+		/* Make sure we're not queuing a delete if someone else has */
+		if (test_and_set_bit(GLF_INODE_DELETING, &gl->gl_flags)) {
+			gfs2_glock_put(gl);
+			continue;
+		}
+
 		/* If the inode is already in cache, we can ignore it here
 		 * because the existing inode disposal code will deal with
 		 * it when all refs have gone away. Accessing gl_object like
-- 
2.4.3




More information about the Cluster-devel mailing list