[Cluster-devel] [GFS2 Patch] GFS2: Add readahead to sequential directory traversal
Steven Whitehouse
swhiteho at redhat.com
Sat Oct 8 11:13:00 UTC 2011
Hi,
On Fri, 2011-10-07 at 12:01 -0400, Bob Peterson wrote:
> Hi,
>
> Thanks for the comments, Steve. Here is another version
> that shows the same performance benefit (better, actually).
> It's much simpler than the previous one. Instead of keeping
> a bitmap, it simply uses a u32 in the gfs2_inode to keep
> track of where it's last read-ahead. That avoids a lot of
> the issues you wrote about.
>
> I couldn't use the file struct because of the way function
> gfs2_dir_read is called from NFS.
>
Thanks for fixing that up... it looks much simpler now. Its a pity about
the NFS issue. We still need to figure out how to correctly reset the
readahead index correctly though, but I think we can leave that for a
future patch. One possible solution would be to reset it on
lseek(SEEK_SET, 0) for example, or if the readahead index is miles away
from the actual index,
Steve.
> Regards,
>
> Bob Peterson
> Red Hat File Systems
>
> Signed-off-by: Bob Peterson <rpeterso at redhat.com>
> --
> fs/gfs2/dir.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
> fs/gfs2/incore.h | 1 +
> 2 files changed, 51 insertions(+), 0 deletions(-)
>
> diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
> index 2045d70..31888bd 100644
> --- a/fs/gfs2/dir.c
> +++ b/fs/gfs2/dir.c
> @@ -76,6 +76,8 @@
> #define IS_LEAF 1 /* Hashed (leaf) directory */
> #define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
>
> +#define MAX_RA_BLOCKS 32 /* max read-ahead blocks */
> +
> #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
> #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
>
> @@ -345,6 +347,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
> if (hc)
> return hc;
>
> + ip->i_ra_index = 0;
> hsize = 1 << ip->i_depth;
> hsize *= sizeof(__be64);
> if (hsize != i_size_read(&ip->i_inode)) {
> @@ -382,6 +385,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
> void gfs2_dir_hash_inval(struct gfs2_inode *ip)
> {
> __be64 *hc = ip->i_hash_cache;
> + ip->i_ra_index = 0;
> ip->i_hash_cache = NULL;
> kfree(hc);
> }
> @@ -1377,6 +1381,50 @@ out:
> }
>
>
> +/* gfs2_dir_readahead - Issue read-ahead requests for leaf blocks.
> + *
> + * Note: we can't calculate each index like dir_e_read can because we don't
> + * have the leaf, and therefore we don't have the depth, and therefore we
> + * don't have the length. So we have to just read enough ahead to make up
> + * for the loss of information. */
> +static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index)
> +{
> + struct gfs2_inode *ip = GFS2_I(inode);
> + struct gfs2_glock *gl = ip->i_gl;
> + struct buffer_head *bh;
> + u64 blocknr = 0, last;
> + unsigned count;
> +
> + /* First check if we've already read-ahead for the whole range. */
> + if (index + MAX_RA_BLOCKS < ip->i_ra_index)
> + return;
> +
> + ip->i_ra_index = max(index, ip->i_ra_index);
> + for (count = 0; count < MAX_RA_BLOCKS; count++) {
> + if (ip->i_ra_index >= hsize) /* if exceeded the hash table */
> + break;
> +
> + last = blocknr;
> + blocknr = be64_to_cpu(ip->i_hash_cache[ip->i_ra_index]);
> + ip->i_ra_index++;
> + if (blocknr == last)
> + continue;
> +
> + bh = gfs2_getbuf(gl, blocknr, 1);
> + if (trylock_buffer(bh)) {
> + if (buffer_uptodate(bh)) {
> + unlock_buffer(bh);
> + brelse(bh);
> + continue;
> + }
> + bh->b_end_io = end_buffer_read_sync;
> + submit_bh(READA | REQ_META, bh);
> + continue;
> + }
> + brelse(bh);
> + }
> +}
> +
> /**
> * dir_e_read - Reads the entries from a directory into a filldir buffer
> * @dip: dinode pointer
> @@ -1406,6 +1454,8 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
> if (IS_ERR(lp))
> return PTR_ERR(lp);
>
> + gfs2_dir_readahead(inode, hsize, index);
> +
> while (index < hsize) {
> error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
> &copied, &depth,
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 892ac37..50c3bcb 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -286,6 +286,7 @@ struct gfs2_inode {
> struct rw_semaphore i_rw_mutex;
> struct list_head i_trunc_list;
> __be64 *i_hash_cache;
> + u32 i_ra_index; /* read-ahead index */
> u32 i_entries;
> u32 i_diskflags;
> u8 i_height;
More information about the Cluster-devel
mailing list