[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: tytso's readdir speedup patch - adoptable?



Theodore Tso wrote:
> 
> I ported my readdir speedup patch to ext3 while I was flying back to
> Boston yesterday.  As I feared, I essentially ended up having to
> rewrite the ext3_find_entry() from scratch, since the original version
> was too fragile to modify safely.  But it wasn't all bad.  This new
> version is substantially easier to understand, fixes a bug and a race
> condition were in the original version, is ~110 bytes smaller (not all
> things get bigger with successive versions :-), and has the readdir
> speedup enhancement.
> 
> Porting this patch to 2.2 ext2 code base should be pretty trivial, if
> anyone cares to do it.
> 
> Because this is a complete rewrite of ext3_find_entry(), I don't
> recommend that folks try using it in production systems at this time;
> wait until other people have looked it over and it gets folded into
> the official ext3 releases.  But I would appreciate people willing to
> try out the patch and letting me know how well it works for them.
> I've desk checked it carefully, and believe it to be Bugfree(tm), but
> some additional eyes to check it would be good....
> 
> (If you do want to audit the changes, I advise that you apply the
> patch to a scratch tree and then look at the search_dirblock and
> ext3_find_entry functions; so much as changed that simply examining
> the diff will likely not be the most efficient way to go about things.)
> 
>                                                 - Ted
> 
> ===== fs/ext3/namei.c 1.2 vs edited =====
> --- 1.2/fs/ext3/namei.c Tue Aug 21 07:08:28 2001
> +++ edited/fs/ext3/namei.c      Wed Aug 22 10:53:11 2001
> @@ -55,6 +55,46 @@
>  }
> 
>  /*
> + * Returns 0 if not found, -1 on failure, and 1 on success
> + */
> +static int inline search_dirblock(struct buffer_head * bh,
> +                                 struct inode *dir,
> +                                 struct dentry *dentry,
> +                                 unsigned long offset,
> +                                 struct ext3_dir_entry_2 ** res_dir)
> +{
> +       struct ext3_dir_entry_2 * de;
> +       char * dlimit;
> +       int de_len;
> +       const char *name = dentry->d_name.name;
> +       int namelen = dentry->d_name.len;
> +
> +       de = (struct ext3_dir_entry_2 *) bh->b_data;
> +       dlimit = bh->b_data + dir->i_sb->s_blocksize;
> +       while ((char *) de < dlimit) {
> +               /* this code is executed quadratically often */
> +               /* do minimal checking `by hand' */
> +
> +               if ((char *) de + namelen <= dlimit &&
> +                   ext3_match (namelen, name, de)) {
> +                       /* found a match - just to be sure, do a full check */
> +                       if (!ext3_check_dir_entry("ext3_find_entry",
> +                                                 dir, de, bh, offset))
> +                               return -1;
> +                       *res_dir = de;
> +                       return 1;
> +               }
> +               /* prevent looping on a bad block */
> +               de_len = le16_to_cpu(de->rec_len);
> +               if (de_len <= 0)
> +                       return -1;
> +               offset += de_len;
> +               de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
> +       }
> +       return 0;
> +}
> +
> +/*
>   *     ext3_find_entry()
>   *
>   * finds an entry in the specified directory with the wanted name. It
> @@ -70,105 +110,79 @@
>  {
>         struct super_block * sb;
>         struct buffer_head * bh_use[NAMEI_RA_SIZE];
> -       struct buffer_head * bh_read[NAMEI_RA_SIZE];
> -       unsigned long offset;
> -       int block, toread, i, err;
> +       struct buffer_head * bh, *ret = NULL;
> +       unsigned long start, block, b;
> +       int ra_ptr = 0, ra_max = 0, num = 0;
> +       int nblocks, i, err;
>         struct inode *dir = dentry->d_parent->d_inode;
> -       const char *name = dentry->d_name.name;
> -       int namelen = dentry->d_name.len;
> 
>         *res_dir = NULL;
>         sb = dir->i_sb;
> 
> -       if (namelen > EXT3_NAME_LEN)
> -               return NULL;
> -
> -       memset (bh_use, 0, sizeof (bh_use));
> -       toread = 0;
> -       for (block = 0; block < NAMEI_RA_SIZE; ++block) {
> -               struct buffer_head * bh;
> -
> -               if ((block << EXT3_BLOCK_SIZE_BITS (sb)) >= dir->i_size)
> -                       break;
> -               bh = ext3_getblk (NULL, dir, block, 0, &err);
> -               bh_use[block] = bh;
> -               if (bh && !buffer_uptodate(bh))
> -                       bh_read[toread++] = bh;
> -       }
> -
> -       for (block = 0, offset = 0; offset < dir->i_size; block++) {
> -               struct buffer_head * bh;
> -               struct ext3_dir_entry_2 * de;
> -               char * dlimit;
> -
> -               if ((block % NAMEI_RA_BLOCKS) == 0 && toread) {
> -                       ll_rw_block (READ, toread, bh_read);
> -                       toread = 0;
> -               }
> -               bh = bh_use[block % NAMEI_RA_SIZE];
> -               if (!bh) {
> -#if 0
> -                       ext3_error (sb, "ext3_find_entry",
> -                               "directory #%lu contains a hole at offset %lu",
> -                                   dir->i_ino, offset);
> -#endif
> -                       offset += sb->s_blocksize;
> -                       continue;
> +       nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
> +       start = dir->u.ext3_i.i_dir_start_lookup;
> +       if (start >= nblocks)
> +               start = 0;
> +       block = start;
> +restart:
> +       do {
> +               /*
> +                * We deal with the read-ahead logic here.
> +                */
> +               if (ra_ptr >= ra_max) {
> +                       ra_ptr = 0;
> +                       b = block;
> +                       for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
> +                               if (b >= nblocks ||
> +                                   (num && block == start))
> +                                       break;
> +                               num++;
> +                               bh = ext3_getblk(NULL, dir, b++, 0, &err);
> +                               bh_use[ra_max] = bh;
> +                               if (bh)
> +                                       ll_rw_block(READ, 1, &bh);
> +                       }
>                 }
> -               wait_on_buffer (bh);
> +               if ((bh = bh_use[ra_ptr++]) == NULL)
> +                       goto next;
> +               wait_on_buffer(bh);
>                 if (!buffer_uptodate(bh)) {
> -                       /*
> -                        * read error: all bets are off
> -                        */
> -                       break;
> +                       /* read error, skip block & hope for the best */
> +                       brelse(bh);
> +                       goto next;
>                 }
> -
> -               de = (struct ext3_dir_entry_2 *) bh->b_data;
> -               dlimit = bh->b_data + sb->s_blocksize;
> -               while ((char *) de < dlimit) {
> -                       /* this code is executed quadratically often */
> -                       /* do minimal checking `by hand' */
> -                       int de_len;
> -
> -                       if ((char *) de + namelen <= dlimit &&
> -                           ext3_match (namelen, name, de)) {
> -                               /* found a match -
> -                                  just to be sure, do a full check */
> -                               if (!ext3_check_dir_entry("ext3_find_entry",
> -                                                         dir, de, bh, offset))
> -                                       goto failure;
> -                               for (i = 0; i < NAMEI_RA_SIZE; ++i) {
> -                                       if (bh_use[i] != bh)
> -                                               brelse (bh_use[i]);
> -                               }
> -                               *res_dir = de;
> -                               return bh;
> -                       }
> -                       /* prevent looping on a bad block */
> -                       de_len = le16_to_cpu(de->rec_len);
> -                       if (de_len <= 0)
> -                               goto failure;
> -                       offset += de_len;
> -                       de = (struct ext3_dir_entry_2 *)
> -                               ((char *) de + de_len);
> +               i = search_dirblock(bh, dir, dentry,
> +                           block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
> +               if (i == 1) {
> +                       dir->u.ext3_i.i_dir_start_lookup = block;
> +                       ret = bh;
> +                       goto cleanup_and_exit;
> +               } else {
> +                       brelse(bh);
> +                       if (i < 0)
> +                               goto cleanup_and_exit;
>                 }
> -
> -               brelse (bh);
> -               if (((block + NAMEI_RA_SIZE) << EXT3_BLOCK_SIZE_BITS (sb)) >=
> -                   dir->i_size)
> -                       bh = NULL;
> -               else
> -                       bh = ext3_getblk (NULL, dir,
> -                                       block + NAMEI_RA_SIZE, 0, &err);
> -               bh_use[block % NAMEI_RA_SIZE] = bh;
> -               if (bh && !buffer_uptodate(bh))
> -                       bh_read[toread++] = bh;
> +       next:
> +               if (++block >= nblocks)
> +                       block = 0;
> +       } while (block != start);
> +
> +       /*
> +        * If the directory has grown while we were searching, then
> +        * search the last part of the directory before giving up.
> +        */
> +       block = nblocks;
> +       nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
> +       if (block < nblocks) {
> +               start = 0;
> +               goto restart;
>         }
> -
> -failure:
> -       for (i = 0; i < NAMEI_RA_SIZE; ++i)
> -               brelse (bh_use[i]);
> -       return NULL;
> +
> +cleanup_and_exit:
> +       /* Clean up the read-ahead blocks */
> +       for (; ra_ptr < ra_max; ra_ptr++)
> +               brelse (bh_use[ra_ptr]);
> +       return ret;
>  }
> 
>  static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry)
> ===== include/linux/ext3_fs_i.h 1.1 vs edited =====
> --- 1.1/include/linux/ext3_fs_i.h       Tue Aug 21 07:03:44 2001
> +++ edited/include/linux/ext3_fs_i.h    Tue Aug 21 12:50:36 2001
> @@ -41,6 +41,7 @@
>         __u32   i_prealloc_block;
>         __u32   i_prealloc_count;
>  #endif
> +       __u32   i_dir_start_lookup;
> 
>         struct list_head i_orphan;      /* unlinked but open inodes */
> 
> 
> _______________________________________________
> Ext3-users mailing list
> Ext3-users redhat com
> https://listman.redhat.com/mailman/listinfo/ext3-users

Hey Ted,
Applied both ext2 and ext3 patch on 2.4.9.

Beautiful, over 20% speed increase whenever making new
kernel. Now this is something to toot horn about.
Thanks,
Albert
-- 
Albert Cranford Deerfield Beach FL USA
ac9410 bellsouth net





[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]