[Linux-cachefs] [PATCH 2/2] Enable fscache as an optional feature of ceph.
Sage Weil
sage at inktank.com
Tue May 28 17:11:26 UTC 2013
Hi Milosz,
Just a heads up that I hope to take a closer look at the patch this
afternoon or tomorrow. Just catching up after the long weekend.
Thanks!
sage
On Thu, 23 May 2013, Milosz Tanski wrote:
> Enable fscache as an optional feature of ceph.
>
> Adding support for fscache to the Ceph filesystem. This would bring it to on
> par with some of the other network filesystems in Linux (like NFS, AFS, etc...)
>
> This exploits the existing Ceph cache & lazyio capabilities.
>
> Signed-off-by: Milosz Tanski <milosz at adfin.com>
> ---
> fs/ceph/Kconfig | 9 ++++++
> fs/ceph/Makefile | 2 ++
> fs/ceph/addr.c | 85 ++++++++++++++++++++++++++++++++++++++++--------------
> fs/ceph/caps.c | 21 +++++++++++++-
> fs/ceph/file.c | 9 ++++++
> fs/ceph/inode.c | 25 ++++++++++++++--
> fs/ceph/super.c | 25 ++++++++++++++--
> fs/ceph/super.h | 12 ++++++++
> 8 files changed, 162 insertions(+), 26 deletions(-)
>
> diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
> index 49bc782..ac9a2ef 100644
> --- a/fs/ceph/Kconfig
> +++ b/fs/ceph/Kconfig
> @@ -16,3 +16,12 @@ config CEPH_FS
>
> If unsure, say N.
>
> +if CEPH_FS
> +config CEPH_FSCACHE
> + bool "Enable Ceph client caching support"
> + depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
> + help
> + Choose Y here to enable persistent, read-only local
> + caching support for Ceph clients using FS-Cache
> +
> +endif
> diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
> index bd35212..0af0678 100644
> --- a/fs/ceph/Makefile
> +++ b/fs/ceph/Makefile
> @@ -9,3 +9,5 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
> mds_client.o mdsmap.o strings.o ceph_frag.o \
> debugfs.o
>
> +ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
> +
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 3e68ac1..fd3a1cc 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -11,6 +11,7 @@
>
> #include "super.h"
> #include "mds_client.h"
> +#include "cache.h"
> #include <linux/ceph/osd_client.h>
>
> /*
> @@ -149,11 +150,26 @@ static void ceph_invalidatepage(struct page
> *page, unsigned long offset)
> struct ceph_inode_info *ci;
> struct ceph_snap_context *snapc = page_snap_context(page);
>
> - BUG_ON(!PageLocked(page));
> - BUG_ON(!PagePrivate(page));
> BUG_ON(!page->mapping);
>
> inode = page->mapping->host;
> + ci = ceph_inode(inode);
> +
> + if (offset != 0) {
> + dout("%p invalidatepage %p idx %lu partial dirty page\n",
> + inode, page, page->index);
> + return;
> + }
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + if (PageFsCache(page))
> + ceph_invalidate_fscache_page(inode, page);
> +#endif
> +
> + if (!PagePrivate(page))
> + return;
> +
> + BUG_ON(!PageLocked(page));
>
> /*
> * We can get non-dirty pages here due to races between
> @@ -163,31 +179,32 @@ static void ceph_invalidatepage(struct page
> *page, unsigned long offset)
> if (!PageDirty(page))
> pr_err("%p invalidatepage %p page not dirty\n", inode, page);
>
> - if (offset == 0)
> - ClearPageChecked(page);
> + ClearPageChecked(page);
>
> - ci = ceph_inode(inode);
> - if (offset == 0) {
> - dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
> - inode, page, page->index, offset);
> - ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
> - ceph_put_snap_context(snapc);
> - page->private = 0;
> - ClearPagePrivate(page);
> - } else {
> - dout("%p invalidatepage %p idx %lu partial dirty page\n",
> - inode, page, page->index);
> - }
> + dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
> + inode, page, page->index, offset);
> +
> + ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
> + ceph_put_snap_context(snapc);
> + page->private = 0;
> + ClearPagePrivate(page);
> }
>
> -/* just a sanity check */
> static int ceph_releasepage(struct page *page, gfp_t g)
> {
> struct inode *inode = page->mapping ? page->mapping->host : NULL;
> dout("%p releasepage %p idx %lu\n", inode, page, page->index);
> WARN_ON(PageDirty(page));
> - WARN_ON(PagePrivate(page));
> - return 0;
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* Can we release the page from the cache? */
> + if (PageFsCache(page) && ceph_release_fscache_page(page, g) == 0)
> + return 0;
> +#endif
> + if (PagePrivate(page))
> + return 0;
> +
> + return 1;
> }
>
> /*
> @@ -197,11 +214,18 @@ static int readpage_nounlock(struct file *filp,
> struct page *page)
> {
> struct inode *inode = file_inode(filp);
> struct ceph_inode_info *ci = ceph_inode(inode);
> - struct ceph_osd_client *osdc =
> + struct ceph_osd_client *osdc =
> &ceph_inode_to_client(inode)->client->osdc;
> int err = 0;
> u64 len = PAGE_CACHE_SIZE;
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + err = ceph_readpage_from_fscache(inode, page);
> +
> + if (err == 0)
> + goto out;
> +#endif
> +
> dout("readpage inode %p file %p page %p index %lu\n",
> inode, filp, page, page->index);
> err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
> @@ -219,6 +243,10 @@ static int readpage_nounlock(struct file *filp,
> struct page *page)
> }
> SetPageUptodate(page);
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + ceph_readpage_to_fscache(inode, page);
> +#endif
> +
> out:
> return err < 0 ? err : 0;
> }
> @@ -262,6 +290,9 @@ static void finish_read(struct ceph_osd_request
> *req, struct ceph_msg *msg)
> flush_dcache_page(page);
> SetPageUptodate(page);
> unlock_page(page);
> +#ifdef CONFIG_CEPH_FSCACHE
> + ceph_readpage_to_fscache(inode, page);
> +#endif
> page_cache_release(page);
> bytes -= PAGE_CACHE_SIZE;
> }
> @@ -330,7 +361,7 @@ static int start_read(struct inode *inode, struct
> list_head *page_list, int max)
> page = list_entry(page_list->prev, struct page, lru);
> BUG_ON(PageLocked(page));
> list_del(&page->lru);
> -
> +
> dout("start_read %p adding %p idx %lu\n", inode, page,
> page->index);
> if (add_to_page_cache_lru(page, &inode->i_data, page->index,
> @@ -377,6 +408,14 @@ static int ceph_readpages(struct file *file,
> struct address_space *mapping,
> int rc = 0;
> int max = 0;
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
> + &nr_pages);
> +
> + if (rc == 0)
> + goto out;
> +#endif
> +
> if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
> max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
> >> PAGE_SHIFT;
> @@ -490,6 +529,10 @@ static int writepage_nounlock(struct page *page,
> struct writeback_control *wbc)
> CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
> set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + ceph_readpage_to_fscache(inode, page);
> +#endif
> +
> set_page_writeback(page);
> err = ceph_osdc_writepages(osdc, ceph_vino(inode),
> &ci->i_layout, snapc,
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index da0f9b8..7e8d8d3 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -10,6 +10,7 @@
>
> #include "super.h"
> #include "mds_client.h"
> +#include "cache.h"
> #include <linux/ceph/decode.h>
> #include <linux/ceph/messenger.h>
>
> @@ -486,8 +487,14 @@ static void __check_cap_issue(struct
> ceph_inode_info *ci, struct ceph_cap *cap,
> * i_rdcache_gen.
> */
> if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
> - (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
> + (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
> ci->i_rdcache_gen++;
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* Invalidate the cache for the whole file. */
> + dout("Invalidating inode data cache: %p", &ci->vfs_inode);
> + fscache_invalidate(ci->fscache);
> +#endif
> + }
>
> /*
> * if we are newly issued FILE_SHARED, mark dir not complete; we
> @@ -2356,6 +2363,12 @@ static void handle_cap_grant(struct inode
> *inode, struct ceph_mds_caps *grant,
> if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
> (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
> !ci->i_wrbuffer_ref) {
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* Close the fscache on inode */
> + ceph_fscache_unregister_inode_cookie(ci);
> +#endif
> +
> if (try_nonblocking_invalidate(inode) == 0) {
> revoked_rdcache = 1;
> } else {
> @@ -2425,6 +2438,12 @@ static void handle_cap_grant(struct inode
> *inode, struct ceph_mds_caps *grant,
> wake = 1;
> }
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* Register cache (if needed); perform this after amny size change. */
> + if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
> + ceph_fscache_register_inode_cookie(session->s_mdsc->fsc, ci);
> +#endif
> +
> /* check cap bits */
> wanted = __ceph_caps_wanted(ci);
> used = __ceph_caps_used(ci);
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 656e169..e7ecc04 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -11,6 +11,7 @@
>
> #include "super.h"
> #include "mds_client.h"
> +#include "cache.h"
>
> /*
> * Ceph file operations
> @@ -67,10 +68,17 @@ out:
> static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
> {
> struct ceph_file_info *cf;
> + struct ceph_inode_info *ci = ceph_inode(inode);
> + struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
> int ret = 0;
>
> switch (inode->i_mode & S_IFMT) {
> case S_IFREG:
> +#ifdef CONFIG_CEPH_FSCACHE
> + spin_lock(&ci->i_ceph_lock);
> + ceph_fscache_register_inode_cookie(fsc, ci);
> + spin_lock(&ci->i_ceph_lock);
> +#endif
> case S_IFDIR:
> dout("init_file %p %p 0%o (regular)\n", inode, file,
> inode->i_mode);
> @@ -181,6 +189,7 @@ int ceph_open(struct inode *inode, struct file *file)
> spin_unlock(&ci->i_ceph_lock);
> return ceph_init_file(inode, file, fmode);
> }
> +
> spin_unlock(&ci->i_ceph_lock);
>
> dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index be0f7e2..620b84c 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -12,6 +12,7 @@
>
> #include "super.h"
> #include "mds_client.h"
> +#include "cache.h"
> #include <linux/ceph/decode.h>
>
> /*
> @@ -377,6 +378,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>
> INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + ci->fscache = NULL;
> +#endif
> +
> return &ci->vfs_inode;
> }
>
> @@ -396,6 +401,10 @@ void ceph_destroy_inode(struct inode *inode)
>
> dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + ceph_fscache_unregister_inode_cookie(ci);
> +#endif
> +
> ceph_queue_caps_release(inode);
>
> /*
> @@ -430,7 +439,6 @@ void ceph_destroy_inode(struct inode *inode)
> call_rcu(&inode->i_rcu, ceph_i_callback);
> }
>
> -
> /*
> * Helpers to fill in size, ctime, mtime, and atime. We have to be
> * careful because either the client or MDS may have more up to date
> @@ -633,6 +641,14 @@ static int fill_inode(struct inode *inode,
> le32_to_cpu(info->time_warp_seq),
> &ctime, &mtime, &atime);
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* Notify the cache that size has changed */
> + if (queue_trunc && ci->fscache) {
> + pr_info("size changed inode: %p cap flags\n", &ci->vfs_inode);
> + fscache_attr_changed(ci->fscache);
> + }
> +#endif
> +
> /* only update max_size on auth cap */
> if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
> ci->i_max_size != le64_to_cpu(info->max_size)) {
> @@ -1066,7 +1082,7 @@ int ceph_fill_trace(struct super_block *sb,
> struct ceph_mds_request *req,
> * complete.
> */
> ceph_set_dentry_offset(req->r_old_dentry);
> - dout("dn %p gets new offset %lld\n", req->r_old_dentry,
> + dout("dn %p gets new offset %lld\n", req->r_old_dentry,
> ceph_dentry(req->r_old_dentry)->offset);
>
> dn = req->r_old_dentry; /* use old_dentry */
> @@ -1430,6 +1446,11 @@ static void ceph_invalidate_work(struct
> work_struct *work)
> orig_gen = ci->i_rdcache_gen;
> spin_unlock(&ci->i_ceph_lock);
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + pr_info("cache invalidating inode: %p cap flags\n", &ci->vfs_inode);
> + fscache_invalidate(ci->fscache);
> +#endif
> +
> truncate_inode_pages(&inode->i_data, 0);
>
> spin_lock(&ci->i_ceph_lock);
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 7d377c9..7847ef7 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -17,6 +17,7 @@
>
> #include "super.h"
> #include "mds_client.h"
> +#include "cache.h"
>
> #include <linux/ceph/ceph_features.h>
> #include <linux/ceph/decode.h>
> @@ -530,6 +531,11 @@ static struct ceph_fs_client
> *create_fs_client(struct ceph_mount_options *fsopt,
> if (!fsc->wb_pagevec_pool)
> goto fail_trunc_wq;
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + /* fscache */
> + ceph_fscache_register_fsid_cookie(fsc);
> +#endif
> +
> /* caps */
> fsc->min_caps = fsopt->max_readdir;
>
> @@ -554,6 +560,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
> {
> dout("destroy_fs_client %p\n", fsc);
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + ceph_fscache_unregister_fsid_cookie(fsc);
> +#endif
> +
> destroy_workqueue(fsc->wb_wq);
> destroy_workqueue(fsc->pg_inv_wq);
> destroy_workqueue(fsc->trunc_wq);
> @@ -588,6 +598,8 @@ static void ceph_inode_init_once(void *foo)
>
> static int __init init_caches(void)
> {
> + int error = -ENOMEM;
> +
> ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
> sizeof(struct ceph_inode_info),
> __alignof__(struct ceph_inode_info),
> @@ -611,15 +623,19 @@ static int __init init_caches(void)
> if (ceph_file_cachep == NULL)
> goto bad_file;
>
> - return 0;
> +#ifdef CONFIG_CEPH_FSCACHE
> + if ((error = fscache_register_netfs(&ceph_cache_netfs)))
> + goto bad_file;
> +#endif
>
> + return 0;
> bad_file:
> kmem_cache_destroy(ceph_dentry_cachep);
> bad_dentry:
> kmem_cache_destroy(ceph_cap_cachep);
> bad_cap:
> kmem_cache_destroy(ceph_inode_cachep);
> - return -ENOMEM;
> + return error;
> }
>
> static void destroy_caches(void)
> @@ -629,10 +645,15 @@ static void destroy_caches(void)
> * destroy cache.
> */
> rcu_barrier();
> +
> kmem_cache_destroy(ceph_inode_cachep);
> kmem_cache_destroy(ceph_cap_cachep);
> kmem_cache_destroy(ceph_dentry_cachep);
> kmem_cache_destroy(ceph_file_cachep);
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + fscache_unregister_netfs(&ceph_cache_netfs);
> +#endif
> }
>
>
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 8696be2..2980337 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -16,6 +16,10 @@
>
> #include <linux/ceph/libceph.h>
>
> +#ifdef CONFIG_CEPH_FSCACHE
> +#include <linux/fscache.h>
> +#endif
> +
> /* f_type in struct statfs */
> #define CEPH_SUPER_MAGIC 0x00c36400
>
> @@ -90,6 +94,10 @@ struct ceph_fs_client {
> struct dentry *debugfs_bdi;
> struct dentry *debugfs_mdsc, *debugfs_mdsmap;
> #endif
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + struct fscache_cookie *fscache;
> +#endif
> };
>
>
> @@ -319,6 +327,10 @@ struct ceph_inode_info {
>
> struct work_struct i_vmtruncate_work;
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + struct fscache_cookie *fscache;
> +#endif
> +
> struct inode vfs_inode; /* at end */
> };
>
> --
> 1.7.9.5
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
More information about the Linux-cachefs
mailing list