[Linux-cachefs] [PATCH 3/3] ceph: use fscache as a local presisent cache
Sage Weil
sage at inktank.com
Fri Aug 9 04:16:44 UTC 2013
Hi Milosz!
I have a few comments below on invalidate_page:
On Wed, 7 Aug 2013, Milosz Tanski wrote:
> Adding support for fscache to the Ceph filesystem. This would bring it to on
> par with some of the other network filesystems in Linux (like NFS, AFS, etc...)
>
> In order to mount the filesystem with fscache the 'fsc' mount option must be
> passed.
>
> Signed-off-by: Milosz Tanski <milosz at adfin.com>
> ---
> fs/ceph/Kconfig | 9 ++
> fs/ceph/Makefile | 2 +
> fs/ceph/addr.c | 70 +++++++++----
> fs/ceph/cache.c | 306 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> fs/ceph/cache.h | 117 +++++++++++++++++++++
> fs/ceph/caps.c | 19 +++-
> fs/ceph/file.c | 17 ++++
> fs/ceph/inode.c | 69 ++++++++++++-
> fs/ceph/super.c | 48 ++++++++-
> fs/ceph/super.h | 17 ++++
> 10 files changed, 646 insertions(+), 28 deletions(-)
> create mode 100644 fs/ceph/cache.c
> create mode 100644 fs/ceph/cache.h
>
> diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
> index 49bc782..ac9a2ef 100644
> --- a/fs/ceph/Kconfig
> +++ b/fs/ceph/Kconfig
> @@ -16,3 +16,12 @@ config CEPH_FS
>
> If unsure, say N.
>
> +if CEPH_FS
> +config CEPH_FSCACHE
> + bool "Enable Ceph client caching support"
> + depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
> + help
> + Choose Y here to enable persistent, read-only local
> + caching support for Ceph clients using FS-Cache
> +
> +endif
> diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
> index bd35212..0af0678 100644
> --- a/fs/ceph/Makefile
> +++ b/fs/ceph/Makefile
> @@ -9,3 +9,5 @@ ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
> mds_client.o mdsmap.o strings.o ceph_frag.o \
> debugfs.o
>
> +ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
> +
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index afb2fc2..de6de0e 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -11,6 +11,7 @@
>
> #include "super.h"
> #include "mds_client.h"
> +#include "cache.h"
> #include <linux/ceph/osd_client.h>
>
> /*
> @@ -149,11 +150,23 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset)
> struct ceph_inode_info *ci;
> struct ceph_snap_context *snapc = page_snap_context(page);
>
> - BUG_ON(!PageLocked(page));
> - BUG_ON(!PagePrivate(page));
Do these go away because of the fscache change? Or were they incorrect to
begin with?
> BUG_ON(!page->mapping);
>
> inode = page->mapping->host;
> + ci = ceph_inode(inode);
> +
> + if (offset != 0) {
> + dout("%p invalidatepage %p idx %lu partial dirty page\n",
> + inode, page, page->index);
> + return;
> + }
It would be nice to factor out the offset != 0 short circuit into a
separate patch. Under what circumstances does it actually happen?
> +
> + ceph_invalidate_fscache_page(inode, page);
> +
> + if (!PagePrivate(page))
> + return;
> +
> + BUG_ON(!PageLocked(page));
>
> /*
> * We can get non-dirty pages here due to races between
> @@ -163,31 +176,28 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset)
> if (!PageDirty(page))
> pr_err("%p invalidatepage %p page not dirty\n", inode, page);
>
> - if (offset == 0)
> - ClearPageChecked(page);
> + ClearPageChecked(page);
>
> - ci = ceph_inode(inode);
> - if (offset == 0) {
> - dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
> - inode, page, page->index, offset);
> - ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
> - ceph_put_snap_context(snapc);
> - page->private = 0;
> - ClearPagePrivate(page);
> - } else {
> - dout("%p invalidatepage %p idx %lu partial dirty page\n",
> - inode, page, page->index);
> - }
Again, having this code movement in a separate patch from fscache will
make it easier to review and test.
Thanks!
sage
> + dout("%p invalidatepage %p idx %lu full dirty page %lu\n",
> + inode, page, page->index, offset);
> +
> + ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
> + ceph_put_snap_context(snapc);
> + page->private = 0;
> + ClearPagePrivate(page);
> }
>
> -/* just a sanity check */
> static int ceph_releasepage(struct page *page, gfp_t g)
> {
> struct inode *inode = page->mapping ? page->mapping->host : NULL;
> dout("%p releasepage %p idx %lu\n", inode, page, page->index);
> WARN_ON(PageDirty(page));
> - WARN_ON(PagePrivate(page));
> - return 0;
> +
> + /* Can we release the page from the cache? */
> + if (!ceph_release_fscache_page(page, g))
> + return 0;
> +
> + return !PagePrivate(page);
> }
>
> /*
> @@ -197,11 +207,16 @@ static int readpage_nounlock(struct file *filp, struct page *page)
> {
> struct inode *inode = file_inode(filp);
> struct ceph_inode_info *ci = ceph_inode(inode);
> - struct ceph_osd_client *osdc =
> + struct ceph_osd_client *osdc =
> &ceph_inode_to_client(inode)->client->osdc;
> int err = 0;
> u64 len = PAGE_CACHE_SIZE;
>
> + err = ceph_readpage_from_fscache(inode, page);
> +
> + if (err == 0)
> + goto out;
> +
> dout("readpage inode %p file %p page %p index %lu\n",
> inode, filp, page, page->index);
> err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
> @@ -219,6 +234,9 @@ static int readpage_nounlock(struct file *filp, struct page *page)
> }
> SetPageUptodate(page);
>
> + if (err == 0)
> + ceph_readpage_to_fscache(inode, page);
> +
> out:
> return err < 0 ? err : 0;
> }
> @@ -261,6 +279,7 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
> page->index);
> flush_dcache_page(page);
> SetPageUptodate(page);
> + ceph_readpage_to_fscache(inode, page);
> unlock_page(page);
> page_cache_release(page);
> bytes -= PAGE_CACHE_SIZE;
> @@ -330,7 +349,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
> page = list_entry(page_list->prev, struct page, lru);
> BUG_ON(PageLocked(page));
> list_del(&page->lru);
> -
> +
> dout("start_read %p adding %p idx %lu\n", inode, page,
> page->index);
> if (add_to_page_cache_lru(page, &inode->i_data, page->index,
> @@ -377,6 +396,12 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
> int rc = 0;
> int max = 0;
>
> + rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
> + &nr_pages);
> +
> + if (rc == 0)
> + goto out;
> +
> if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
> max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
> >> PAGE_SHIFT;
> @@ -496,6 +521,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
> CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
> set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
>
> + ceph_readpage_to_fscache(inode, page);
> +
> set_page_writeback(page);
> err = ceph_osdc_writepages(osdc, ceph_vino(inode),
> &ci->i_layout, snapc,
> @@ -551,7 +578,6 @@ static void ceph_release_pages(struct page **pages, int num)
> pagevec_release(&pvec);
> }
>
> -
> /*
> * async writeback completion handler.
> *
> diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
> new file mode 100644
> index 0000000..fa49bf7
> --- /dev/null
> +++ b/fs/ceph/cache.c
> @@ -0,0 +1,306 @@
> +/*
> + * Ceph cache definitions.
> + *
> + * Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved.
> + * Written by Milosz Tanski (milosz at adfin.com)
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2
> + * as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to:
> + * Free Software Foundation
> + * 51 Franklin Street, Fifth Floor
> + * Boston, MA 02111-1301 USA
> + *
> + */
> +
> +#include "super.h"
> +#include "cache.h"
> +
> +struct ceph_aux_inode {
> + struct timespec mtime;
> + loff_t size;
> +};
> +
> +struct fscache_netfs ceph_cache_netfs = {
> + .name = "ceph",
> + .version = 0,
> +};
> +
> +static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
> + void *buffer, uint16_t maxbuf)
> +{
> + const struct ceph_fs_client *fsc = cookie_netfs_data;
> + uint16_t klen;
> +
> + klen = sizeof(fsc->client->fsid);
> + if (klen > maxbuf)
> + return 0;
> +
> + memcpy(buffer, &fsc->client->fsid, klen);
> + return klen;
> +}
> +
> +static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
> + .name = "CEPH.fsid",
> + .type = FSCACHE_COOKIE_TYPE_INDEX,
> + .get_key = ceph_fscache_session_get_key,
> +};
> +
> +void ceph_fscache_register_fsid_cookie(struct ceph_fs_client *fsc)
> +{
> + fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
> + &ceph_fscache_fsid_object_def,
> + fsc);
> +}
> +
> +void ceph_fscache_unregister_fsid_cookie(struct ceph_fs_client *fsc)
> +{
> + fscache_relinquish_cookie(fsc->fscache, 0);
> + fsc->fscache = NULL;
> +}
> +
> +static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
> + void *buffer, uint16_t maxbuf)
> +{
> + const struct ceph_inode_info *ci = cookie_netfs_data;
> + uint16_t klen;
> +
> + /* use ceph virtual inode (id + snaphot) */
> + klen = sizeof(ci->i_vino);
> + if (klen > maxbuf)
> + return 0;
> +
> + memcpy(buffer, &ci->i_vino, klen);
> + return klen;
> +}
> +
> +static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
> + void *buffer, uint16_t bufmax)
> +{
> + struct ceph_aux_inode aux;
> + const struct ceph_inode_info *ci = cookie_netfs_data;
> + const struct inode *inode = &ci->vfs_inode;
> +
> + memset(&aux, 0, sizeof(aux));
> + aux.mtime = inode->i_mtime;
> + aux.size = inode->i_size;
> +
> + memcpy(buffer, &aux, sizeof(aux));
> +
> + return sizeof(aux);
> +}
> +
> +static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
> + uint64_t *size)
> +{
> + const struct ceph_inode_info *ci = cookie_netfs_data;
> + const struct inode *inode = &ci->vfs_inode;
> +
> + *size = inode->i_size;
> +}
> +
> +static enum fscache_checkaux ceph_fscache_inode_check_aux(
> + void *cookie_netfs_data, const void *data, uint16_t dlen)
> +{
> + struct ceph_aux_inode aux;
> + struct ceph_inode_info *ci = cookie_netfs_data;
> + struct inode *inode = &ci->vfs_inode;
> +
> + if (dlen != sizeof(aux))
> + return FSCACHE_CHECKAUX_OBSOLETE;
> +
> + memset(&aux, 0, sizeof(aux));
> + aux.mtime = inode->i_mtime;
> + aux.size = inode->i_size;
> +
> + if (memcmp(data, &aux, sizeof(aux)) != 0)
> + return FSCACHE_CHECKAUX_OBSOLETE;
> +
> + dout("ceph inode 0x%p cached okay", ci);
> + return FSCACHE_CHECKAUX_OKAY;
> +}
> +
> +static void ceph_fscache_inode_now_uncached(void *cookie_netfs_data)
> +{
> + struct ceph_inode_info *ci = cookie_netfs_data;
> + struct pagevec pvec;
> + pgoff_t first;
> + int loop, nr_pages;
> +
> + pagevec_init(&pvec, 0);
> + first = 0;
> +
> + dout("ceph inode 0x%p now uncached", ci);
> +
> + while (1) {
> + nr_pages = pagevec_lookup(&pvec, ci->vfs_inode.i_mapping, first,
> + PAGEVEC_SIZE - pagevec_count(&pvec));
> +
> + if (!nr_pages)
> + break;
> +
> + for (loop = 0; loop < nr_pages; loop++)
> + ClearPageFsCache(pvec.pages[loop]);
> +
> + first = pvec.pages[nr_pages - 1]->index + 1;
> +
> + pvec.nr = nr_pages;
> + pagevec_release(&pvec);
> + cond_resched();
> + }
> +}
> +
> +static const struct fscache_cookie_def ceph_fscache_inode_object_def = {
> + .name = "CEPH.inode",
> + .type = FSCACHE_COOKIE_TYPE_DATAFILE,
> + .get_key = ceph_fscache_inode_get_key,
> + .get_attr = ceph_fscache_inode_get_attr,
> + .get_aux = ceph_fscache_inode_get_aux,
> + .check_aux = ceph_fscache_inode_check_aux,
> + .now_uncached = ceph_fscache_inode_now_uncached,
> +};
> +
> +void ceph_fscache_register_inode_cookie(struct ceph_fs_client *fsc,
> + struct ceph_inode_info *ci)
> +{
> + struct inode *inode = &ci->vfs_inode;
> +
> + /* No caching for filesystem */
> + if (fsc->fscache == NULL)
> + return;
> +
> + /* Only cache for regular files that are read only */
> + if ((ci->vfs_inode.i_mode & S_IFREG) == 0)
> + return;
> +
> + /* Avoid multiple racing open requests */
> + mutex_lock(&inode->i_mutex);
> +
> + if (ci->fscache)
> + goto done;
> +
> + ci->fscache = fscache_acquire_cookie(fsc->fscache,
> + &ceph_fscache_inode_object_def,
> + ci);
> +done:
> + mutex_unlock(&inode->i_mutex);
> +
> +}
> +
> +void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info *ci)
> +{
> + fscache_uncache_all_inode_pages(ci->fscache, &ci->vfs_inode);
> + fscache_relinquish_cookie(ci->fscache, 0);
> + ci->fscache = NULL;
> +}
> +
> +static void ceph_vfs_readpage_complete(struct page *page, void *data, int error)
> +{
> + if (!error)
> + SetPageUptodate(page);
> +}
> +
> +static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data,
> + int error)
> +{
> + if (!error)
> + SetPageUptodate(page);
> +
> + unlock_page(page);
> +}
> +
> +static inline int cache_valid(struct ceph_inode_info *ci)
> +{
> + return ((ceph_caps_issued(ci) & CEPH_CAP_FILE_CACHE) &&
> + (ci->i_fscache_gen == ci->i_rdcache_gen));
> +}
> +
> +
> +/* Atempt to read from the fscache,
> + *
> + * This function is called from the readpage_nounlock context. DO NOT attempt to
> + * unlock the page here (or in the callback).
> + */
> +int __ceph_readpage_from_fscache(struct inode *inode, struct page *page)
> +{
> + struct ceph_inode_info *ci = ceph_inode(inode);
> + int ret;
> +
> + if (!cache_valid(ci))
> + return -ENOBUFS;
> +
> + ret = fscache_read_or_alloc_page(ci->fscache, page,
> + ceph_vfs_readpage_complete, NULL,
> + GFP_KERNEL);
> +
> + switch (ret) {
> + case 0: /* Page found */
> + dout("page read submitted\n");
> + return 0;
> + case -ENOBUFS: /* Pages were not found, and can't be */
> + case -ENODATA: /* Pages were not found */
> + dout("page/inode not in cache\n");
> + return ret;
> + default:
> + dout("%s: unknown error ret = %i\n", __func__, ret);
> + return ret;
> + }
> +}
> +
> +int __ceph_readpages_from_fscache(struct inode *inode,
> + struct address_space *mapping,
> + struct list_head *pages,
> + unsigned *nr_pages)
> +{
> + struct ceph_inode_info *ci = ceph_inode(inode);
> + int ret;
> +
> + if (!cache_valid(ci))
> + return -ENOBUFS;
> +
> + ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages,
> + ceph_vfs_readpage_complete_unlock,
> + NULL, mapping_gfp_mask(mapping));
> +
> + switch (ret) {
> + case 0: /* All pages found */
> + dout("all-page read submitted\n");
> + return 0;
> + case -ENOBUFS: /* Some pages were not found, and can't be */
> + case -ENODATA: /* some pages were not found */
> + dout("page/inode not in cache\n");
> + return ret;
> + default:
> + dout("%s: unknown error ret = %i\n", __func__, ret);
> + return ret;
> + }
> +}
> +
> +void __ceph_readpage_to_fscache(struct inode *inode, struct page *page)
> +{
> + struct ceph_inode_info *ci = ceph_inode(inode);
> + int ret;
> +
> + if (!cache_valid(ci))
> + return;
> +
> + ret = fscache_write_page(ci->fscache, page, GFP_KERNEL);
> + if (ret)
> + fscache_uncache_page(ci->fscache, page);
> +}
> +
> +void __ceph_invalidate_fscache_page(struct inode *inode, struct page *page)
> +{
> + struct ceph_inode_info *ci = ceph_inode(inode);
> +
> + fscache_wait_on_page_write(ci->fscache, page);
> + fscache_uncache_page(ci->fscache, page);
> +}
> diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
> new file mode 100644
> index 0000000..23ce336
> --- /dev/null
> +++ b/fs/ceph/cache.h
> @@ -0,0 +1,117 @@
> +/*
> + * Ceph cache definitions.
> + *
> + * Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved.
> + * Written by Milosz Tanski (milosz at adfin.com)
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2
> + * as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to:
> + * Free Software Foundation
> + * 51 Franklin Street, Fifth Floor
> + * Boston, MA 02111-1301 USA
> + *
> + */
> +
> +#ifndef _CEPH_CACHE_H
> +#define _CEPH_CACHE_H
> +
> +#include <linux/fscache.h>
> +
> +
> +extern struct fscache_netfs ceph_cache_netfs;
> +
> +
> +void ceph_fscache_register_fsid_cookie(struct ceph_fs_client *fsc);
> +void ceph_fscache_unregister_fsid_cookie(struct ceph_fs_client *fsc);
> +void ceph_fscache_register_inode_cookie(struct ceph_fs_client *parent_fsc,
> + struct ceph_inode_info *ci);
> +void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info *ci);
> +
> +int __ceph_readpage_from_fscache(struct inode *inode, struct page *page);
> +int __ceph_readpages_from_fscache(struct inode *inode,
> + struct address_space *mapping,
> + struct list_head *pages,
> + unsigned *nr_pages);
> +void __ceph_readpage_to_fscache(struct inode *inode, struct page *page);
> +void __ceph_invalidate_fscache_page(struct inode *inode, struct page *page);
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> +
> +
> +static inline int ceph_readpage_from_fscache(struct inode *inode,
> + struct page *page)
> +{
> + return __ceph_readpage_from_fscache(inode, page);
> +}
> +
> +static inline int ceph_readpages_from_fscache(struct inode *inode,
> + struct address_space *mapping,
> + struct list_head *pages,
> + unsigned *nr_pages)
> +{
> + return __ceph_readpages_from_fscache(inode, mapping, pages,
> + nr_pages);
> +}
> +
> +static inline void ceph_readpage_to_fscache(struct inode *inode,
> + struct page *page)
> +{
> + return __ceph_readpage_to_fscache(inode, page);
> +}
> +
> +static inline void ceph_invalidate_fscache_page(struct inode *inode,
> + struct page *page)
> +{
> + return __ceph_invalidate_fscache_page(inode, page);
> +}
> +
> +static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
> +{
> + struct inode *inode = page->mapping->host;
> + struct ceph_inode_info *ci = ceph_inode(inode);
> + return fscache_maybe_release_page(ci->fscache, page, gfp);
> +}
> +
> +#else
> +
> +static inline int ceph_readpage_from_fscache(struct inode *inode,
> + struct page *page)
> +{
> + return -ENOBUFS;
> +}
> +
> +static inline int ceph_readpages_from_fscache(struct inode *inode,
> + struct address_space *mapping,
> + struct list_head *pages,
> + unsigned *nr_pages)
> +{
> + return -ENOBUFS;
> +}
> +
> +static inline void ceph_readpage_to_fscache(struct inode *inode,
> + struct page *page)
> +{
> +}
> +
> +static inline void ceph_invalidate_fscache_page(struct inode *inode,
> + struct page *page)
> +{
> +}
> +
> +static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
> +{
> + return 1;
> +}
> +
> +#endif
> +
> +#endif
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 430121a..32ce13d 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -10,6 +10,7 @@
>
> #include "super.h"
> #include "mds_client.h"
> +#include "cache.h"
> #include <linux/ceph/decode.h>
> #include <linux/ceph/messenger.h>
>
> @@ -479,8 +480,9 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
> * i_rdcache_gen.
> */
> if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
> - (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
> + (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
> ci->i_rdcache_gen++;
> + }
>
> /*
> * if we are newly issued FILE_SHARED, mark dir not complete; we
> @@ -2396,6 +2398,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
> int revoked_rdcache = 0;
> int queue_invalidate = 0;
> int deleted_inode = 0;
> + int queue_revalidate = 0;
>
> dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
> inode, cap, mds, seq, ceph_cap_string(newcaps));
> @@ -2420,6 +2423,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
> ci->i_rdcache_revoking = ci->i_rdcache_gen;
> }
> }
> +
> + fscache_invalidate(ci->fscache);
> }
>
> /* side effects now are allowed */
> @@ -2461,6 +2466,11 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
> }
> }
>
> + /* Do we need to revalidate our fscache cookie. Don't bother on the
> + * first cache cap as we already validate at cookie creation time. */
> + if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
> + queue_revalidate = 1;
> +
> /* size/ctime/mtime/atime? */
> ceph_fill_file_size(inode, issued,
> le32_to_cpu(grant->truncate_seq),
> @@ -2545,6 +2555,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
> BUG_ON(cap->issued & ~cap->implemented);
>
> spin_unlock(&ci->i_ceph_lock);
> +
> if (writeback)
> /*
> * queue inode for writeback: we can't actually call
> @@ -2556,6 +2567,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
> ceph_queue_invalidate(inode);
> if (deleted_inode)
> invalidate_aliases(inode);
> + if (queue_revalidate)
> + ceph_queue_revalidate(inode);
> if (wake)
> wake_up_all(&ci->i_cap_wq);
>
> @@ -2712,8 +2725,10 @@ static void handle_cap_trunc(struct inode *inode,
> truncate_seq, truncate_size, size);
> spin_unlock(&ci->i_ceph_lock);
>
> - if (queue_trunc)
> + if (queue_trunc) {
> ceph_queue_vmtruncate(inode);
> + fscache_invalidate(ci->fscache);
> + }
> }
>
> /*
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 63ec830..ff35aa4 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -11,6 +11,7 @@
>
> #include "super.h"
> #include "mds_client.h"
> +#include "cache.h"
>
> /*
> * Ceph file operations
> @@ -68,9 +69,23 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
> {
> struct ceph_file_info *cf;
> int ret = 0;
> + struct ceph_inode_info *ci = ceph_inode(inode);
> + struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
> + struct ceph_mds_client *mdsc = fsc->mdsc;
>
> switch (inode->i_mode & S_IFMT) {
> case S_IFREG:
> + /* First file open request creates the cookie, we want to keep
> + * this cookie around for the filetime of the inode as not to
> + * have to worry about fscache register / revoke / operation
> + * races.
> + *
> + * Also, if we know the operation is going to invalidate data
> + * (non readonly) just nuke the cache right away.
> + */
> + ceph_fscache_register_inode_cookie(mdsc->fsc, ci);
> + if ((fmode & CEPH_FILE_MODE_WR))
> + fscache_invalidate(ci->fscache);
> case S_IFDIR:
> dout("init_file %p %p 0%o (regular)\n", inode, file,
> inode->i_mode);
> @@ -181,6 +196,7 @@ int ceph_open(struct inode *inode, struct file *file)
> spin_unlock(&ci->i_ceph_lock);
> return ceph_init_file(inode, file, fmode);
> }
> +
> spin_unlock(&ci->i_ceph_lock);
>
> dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
> @@ -191,6 +207,7 @@ int ceph_open(struct inode *inode, struct file *file)
> }
> req->r_inode = inode;
> ihold(inode);
> +
> req->r_num_caps = 1;
> if (flags & (O_CREAT|O_TRUNC))
> parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 3b0abed..d85c977 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -12,6 +12,7 @@
>
> #include "super.h"
> #include "mds_client.h"
> +#include "cache.h"
> #include <linux/ceph/decode.h>
>
> /*
> @@ -31,6 +32,7 @@ static const struct inode_operations ceph_symlink_iops;
> static void ceph_invalidate_work(struct work_struct *work);
> static void ceph_writeback_work(struct work_struct *work);
> static void ceph_vmtruncate_work(struct work_struct *work);
> +static void ceph_revalidate_work(struct work_struct *work);
>
> /*
> * find or create an inode, given the ceph ino number
> @@ -385,6 +387,13 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>
> INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + ci->fscache = NULL;
> + /* The first load is verifed cookie open time */
> + ci->i_fscache_gen = 1;
> + INIT_WORK(&ci->i_revalidate_work, ceph_revalidate_work);
> +#endif
> +
> return &ci->vfs_inode;
> }
>
> @@ -404,6 +413,8 @@ void ceph_destroy_inode(struct inode *inode)
>
> dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
>
> + ceph_fscache_unregister_inode_cookie(ci);
> +
> ceph_queue_caps_release(inode);
>
> /*
> @@ -438,7 +449,6 @@ void ceph_destroy_inode(struct inode *inode)
> call_rcu(&inode->i_rcu, ceph_i_callback);
> }
>
> -
> /*
> * Helpers to fill in size, ctime, mtime, and atime. We have to be
> * careful because either the client or MDS may have more up to date
> @@ -486,6 +496,10 @@ int ceph_fill_file_size(struct inode *inode, int issued,
> truncate_size);
> ci->i_truncate_size = truncate_size;
> }
> +
> + if (queue_trunc)
> + fscache_invalidate(ci->fscache);
> +
> return queue_trunc;
> }
>
> @@ -1074,7 +1088,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
> * complete.
> */
> ceph_set_dentry_offset(req->r_old_dentry);
> - dout("dn %p gets new offset %lld\n", req->r_old_dentry,
> + dout("dn %p gets new offset %lld\n", req->r_old_dentry,
> ceph_dentry(req->r_old_dentry)->offset);
>
> dn = req->r_old_dentry; /* use old_dentry */
> @@ -1495,6 +1509,7 @@ void ceph_queue_vmtruncate(struct inode *inode)
> struct ceph_inode_info *ci = ceph_inode(inode);
>
> ihold(inode);
> +
> if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
> &ci->i_vmtruncate_work)) {
> dout("ceph_queue_vmtruncate %p\n", inode);
> @@ -1559,6 +1574,56 @@ retry:
> wake_up_all(&ci->i_cap_wq);
> }
>
> +static void ceph_revalidate_work(struct work_struct *work)
> +{
> + int issued;
> + u32 orig_gen;
> + struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
> + i_revalidate_work);
> + struct inode *inode = &ci->vfs_inode;
> +
> + spin_lock(&ci->i_ceph_lock);
> + issued = __ceph_caps_issued(ci, NULL);
> + orig_gen = ci->i_rdcache_gen;
> + spin_unlock(&ci->i_ceph_lock);
> +
> + if (!(issued & CEPH_CAP_FILE_CACHE)) {
> + dout("revalidate_work lost cache before validation %p\n",
> + inode);
> + goto out;
> + }
> +
> + if (!fscache_check_consistency(ci->fscache))
> + fscache_invalidate(ci->fscache);
> +
> + spin_lock(&ci->i_ceph_lock);
> + /* Update the new valid generation (backwards sanity check too) */
> + if (orig_gen > ci->i_fscache_gen) {
> + ci->i_fscache_gen = orig_gen;
> + } else {
> + pr_warn("revalidate_work raced cache validation %p\n",
> + inode);
> + }
> + spin_unlock(&ci->i_ceph_lock);
> +
> +out:
> + iput(&ci->vfs_inode);
> +}
> +
> +void ceph_queue_revalidate(struct inode *inode)
> +{
> + struct ceph_inode_info *ci = ceph_inode(inode);
> +
> + ihold(inode);
> +
> + if (queue_work(ceph_sb_to_client(inode->i_sb)->revalidate_wq,
> + &ci->i_revalidate_work)) {
> + dout("ceph_queue_revalidate %p\n", inode);
> + } else {
> + dout("ceph_queue_revalidate %p failed\n)", inode);
> + iput(inode);
> + }
> +}
>
> /*
> * symlinks
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 6627b26..d88808c 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -17,6 +17,7 @@
>
> #include "super.h"
> #include "mds_client.h"
> +#include "cache.h"
>
> #include <linux/ceph/ceph_features.h>
> #include <linux/ceph/decode.h>
> @@ -142,6 +143,8 @@ enum {
> Opt_nodcache,
> Opt_ino32,
> Opt_noino32,
> + Opt_fscache,
> + Opt_nofscache
> };
>
> static match_table_t fsopt_tokens = {
> @@ -167,6 +170,8 @@ static match_table_t fsopt_tokens = {
> {Opt_nodcache, "nodcache"},
> {Opt_ino32, "ino32"},
> {Opt_noino32, "noino32"},
> + {Opt_fscache, "fsc"},
> + {Opt_nofscache, "nofsc"},
> {-1, NULL}
> };
>
> @@ -260,6 +265,12 @@ static int parse_fsopt_token(char *c, void *private)
> case Opt_noino32:
> fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
> break;
> + case Opt_fscache:
> + fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
> + break;
> + case Opt_nofscache:
> + fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
> + break;
> default:
> BUG_ON(token);
> }
> @@ -422,6 +433,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
> seq_puts(m, ",dcache");
> else
> seq_puts(m, ",nodcache");
> + if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
> + seq_puts(m, ",fsc");
> + else
> + seq_puts(m, ",nofsc");
>
> if (fsopt->wsize)
> seq_printf(m, ",wsize=%d", fsopt->wsize);
> @@ -530,11 +545,24 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
> if (!fsc->wb_pagevec_pool)
> goto fail_trunc_wq;
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE))
> + ceph_fscache_register_fsid_cookie(fsc);
> +
> + fsc->revalidate_wq = alloc_workqueue("ceph-revalidate", 0, 1);
> + if (fsc->revalidate_wq == NULL)
> + goto fail_fscache;
> +#endif
> +
> /* caps */
> fsc->min_caps = fsopt->max_readdir;
>
> return fsc;
>
> +#ifdef CONFIG_CEPH_FSCACHE
> +fail_fscache:
> + ceph_fscache_unregister_fsid_cookie(fsc);
> +#endif
> fail_trunc_wq:
> destroy_workqueue(fsc->trunc_wq);
> fail_pg_inv_wq:
> @@ -554,6 +582,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
> {
> dout("destroy_fs_client %p\n", fsc);
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + ceph_fscache_unregister_fsid_cookie(fsc);
> +#endif
> +
> destroy_workqueue(fsc->wb_wq);
> destroy_workqueue(fsc->pg_inv_wq);
> destroy_workqueue(fsc->trunc_wq);
> @@ -588,6 +620,8 @@ static void ceph_inode_init_once(void *foo)
>
> static int __init init_caches(void)
> {
> + int error = -ENOMEM;
> +
> ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
> sizeof(struct ceph_inode_info),
> __alignof__(struct ceph_inode_info),
> @@ -611,15 +645,20 @@ static int __init init_caches(void)
> if (ceph_file_cachep == NULL)
> goto bad_file;
>
> - return 0;
> +#ifdef CONFIG_CEPH_FSCACHE
> + error = fscache_register_netfs(&ceph_cache_netfs);
> + if (error)
> + goto bad_file;
> +#endif
>
> + return 0;
> bad_file:
> kmem_cache_destroy(ceph_dentry_cachep);
> bad_dentry:
> kmem_cache_destroy(ceph_cap_cachep);
> bad_cap:
> kmem_cache_destroy(ceph_inode_cachep);
> - return -ENOMEM;
> + return error;
> }
>
> static void destroy_caches(void)
> @@ -629,10 +668,15 @@ static void destroy_caches(void)
> * destroy cache.
> */
> rcu_barrier();
> +
> kmem_cache_destroy(ceph_inode_cachep);
> kmem_cache_destroy(ceph_cap_cachep);
> kmem_cache_destroy(ceph_dentry_cachep);
> kmem_cache_destroy(ceph_file_cachep);
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + fscache_unregister_netfs(&ceph_cache_netfs);
> +#endif
> }
>
>
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index afcd62a..8bcac51 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -16,6 +16,10 @@
>
> #include <linux/ceph/libceph.h>
>
> +#ifdef CONFIG_CEPH_FSCACHE
> +#include <linux/fscache.h>
> +#endif
> +
> /* f_type in struct statfs */
> #define CEPH_SUPER_MAGIC 0x00c36400
>
> @@ -29,6 +33,7 @@
> #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */
> #define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */
> #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
> +#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
>
> #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
>
> @@ -90,6 +95,11 @@ struct ceph_fs_client {
> struct dentry *debugfs_bdi;
> struct dentry *debugfs_mdsc, *debugfs_mdsmap;
> #endif
> +
> +#ifdef CONFIG_CEPH_FSCACHE
> + struct fscache_cookie *fscache;
> + struct workqueue_struct *revalidate_wq;
> +#endif
> };
>
>
> @@ -319,6 +329,12 @@ struct ceph_inode_info {
>
> struct work_struct i_vmtruncate_work;
>
> +#ifdef CONFIG_CEPH_FSCACHE
> + struct fscache_cookie *fscache;
> + u32 i_fscache_gen; /* sequence, for delayed fscache validate */
> + struct work_struct i_revalidate_work;
> +#endif
> +
> struct inode vfs_inode; /* at end */
> };
>
> @@ -699,6 +715,7 @@ extern void ceph_queue_vmtruncate(struct inode *inode);
>
> extern void ceph_queue_invalidate(struct inode *inode);
> extern void ceph_queue_writeback(struct inode *inode);
> +extern void ceph_queue_revalidate(struct inode *inode);
>
> extern int ceph_do_getattr(struct inode *inode, int mask);
> extern int ceph_permission(struct inode *inode, int mask);
> --
> 1.8.1.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
More information about the Linux-cachefs
mailing list