From c4d6d8dbf335c7fa47341654a37c53a512b519bb Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:32 +0000 Subject: CacheFiles: Fix the marking of cached pages Under some circumstances CacheFiles defers the marking of pages with PG_fscache so that it can take advantage of pagevecs to reduce the number of calls to fscache_mark_pages_cached() and the netfs's hook to keep track of this. There are, however, two problems with this: (1) It can lead to the PG_fscache mark being applied _after_ the page is set PG_uptodate and unlocked (by the call to fscache_end_io()). (2) CacheFiles's ref on the page is dropped immediately following fscache_end_io() - and so may not still be held when the mark is applied. This can lead to the page being passed back to the allocator before the mark is applied. Fix this by, where appropriate, marking the page before calling fscache_end_io() and releasing the page. This means that we can't take advantage of pagevecs and have to make a separate call for each page to the marking routines. The symptoms of this are Bad Page state errors cropping up under memory pressure, for example: BUG: Bad page state in process tar pfn:002da page:ffffea0000009fb0 count:0 mapcount:0 mapping: (null) index:0x1447 page flags: 0x1000(private_2) Pid: 4574, comm: tar Tainted: G W 3.1.0-rc4-fsdevel+ #1064 Call Trace: [] ? dump_page+0xb9/0xbe [] bad_page+0xd5/0xea [] get_page_from_freelist+0x35b/0x46a [] __alloc_pages_nodemask+0x362/0x662 [] __do_page_cache_readahead+0x13a/0x267 [] ? __do_page_cache_readahead+0xa2/0x267 [] ra_submit+0x1c/0x20 [] ondemand_readahead+0x28b/0x29a [] ? ondemand_readahead+0x163/0x29a [] page_cache_sync_readahead+0x38/0x3a [] generic_file_aio_read+0x2ab/0x67e [] nfs_file_read+0xa4/0xc9 [nfs] [] do_sync_read+0xba/0xfa [] ? security_file_permission+0x7b/0x84 [] ? rw_verify_area+0xab/0xc8 [] vfs_read+0xaa/0x13a [] sys_read+0x45/0x6c [] system_call_fastpath+0x16/0x1b As can be seen, PG_private_2 (== PG_fscache) is set in the page flags. Instrumenting fscache_mark_pages_cached() to verify whether page->mapping was set appropriately showed that sometimes it wasn't. This led to the discovery that sometimes the page has apparently been reclaimed by the time the marker got to see it. Reported-by: M. Stevens Signed-off-by: David Howells Reviewed-by: Jeff Layton --- fs/cachefiles/rdwr.c | 34 ++++++++++-------------------- fs/fscache/page.c | 59 ++++++++++++++++++++++++++++++++-------------------- 2 files changed, 47 insertions(+), 46 deletions(-) (limited to 'fs') diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index c994691d9445..3367abdcdac4 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -176,9 +176,8 @@ static void cachefiles_read_copier(struct fscache_operation *_op) recheck: if (PageUptodate(monitor->back_page)) { copy_highpage(monitor->netfs_page, monitor->back_page); - - pagevec_add(&pagevec, monitor->netfs_page); - fscache_mark_pages_cached(monitor->op, &pagevec); + fscache_mark_page_cached(monitor->op, + monitor->netfs_page); error = 0; } else if (!PageError(monitor->back_page)) { /* the page has probably been truncated */ @@ -335,8 +334,7 @@ backing_page_already_present: backing_page_already_uptodate: _debug("- uptodate"); - pagevec_add(pagevec, netpage); - fscache_mark_pages_cached(op, pagevec); + fscache_mark_page_cached(op, netpage); copy_highpage(netpage, backpage); fscache_end_io(op, netpage, 0); @@ -448,8 +446,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, &pagevec); } else if (cachefiles_has_space(cache, 0, 1) == 0) { /* there's space in the cache we can use */ - pagevec_add(&pagevec, page); - fscache_mark_pages_cached(op, &pagevec); + fscache_mark_page_cached(op, page); ret = -ENODATA; } else { ret = -ENOBUFS; @@ -465,8 +462,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, */ static int cachefiles_read_backing_file(struct cachefiles_object *object, struct fscache_retrieval *op, - struct list_head *list, - struct pagevec *mark_pvec) + struct list_head *list) { struct cachefiles_one_read *monitor = NULL; struct address_space *bmapping = object->backer->d_inode->i_mapping; @@ -626,13 +622,13 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, page_cache_release(backpage); backpage = NULL; - if (!pagevec_add(mark_pvec, netpage)) - fscache_mark_pages_cached(op, mark_pvec); + fscache_mark_page_cached(op, netpage); page_cache_get(netpage); if (!pagevec_add(&lru_pvec, netpage)) __pagevec_lru_add_file(&lru_pvec); + /* the netpage is unlocked and marked up to date here */ fscache_end_io(op, netpage, 0); page_cache_release(netpage); netpage = NULL; @@ -775,15 +771,11 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, /* submit the apparently valid pages to the backing fs to be read from * disk */ if (nrbackpages > 0) { - ret2 = cachefiles_read_backing_file(object, op, &backpages, - &pagevec); + ret2 = cachefiles_read_backing_file(object, op, &backpages); if (ret2 == -ENOMEM || ret2 == -EINTR) ret = ret2; } - if (pagevec_count(&pagevec) > 0) - fscache_mark_pages_cached(op, &pagevec); - _leave(" = %d [nr=%u%s]", ret, *nr_pages, list_empty(pages) ? " empty" : ""); return ret; @@ -806,7 +798,6 @@ int cachefiles_allocate_page(struct fscache_retrieval *op, { struct cachefiles_object *object; struct cachefiles_cache *cache; - struct pagevec pagevec; int ret; object = container_of(op->op.object, @@ -817,13 +808,10 @@ int cachefiles_allocate_page(struct fscache_retrieval *op, _enter("%p,{%lx},", object, page->index); ret = cachefiles_has_space(cache, 0, 1); - if (ret == 0) { - pagevec_init(&pagevec, 0); - pagevec_add(&pagevec, page); - fscache_mark_pages_cached(op, &pagevec); - } else { + if (ret == 0) + fscache_mark_page_cached(op, page); + else ret = -ENOBUFS; - } _leave(" = %d", ret); return ret; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 3f7a59bfa7ad..d7c663cfc923 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -914,6 +914,40 @@ done: } EXPORT_SYMBOL(__fscache_uncache_page); +/** + * fscache_mark_page_cached - Mark a page as being cached + * @op: The retrieval op pages are being marked for + * @page: The page to be marked + * + * Mark a netfs page as being cached. After this is called, the netfs + * must call fscache_uncache_page() to remove the mark. + */ +void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page) +{ + struct fscache_cookie *cookie = op->op.object->cookie; + +#ifdef CONFIG_FSCACHE_STATS + atomic_inc(&fscache_n_marks); +#endif + + _debug("- mark %p{%lx}", page, page->index); + if (TestSetPageFsCache(page)) { + static bool once_only; + if (!once_only) { + once_only = true; + printk(KERN_WARNING "FS-Cache:" + " Cookie type %s marked page %lx" + " multiple times\n", + cookie->def->name, page->index); + } + } + + if (cookie->def->mark_page_cached) + cookie->def->mark_page_cached(cookie->netfs_data, + op->mapping, page); +} +EXPORT_SYMBOL(fscache_mark_page_cached); + /** * fscache_mark_pages_cached - Mark pages as being cached * @op: The retrieval op pages are being marked for @@ -925,32 +959,11 @@ EXPORT_SYMBOL(__fscache_uncache_page); void fscache_mark_pages_cached(struct fscache_retrieval *op, struct pagevec *pagevec) { - struct fscache_cookie *cookie = op->op.object->cookie; unsigned long loop; -#ifdef CONFIG_FSCACHE_STATS - atomic_add(pagevec->nr, &fscache_n_marks); -#endif - - for (loop = 0; loop < pagevec->nr; loop++) { - struct page *page = pagevec->pages[loop]; - - _debug("- mark %p{%lx}", page, page->index); - if (TestSetPageFsCache(page)) { - static bool once_only; - if (!once_only) { - once_only = true; - printk(KERN_WARNING "FS-Cache:" - " Cookie type %s marked page %lx" - " multiple times\n", - cookie->def->name, page->index); - } - } - } + for (loop = 0; loop < pagevec->nr; loop++) + fscache_mark_page_cached(op, pagevec->pages[loop]); - if (cookie->def->mark_pages_cached) - cookie->def->mark_pages_cached(cookie->netfs_data, - op->mapping, pagevec); pagevec_reinit(pagevec); } EXPORT_SYMBOL(fscache_mark_pages_cached); -- cgit v1.2.3 From 5f4f9f4af185d5e76c966d2d3420a61870c856e7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:33 +0000 Subject: CacheFiles: Downgrade the requirements passed to the allocator Downgrade the requirements passed to the allocator in the gfp flags parameter. FS-Cache/CacheFiles can handle OOM conditions simply by aborting the attempt to store an object or a page in the cache. Signed-off-by: David Howells --- fs/cachefiles/interface.c | 8 ++++---- fs/cachefiles/internal.h | 2 ++ fs/cachefiles/key.c | 2 +- fs/cachefiles/rdwr.c | 18 ++++++++++-------- fs/cachefiles/xattr.c | 2 +- fs/fscache/page.c | 2 +- 6 files changed, 19 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 67bef6d01484..9bff0f878cfd 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -41,12 +41,12 @@ static struct fscache_object *cachefiles_alloc_object( _enter("{%s},%p,", cache->cache.identifier, cookie); - lookup_data = kmalloc(sizeof(*lookup_data), GFP_KERNEL); + lookup_data = kmalloc(sizeof(*lookup_data), cachefiles_gfp); if (!lookup_data) goto nomem_lookup_data; /* create a new object record and a temporary leaf image */ - object = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); + object = kmem_cache_alloc(cachefiles_object_jar, cachefiles_gfp); if (!object) goto nomem_object; @@ -63,7 +63,7 @@ static struct fscache_object *cachefiles_alloc_object( * - stick the length on the front and leave space on the back for the * encoder */ - buffer = kmalloc((2 + 512) + 3, GFP_KERNEL); + buffer = kmalloc((2 + 512) + 3, cachefiles_gfp); if (!buffer) goto nomem_buffer; @@ -219,7 +219,7 @@ static void cachefiles_update_object(struct fscache_object *_object) return; } - auxdata = kmalloc(2 + 512 + 3, GFP_KERNEL); + auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp); if (!auxdata) { _leave(" [nomem]"); return; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index bd6bc1bde2d7..49382519907a 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -23,6 +23,8 @@ extern unsigned cachefiles_debug; #define CACHEFILES_DEBUG_KLEAVE 2 #define CACHEFILES_DEBUG_KDEBUG 4 +#define cachefiles_gfp (__GFP_WAIT | __GFP_NORETRY | __GFP_NOMEMALLOC) + /* * node records */ diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c index 81b8b2b3a674..33b58c60f2d1 100644 --- a/fs/cachefiles/key.c +++ b/fs/cachefiles/key.c @@ -78,7 +78,7 @@ char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type) _debug("max: %d", max); - key = kmalloc(max, GFP_KERNEL); + key = kmalloc(max, cachefiles_gfp); if (!key) return NULL; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 3367abdcdac4..9108b8ea505a 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -238,7 +238,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, _debug("read back %p{%lu,%d}", netpage, netpage->index, page_count(netpage)); - monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); + monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); if (!monitor) goto nomem; @@ -257,13 +257,14 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, goto backing_page_already_present; if (!newpage) { - newpage = page_cache_alloc_cold(bmapping); + newpage = __page_cache_alloc(cachefiles_gfp | + __GFP_COLD); if (!newpage) goto nomem_monitor; } ret = add_to_page_cache(newpage, bmapping, - netpage->index, GFP_KERNEL); + netpage->index, cachefiles_gfp); if (ret == 0) goto installed_new_backing_page; if (ret != -EEXIST) @@ -481,7 +482,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, netpage, netpage->index, page_count(netpage)); if (!monitor) { - monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); + monitor = kzalloc(sizeof(*monitor), cachefiles_gfp); if (!monitor) goto nomem; @@ -496,13 +497,14 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, goto backing_page_already_present; if (!newpage) { - newpage = page_cache_alloc_cold(bmapping); + newpage = __page_cache_alloc(cachefiles_gfp | + __GFP_COLD); if (!newpage) goto nomem; } ret = add_to_page_cache(newpage, bmapping, - netpage->index, GFP_KERNEL); + netpage->index, cachefiles_gfp); if (ret == 0) goto installed_new_backing_page; if (ret != -EEXIST) @@ -532,7 +534,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, _debug("- monitor add"); ret = add_to_page_cache(netpage, op->mapping, netpage->index, - GFP_KERNEL); + cachefiles_gfp); if (ret < 0) { if (ret == -EEXIST) { page_cache_release(netpage); @@ -608,7 +610,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, _debug("- uptodate"); ret = add_to_page_cache(netpage, op->mapping, netpage->index, - GFP_KERNEL); + cachefiles_gfp); if (ret < 0) { if (ret == -EEXIST) { page_cache_release(netpage); diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index e18b183b47e1..73b46288b54b 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c @@ -174,7 +174,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object, ASSERT(dentry); ASSERT(dentry->d_inode); - auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL); + auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, cachefiles_gfp); if (!auxbuf) { _leave(" = -ENOMEM"); return -ENOMEM; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index d7c663cfc923..248a12e22532 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -759,7 +759,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, fscache_stat(&fscache_n_stores); - op = kzalloc(sizeof(*op), GFP_NOIO); + op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY); if (!op) goto nomem; -- cgit v1.2.3 From 0f972b5696c0a0677a9b3a18fee45cc0e8de4184 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:33 +0000 Subject: FS-Cache: Check that there are no read ops when cookie relinquished Check that the netfs isn't trying to relinquish a cookie that still has read operations in progress upon it. If there are, then give log a warning and BUG. Signed-off-by: David Howells --- fs/fscache/cookie.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 990535071a8a..0666996adf80 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -452,6 +452,14 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) _debug("RELEASE OBJ%x", object->debug_id); + if (atomic_read(&object->n_reads)) { + spin_unlock(&cookie->lock); + printk(KERN_ERR "FS-Cache:" + " Cookie '%s' still has %d outstanding reads\n", + cookie->def->name, atomic_read(&object->n_reads)); + BUG(); + } + /* detach each cache object from the object cookie */ spin_lock(&object->lock); hlist_del_init(&object->cookie_link); -- cgit v1.2.3 From 37491a1339df26259b06dfa33f30e574e9e52034 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:34 +0000 Subject: CacheFiles: Make some debugging statements conditional Downgrade some debugging statements to not unconditionally print stuff, but rather be conditional on the appropriate module parameter setting. Signed-off-by: David Howells --- fs/cachefiles/rdwr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 9108b8ea505a..bf123d9c3206 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -77,25 +77,25 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, struct page *backpage = monitor->back_page, *backpage2; int ret; - kenter("{ino=%lx},{%lx,%lx}", + _enter("{ino=%lx},{%lx,%lx}", object->backer->d_inode->i_ino, backpage->index, backpage->flags); /* skip if the page was truncated away completely */ if (backpage->mapping != bmapping) { - kleave(" = -ENODATA [mapping]"); + _leave(" = -ENODATA [mapping]"); return -ENODATA; } backpage2 = find_get_page(bmapping, backpage->index); if (!backpage2) { - kleave(" = -ENODATA [gone]"); + _leave(" = -ENODATA [gone]"); return -ENODATA; } if (backpage != backpage2) { put_page(backpage2); - kleave(" = -ENODATA [different]"); + _leave(" = -ENODATA [different]"); return -ENODATA; } @@ -114,7 +114,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, if (PageUptodate(backpage)) goto unlock_discard; - kdebug("reissue read"); + _debug("reissue read"); ret = bmapping->a_ops->readpage(NULL, backpage); if (ret < 0) goto unlock_discard; @@ -129,7 +129,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object, } /* it'll reappear on the todo list */ - kleave(" = -EINPROGRESS"); + _leave(" = -EINPROGRESS"); return -EINPROGRESS; unlock_discard: @@ -137,7 +137,7 @@ unlock_discard: spin_lock_irq(&object->work_lock); list_del(&monitor->op_link); spin_unlock_irq(&object->work_lock); - kleave(" = %d", ret); + _leave(" = %d", ret); return ret; } -- cgit v1.2.3 From ef46ed888efb1e8da33be5d33c9b54476289a43b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:35 +0000 Subject: FS-Cache: Make cookie relinquishment wait for outstanding reads Make fscache_relinquish_cookie() log a warning and wait if there are any outstanding reads left on the cookie it was given. Signed-off-by: David Howells --- fs/fscache/cookie.c | 18 ++++++++++++++---- fs/fscache/operation.c | 10 ++++++++-- include/linux/fscache-cache.h | 1 + 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 0666996adf80..66be9eccede0 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -442,22 +442,32 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; +try_again: spin_lock(&cookie->lock); /* break links with all the active objects */ while (!hlist_empty(&cookie->backing_objects)) { + int n_reads; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); _debug("RELEASE OBJ%x", object->debug_id); - if (atomic_read(&object->n_reads)) { + set_bit(FSCACHE_COOKIE_WAITING_ON_READS, &cookie->flags); + n_reads = atomic_read(&object->n_reads); + if (n_reads) { + int n_ops = object->n_ops; + int n_in_progress = object->n_in_progress; spin_unlock(&cookie->lock); printk(KERN_ERR "FS-Cache:" - " Cookie '%s' still has %d outstanding reads\n", - cookie->def->name, atomic_read(&object->n_reads)); - BUG(); + " Cookie '%s' still has %d outstanding reads (%d,%d)\n", + cookie->def->name, + n_reads, n_ops, n_in_progress); + wait_on_bit(&cookie->flags, FSCACHE_COOKIE_WAITING_ON_READS, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + printk("Wait finished\n"); + goto try_again; } /* detach each cache object from the object cookie */ diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 30afdfa7aec7..c857ab824d6e 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -340,8 +340,14 @@ void fscache_put_operation(struct fscache_operation *op) object = op->object; - if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) - atomic_dec(&object->n_reads); + if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) { + if (atomic_dec_and_test(&object->n_reads)) { + clear_bit(FSCACHE_COOKIE_WAITING_ON_READS, + &object->cookie->flags); + wake_up_bit(&object->cookie->flags, + FSCACHE_COOKIE_WAITING_ON_READS); + } + } /* now... we may get called with the object spinlock held, so we * complete the cleanup here only if we can immediately acquire the diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 9879183b55d8..e3d6d939d959 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -301,6 +301,7 @@ struct fscache_cookie { #define FSCACHE_COOKIE_PENDING_FILL 3 /* T if pending initial fill on object */ #define FSCACHE_COOKIE_FILLING 4 /* T if filling object incrementally */ #define FSCACHE_COOKIE_UNAVAILABLE 5 /* T if cookie is unavailable (error, etc) */ +#define FSCACHE_COOKIE_WAITING_ON_READS 6 /* T if cookie is waiting on reads */ }; extern struct fscache_cookie fscache_fsdef_index; -- cgit v1.2.3 From 9f10523f891928330b7529da54c1a3cc65180b1a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:35 +0000 Subject: FS-Cache: Fix operation state management and accounting Fix the state management of internal fscache operations and the accounting of what operations are in what states. This is done by: (1) Give struct fscache_operation a enum variable that directly represents the state it's currently in, rather than spreading this knowledge over a bunch of flags, who's processing the operation at the moment and whether it is queued or not. This makes it easier to write assertions to check the state at various points and to prevent invalid state transitions. (2) Add an 'operation complete' state and supply a function to indicate the completion of an operation (fscache_op_complete()) and make things call it. The final call to fscache_put_operation() can then check that an op in the appropriate state (complete or cancelled). (3) Adjust the use of object->n_ops, ->n_in_progress, ->n_exclusive to better govern the state of an object: (a) The ->n_ops is now the number of extant operations on the object and is now decremented by fscache_put_operation() only. (b) The ->n_in_progress is simply the number of objects that have been taken off of the object's pending queue for the purposes of being run. This is decremented by fscache_op_complete() only. (c) The ->n_exclusive is the number of exclusive ops that have been submitted and queued or are in progress. It is decremented by fscache_op_complete() and by fscache_cancel_op(). fscache_put_operation() and fscache_operation_gc() now no longer try to clean up ->n_exclusive and ->n_in_progress. That was leading to double decrements against fscache_cancel_op(). fscache_cancel_op() now no longer decrements ->n_ops. That was leading to double decrements against fscache_put_operation(). fscache_submit_exclusive_op() now decides whether it has to queue an op based on ->n_in_progress being > 0 rather than ->n_ops > 0 as the latter will persist in being true even after all preceding operations have been cancelled or completed. Furthermore, if an object is active and there are runnable ops against it, there must be at least one op running. (4) Add a remaining-pages counter (n_pages) to struct fscache_retrieval and provide a function to record completion of the pages as they complete. When n_pages reaches 0, the operation is deemed to be complete and fscache_op_complete() is called. Add calls to fscache_retrieval_complete() anywhere we've finished with a page we've been given to read or allocate for. This includes places where we just return pages to the netfs for reading from the server and where accessing the cache fails and we discard the proposed netfs page. The bugs in the unfixed state management manifest themselves as oopses like the following where the operation completion gets out of sync with return of the cookie by the netfs. This is possible because the cache unlocks and returns all the netfs pages before recording its completion - which means that there's nothing to stop the netfs discarding them and returning the cookie. FS-Cache: Cookie 'NFS.fh' still has outstanding reads ------------[ cut here ]------------ kernel BUG at fs/fscache/cookie.c:519! invalid opcode: 0000 [#1] SMP CPU 1 Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc Pid: 400, comm: kswapd0 Not tainted 3.1.0-rc7-fsdevel+ #1090 /DG965RY RIP: 0010:[] [] __fscache_relinquish_cookie+0x170/0x343 [fscache] RSP: 0018:ffff8800368cfb00 EFLAGS: 00010282 RAX: 000000000000003c RBX: ffff880023cc8790 RCX: 0000000000000000 RDX: 0000000000002f2e RSI: 0000000000000001 RDI: ffffffff813ab86c RBP: ffff8800368cfb50 R08: 0000000000000002 R09: 0000000000000000 R10: ffff88003a1b7890 R11: ffff88001df6e488 R12: ffff880023d8ed98 R13: ffff880023cc8798 R14: 0000000000000004 R15: ffff88003b8bf370 FS: 0000000000000000(0000) GS:ffff88003bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000008ba008 CR3: 0000000023d93000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kswapd0 (pid: 400, threadinfo ffff8800368ce000, task ffff88003b8bf040) Stack: ffff88003b8bf040 ffff88001df6e528 ffff88001df6e528 ffffffffa00b46b0 ffff88003b8bf040 ffff88001df6e488 ffff88001df6e620 ffffffffa00b46b0 ffff88001ebd04c8 0000000000000004 ffff8800368cfb70 ffffffffa00b2c91 Call Trace: [] nfs_fscache_release_inode_cookie+0x3b/0x47 [nfs] [] nfs_clear_inode+0x3c/0x41 [nfs] [] nfs4_evict_inode+0x2f/0x33 [nfs] [] evict+0xa1/0x15c [] dispose_list+0x2c/0x38 [] prune_icache_sb+0x28c/0x29b [] prune_super+0xd5/0x140 [] shrink_slab+0x102/0x1ab [] balance_pgdat+0x2f2/0x595 [] ? process_timeout+0xb/0xb [] kswapd+0x270/0x289 [] ? __init_waitqueue_head+0x46/0x46 [] ? balance_pgdat+0x595/0x595 [] kthread+0x7f/0x87 [] kernel_thread_helper+0x4/0x10 [] ? finish_task_switch+0x45/0xc0 [] ? retint_restore_args+0xe/0xe [] ? __init_kthread_worker+0x53/0x53 [] ? gs_change+0xb/0xb Signed-off-by: David Howells --- Documentation/filesystems/caching/backend-api.txt | 26 ++++++- Documentation/filesystems/caching/operations.txt | 2 +- fs/cachefiles/rdwr.c | 31 ++++++-- fs/fscache/object.c | 2 - fs/fscache/operation.c | 91 +++++++++++++++-------- fs/fscache/page.c | 25 ++++++- include/linux/fscache-cache.h | 37 +++++++-- 7 files changed, 164 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt index 382d52cdaf2d..f4769b9399df 100644 --- a/Documentation/filesystems/caching/backend-api.txt +++ b/Documentation/filesystems/caching/backend-api.txt @@ -419,7 +419,10 @@ performed on the denizens of the cache. These are held in a structure of type: If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS returned if possible or fscache_end_io() called with a suitable error - code.. + code. + + fscache_put_retrieval() should be called after a page or pages are dealt + with. This will complete the operation when all pages are dealt with. (*) Request pages be read from cache [mandatory]: @@ -526,6 +529,27 @@ FS-Cache provides some utilities that a cache backend may make use of: error value should be 0 if successful and an error otherwise. + (*) Record that one or more pages being retrieved or allocated have been dealt + with: + + void fscache_retrieval_complete(struct fscache_retrieval *op, + int n_pages); + + This is called to record the fact that one or more pages have been dealt + with and are no longer the concern of this operation. When the number of + pages remaining in the operation reaches 0, the operation will be + completed. + + + (*) Record operation completion: + + void fscache_op_complete(struct fscache_operation *op); + + This is called to record the completion of an operation. This deducts + this operation from the parent object's run state, potentially permitting + one or more pending operations to start running. + + (*) Set highest store limit: void fscache_set_store_limit(struct fscache_object *object, diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt index b6b070c57cbf..bee2a5f93d60 100644 --- a/Documentation/filesystems/caching/operations.txt +++ b/Documentation/filesystems/caching/operations.txt @@ -174,7 +174,7 @@ Operations are used through the following procedure: necessary (the object might have died whilst the thread was waiting). When it has finished doing its processing, it should call - fscache_put_operation() on it. + fscache_op_complete() and fscache_put_operation() on it. (4) The operation holds an effective lock upon the object, preventing other exclusive ops conflicting until it is released. The operation can be diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index bf123d9c3206..93a0815e0498 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -197,6 +197,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op) fscache_end_io(op, monitor->netfs_page, error); page_cache_release(monitor->netfs_page); + fscache_retrieval_complete(op, 1); fscache_put_retrieval(op); kfree(monitor); @@ -339,6 +340,7 @@ backing_page_already_uptodate: copy_highpage(netpage, backpage); fscache_end_io(op, netpage, 0); + fscache_retrieval_complete(op, 1); success: _debug("success"); @@ -360,6 +362,7 @@ read_error: goto out; io_error: cachefiles_io_error_obj(object, "Page read error on backing file"); + fscache_retrieval_complete(op, 1); ret = -ENOBUFS; goto out; @@ -369,6 +372,7 @@ nomem_monitor: fscache_put_retrieval(monitor->op); kfree(monitor); nomem: + fscache_retrieval_complete(op, 1); _leave(" = -ENOMEM"); return -ENOMEM; } @@ -407,7 +411,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, _enter("{%p},{%lx},,,", object, page->index); if (!object->backer) - return -ENOBUFS; + goto enobufs; inode = object->backer->d_inode; ASSERT(S_ISREG(inode->i_mode)); @@ -416,7 +420,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, /* calculate the shift required to use bmap */ if (inode->i_sb->s_blocksize > PAGE_SIZE) - return -ENOBUFS; + goto enobufs; shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; @@ -448,13 +452,19 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, } else if (cachefiles_has_space(cache, 0, 1) == 0) { /* there's space in the cache we can use */ fscache_mark_page_cached(op, page); + fscache_retrieval_complete(op, 1); ret = -ENODATA; } else { - ret = -ENOBUFS; + goto enobufs; } _leave(" = %d", ret); return ret; + +enobufs: + fscache_retrieval_complete(op, 1); + _leave(" = -ENOBUFS"); + return -ENOBUFS; } /* @@ -632,6 +642,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, /* the netpage is unlocked and marked up to date here */ fscache_end_io(op, netpage, 0); + fscache_retrieval_complete(op, 1); page_cache_release(netpage); netpage = NULL; continue; @@ -659,6 +670,7 @@ out: list_for_each_entry_safe(netpage, _n, list, lru) { list_del(&netpage->lru); page_cache_release(netpage); + fscache_retrieval_complete(op, 1); } _leave(" = %d", ret); @@ -707,7 +719,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, *nr_pages); if (!object->backer) - return -ENOBUFS; + goto all_enobufs; space = 1; if (cachefiles_has_space(cache, 0, *nr_pages) < 0) @@ -720,7 +732,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, /* calculate the shift required to use bmap */ if (inode->i_sb->s_blocksize > PAGE_SIZE) - return -ENOBUFS; + goto all_enobufs; shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; @@ -760,7 +772,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, nrbackpages++; } else if (space && pagevec_add(&pagevec, page) == 0) { fscache_mark_pages_cached(op, &pagevec); + fscache_retrieval_complete(op, 1); ret = -ENODATA; + } else { + fscache_retrieval_complete(op, 1); } } @@ -781,6 +796,10 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, _leave(" = %d [nr=%u%s]", ret, *nr_pages, list_empty(pages) ? " empty" : ""); return ret; + +all_enobufs: + fscache_retrieval_complete(op, *nr_pages); + return -ENOBUFS; } /* @@ -815,6 +834,7 @@ int cachefiles_allocate_page(struct fscache_retrieval *op, else ret = -ENOBUFS; + fscache_retrieval_complete(op, 1); _leave(" = %d", ret); return ret; } @@ -864,6 +884,7 @@ int cachefiles_allocate_pages(struct fscache_retrieval *op, ret = -ENOBUFS; } + fscache_retrieval_complete(op, *nr_pages); _leave(" = %d", ret); return ret; } diff --git a/fs/fscache/object.c b/fs/fscache/object.c index b6b897c550ac..773bc798a416 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -587,8 +587,6 @@ static void fscache_object_available(struct fscache_object *object) if (object->n_in_progress == 0) { if (object->n_ops > 0) { ASSERTCMP(object->n_ops, >=, object->n_obj_ops); - ASSERTIF(object->n_ops > object->n_obj_ops, - !list_empty(&object->pending_ops)); fscache_start_operations(object); } else { ASSERT(list_empty(&object->pending_ops)); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index c857ab824d6e..748f9553c2cb 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -37,6 +37,7 @@ void fscache_enqueue_operation(struct fscache_operation *op) ASSERT(op->processor != NULL); ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); ASSERTCMP(atomic_read(&op->usage), >, 0); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); fscache_stat(&fscache_n_op_enqueue); switch (op->flags & FSCACHE_OP_TYPE) { @@ -64,6 +65,9 @@ EXPORT_SYMBOL(fscache_enqueue_operation); static void fscache_run_op(struct fscache_object *object, struct fscache_operation *op) { + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); + + op->state = FSCACHE_OP_ST_IN_PROGRESS; object->n_in_progress++; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) wake_up_bit(&op->flags, FSCACHE_OP_WAITING); @@ -80,22 +84,23 @@ static void fscache_run_op(struct fscache_object *object, int fscache_submit_exclusive_op(struct fscache_object *object, struct fscache_operation *op) { - int ret; - _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); + ASSERTCMP(atomic_read(&op->usage), >, 0); + spin_lock(&object->lock); ASSERTCMP(object->n_ops, >=, object->n_in_progress); ASSERTCMP(object->n_ops, >=, object->n_exclusive); ASSERT(list_empty(&op->pend_link)); - ret = -ENOBUFS; + op->state = FSCACHE_OP_ST_PENDING; if (fscache_object_is_active(object)) { op->object = object; object->n_ops++; object->n_exclusive++; /* reads and writes must wait */ - if (object->n_ops > 1) { + if (object->n_in_progress > 0) { atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); @@ -111,7 +116,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object, /* need to issue a new write op after this */ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); - ret = 0; } else if (object->state == FSCACHE_OBJECT_CREATING) { op->object = object; object->n_ops++; @@ -119,14 +123,13 @@ int fscache_submit_exclusive_op(struct fscache_object *object, atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); - ret = 0; } else { /* not allowed to submit ops in any other state */ BUG(); } spin_unlock(&object->lock); - return ret; + return 0; } /* @@ -186,6 +189,7 @@ int fscache_submit_op(struct fscache_object *object, _enter("{OBJ%x OP%x},{%u}", object->debug_id, op->debug_id, atomic_read(&op->usage)); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); ASSERTCMP(atomic_read(&op->usage), >, 0); spin_lock(&object->lock); @@ -196,6 +200,7 @@ int fscache_submit_op(struct fscache_object *object, ostate = object->state; smp_rmb(); + op->state = FSCACHE_OP_ST_PENDING; if (fscache_object_is_active(object)) { op->object = object; object->n_ops++; @@ -225,12 +230,15 @@ int fscache_submit_op(struct fscache_object *object, object->state == FSCACHE_OBJECT_LC_DYING || object->state == FSCACHE_OBJECT_WITHDRAWING) { fscache_stat(&fscache_n_op_rejected); + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { fscache_report_unexpected_submission(object, op, ostate); ASSERT(!fscache_object_is_active(object)); + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } else { + op->state = FSCACHE_OP_ST_CANCELLED; ret = -ENOBUFS; } @@ -290,13 +298,18 @@ int fscache_cancel_op(struct fscache_operation *op) _enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id); + ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING); + ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED); + ASSERTCMP(atomic_read(&op->usage), >, 0); + spin_lock(&object->lock); ret = -EBUSY; - if (!list_empty(&op->pend_link)) { + if (op->state == FSCACHE_OP_ST_PENDING) { + ASSERT(!list_empty(&op->pend_link)); fscache_stat(&fscache_n_op_cancelled); list_del_init(&op->pend_link); - object->n_ops--; + op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) @@ -310,6 +323,37 @@ int fscache_cancel_op(struct fscache_operation *op) return ret; } +/* + * Record the completion of an in-progress operation. + */ +void fscache_op_complete(struct fscache_operation *op) +{ + struct fscache_object *object = op->object; + + _enter("OBJ%x", object->debug_id); + + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); + ASSERTCMP(object->n_in_progress, >, 0); + ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), + object->n_exclusive, >, 0); + ASSERTIFCMP(test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags), + object->n_in_progress, ==, 1); + + spin_lock(&object->lock); + + op->state = FSCACHE_OP_ST_COMPLETE; + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + object->n_in_progress--; + if (object->n_in_progress == 0) + fscache_start_operations(object); + + spin_unlock(&object->lock); + _leave(""); +} +EXPORT_SYMBOL(fscache_op_complete); + /* * release an operation * - queues pending ops if this is the last in-progress op @@ -328,8 +372,9 @@ void fscache_put_operation(struct fscache_operation *op) return; _debug("PUT OP"); - if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) - BUG(); + ASSERTIFCMP(op->state != FSCACHE_OP_ST_COMPLETE, + op->state, ==, FSCACHE_OP_ST_CANCELLED); + op->state = FSCACHE_OP_ST_DEAD; fscache_stat(&fscache_n_op_release); @@ -365,16 +410,6 @@ void fscache_put_operation(struct fscache_operation *op) return; } - if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { - ASSERTCMP(object->n_exclusive, >, 0); - object->n_exclusive--; - } - - ASSERTCMP(object->n_in_progress, >, 0); - object->n_in_progress--; - if (object->n_in_progress == 0) - fscache_start_operations(object); - ASSERTCMP(object->n_ops, >, 0); object->n_ops--; if (object->n_ops == 0) @@ -413,23 +448,14 @@ void fscache_operation_gc(struct work_struct *work) spin_unlock(&cache->op_gc_list_lock); object = op->object; + spin_lock(&object->lock); _debug("GC DEFERRED REL OBJ%x OP%x", object->debug_id, op->debug_id); fscache_stat(&fscache_n_op_gc); ASSERTCMP(atomic_read(&op->usage), ==, 0); - - spin_lock(&object->lock); - if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { - ASSERTCMP(object->n_exclusive, >, 0); - object->n_exclusive--; - } - - ASSERTCMP(object->n_in_progress, >, 0); - object->n_in_progress--; - if (object->n_in_progress == 0) - fscache_start_operations(object); + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_DEAD); ASSERTCMP(object->n_ops, >, 0); object->n_ops--; @@ -437,6 +463,7 @@ void fscache_operation_gc(struct work_struct *work) fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); spin_unlock(&object->lock); + kfree(op); } while (count++ < 20); diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 248a12e22532..b38b13d2a555 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -162,6 +162,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op) fscache_abort_object(object); } + fscache_op_complete(op); _leave(""); } @@ -223,6 +224,8 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) _enter("{OP%x}", op->op.debug_id); + ASSERTCMP(op->n_pages, ==, 0); + fscache_hist(fscache_retrieval_histogram, op->start_time); if (op->context) fscache_put_context(op->op.object->cookie, op->context); @@ -320,6 +323,11 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object, _debug("<<< GO"); check_if_dead: + if (op->op.state == FSCACHE_OP_ST_CANCELLED) { + fscache_stat(stat_object_dead); + _leave(" = -ENOBUFS [cancelled]"); + return -ENOBUFS; + } if (unlikely(fscache_object_is_dead(object))) { fscache_stat(stat_object_dead); return -ENOBUFS; @@ -364,6 +372,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, _leave(" = -ENOMEM"); return -ENOMEM; } + op->n_pages = 1; spin_lock(&cookie->lock); @@ -375,10 +384,10 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); atomic_inc(&object->n_reads); - set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock; + goto nobufs_unlock_dec; spin_unlock(&cookie->lock); fscache_stat(&fscache_n_retrieval_ops); @@ -425,6 +434,8 @@ error: _leave(" = %d", ret); return ret; +nobufs_unlock_dec: + atomic_dec(&object->n_reads); nobufs_unlock: spin_unlock(&cookie->lock); kfree(op); @@ -482,6 +493,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(mapping, end_io_func, context); if (!op) return -ENOMEM; + op->n_pages = *nr_pages; spin_lock(&cookie->lock); @@ -491,10 +503,10 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, struct fscache_object, cookie_link); atomic_inc(&object->n_reads); - set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); + __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock; + goto nobufs_unlock_dec; spin_unlock(&cookie->lock); fscache_stat(&fscache_n_retrieval_ops); @@ -541,6 +553,8 @@ error: _leave(" = %d", ret); return ret; +nobufs_unlock_dec: + atomic_dec(&object->n_reads); nobufs_unlock: spin_unlock(&cookie->lock); kfree(op); @@ -583,6 +597,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, op = fscache_alloc_retrieval(page->mapping, NULL, NULL); if (!op) return -ENOMEM; + op->n_pages = 1; spin_lock(&cookie->lock); @@ -696,6 +711,7 @@ static void fscache_write_op(struct fscache_operation *_op) fscache_end_page_write(object, page); if (ret < 0) { fscache_abort_object(object); + fscache_op_complete(&op->op); } else { fscache_enqueue_operation(&op->op); } @@ -710,6 +726,7 @@ superseded: spin_unlock(&cookie->stores_lock); clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); spin_unlock(&object->lock); + fscache_op_complete(&op->op); _leave(""); } diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index e3d6d939d959..f5facd1d333f 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -75,6 +75,16 @@ extern wait_queue_head_t fscache_cache_cleared_wq; typedef void (*fscache_operation_release_t)(struct fscache_operation *op); typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); +enum fscache_operation_state { + FSCACHE_OP_ST_BLANK, /* Op is not yet submitted */ + FSCACHE_OP_ST_INITIALISED, /* Op is initialised */ + FSCACHE_OP_ST_PENDING, /* Op is blocked from running */ + FSCACHE_OP_ST_IN_PROGRESS, /* Op is in progress */ + FSCACHE_OP_ST_COMPLETE, /* Op is complete */ + FSCACHE_OP_ST_CANCELLED, /* Op has been cancelled */ + FSCACHE_OP_ST_DEAD /* Op is now dead */ +}; + struct fscache_operation { struct work_struct work; /* record for async ops */ struct list_head pend_link; /* link in object->pending_ops */ @@ -86,10 +96,10 @@ struct fscache_operation { #define FSCACHE_OP_MYTHREAD 0x0002 /* - processing is done be issuing thread, not pool */ #define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ #define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ -#define FSCACHE_OP_DEAD 6 /* op is now dead */ -#define FSCACHE_OP_DEC_READ_CNT 7 /* decrement object->n_reads on destruction */ -#define FSCACHE_OP_KEEP_FLAGS 0xc0 /* flags to keep when repurposing an op */ +#define FSCACHE_OP_DEC_READ_CNT 6 /* decrement object->n_reads on destruction */ +#define FSCACHE_OP_KEEP_FLAGS 0x0070 /* flags to keep when repurposing an op */ + enum fscache_operation_state state; atomic_t usage; unsigned debug_id; /* debugging ID */ @@ -106,6 +116,7 @@ extern atomic_t fscache_op_debug_id; extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); +extern void fscache_op_complete(struct fscache_operation *); extern void fscache_put_operation(struct fscache_operation *); /** @@ -122,6 +133,7 @@ static inline void fscache_operation_init(struct fscache_operation *op, { INIT_WORK(&op->work, fscache_op_work_func); atomic_set(&op->usage, 1); + op->state = FSCACHE_OP_ST_INITIALISED; op->debug_id = atomic_inc_return(&fscache_op_debug_id); op->processor = processor; op->release = release; @@ -138,6 +150,7 @@ struct fscache_retrieval { void *context; /* netfs read context (pinned) */ struct list_head to_do; /* list of things to be done by the backend */ unsigned long start_time; /* time at which retrieval started */ + unsigned n_pages; /* number of pages to be retrieved */ }; typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op, @@ -173,9 +186,23 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) fscache_enqueue_operation(&op->op); } +/** + * fscache_retrieval_complete - Record (partial) completion of a retrieval + * @op: The retrieval operation affected + * @n_pages: The number of pages to account for + */ +static inline void fscache_retrieval_complete(struct fscache_retrieval *op, + int n_pages) +{ + op->n_pages -= n_pages; + if (op->n_pages <= 0) + fscache_op_complete(&op->op); +} + /** * fscache_put_retrieval - Drop a reference to a retrieval operation * @op: The retrieval operation affected + * @n_pages: The number of pages to account for * * Drop a reference to a retrieval operation. */ @@ -333,10 +360,10 @@ struct fscache_object { int debug_id; /* debugging ID */ int n_children; /* number of child objects */ - int n_ops; /* number of ops outstanding on object */ + int n_ops; /* number of extant ops on object */ int n_obj_ops; /* number of object ops outstanding on object */ int n_in_progress; /* number of ops in progress */ - int n_exclusive; /* number of exclusive ops queued */ + int n_exclusive; /* number of exclusive ops queued or in progress */ atomic_t n_reads; /* number of read ops in progress */ spinlock_t lock; /* state and operations lock */ -- cgit v1.2.3 From ef778e7ae67cd426c30cad43378b908f5eb0bad5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:36 +0000 Subject: FS-Cache: Provide proper invalidation Provide a proper invalidation method rather than relying on the netfs retiring the cookie it has and getting a new one. The problem with this is that isn't easy for the netfs to make sure that it has completed/cancelled all its outstanding storage and retrieval operations on the cookie it is retiring. Instead, have the cache provide an invalidation method that will cancel or wait for all currently outstanding operations before invalidating the cache, and will cause new operations to queue up behind that. Whilst invalidation is in progress, some requests will be rejected until the cache can stack a barrier on the operation queue to cause new operations to be deferred behind it. Signed-off-by: David Howells --- Documentation/filesystems/caching/backend-api.txt | 12 ++++ Documentation/filesystems/caching/netfs-api.txt | 46 ++++++++++++--- Documentation/filesystems/caching/object.txt | 23 +++++--- fs/fscache/cookie.c | 60 +++++++++++++++++++ fs/fscache/internal.h | 10 ++++ fs/fscache/object.c | 72 +++++++++++++++++++++++ fs/fscache/operation.c | 32 ++++++++++ fs/fscache/page.c | 51 ++++++++++++++++ fs/fscache/stats.c | 11 +++- include/linux/fscache-cache.h | 8 ++- include/linux/fscache.h | 38 ++++++++++++ 11 files changed, 345 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt index f4769b9399df..d78bab9622c6 100644 --- a/Documentation/filesystems/caching/backend-api.txt +++ b/Documentation/filesystems/caching/backend-api.txt @@ -308,6 +308,18 @@ performed on the denizens of the cache. These are held in a structure of type: obtained by calling object->cookie->def->get_aux()/get_attr(). + (*) Invalidate data object [mandatory]: + + int (*invalidate_object)(struct fscache_operation *op) + + This is called to invalidate a data object (as pointed to by op->object). + All the data stored for this object should be discarded and an + attr_changed operation should be performed. The caller will follow up + with an object update operation. + + fscache_op_complete() must be called on op before returning. + + (*) Discard object [mandatory]: void (*drop_object)(struct fscache_object *object) diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt index 7cc6bf2871eb..97e6c0ecc5ef 100644 --- a/Documentation/filesystems/caching/netfs-api.txt +++ b/Documentation/filesystems/caching/netfs-api.txt @@ -35,8 +35,9 @@ This document contains the following sections: (12) Index and data file update (13) Miscellaneous cookie operations (14) Cookie unregistration - (15) Index and data file invalidation - (16) FS-Cache specific page flags. + (15) Index invalidation + (16) Data file invalidation + (17) FS-Cache specific page flags. ============================= @@ -767,13 +768,42 @@ the cookies for "child" indices, objects and pages have been relinquished first. -================================ -INDEX AND DATA FILE INVALIDATION -================================ +================== +INDEX INVALIDATION +================== + +There is no direct way to invalidate an index subtree. To do this, the caller +should relinquish and retire the cookie they have, and then acquire a new one. + + +====================== +DATA FILE INVALIDATION +====================== + +Sometimes it will be necessary to invalidate an object that contains data. +Typically this will be necessary when the server tells the netfs of a foreign +change - at which point the netfs has to throw away all the state it had for an +inode and reload from the server. + +To indicate that a cache object should be invalidated, the following function +can be called: + + void fscache_invalidate(struct fscache_cookie *cookie); + +This can be called with spinlocks held as it defers the work to a thread pool. +All extant storage, retrieval and attribute change ops at this point are +cancelled and discarded. Some future operations will be rejected until the +cache has had a chance to insert a barrier in the operations queue. After +that, operations will be queued again behind the invalidation operation. + +The invalidation operation will perform an attribute change operation and an +auxiliary data update operation as it is very likely these will have changed. + +Using the following function, the netfs can wait for the invalidation operation +to have reached a point at which it can start submitting ordinary operations +once again: -There is no direct way to invalidate an index subtree or a data file. To do -this, the caller should relinquish and retire the cookie they have, and then -acquire a new one. + void fscache_wait_on_invalidate(struct fscache_cookie *cookie); =========================== diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt index 58313348da87..100ff41127e4 100644 --- a/Documentation/filesystems/caching/object.txt +++ b/Documentation/filesystems/caching/object.txt @@ -216,7 +216,14 @@ servicing netfs requests: The normal running state. In this state, requests the netfs makes will be passed on to the cache. - (6) State FSCACHE_OBJECT_UPDATING. + (6) State FSCACHE_OBJECT_INVALIDATING. + + The object is undergoing invalidation. When the state comes here, it + discards all pending read, write and attribute change operations as it is + going to clear out the cache entirely and reinitialise it. It will then + continue to the FSCACHE_OBJECT_UPDATING state. + + (7) State FSCACHE_OBJECT_UPDATING. The state machine comes here to update the object in the cache from the netfs's records. This involves updating the auxiliary data that is used @@ -225,13 +232,13 @@ servicing netfs requests: And there are terminal states in which an object cleans itself up, deallocates memory and potentially deletes stuff from disk: - (7) State FSCACHE_OBJECT_LC_DYING. + (8) State FSCACHE_OBJECT_LC_DYING. The object comes here if it is dying because of a lookup or creation error. This would be due to a disk error or system error of some sort. Temporary data is cleaned up, and the parent is released. - (8) State FSCACHE_OBJECT_DYING. + (9) State FSCACHE_OBJECT_DYING. The object comes here if it is dying due to an error, because its parent cookie has been relinquished by the netfs or because the cache is being @@ -241,27 +248,27 @@ memory and potentially deletes stuff from disk: can destroy themselves. This object waits for all its children to go away before advancing to the next state. - (9) State FSCACHE_OBJECT_ABORT_INIT. +(10) State FSCACHE_OBJECT_ABORT_INIT. The object comes to this state if it was waiting on its parent in FSCACHE_OBJECT_INIT, but its parent died. The object will destroy itself so that the parent may proceed from the FSCACHE_OBJECT_DYING state. -(10) State FSCACHE_OBJECT_RELEASING. -(11) State FSCACHE_OBJECT_RECYCLING. +(11) State FSCACHE_OBJECT_RELEASING. +(12) State FSCACHE_OBJECT_RECYCLING. The object comes to one of these two states when dying once it is rid of all its children, if it is dying because the netfs relinquished its cookie. In the first state, the cached data is expected to persist, and in the second it will be deleted. -(12) State FSCACHE_OBJECT_WITHDRAWING. +(13) State FSCACHE_OBJECT_WITHDRAWING. The object transits to this state if the cache decides it wants to withdraw the object from service, perhaps to make space, but also due to error or just because the whole cache is being withdrawn. -(13) State FSCACHE_OBJECT_DEAD. +(14) State FSCACHE_OBJECT_DEAD. The object transits to this state when the in-memory object record is ready to be deleted. The object processor shouldn't ever see an object in diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 66be9eccede0..8dcb114758e3 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -369,6 +369,66 @@ cant_attach_object: return ret; } +/* + * Invalidate an object. Callable with spinlocks held. + */ +void __fscache_invalidate(struct fscache_cookie *cookie) +{ + struct fscache_object *object; + + _enter("{%s}", cookie->def->name); + + fscache_stat(&fscache_n_invalidates); + + /* Only permit invalidation of data files. Invalidating an index will + * require the caller to release all its attachments to the tree rooted + * there, and if it's doing that, it may as well just retire the + * cookie. + */ + ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE); + + /* We will be updating the cookie too. */ + BUG_ON(!cookie->def->get_aux); + + /* If there's an object, we tell the object state machine to handle the + * invalidation on our behalf, otherwise there's nothing to do. + */ + if (!hlist_empty(&cookie->backing_objects)) { + spin_lock(&cookie->lock); + + if (!hlist_empty(&cookie->backing_objects) && + !test_and_set_bit(FSCACHE_COOKIE_INVALIDATING, + &cookie->flags)) { + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, + cookie_link); + if (object->state < FSCACHE_OBJECT_DYING) + fscache_raise_event( + object, FSCACHE_OBJECT_EV_INVALIDATE); + } + + spin_unlock(&cookie->lock); + } + + _leave(""); +} +EXPORT_SYMBOL(__fscache_invalidate); + +/* + * Wait for object invalidation to complete. + */ +void __fscache_wait_on_invalidate(struct fscache_cookie *cookie) +{ + _enter("%p", cookie); + + wait_on_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING, + fscache_wait_bit_interruptible, + TASK_UNINTERRUPTIBLE); + + _leave(""); +} +EXPORT_SYMBOL(__fscache_wait_on_invalidate); + /* * update the index entries backing a cookie */ diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index f6aad48d38a8..c81179303930 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -122,10 +122,16 @@ extern int fscache_submit_exclusive_op(struct fscache_object *, extern int fscache_submit_op(struct fscache_object *, struct fscache_operation *); extern int fscache_cancel_op(struct fscache_operation *); +extern void fscache_cancel_all_ops(struct fscache_object *); extern void fscache_abort_object(struct fscache_object *); extern void fscache_start_operations(struct fscache_object *); extern void fscache_operation_gc(struct work_struct *); +/* + * page.c + */ +extern void fscache_invalidate_writes(struct fscache_cookie *); + /* * proc.c */ @@ -205,6 +211,9 @@ extern atomic_t fscache_n_acquires_ok; extern atomic_t fscache_n_acquires_nobufs; extern atomic_t fscache_n_acquires_oom; +extern atomic_t fscache_n_invalidates; +extern atomic_t fscache_n_invalidates_run; + extern atomic_t fscache_n_updates; extern atomic_t fscache_n_updates_null; extern atomic_t fscache_n_updates_run; @@ -237,6 +246,7 @@ extern atomic_t fscache_n_cop_alloc_object; extern atomic_t fscache_n_cop_lookup_object; extern atomic_t fscache_n_cop_lookup_complete; extern atomic_t fscache_n_cop_grab_object; +extern atomic_t fscache_n_cop_invalidate_object; extern atomic_t fscache_n_cop_update_object; extern atomic_t fscache_n_cop_drop_object; extern atomic_t fscache_n_cop_put_object; diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 773bc798a416..80b549141ea6 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -14,6 +14,7 @@ #define FSCACHE_DEBUG_LEVEL COOKIE #include +#include #include "internal.h" const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { @@ -22,6 +23,7 @@ const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE", [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE", + [FSCACHE_OBJECT_INVALIDATING] = "OBJECT_INVALIDATING", [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING", [FSCACHE_OBJECT_DYING] = "OBJECT_DYING", [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING", @@ -39,6 +41,7 @@ const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { [FSCACHE_OBJECT_CREATING] = "CRTN", [FSCACHE_OBJECT_AVAILABLE] = "AVBL", [FSCACHE_OBJECT_ACTIVE] = "ACTV", + [FSCACHE_OBJECT_INVALIDATING] = "INVL", [FSCACHE_OBJECT_UPDATING] = "UPDT", [FSCACHE_OBJECT_DYING] = "DYNG", [FSCACHE_OBJECT_LC_DYING] = "LCDY", @@ -54,6 +57,7 @@ static void fscache_put_object(struct fscache_object *); static void fscache_initialise_object(struct fscache_object *); static void fscache_lookup_object(struct fscache_object *); static void fscache_object_available(struct fscache_object *); +static void fscache_invalidate_object(struct fscache_object *); static void fscache_release_object(struct fscache_object *); static void fscache_withdraw_object(struct fscache_object *); static void fscache_enqueue_dependents(struct fscache_object *); @@ -78,6 +82,15 @@ static inline void fscache_done_parent_op(struct fscache_object *object) spin_unlock(&parent->lock); } +/* + * Notify netfs of invalidation completion. + */ +static inline void fscache_invalidation_complete(struct fscache_cookie *cookie) +{ + if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); +} + /* * process events that have been sent to an object's state machine * - initiates parent lookup @@ -125,6 +138,16 @@ static void fscache_object_state_machine(struct fscache_object *object) case FSCACHE_OBJECT_ACTIVE: goto active_transit; + /* Invalidate an object on disk */ + case FSCACHE_OBJECT_INVALIDATING: + clear_bit(FSCACHE_OBJECT_EV_INVALIDATE, &object->events); + fscache_stat(&fscache_n_invalidates_run); + fscache_stat(&fscache_n_cop_invalidate_object); + fscache_invalidate_object(object); + fscache_stat_d(&fscache_n_cop_invalidate_object); + fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); + goto active_transit; + /* update the object metadata on disk */ case FSCACHE_OBJECT_UPDATING: clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); @@ -275,6 +298,9 @@ active_transit: case FSCACHE_OBJECT_EV_ERROR: new_state = FSCACHE_OBJECT_DYING; goto change_state; + case FSCACHE_OBJECT_EV_INVALIDATE: + new_state = FSCACHE_OBJECT_INVALIDATING; + goto change_state; case FSCACHE_OBJECT_EV_UPDATE: new_state = FSCACHE_OBJECT_UPDATING; goto change_state; @@ -679,6 +705,7 @@ static void fscache_withdraw_object(struct fscache_object *object) if (object->cookie == cookie) { hlist_del_init(&object->cookie_link); object->cookie = NULL; + fscache_invalidation_complete(cookie); detached = true; } spin_unlock(&cookie->lock); @@ -888,3 +915,48 @@ enum fscache_checkaux fscache_check_aux(struct fscache_object *object, return result; } EXPORT_SYMBOL(fscache_check_aux); + +/* + * Asynchronously invalidate an object. + */ +static void fscache_invalidate_object(struct fscache_object *object) +{ + struct fscache_operation *op; + struct fscache_cookie *cookie = object->cookie; + + _enter("{OBJ%x}", object->debug_id); + + /* Reject any new read/write ops and abort any that are pending. */ + fscache_invalidate_writes(cookie); + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + fscache_cancel_all_ops(object); + + /* Now we have to wait for in-progress reads and writes */ + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); + _leave(" [ENOMEM]"); + return; + } + + fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); + op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); + + spin_lock(&cookie->lock); + if (fscache_submit_exclusive_op(object, op) < 0) + BUG(); + spin_unlock(&cookie->lock); + fscache_put_operation(op); + + /* Once we've completed the invalidation, we know there will be no data + * stored in the cache and thus we can reinstate the data-check-skip + * optimisation. + */ + set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + + /* We can allow read and write requests to come in once again. They'll + * queue up behind our exclusive invalidation operation. + */ + fscache_invalidation_complete(cookie); + _leave(""); +} diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 748f9553c2cb..c58dbe613266 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -323,6 +323,38 @@ int fscache_cancel_op(struct fscache_operation *op) return ret; } +/* + * Cancel all pending operations on an object + */ +void fscache_cancel_all_ops(struct fscache_object *object) +{ + struct fscache_operation *op; + + _enter("OBJ%x", object->debug_id); + + spin_lock(&object->lock); + + while (!list_empty(&object->pending_ops)) { + op = list_entry(object->pending_ops.next, + struct fscache_operation, pend_link); + fscache_stat(&fscache_n_op_cancelled); + list_del_init(&op->pend_link); + + ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING); + op->state = FSCACHE_OP_ST_CANCELLED; + + if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) + object->n_exclusive--; + if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) + wake_up_bit(&op->flags, FSCACHE_OP_WAITING); + fscache_put_operation(op); + cond_resched_lock(&object->lock); + } + + spin_unlock(&object->lock); + _leave(""); +} + /* * Record the completion of an in-progress operation. */ diff --git a/fs/fscache/page.c b/fs/fscache/page.c index b38b13d2a555..7bf9d2557052 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -361,6 +361,11 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, if (hlist_empty(&cookie->backing_objects)) goto nobufs; + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(page, !=, NULL); @@ -483,6 +488,11 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, if (hlist_empty(&cookie->backing_objects)) goto nobufs; + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(*nr_pages, >, 0); ASSERT(!list_empty(pages)); @@ -591,6 +601,11 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); ASSERTCMP(page, !=, NULL); + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + if (fscache_wait_for_deferred_lookup(cookie) < 0) return -ERESTARTSYS; @@ -730,6 +745,37 @@ superseded: _leave(""); } +/* + * Clear the pages pending writing for invalidation + */ +void fscache_invalidate_writes(struct fscache_cookie *cookie) +{ + struct page *page; + void *results[16]; + int n, i; + + _enter(""); + + while (spin_lock(&cookie->stores_lock), + n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, + ARRAY_SIZE(results), + FSCACHE_COOKIE_PENDING_TAG), + n > 0) { + for (i = n - 1; i >= 0; i--) { + page = results[i]; + radix_tree_delete(&cookie->stores, page->index); + } + + spin_unlock(&cookie->stores_lock); + + for (i = n - 1; i >= 0; i--) + page_cache_release(results[i]); + } + + spin_unlock(&cookie->stores_lock); + _leave(""); +} + /* * request a page be stored in the cache * - returns: @@ -776,6 +822,11 @@ int __fscache_write_page(struct fscache_cookie *cookie, fscache_stat(&fscache_n_stores); + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY); if (!op) goto nomem; diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4765190d537f..51cdaee14109 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -80,6 +80,9 @@ atomic_t fscache_n_acquires_ok; atomic_t fscache_n_acquires_nobufs; atomic_t fscache_n_acquires_oom; +atomic_t fscache_n_invalidates; +atomic_t fscache_n_invalidates_run; + atomic_t fscache_n_updates; atomic_t fscache_n_updates_null; atomic_t fscache_n_updates_run; @@ -112,6 +115,7 @@ atomic_t fscache_n_cop_alloc_object; atomic_t fscache_n_cop_lookup_object; atomic_t fscache_n_cop_lookup_complete; atomic_t fscache_n_cop_grab_object; +atomic_t fscache_n_cop_invalidate_object; atomic_t fscache_n_cop_update_object; atomic_t fscache_n_cop_drop_object; atomic_t fscache_n_cop_put_object; @@ -168,6 +172,10 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_object_created), atomic_read(&fscache_n_object_lookups_timed_out)); + seq_printf(m, "Invals : n=%u run=%u\n", + atomic_read(&fscache_n_invalidates), + atomic_read(&fscache_n_invalidates_run)); + seq_printf(m, "Updates: n=%u nul=%u run=%u\n", atomic_read(&fscache_n_updates), atomic_read(&fscache_n_updates_null), @@ -246,7 +254,8 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_cop_lookup_object), atomic_read(&fscache_n_cop_lookup_complete), atomic_read(&fscache_n_cop_grab_object)); - seq_printf(m, "CacheOp: upo=%d dro=%d pto=%d atc=%d syn=%d\n", + seq_printf(m, "CacheOp: inv=%d upo=%d dro=%d pto=%d atc=%d syn=%d\n", + atomic_read(&fscache_n_cop_invalidate_object), atomic_read(&fscache_n_cop_update_object), atomic_read(&fscache_n_cop_drop_object), atomic_read(&fscache_n_cop_put_object), diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index f5facd1d333f..1e454ad7a832 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -254,6 +254,9 @@ struct fscache_cache_ops { /* store the updated auxiliary data on an object */ void (*update_object)(struct fscache_object *object); + /* Invalidate an object */ + void (*invalidate_object)(struct fscache_operation *op); + /* discard the resources pinned by an object and effect retirement if * necessary */ void (*drop_object)(struct fscache_object *object); @@ -329,6 +332,7 @@ struct fscache_cookie { #define FSCACHE_COOKIE_FILLING 4 /* T if filling object incrementally */ #define FSCACHE_COOKIE_UNAVAILABLE 5 /* T if cookie is unavailable (error, etc) */ #define FSCACHE_COOKIE_WAITING_ON_READS 6 /* T if cookie is waiting on reads */ +#define FSCACHE_COOKIE_INVALIDATING 7 /* T if cookie is being invalidated */ }; extern struct fscache_cookie fscache_fsdef_index; @@ -345,6 +349,7 @@ struct fscache_object { /* active states */ FSCACHE_OBJECT_AVAILABLE, /* cleaning up object after creation */ FSCACHE_OBJECT_ACTIVE, /* object is usable */ + FSCACHE_OBJECT_INVALIDATING, /* object is invalidating */ FSCACHE_OBJECT_UPDATING, /* object is updating */ /* terminal states */ @@ -378,7 +383,8 @@ struct fscache_object { #define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ #define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ #define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ -#define FSCACHE_OBJECT_EVENTS_MASK 0x7f /* mask of all events*/ +#define FSCACHE_OBJECT_EV_INVALIDATE 7 /* T if cache requested object invalidation */ +#define FSCACHE_OBJECT_EVENTS_MASK 0xff /* mask of all events*/ unsigned long flags; #define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ diff --git a/include/linux/fscache.h b/include/linux/fscache.h index f4b6353543bf..7a086235da4b 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -185,6 +185,8 @@ extern struct fscache_cookie *__fscache_acquire_cookie( extern void __fscache_relinquish_cookie(struct fscache_cookie *, int); extern void __fscache_update_cookie(struct fscache_cookie *); extern int __fscache_attr_changed(struct fscache_cookie *); +extern void __fscache_invalidate(struct fscache_cookie *); +extern void __fscache_wait_on_invalidate(struct fscache_cookie *); extern int __fscache_read_or_alloc_page(struct fscache_cookie *, struct page *, fscache_rw_complete_t, @@ -389,6 +391,42 @@ int fscache_attr_changed(struct fscache_cookie *cookie) return -ENOBUFS; } +/** + * fscache_invalidate - Notify cache that an object needs invalidation + * @cookie: The cookie representing the cache object + * + * Notify the cache that an object is needs to be invalidated and that it + * should abort any retrievals or stores it is doing on the cache. The object + * is then marked non-caching until such time as the invalidation is complete. + * + * This can be called with spinlocks held. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_invalidate(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + __fscache_invalidate(cookie); +} + +/** + * fscache_wait_on_invalidate - Wait for invalidation to complete + * @cookie: The cookie representing the cache object + * + * Wait for the invalidation of an object to complete. + * + * See Documentation/filesystems/caching/netfs-api.txt for a complete + * description. + */ +static inline +void fscache_wait_on_invalidate(struct fscache_cookie *cookie) +{ + if (fscache_cookie_valid(cookie)) + __fscache_wait_on_invalidate(cookie); +} + /** * fscache_reserve_space - Reserve data space for a cached object * @cookie: The cookie representing the cache object -- cgit v1.2.3 From a02de9608595c8ef649ef03ae735b0b45e3d4396 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:36 +0000 Subject: VFS: Make more complete truncate operation available to CacheFiles Make a more complete truncate operation available to CacheFiles (including security checks and suchlike) so that it can use this to clear invalidated cache files. Signed-off-by: David Howells Acked-by: Al Viro --- fs/open.c | 50 +++++++++++++++++++++++++++----------------------- include/linux/fs.h | 1 + 2 files changed, 28 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/open.c b/fs/open.c index 182d8667b7bd..c819bbdab47f 100644 --- a/fs/open.c +++ b/fs/open.c @@ -61,33 +61,22 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, return ret; } -static long do_sys_truncate(const char __user *pathname, loff_t length) +long vfs_truncate(struct path *path, loff_t length) { - struct path path; struct inode *inode; - int error; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; + long error; - error = user_path(pathname, &path); - if (error) - goto out; - inode = path.dentry->d_inode; + inode = path->dentry->d_inode; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ - error = -EISDIR; if (S_ISDIR(inode->i_mode)) - goto dput_and_out; - - error = -EINVAL; + return -EISDIR; if (!S_ISREG(inode->i_mode)) - goto dput_and_out; + return -EINVAL; - error = mnt_want_write(path.mnt); + error = mnt_want_write(path->mnt); if (error) - goto dput_and_out; + goto out; error = inode_permission(inode, MAY_WRITE); if (error) @@ -111,19 +100,34 @@ static long do_sys_truncate(const char __user *pathname, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) - error = security_path_truncate(&path); + error = security_path_truncate(path); if (!error) - error = do_truncate(path.dentry, length, 0, NULL); + error = do_truncate(path->dentry, length, 0, NULL); put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: - mnt_drop_write(path.mnt); -dput_and_out: - path_put(&path); + mnt_drop_write(path->mnt); out: return error; } +EXPORT_SYMBOL_GPL(vfs_truncate); + +static long do_sys_truncate(const char __user *pathname, loff_t length) +{ + struct path path; + int error; + + if (length < 0) /* sorry, but loff_t says... */ + return -EINVAL; + + error = user_path(pathname, &path); + if (!error) { + error = vfs_truncate(&path, length); + path_put(&path); + } + return error; +} SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) { diff --git a/include/linux/fs.h b/include/linux/fs.h index a823d4be38e7..017a15b707e2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1999,6 +1999,7 @@ struct filename { bool separate; /* should "name" be freed? */ }; +extern long vfs_truncate(struct path *, loff_t); extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int do_fallocate(struct file *file, int mode, loff_t offset, -- cgit v1.2.3 From 9dc8d9bfe4415efb61a5e9390706b8a3bffef329 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:36 +0000 Subject: CacheFiles: Implement invalidation Implement invalidation for CacheFiles. This is in two parts: (1) Provide an invalidation method (which just truncates the backing file). (2) Abort attempts to copy anything read from the backing file whilst invalidation is in progress. Question: CacheFiles uses truncation in a couple of places. It has been using notify_change() rather than sys_truncate() or something similar. This means it bypasses a bunch of checks and suchlike that it possibly should be making (security, file locking, lease breaking, vfsmount write). Should it be using vfs_truncate() as added by a preceding patch or should it use notify_write() and assume that anyone poking around in the cache files on disk gets everything they deserve? Signed-off-by: David Howells --- fs/cachefiles/interface.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ fs/cachefiles/rdwr.c | 5 ++++- 2 files changed, 53 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 9bff0f878cfd..7a9d574b961c 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -440,6 +440,54 @@ truncate_failed: return ret; } +/* + * Invalidate an object + */ +static void cachefiles_invalidate_object(struct fscache_operation *op) +{ + struct cachefiles_object *object; + struct cachefiles_cache *cache; + const struct cred *saved_cred; + struct path path; + uint64_t ni_size; + int ret; + + object = container_of(op->object, struct cachefiles_object, fscache); + cache = container_of(object->fscache.cache, + struct cachefiles_cache, cache); + + op->object->cookie->def->get_attr(op->object->cookie->netfs_data, + &ni_size); + + _enter("{OBJ%x},[%llu]", + op->object->debug_id, (unsigned long long)ni_size); + + if (object->backer) { + ASSERT(S_ISREG(object->backer->d_inode->i_mode)); + + fscache_set_store_limit(&object->fscache, ni_size); + + path.dentry = object->backer; + path.mnt = cache->mnt; + + cachefiles_begin_secure(cache, &saved_cred); + ret = vfs_truncate(&path, 0); + if (ret == 0) + ret = vfs_truncate(&path, ni_size); + cachefiles_end_secure(cache, saved_cred); + + if (ret != 0) { + fscache_set_store_limit(&object->fscache, 0); + if (ret == -EIO) + cachefiles_io_error_obj(object, + "Invalidate failed"); + } + } + + fscache_op_complete(op); + _leave(""); +} + /* * dissociate a cache from all the pages it was backing */ @@ -455,6 +503,7 @@ const struct fscache_cache_ops cachefiles_cache_ops = { .lookup_complete = cachefiles_lookup_complete, .grab_object = cachefiles_grab_object, .update_object = cachefiles_update_object, + .invalidate_object = cachefiles_invalidate_object, .drop_object = cachefiles_drop_object, .put_object = cachefiles_put_object, .sync_cache = cachefiles_sync_cache, diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 93a0815e0498..2c994885520a 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -174,7 +174,10 @@ static void cachefiles_read_copier(struct fscache_operation *_op) _debug("- copy {%lu}", monitor->back_page->index); recheck: - if (PageUptodate(monitor->back_page)) { + if (test_bit(FSCACHE_COOKIE_INVALIDATING, + &object->fscache.cookie->flags)) { + error = -ESTALE; + } else if (PageUptodate(monitor->back_page)) { copy_highpage(monitor->netfs_page, monitor->back_page); fscache_mark_page_cached(monitor->op, monitor->netfs_page); -- cgit v1.2.3 From de242c0b8b365a9e348bf53143e18e9d8c9cfae8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Dec 2012 21:52:38 +0000 Subject: NFS: Use FS-Cache invalidation Use the new FS-Cache invalidation facility from NFS to deal with foreign changes being detected on the server rather than attempting to retire the old cookie and get a new one. The problem with the old method was that NFS did not wait for all outstanding storage and retrieval ops on the cache to complete. There was no automatic wait between the calls to ->readpages() and calls to invalidate_inode_pages2() as the latter can only wait on locked pages that have been added to the pagecache (which they haven't yet on entry to ->readpages()). This was leading to oopses like the one below when an outstanding read got cut off from its cookie by a premature release. BUG: unable to handle kernel NULL pointer dereference at 00000000000000a8 IP: [] __fscache_read_or_alloc_pages+0x1dd/0x315 [fscache] PGD 15889067 PUD 15890067 PMD 0 Oops: 0000 [#1] SMP CPU 0 Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc Pid: 4544, comm: tar Not tainted 3.1.0-rc4-fsdevel+ #1064 /DG965RY RIP: 0010:[] [] __fscache_read_or_alloc_pages+0x1dd/0x315 [fscache] RSP: 0018:ffff8800158799e8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8800070d41e0 RCX: ffff8800083dc1b0 RDX: 0000000000000000 RSI: ffff880015879960 RDI: ffff88003e627b90 RBP: ffff880015879a28 R08: 0000000000000002 R09: 0000000000000002 R10: 0000000000000001 R11: ffff880015879950 R12: ffff880015879aa4 R13: 0000000000000000 R14: ffff8800083dc158 R15: ffff880015879be8 FS: 00007f671e9d87c0(0000) GS:ffff88003bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000000000a8 CR3: 000000001587f000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process tar (pid: 4544, threadinfo ffff880015878000, task ffff880015875040) Stack: ffffffffa00b1759 ffff8800070dc158 ffff8800000213da ffff88002a286508 ffff880015879aa4 ffff880015879be8 0000000000000001 ffff88002a2866e8 ffff880015879a88 ffffffffa00b20be 00000000000200da ffff880015875040 Call Trace: [] ? nfs_fscache_wait_bit+0xd/0xd [nfs] [] __nfs_readpages_from_fscache+0x7e/0x13f [nfs] [] ? __alloc_pages_nodemask+0x156/0x662 [] nfs_readpages+0xee/0x187 [nfs] [] __do_page_cache_readahead+0x1be/0x267 [] ? __do_page_cache_readahead+0xa2/0x267 [] ra_submit+0x1c/0x20 [] ondemand_readahead+0x28b/0x29a [] page_cache_sync_readahead+0x38/0x3a [] generic_file_aio_read+0x2ab/0x67e [] nfs_file_read+0xa4/0xc9 [nfs] [] do_sync_read+0xba/0xfa [] ? might_fault+0x4e/0x9e [] ? security_file_permission+0x7b/0x84 [] ? rw_verify_area+0xab/0xc8 [] vfs_read+0xaa/0x13a [] sys_read+0x45/0x6c [] system_call_fastpath+0x16/0x1b Reported-by: Mark Moseley Signed-off-by: David Howells --- fs/nfs/fscache.h | 20 +++++++++++++++++++- fs/nfs/inode.c | 20 ++++++++++++++++---- fs/nfs/nfs4proc.c | 3 ++- 3 files changed, 37 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index c5b11b53ff33..277b02782897 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -152,6 +152,22 @@ static inline void nfs_readpage_to_fscache(struct inode *inode, __nfs_readpage_to_fscache(inode, page, sync); } +/* + * Invalidate the contents of fscache for this inode. This will not sleep. + */ +static inline void nfs_fscache_invalidate(struct inode *inode) +{ + fscache_invalidate(NFS_I(inode)->fscache); +} + +/* + * Wait for an object to finish being invalidated. + */ +static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) +{ + fscache_wait_on_invalidate(NFS_I(inode)->fscache); +} + /* * indicate the client caching state as readable text */ @@ -162,7 +178,6 @@ static inline const char *nfs_server_fscache_state(struct nfs_server *server) return "no "; } - #else /* CONFIG_NFS_FSCACHE */ static inline int nfs_fscache_register(void) { return 0; } static inline void nfs_fscache_unregister(void) {} @@ -205,6 +220,9 @@ static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, static inline void nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) {} + +static inline void nfs_fscache_invalidate(struct inode *inode) {} + static inline const char *nfs_server_fscache_state(struct nfs_server *server) { return "no "; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2faae14d89f4..ebeb94ce1b0b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -161,10 +161,12 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo_timestamp = jiffies; memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); - if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; - else + nfs_fscache_invalidate(inode); + } else { nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + } } void nfs_zap_caches(struct inode *inode) @@ -179,6 +181,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) if (mapping->nrpages != 0) { spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; + nfs_fscache_invalidate(inode); spin_unlock(&inode->i_lock); } } @@ -881,7 +884,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); spin_unlock(&inode->i_lock); nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); - nfs_fscache_reset_inode_cookie(inode); + nfs_fscache_wait_on_invalidate(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); return 0; @@ -957,6 +960,10 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } + + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); + return ret; } @@ -1205,8 +1212,10 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr struct nfs_inode *nfsi = NFS_I(inode); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) { nfsi->cache_validity |= NFS_INO_INVALID_DATA; + nfs_fscache_invalidate(inode); + } if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; return nfs_refresh_inode_locked(inode, fattr); @@ -1494,6 +1503,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) (save_cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; + if (invalid & NFS_INO_INVALID_DATA) + nfs_fscache_invalidate(inode); + return 0; out_err: /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 493f0f41c554..5d864fb36578 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -64,7 +64,7 @@ #include "pnfs.h" #include "netns.h" #include "nfs4session.h" - +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -734,6 +734,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) if (!cinfo->atomic || cinfo->before != dir->i_version) nfs_force_lookup_revalidate(dir); dir->i_version = cinfo->after; + nfs_fscache_invalidate(dir); spin_unlock(&dir->i_lock); } -- cgit v1.2.3 From b4cf1e08c8ac95eff65faa53904f7f13ac78194b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:45 +0000 Subject: CacheFiles: Add missing retrieval completions CacheFiles is missing some calls to fscache_retrieval_complete() in the error handling/collision paths of its reader functions. This can be seen by the following assertion tripping in fscache_put_operation() whereby the operation being destroyed is still in the in-progress state and has not been cancelled or completed: FS-Cache: Assertion failed 3 == 5 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:408! invalid opcode: 0000 [#1] SMP CPU 2 Modules linked in: xfs ioatdma dca loop joydev evdev psmouse dcdbas pcspkr serio_raw i5000_edac edac_core i5k_amb shpchp pci_hotplug sg sr_mod] Pid: 8062, comm: httpd Not tainted 3.1.0-rc8 #1 Dell Inc. PowerEdge 1950/0DT097 RIP: 0010:[] [] fscache_put_operation+0x304/0x330 RSP: 0018:ffff880062f739d8 EFLAGS: 00010296 RAX: 0000000000000025 RBX: ffff8800c5122e84 RCX: ffffffff81ddf040 RDX: 00000000ffffffff RSI: 0000000000000082 RDI: ffffffff81ddef30 RBP: ffff880062f739f8 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000003 R12: ffff8800c5122e40 R13: ffff880037a2cd20 R14: ffff880087c7a058 R15: ffff880087c7a000 FS: 00007f63dcf636e0(0000) GS:ffff88022fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f0c0a91f000 CR3: 0000000062ec2000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process httpd (pid: 8062, threadinfo ffff880062f72000, task ffff880087e58000) Stack: ffff880062f73bf8 0000000000000000 ffff880062f73bf8 ffff880037a2cd20 ffff880062f73a68 ffffffff8119aa7e ffff88006540e000 ffff880062f73ad4 ffff88008e9a4308 ffff880037a2cd20 ffff880062f73a48 ffff8800c5122e40 Call Trace: [] __fscache_read_or_alloc_pages+0x1fe/0x530 [] __nfs_readpages_from_fscache+0x70/0x1c0 [] nfs_readpages+0xca/0x1e0 [] ? rpc_do_put_task+0x36/0x50 [] ? alloc_nfs_open_context+0x4b/0x110 [] ? rpc_call_sync+0x5a/0x70 [] __do_page_cache_readahead+0x1ca/0x270 [] ra_submit+0x21/0x30 [] ondemand_readahead+0x11d/0x250 [] page_cache_sync_readahead+0x36/0x60 [] generic_file_aio_read+0x454/0x770 [] nfs_file_read+0xe1/0x130 [] do_sync_read+0xd9/0x120 [] ? mntput+0x1f/0x40 [] ? fput+0x1cb/0x260 [] vfs_read+0xc8/0x180 [] sys_read+0x55/0x90 Reported-by: Mark Moseley Signed-off-by: David Howells --- fs/cachefiles/rdwr.c | 14 ++++++++++---- fs/fscache/page.c | 2 ++ 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 2c994885520a..480992259707 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -361,8 +361,10 @@ out: read_error: _debug("read error %d", ret); - if (ret == -ENOMEM) + if (ret == -ENOMEM) { + fscache_retrieval_complete(op, 1); goto out; + } io_error: cachefiles_io_error_obj(object, "Page read error on backing file"); fscache_retrieval_complete(op, 1); @@ -551,6 +553,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, if (ret < 0) { if (ret == -EEXIST) { page_cache_release(netpage); + fscache_retrieval_complete(op, 1); continue; } goto nomem; @@ -627,6 +630,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, if (ret < 0) { if (ret == -EEXIST) { page_cache_release(netpage); + fscache_retrieval_complete(op, 1); continue; } goto nomem; @@ -645,9 +649,9 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, /* the netpage is unlocked and marked up to date here */ fscache_end_io(op, netpage, 0); - fscache_retrieval_complete(op, 1); page_cache_release(netpage); netpage = NULL; + fscache_retrieval_complete(op, 1); continue; } @@ -682,15 +686,17 @@ out: nomem: _debug("nomem"); ret = -ENOMEM; - goto out; + goto record_page_complete; read_error: _debug("read error %d", ret); if (ret == -ENOMEM) - goto out; + goto record_page_complete; io_error: cachefiles_io_error_obj(object, "Page read error on backing file"); ret = -ENOBUFS; +record_page_complete: + fscache_retrieval_complete(op, 1); goto out; } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 7bf9d2557052..4dbbca162620 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -329,6 +329,8 @@ check_if_dead: return -ENOBUFS; } if (unlikely(fscache_object_is_dead(object))) { + pr_err("%s() = -ENOBUFS [obj dead %d]", __func__, op->op.state); + fscache_cancel_op(&op->op); fscache_stat(stat_object_dead); return -ENOBUFS; } -- cgit v1.2.3 From 03acc4be5e479eebc95338cd1d72a9954c128e2b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:46 +0000 Subject: FS-Cache: Initialise the object event mask with the calculated mask Initialise the object event mask with the calculated mask rather than unmasking undefined events also. Signed-off-by: David Howells --- fs/fscache/object.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 80b549141ea6..2ef8a082a272 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -114,7 +114,8 @@ static void fscache_object_state_machine(struct fscache_object *object) /* wait for the parent object to become ready */ case FSCACHE_OBJECT_INIT: object->event_mask = - ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); + FSCACHE_OBJECT_EVENTS_MASK & + ~(1 << FSCACHE_OBJECT_EV_CLEARED); fscache_initialise_object(object); goto done; -- cgit v1.2.3 From c2d35bfe4b508451b75b5b6bc60a08dbdc44f952 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:47 +0000 Subject: FS-Cache: Don't mask off the object event mask when printing it Don't mask off the object event mask when printing it. That way it can be seen if threre are bits set that shouldn't be. Signed-off-by: David Howells --- fs/cachefiles/namei.c | 3 +-- fs/fscache/object-list.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index b0b5f7cdfffa..8c01c5fcdf75 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -40,8 +40,7 @@ void __cachefiles_printk_object(struct cachefiles_object *object, printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", prefix, fscache_object_states[object->fscache.state], object->fscache.flags, work_busy(&object->fscache.work), - object->fscache.events, - object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK); + object->fscache.events, object->fscache.event_mask); printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", prefix, object->fscache.n_ops, object->fscache.n_in_progress, object->fscache.n_exclusive); diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index ebe29c581380..f27c89d17885 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c @@ -245,7 +245,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) obj->n_in_progress, obj->n_exclusive, atomic_read(&obj->n_reads), - obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK, + obj->event_mask, obj->events, obj->flags, work_busy(&obj->work)); -- cgit v1.2.3 From 75bc411388f4aeb9fb0381bd56eb5d67193ed9a1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:48 +0000 Subject: FS-Cache: Limit the number of I/O error reports for a cache Limit the number of I/O error reports for a cache to 1 to prevent massive amounts of noise. After the first I/O error the cache is taken off line automatically, so must be restarted to resume caching. Signed-off-by: David Howells --- fs/fscache/cache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index 6a3c48abd677..b52aed1dca97 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c @@ -314,10 +314,10 @@ EXPORT_SYMBOL(fscache_add_cache); */ void fscache_io_error(struct fscache_cache *cache) { - set_bit(FSCACHE_IOERROR, &cache->flags); - - printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n", - cache->ops->name); + if (!test_and_set_bit(FSCACHE_IOERROR, &cache->flags)) + printk(KERN_ERR "FS-Cache:" + " Cache '%s' stopped due to I/O error\n", + cache->ops->name); } EXPORT_SYMBOL(fscache_io_error); -- cgit v1.2.3 From 8d76349d359064859217dc292dc8733e209705af Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:48 +0000 Subject: FS-Cache: Exclusive op submission can BUG if there's been an I/O error The function to submit an exclusive op (fscache_submit_exclusive_op()) can BUG if there's been an I/O error because it may see the parent cache object in an unexpected state. It should only BUG if there hasn't been an I/O error. In this case the problem was produced by remounting the cache partition to be R/O. The EROFS state was detected and the cache was aborted, but not everything handled the aborting correctly. SysRq : Emergency Remount R/O EXT4-fs (sda6): re-mounted. Opts: (null) Emergency Remount complete CacheFiles: I/O Error: Failed to update xattr with error -30 FS-Cache: Cache cachefiles stopped due to I/O error ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:128! invalid opcode: 0000 [#1] SMP CPU 0 Modules linked in: cachefiles nfs fscache auth_rpcgss nfs_acl lockd sunrpc Pid: 6612, comm: kworker/u:2 Not tainted 3.1.0-rc8-fsdevel+ #1093 /DG965RY RIP: 0010:[] [] fscache_submit_exclusive_op+0x2ad/0x2c2 [fscache] RSP: 0018:ffff880000853d40 EFLAGS: 00010206 RAX: ffff880038ac72a8 RBX: ffff8800181f2260 RCX: ffffffff81f2b2b0 RDX: 0000000000000001 RSI: ffffffff8179a478 RDI: ffff8800181f2280 RBP: ffff880000853d60 R08: 0000000000000002 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff880038ac7268 R13: ffff8800181f2280 R14: ffff88003a359190 R15: 000000010122b162 FS: 0000000000000000(0000) GS:ffff88003bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000034cc4a77f0 CR3: 0000000010e96000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kworker/u:2 (pid: 6612, threadinfo ffff880000852000, task ffff880014c3c040) Stack: ffff8800181f2260 ffff8800181f2310 ffff880038ac7268 ffff8800181f2260 ffff880000853dc0 ffffffffa0072375 ffff880037ecfe00 ffff88003a359198 ffff880000853dc0 0000000000000246 0000000000000000 ffff88000a91d308 Call Trace: [] fscache_object_work_func+0x792/0xe65 [fscache] [] process_one_work+0x1eb/0x37f [] ? process_one_work+0x18d/0x37f [] ? fscache_enqueue_dependents+0xd8/0xd8 [fscache] [] worker_thread+0x15a/0x21a [] ? rescuer_thread+0x188/0x188 [] kthread+0x7f/0x87 [] kernel_thread_helper+0x4/0x10 [] ? finish_task_switch+0x45/0xc0 [] ? retint_restore_args+0xe/0xe [] ? __init_kthread_worker+0x53/0x53 [] ? gs_change+0xb/0xb Signed-off-by: David Howells --- fs/fscache/internal.h | 1 + fs/fscache/object.c | 23 +++++++++++++++++------ fs/fscache/operation.c | 13 ++++++++++--- 3 files changed, 28 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index c81179303930..dcb3e1d5dbf6 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -288,6 +288,7 @@ extern const struct file_operations fscache_stats_fops; static inline void fscache_raise_event(struct fscache_object *object, unsigned event) { + BUG_ON(event >= NR_FSCACHE_OBJECT_EVENTS); if (!test_and_set_bit(event, &object->events) && test_bit(event, &object->event_mask)) fscache_enqueue_object(object); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 2ef8a082a272..2c512cbac380 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -103,6 +103,7 @@ static void fscache_object_state_machine(struct fscache_object *object) { enum fscache_object_state new_state; struct fscache_cookie *cookie; + int event; ASSERT(object != NULL); @@ -275,7 +276,8 @@ static void fscache_object_state_machine(struct fscache_object *object) /* determine the transition from a lookup state */ lookup_transit: - switch (fls(object->events & object->event_mask) - 1) { + event = fls(object->events & object->event_mask) - 1; + switch (event) { case FSCACHE_OBJECT_EV_WITHDRAW: case FSCACHE_OBJECT_EV_RETIRE: case FSCACHE_OBJECT_EV_RELEASE: @@ -292,7 +294,8 @@ lookup_transit: /* determine the transition from an active state */ active_transit: - switch (fls(object->events & object->event_mask) - 1) { + event = fls(object->events & object->event_mask) - 1; + switch (event) { case FSCACHE_OBJECT_EV_WITHDRAW: case FSCACHE_OBJECT_EV_RETIRE: case FSCACHE_OBJECT_EV_RELEASE: @@ -314,7 +317,8 @@ active_transit: /* determine the transition from a terminal state */ terminal_transit: - switch (fls(object->events & object->event_mask) - 1) { + event = fls(object->events & object->event_mask) - 1; + switch (event) { case FSCACHE_OBJECT_EV_WITHDRAW: new_state = FSCACHE_OBJECT_WITHDRAWING; goto change_state; @@ -347,8 +351,8 @@ done: unsupported_event: printk(KERN_ERR "FS-Cache:" - " Unsupported event %lx [mask %lx] in state %s\n", - object->events, object->event_mask, + " Unsupported event %d [%lx/%lx] in state %s\n", + event, object->events, object->event_mask, fscache_object_states[object->state]); BUG(); } @@ -945,7 +949,7 @@ static void fscache_invalidate_object(struct fscache_object *object) spin_lock(&cookie->lock); if (fscache_submit_exclusive_op(object, op) < 0) - BUG(); + goto submit_op_failed; spin_unlock(&cookie->lock); fscache_put_operation(op); @@ -960,4 +964,11 @@ static void fscache_invalidate_object(struct fscache_object *object) */ fscache_invalidation_complete(cookie); _leave(""); + return; + +submit_op_failed: + spin_unlock(&cookie->lock); + kfree(op); + fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); + _leave(" [EIO]"); } diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index c58dbe613266..9e6b7d232bb1 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -84,6 +84,8 @@ static void fscache_run_op(struct fscache_object *object, int fscache_submit_exclusive_op(struct fscache_object *object, struct fscache_operation *op) { + int ret; + _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED); @@ -116,6 +118,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object, /* need to issue a new write op after this */ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + ret = 0; } else if (object->state == FSCACHE_OBJECT_CREATING) { op->object = object; object->n_ops++; @@ -123,13 +126,17 @@ int fscache_submit_exclusive_op(struct fscache_object *object, atomic_inc(&op->usage); list_add_tail(&op->pend_link, &object->pending_ops); fscache_stat(&fscache_n_op_pend); + ret = 0; } else { - /* not allowed to submit ops in any other state */ - BUG(); + /* If we're in any other state, there must have been an I/O + * error of some nature. + */ + ASSERT(test_bit(FSCACHE_IOERROR, &object->cache->flags)); + ret = -EIO; } spin_unlock(&object->lock); - return 0; + return ret; } /* -- cgit v1.2.3 From 8c209ce721444a61b61d9e772746c721e4d8d1e8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:49 +0000 Subject: NFS: nfs_migrate_page() does not wait for FS-Cache to finish with a page nfs_migrate_page() does not wait for FS-Cache to finish with a page, probably leading to the following bad-page-state: BUG: Bad page state in process python-bin pfn:17d39b page:ffffea00053649e8 flags:004000000000100c count:0 mapcount:0 mapping:(null) index:38686 (Tainted: G B ---------------- ) Pid: 31053, comm: python-bin Tainted: G B ---------------- 2.6.32-71.24.1.el6.x86_64 #1 Call Trace: [] bad_page+0x107/0x160 [] free_hot_cold_page+0x1c9/0x220 [] __pagevec_free+0x59/0xb0 [] ? flush_tlb_others_ipi+0x128/0x130 [] release_pages+0x21c/0x250 [] ? remove_migration_pte+0x28a/0x2b0 [] ? mem_cgroup_get_reclaim_stat_from_page+0x18/0x70 [] ____pagevec_lru_add+0x167/0x180 [] __lru_cache_add+0x58/0x70 [] lru_cache_add_lru+0x21/0x40 [] putback_lru_page+0x69/0x100 [] migrate_pages+0x13d/0x5d0 [] ? ____pagevec_lru_add+0x167/0x180 [] ? compaction_alloc+0x0/0x370 [] compact_zone+0x4cc/0x600 [] ? get_page_from_freelist+0x15c/0x820 [] ? check_preempt_wakeup+0x1c4/0x3c0 [] compact_zone_order+0x7e/0xb0 [] try_to_compact_pages+0x109/0x170 [] __alloc_pages_nodemask+0x5ed/0x850 [] ? thread_return+0x4e/0x778 [] alloc_pages_vma+0x93/0x150 [] do_huge_pmd_anonymous_page+0x135/0x340 [] ? rwsem_down_read_failed+0x26/0x30 [] handle_mm_fault+0x245/0x2b0 [] do_page_fault+0x123/0x3a0 [] page_fault+0x25/0x30 nfs_migrate_page() calls nfs_fscache_release_page() which doesn't actually wait - even if __GFP_WAIT is set. The reason that doesn't wait is that fscache_maybe_release_page() might deadlock the allocator as the work threads writing to the cache may all end up sleeping on memory allocation. However, I wonder if that is actually a problem. There are a number of things I can do to deal with this: (1) Make nfs_migrate_page() wait. (2) Make fscache_maybe_release_page() honour the __GFP_WAIT flag. (3) Set a timeout around the wait. (4) Make nfs_migrate_page() return an error if the page is still busy. For the moment, I'll select (2) and (4). Signed-off-by: David Howells Acked-by: Jeff Layton --- fs/fscache/internal.h | 1 + fs/fscache/page.c | 19 ++++++++++++++----- fs/fscache/stats.c | 6 ++++-- fs/nfs/write.c | 3 ++- 4 files changed, 21 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index dcb3e1d5dbf6..88a48ccb7d9e 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -200,6 +200,7 @@ extern atomic_t fscache_n_store_vmscan_not_storing; extern atomic_t fscache_n_store_vmscan_gone; extern atomic_t fscache_n_store_vmscan_busy; extern atomic_t fscache_n_store_vmscan_cancelled; +extern atomic_t fscache_n_store_vmscan_wait; extern atomic_t fscache_n_marks; extern atomic_t fscache_n_uncaches; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 4dbbca162620..f9b2fb3ae492 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -56,6 +56,7 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, _enter("%p,%p,%x", cookie, page, gfp); +try_again: rcu_read_lock(); val = radix_tree_lookup(&cookie->stores, page->index); if (!val) { @@ -104,11 +105,19 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie, return true; page_busy: - /* we might want to wait here, but that could deadlock the allocator as - * the work threads writing to the cache may all end up sleeping - * on memory allocation */ - fscache_stat(&fscache_n_store_vmscan_busy); - return false; + /* We will wait here if we're allowed to, but that could deadlock the + * allocator as the work threads writing to the cache may all end up + * sleeping on memory allocation, so we may need to impose a timeout + * too. */ + if (!(gfp & __GFP_WAIT)) { + fscache_stat(&fscache_n_store_vmscan_busy); + return false; + } + + fscache_stat(&fscache_n_store_vmscan_wait); + __fscache_wait_on_page_write(cookie, page); + gfp &= ~__GFP_WAIT; + goto try_again; } EXPORT_SYMBOL(__fscache_maybe_release_page); diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 51cdaee14109..8179e8bc4a3d 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -69,6 +69,7 @@ atomic_t fscache_n_store_vmscan_not_storing; atomic_t fscache_n_store_vmscan_gone; atomic_t fscache_n_store_vmscan_busy; atomic_t fscache_n_store_vmscan_cancelled; +atomic_t fscache_n_store_vmscan_wait; atomic_t fscache_n_marks; atomic_t fscache_n_uncaches; @@ -232,11 +233,12 @@ static int fscache_stats_show(struct seq_file *m, void *v) atomic_read(&fscache_n_store_radix_deletes), atomic_read(&fscache_n_store_pages_over_limit)); - seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u\n", + seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u wt=%u\n", atomic_read(&fscache_n_store_vmscan_not_storing), atomic_read(&fscache_n_store_vmscan_gone), atomic_read(&fscache_n_store_vmscan_busy), - atomic_read(&fscache_n_store_vmscan_cancelled)); + atomic_read(&fscache_n_store_vmscan_cancelled), + atomic_read(&fscache_n_store_vmscan_wait)); seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u rej=%u\n", atomic_read(&fscache_n_op_pend), diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5209916e1222..b673be31590e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1794,7 +1794,8 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, if (PagePrivate(page)) return -EBUSY; - nfs_fscache_release_page(page, GFP_KERNEL); + if (!nfs_fscache_release_page(page, GFP_KERNEL)) + return -EBUSY; return migrate_page(mapping, newpage, page, mode); } -- cgit v1.2.3 From 969695215f9a865cbf64c4ce3742ac9fc57fffed Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 13:34:49 +0000 Subject: FS-Cache: Add transition to handle invalidate immediately after lookup Add a missing transition to the FS-Cache object state machine to handle an invalidation event occuring between the back end completing the object lookup by calling fscache_obtained_object() (which moves to state OBJECT_AVAILABLE) and the backend returning to fscache_lookup_object() and thence to fscache_object_state_machine() which then does a goto lookup_transit to handle the transition - but lookup_transit doesn't handle EV_INVALIDATE. Without this, the following BUG can be logged: FS-Cache: Unsupported event 2 [5/f7] in state OBJECT_AVAILABLE ------------[ cut here ]------------ kernel BUG at fs/fscache/object.c:357! Where event 2 is EV_INVALIDATE. Signed-off-by: David Howells --- fs/fscache/object.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 2c512cbac380..50d41c180211 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -284,6 +284,9 @@ lookup_transit: case FSCACHE_OBJECT_EV_ERROR: new_state = FSCACHE_OBJECT_LC_DYING; goto change_state; + case FSCACHE_OBJECT_EV_INVALIDATE: + new_state = FSCACHE_OBJECT_INVALIDATING; + goto change_state; case FSCACHE_OBJECT_EV_REQUEUE: goto done; case -1: -- cgit v1.2.3 From a4ff146881c2764d7c3e4ef710e7c27d521ddd51 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Dec 2012 16:31:49 +0000 Subject: NFS4: Open files for fscaching nfs4_file_open() should open files for fscaching. Signed-off-by: David Howells --- fs/nfs/fscache.c | 1 + fs/nfs/nfs4file.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index c817787fbdb4..24d1d1c5fcaf 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -307,6 +307,7 @@ void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) nfs_fscache_inode_unlock(inode); } } +EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie); /* * Replace a per-inode cookie due to revalidation detecting a file having diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index e7699308364a..08ddcccb8887 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -5,6 +5,7 @@ */ #include #include "internal.h" +#include "fscache.h" #include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -74,6 +75,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs_file_set_open_context(filp, ctx); + nfs_fscache_set_inode_cookie(inode, filp); err = 0; out_put_ctx: -- cgit v1.2.3 From 9c04caa81b876faee5f1cc6eaad76dd7021ab8ff Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Dec 2012 18:08:02 +0000 Subject: FS-Cache: Fix signal handling during waits wait_on_bit() with TASK_INTERRUPTIBLE returns 1 rather than a negative error code, so change what we check for. This means that the signal handling in fscache_wait_for_retrieval_activation() should now work properly. Without this, the following bug can be seen if CTRL-C is pressed during fscache read operation: FS-Cache: Assertion failed 2 == 3 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/page.c:347! invalid opcode: 0000 [#1] SMP Modules linked in: cachefiles(F) nfsv4(F) nfsv3(F) nfsv2(F) nfs(F) fscache(F) auth_rpcgss(F) nfs_acl(F) lockd(F) sunrpc(F) CPU 1 Pid: 15006, comm: slurp-q Tainted: GF 3.7.0-rc8-fsdevel+ #411 /DG965RY RIP: 0010:[] [] fscache_wait_for_retrieval_activation+0x167/0x177 [fscache] RSP: 0018:ffff88002a4c39a8 EFLAGS: 00010292 RAX: 000000000000001a RBX: ffff88002d3dc158 RCX: 0000000000008685 RDX: ffffffff8102ccd6 RSI: 0000000000000001 RDI: ffffffff8102d1d6 RBP: ffff88002a4c39c8 R08: 0000000000000002 R09: 0000000000000000 R10: ffffffff8163afa0 R11: ffff88003bd11900 R12: ffffffffa00868c8 R13: ffff880028306458 R14: ffff88002d3dc1b0 R15: ffff88001372e538 FS: 00007f17426a0700(0000) GS:ffff88003bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f1742494a44 CR3: 0000000031bd7000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process slurp-q (pid: 15006, threadinfo ffff88002a4c2000, task ffff880023de3040) Stack: ffff88002d3dc158 ffff88001372e538 ffff88002a4c3ab4 ffff8800283064e0 ffff88002a4c3a38 ffffffffa0080f6d 0000000000000000 ffff880023de3040 ffff88002a4c3ac8 ffffffff810ac8ae ffff880028306458 ffff88002a4c3bc8 Call Trace: [] __fscache_read_or_alloc_pages+0x24f/0x4bc [fscache] [] ? __alloc_pages_nodemask+0x195/0x75c [] __nfs_readpages_from_fscache+0x86/0x13d [nfs] [] nfs_readpages+0x186/0x1bd [nfs] [] ? alloc_pages_current+0xc7/0xe4 [] ? __page_cache_alloc+0x84/0x91 [] ? __do_page_cache_readahead+0xa6/0x2e0 [] __do_page_cache_readahead+0x237/0x2e0 [] ? __do_page_cache_readahead+0xa6/0x2e0 [] ra_submit+0x1c/0x20 [] ondemand_readahead+0x359/0x382 [] page_cache_sync_readahead+0x38/0x3a [] generic_file_aio_read+0x26b/0x637 [] ? nfs_mark_delegation_referenced+0xb/0xb [nfsv4] [] nfs_file_read+0xaa/0xcf [nfs] [] do_sync_read+0x91/0xd1 [] vfs_read+0x9b/0x144 [] sys_read+0x44/0x75 [] system_call_fastpath+0x16/0x1b Signed-off-by: David Howells --- fs/fscache/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fscache/page.c b/fs/fscache/page.c index f9b2fb3ae492..5b5d9081c8b2 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -319,7 +319,7 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object, fscache_stat(stat_op_waits); if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, fscache_wait_bit_interruptible, - TASK_INTERRUPTIBLE) < 0) { + TASK_INTERRUPTIBLE) != 0) { ret = fscache_cancel_op(&op->op); if (ret == 0) return -ERESTARTSYS; -- cgit v1.2.3 From 7ef001e937e8b9cbedb2fc1c31dd681ac3b31927 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 7 Dec 2012 10:41:26 +0000 Subject: FS-Cache: One of the write operation paths doesn't set the object state In fscache_write_op(), if the object is determined to have become inactive or to have lost its cookie, we don't move the operation state from in-progress, and so an assertion in fscache_put_operation() fails with an assertion (see below). Instrumenting fscache_op_work_func() indicates that it called fscache_write_op() before calling fscache_put_operation() - where the assertion failed. The assertion at line 433 indicates that the operation state is IN_PROGRESS rather than being COMPLETE or CANCELLED. Instrumenting fscache_write_op() showed that it was being called on an object that had had its cookie removed and that this was due to relinquishment of the cookie by the netfs. At this point fscache no longer has access to the pages of netfs data that were requested to be written, and so simply cancelling the operation is the thing to do. FS-Cache: Assertion failed 3 == 5 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/operation.c:433! invalid opcode: 0000 [#1] SMP Modules linked in: cachefiles(F) nfsv4(F) nfsv3(F) nfsv2(F) nfs(F) fscache(F) auth_rpcgss(F) nfs_acl(F) lockd(F) sunrpc(F) CPU 0 Pid: 1035, comm: kworker/u:3 Tainted: GF 3.7.0-rc8-fsdevel+ #411 /DG965RY RIP: 0010:[] [] fscache_put_operation+0x11a/0x2ed [fscache] RSP: 0018:ffff88003e32bcf8 EFLAGS: 00010296 RAX: 000000000000000f RBX: ffff88001818eb78 RCX: ffffffff6c102000 RDX: ffffffff8102d1ad RSI: ffffffff6c102000 RDI: ffffffff8102d1d6 RBP: ffff88003e32bd18 R08: 0000000000000002 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffa00811da R13: 0000000000000001 R14: 0000000100625d26 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88003bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007fff7dd31c68 CR3: 000000003d730000 CR4: 00000000000007f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kworker/u:3 (pid: 1035, threadinfo ffff88003e32a000, task ffff88003bb38080) Stack: ffffffff8102d1ad ffff88001818eb78 ffffffffa00811da 0000000000000001 ffff88003e32bd48 ffffffffa007f0ad ffff88001818eb78 ffffffff819583c0 ffff88003df24e00 ffff88003882c3e0 ffff88003e32bde8 ffffffff81042de0 Call Trace: [] ? vprintk_emit+0x3c6/0x41a [] ? __fscache_read_or_alloc_pages+0x4bc/0x4bc [fscache] [] fscache_op_work_func+0xec/0x123 [fscache] [] process_one_work+0x21c/0x3b0 [] ? process_one_work+0x1be/0x3b0 [] ? fscache_operation_gc+0x23e/0x23e [fscache] [] worker_thread+0x202/0x2df [] ? rescuer_thread+0x18e/0x18e [] kthread+0xd0/0xd8 [] ? _raw_spin_unlock_irq+0x29/0x3e [] ? __init_kthread_worker+0x55/0x55 [] ret_from_fork+0x7c/0xb0 [] ? __init_kthread_worker+0x55/0x55 Signed-off-by: David Howells --- fs/fscache/page.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 5b5d9081c8b2..ef0218f5080d 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -699,9 +699,27 @@ static void fscache_write_op(struct fscache_operation *_op) spin_lock(&object->lock); cookie = object->cookie; - if (!fscache_object_is_active(object) || !cookie) { + if (!fscache_object_is_active(object)) { + /* If we get here, then the on-disk cache object likely longer + * exists, so we should just cancel this write operation. + */ spin_unlock(&object->lock); - _leave(""); + op->op.state = FSCACHE_OP_ST_CANCELLED; + _leave(" [inactive]"); + return; + } + + if (!cookie) { + /* If we get here, then the cookie belonging to the object was + * detached, probably by the cookie being withdrawn due to + * memory pressure, which means that the pages we might write + * to the cache from no longer exist - therefore, we can just + * cancel this write operation. + */ + spin_unlock(&object->lock); + op->op.state = FSCACHE_OP_ST_CANCELLED; + _leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}", + _op->flags, _op->state, object->state, object->flags); return; } -- cgit v1.2.3 From 1f372dff1da37e2b36ae9085368fa46896398598 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Dec 2012 20:03:13 +0000 Subject: FS-Cache: Mark cancellation of in-progress operation Mark as cancelled an operation that is in progress rather than pending at the time it is cancelled, and call fscache_complete_op() to cancel an operation so that blocked ops can be started. Signed-off-by: David Howells --- fs/cachefiles/interface.c | 2 +- fs/fscache/operation.c | 7 ++++--- fs/fscache/page.c | 10 +++++----- include/linux/fscache-cache.h | 4 ++-- 4 files changed, 12 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 7a9d574b961c..746ce532e130 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -484,7 +484,7 @@ static void cachefiles_invalidate_object(struct fscache_operation *op) } } - fscache_op_complete(op); + fscache_op_complete(op, true); _leave(""); } diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 9e6b7d232bb1..36c59604130d 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -363,9 +363,9 @@ void fscache_cancel_all_ops(struct fscache_object *object) } /* - * Record the completion of an in-progress operation. + * Record the completion or cancellation of an in-progress operation. */ -void fscache_op_complete(struct fscache_operation *op) +void fscache_op_complete(struct fscache_operation *op, bool cancelled) { struct fscache_object *object = op->object; @@ -380,7 +380,8 @@ void fscache_op_complete(struct fscache_operation *op) spin_lock(&object->lock); - op->state = FSCACHE_OP_ST_COMPLETE; + op->state = cancelled ? + FSCACHE_OP_ST_CANCELLED : FSCACHE_OP_ST_COMPLETE; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index ef0218f5080d..8a92b9fabe83 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -171,7 +171,7 @@ static void fscache_attr_changed_op(struct fscache_operation *op) fscache_abort_object(object); } - fscache_op_complete(op); + fscache_op_complete(op, true); _leave(""); } @@ -704,7 +704,7 @@ static void fscache_write_op(struct fscache_operation *_op) * exists, so we should just cancel this write operation. */ spin_unlock(&object->lock); - op->op.state = FSCACHE_OP_ST_CANCELLED; + fscache_op_complete(&op->op, false); _leave(" [inactive]"); return; } @@ -717,7 +717,7 @@ static void fscache_write_op(struct fscache_operation *_op) * cancel this write operation. */ spin_unlock(&object->lock); - op->op.state = FSCACHE_OP_ST_CANCELLED; + fscache_op_complete(&op->op, false); _leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}", _op->flags, _op->state, object->state, object->flags); return; @@ -755,7 +755,7 @@ static void fscache_write_op(struct fscache_operation *_op) fscache_end_page_write(object, page); if (ret < 0) { fscache_abort_object(object); - fscache_op_complete(&op->op); + fscache_op_complete(&op->op, true); } else { fscache_enqueue_operation(&op->op); } @@ -770,7 +770,7 @@ superseded: spin_unlock(&cookie->stores_lock); clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); spin_unlock(&object->lock); - fscache_op_complete(&op->op); + fscache_op_complete(&op->op, true); _leave(""); } diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 73e68c8d5df4..5dfa0aa216b6 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -116,7 +116,7 @@ extern atomic_t fscache_op_debug_id; extern void fscache_op_work_func(struct work_struct *work); extern void fscache_enqueue_operation(struct fscache_operation *); -extern void fscache_op_complete(struct fscache_operation *); +extern void fscache_op_complete(struct fscache_operation *, bool); extern void fscache_put_operation(struct fscache_operation *); /** @@ -196,7 +196,7 @@ static inline void fscache_retrieval_complete(struct fscache_retrieval *op, { op->n_pages -= n_pages; if (op->n_pages <= 0) - fscache_op_complete(&op->op); + fscache_op_complete(&op->op, true); } /** -- cgit v1.2.3 From 91c7fbbf63f33c77d8d28de624834a21888842bb Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Dec 2012 11:02:22 +0000 Subject: FS-Cache: Clear remaining page count on retrieval cancellation Provide fscache_cancel_op() with a pointer to a function it should invoke under lock if it cancels an operation. Use this to clear the remaining page count upon cancellation of a pending retrieval operation so that fscache_release_retrieval_op() doesn't get an assertion failure (see below). This can happen when a signal occurs, say from CTRL-C being pressed during data retrieval. FS-Cache: Assertion failed 3 == 0 is false ------------[ cut here ]------------ kernel BUG at fs/fscache/page.c:237! invalid opcode: 0000 [#641] SMP Modules linked in: cachefiles(F) nfsv4(F) nfsv3(F) nfsv2(F) nfs(F) fscache(F) auth_rpcgss(F) nfs_acl(F) lockd(F) sunrpc(F) CPU 0 Pid: 6075, comm: slurp-q Tainted: GF D 3.7.0-rc8-fsdevel+ #411 /DG965RY RIP: 0010:[] [] fscache_release_retrieval_op+0x75/0xff [fscache] RSP: 0000:ffff88001c6d7988 EFLAGS: 00010296 RAX: 000000000000000f RBX: ffff880014cdfe00 RCX: ffffffff6c102000 RDX: ffffffff8102d1ad RSI: ffffffff6c102000 RDI: ffffffff8102d1d6 RBP: ffff88001c6d7998 R08: 0000000000000002 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000fffffe00 R13: ffff88001c6d7ab4 R14: ffff88001a8638a0 R15: ffff88001552b190 FS: 00007f877aaf0700(0000) GS:ffff88003bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007fff11378fd2 CR3: 000000001c6c6000 CR4: 00000000000007f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process slurp-q (pid: 6075, threadinfo ffff88001c6d6000, task ffff88001c6c4080) Stack: ffffffffa007ec07 ffff880014cdfe00 ffff88001c6d79c8 ffffffffa007db4d ffffffffa007ec07 ffff880014cdfe00 00000000fffffe00 ffff88001c6d7ab4 ffff88001c6d7a38 ffffffffa008116d 0000000000000000 ffff88001c6c4080 Call Trace: [] ? fscache_cancel_op+0x194/0x1cf [fscache] [] fscache_put_operation+0x135/0x2ed [fscache] [] ? fscache_cancel_op+0x194/0x1cf [fscache] [] __fscache_read_or_alloc_pages+0x413/0x4bc [fscache] [] ? __alloc_pages_nodemask+0x195/0x75c [] __nfs_readpages_from_fscache+0x86/0x13d [nfs] [] nfs_readpages+0x186/0x1bd [nfs] [] ? alloc_pages_current+0xc7/0xe4 [] ? __page_cache_alloc+0x84/0x91 [] ? __do_page_cache_readahead+0xa6/0x2e0 [] __do_page_cache_readahead+0x237/0x2e0 [] ? __do_page_cache_readahead+0xa6/0x2e0 [] ra_submit+0x1c/0x20 [] ondemand_readahead+0x359/0x382 [] page_cache_sync_readahead+0x38/0x3a [] generic_file_aio_read+0x26b/0x637 [] ? nfs_mark_delegation_referenced+0xb/0xb [nfsv4] [] nfs_file_read+0xaa/0xcf [nfs] [] do_sync_read+0x91/0xd1 [] vfs_read+0x9b/0x144 [] sys_read+0x44/0x75 [] system_call_fastpath+0x16/0x1b Signed-off-by: David Howells --- fs/fscache/internal.h | 3 ++- fs/fscache/operation.c | 5 ++++- fs/fscache/page.c | 17 ++++++++++++++--- 3 files changed, 20 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 88a48ccb7d9e..ee38fef4be51 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -121,7 +121,8 @@ extern int fscache_submit_exclusive_op(struct fscache_object *, struct fscache_operation *); extern int fscache_submit_op(struct fscache_object *, struct fscache_operation *); -extern int fscache_cancel_op(struct fscache_operation *); +extern int fscache_cancel_op(struct fscache_operation *, + void (*)(struct fscache_operation *)); extern void fscache_cancel_all_ops(struct fscache_object *); extern void fscache_abort_object(struct fscache_object *); extern void fscache_start_operations(struct fscache_object *); diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 36c59604130d..762a9ec4ffa4 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c @@ -298,7 +298,8 @@ void fscache_start_operations(struct fscache_object *object) /* * cancel an operation that's pending on an object */ -int fscache_cancel_op(struct fscache_operation *op) +int fscache_cancel_op(struct fscache_operation *op, + void (*do_cancel)(struct fscache_operation *)) { struct fscache_object *object = op->object; int ret; @@ -316,6 +317,8 @@ int fscache_cancel_op(struct fscache_operation *op) ASSERT(!list_empty(&op->pend_link)); fscache_stat(&fscache_n_op_cancelled); list_del_init(&op->pend_link); + if (do_cancel) + do_cancel(op); op->state = FSCACHE_OP_ST_CANCELLED; if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) object->n_exclusive--; diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 8a92b9fabe83..ff000e52072d 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -302,6 +302,17 @@ static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) return 0; } +/* + * Handle cancellation of a pending retrieval op + */ +static void fscache_do_cancel_retrieval(struct fscache_operation *_op) +{ + struct fscache_retrieval *op = + container_of(_op, struct fscache_retrieval, op); + + op->n_pages = 0; +} + /* * wait for an object to become active (or dead) */ @@ -320,7 +331,7 @@ static int fscache_wait_for_retrieval_activation(struct fscache_object *object, if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, fscache_wait_bit_interruptible, TASK_INTERRUPTIBLE) != 0) { - ret = fscache_cancel_op(&op->op); + ret = fscache_cancel_op(&op->op, fscache_do_cancel_retrieval); if (ret == 0) return -ERESTARTSYS; @@ -338,8 +349,8 @@ check_if_dead: return -ENOBUFS; } if (unlikely(fscache_object_is_dead(object))) { - pr_err("%s() = -ENOBUFS [obj dead %d]", __func__, op->op.state); - fscache_cancel_op(&op->op); + pr_err("%s() = -ENOBUFS [obj dead %d]\n", __func__, op->op.state); + fscache_cancel_op(&op->op, fscache_do_cancel_retrieval); fscache_stat(stat_object_dead); return -ENOBUFS; } -- cgit v1.2.3