From e603a4c1b5c2b9d24139eeb1769c5ac600318c07 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Dec 2016 16:55:55 -0500 Subject: NFSv4: Update the attribute cache info in update_changeattr If we successfully updated the change attribute, we should timestamp the cache. While we do know that the other attributes are not completely up to date, we have the NFS_INO_INVALID_ATTR flag that let us know that, so it is valid to say that the cache has not timed out. We can also clear NFS_INO_REVAL_PAGECACHE, since our change attribute is now known to be valid. Conversely, if the change attribute did not match, we should make sure to also revalidate the access and ACL caches. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d33242c8d95d..39dfba9265f9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1089,8 +1089,15 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) spin_lock(&dir->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - if (!cinfo->atomic || cinfo->before != dir->i_version) + if (cinfo->atomic && cinfo->before == dir->i_version) { + nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; + nfsi->attrtimeo_timestamp = jiffies; + } else { nfs_force_lookup_revalidate(dir); + if (cinfo->before != dir->i_version) + nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL; + } dir->i_version = cinfo->after; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_fscache_invalidate(dir); -- cgit v1.2.3 From 0bc2c9b4dca9668a236fde48ebb15e5f0735cbff Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Dec 2016 19:48:09 -0500 Subject: NFSv4: Don't discard the attributes returned by asynchronous DELEGRETURN DELEGRETURN will always carry a reference to the inode except when the latter is being freed, so let's ensure that we always use that inode information to ensure close-to-open cache consistency, even when the DELEGRETURN call is asynchronous. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 39dfba9265f9..0d9fa18aa243 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5699,6 +5699,7 @@ static void nfs4_delegreturn_release(void *calldata) if (data->lr.roc) pnfs_roc_release(&data->lr.arg, &data->lr.res, data->res.lr_ret); + nfs_post_op_update_inode_force_wcc(inode, &data->fattr); nfs_iput_and_deactive(inode); } kfree(calldata); @@ -5787,10 +5788,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co if (status != 0) goto out; status = data->rpc_status; - if (status == 0) - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); - else - nfs_refresh_inode(inode, &data->fattr); out: rpc_put_task(task); return status; -- cgit v1.2.3 From 58ff41842c7b8b8a79752e3d040188ebddb95194 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Dec 2016 17:39:58 -0500 Subject: NFS: Don't revalidate the file on close if we hold a delegation If we're holding a delegation, we can skip sending the close-to-open GETATTR until we're returning that delegation. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7de345fd8e1e..2fc237cd338e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -795,6 +795,8 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) if (!is_sync) return; inode = d_inode(ctx->dentry); + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + return; nfsi = NFS_I(inode); if (inode->i_mapping->nrpages == 0) return; -- cgit v1.2.3 From 61540bf6bb40ddfa3e766de91cedca2e1afd24eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 8 Dec 2016 18:18:38 -0500 Subject: NFS: Clean up cache validity checking Consolidate the open-coded checking of NFS_I(inode)->cache_validity into a couple of helper functions. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 12 +----------- fs/nfs/inode.c | 43 ++++++++++++++++++++++++++++++++----------- fs/nfs/internal.h | 1 + 3 files changed, 34 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 64c11f399b3d..599a602cb2fd 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -101,21 +101,11 @@ EXPORT_SYMBOL_GPL(nfs_file_release); static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs_inode *nfsi = NFS_I(inode); - const unsigned long force_reval = NFS_INO_REVAL_PAGECACHE|NFS_INO_REVAL_FORCED; - unsigned long cache_validity = nfsi->cache_validity; - - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) && - (cache_validity & force_reval) != force_reval) - goto out_noreval; if (filp->f_flags & O_DIRECT) goto force_reval; - if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - goto force_reval; - if (nfs_attribute_timeout(inode)) + if (nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE)) goto force_reval; -out_noreval: return 0; force_reval: return __nfs_revalidate_inode(server, inode); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2fc237cd338e..e3194aa797f7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -160,6 +160,36 @@ int nfs_sync_mapping(struct address_space *mapping) return ret; } +static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags) +{ + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + + /* Special case for the pagecache or access cache */ + if (flags == NFS_INO_REVAL_PAGECACHE && + !(cache_validity & NFS_INO_REVAL_FORCED)) + return false; + return (cache_validity & flags) != 0; +} + +static bool nfs_check_cache_invalid_not_delegated(struct inode *inode, unsigned long flags) +{ + unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + + if ((cache_validity & flags) != 0) + return true; + if (nfs_attribute_timeout(inode)) + return true; + return false; +} + +bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) +{ + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + return nfs_check_cache_invalid_delegated(inode, flags); + + return nfs_check_cache_invalid_not_delegated(inode, flags); +} + static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); @@ -1116,17 +1146,8 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map bool nfs_mapping_need_revalidate_inode(struct inode *inode) { - unsigned long cache_validity = NFS_I(inode)->cache_validity; - - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { - const unsigned long force_reval = - NFS_INO_REVAL_PAGECACHE|NFS_INO_REVAL_FORCED; - return (cache_validity & force_reval) == force_reval; - } - - return (cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_timeout(inode) - || NFS_STALE(inode); + return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) || + NFS_STALE(inode); } int nfs_revalidate_mapping_rcu(struct inode *inode) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6b79c2ca9b9a..09ca5095c04e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -381,6 +381,7 @@ extern int nfs_drop_inode(struct inode *); extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); +extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); extern int nfs_wait_atomic_killable(atomic_t *p); -- cgit v1.2.3 From 9cdd1d3f1a8cea9cfe7953f45fae9ff51c37afa3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Dec 2016 18:04:47 -0500 Subject: NFS: Only look at the change attribute cache state in nfs_weak_revalidate() Just like in nfs_check_verifier(), we want to use nfs_mapping_need_revalidate_inode() to check our knowledge of the change attribute is up to date. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cb22a9f9ae7e..8f706f3e5c05 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1273,8 +1273,8 @@ out_error: */ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) { - int error; struct inode *inode = d_inode(dentry); + int error = 0; /* * I believe we can only get a negative dentry here in the case of a @@ -1293,7 +1293,8 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - error = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (nfs_mapping_need_revalidate_inode(inode)) + error = __nfs_revalidate_inode(NFS_SERVER(inode), inode); dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", __func__, inode->i_ino, error ? "invalid" : "valid"); return !error; -- cgit v1.2.3 From 21c3ba7e5dcdba23094fb50f6d1198faed94dac4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Dec 2016 18:40:03 -0500 Subject: NFS: Fix and clean up the access cache validity checking The access cache needs to check whether or not the mode bits, ownership, or ACL has changed or the cache has timed out. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8f706f3e5c05..fad81041f5ab 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2286,8 +2286,7 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str if (cache == NULL) goto out; /* Found an entry, is our attribute cache valid? */ - if (!nfs_attribute_cache_expired(inode) && - !(nfsi->cache_validity & NFS_INO_INVALID_ATTR)) + if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) break; err = -ECHILD; if (!may_block) @@ -2335,12 +2334,12 @@ static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, cache = NULL; if (cache == NULL) goto out; - err = nfs_revalidate_inode_rcu(NFS_SERVER(inode), inode); - if (err) + if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; res->jiffies = cache->jiffies; res->cred = cache->cred; res->mask = cache->mask; + err = 0; out: rcu_read_unlock(); return err; @@ -2492,12 +2491,13 @@ EXPORT_SYMBOL_GPL(nfs_may_open); static int nfs_execute_ok(struct inode *inode, int mask) { struct nfs_server *server = NFS_SERVER(inode); - int ret; + int ret = 0; - if (mask & MAY_NOT_BLOCK) - ret = nfs_revalidate_inode_rcu(server, inode); - else - ret = nfs_revalidate_inode(server, inode); + if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) { + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + ret = __nfs_revalidate_inode(server, inode); + } if (ret == 0 && !execute_ok(inode)) ret = -EACCES; return ret; -- cgit v1.2.3 From 3f642a13359468181f29db3d8926ba36530be85e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Dec 2016 18:49:38 -0500 Subject: NFS: Remove unused function nfs_revalidate_inode_rcu() Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 9 --------- include/linux/nfs_fs.h | 1 - 2 files changed, 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e3194aa797f7..8224e96b5808 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1105,15 +1105,6 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } EXPORT_SYMBOL_GPL(nfs_revalidate_inode); -int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode) -{ - if (!(NFS_I(inode)->cache_validity & - (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) - && !nfs_attribute_cache_expired(inode)) - return NFS_STALE(inode) ? -ESTALE : 0; - return -ECHILD; -} - static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index cb631973839a..fe2e7810ae80 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -343,7 +343,6 @@ extern int nfs_open(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); extern int nfs_attribute_cache_expired(struct inode *inode); extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); -extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern bool nfs_mapping_need_revalidate_inode(struct inode *inode); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); -- cgit v1.2.3 From 187e593d2779fb92ae1de06f873d6e192ba35d88 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 16 Dec 2016 18:51:15 -0500 Subject: NFS: Clean up nfs_attribute_timeout() It can be made static. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 14 +++++++------- include/linux/nfs_fs.h | 1 - 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8224e96b5808..a0cd79646c1b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -160,6 +160,13 @@ int nfs_sync_mapping(struct address_space *mapping) return ret; } +static int nfs_attribute_timeout(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); +} + static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags) { unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); @@ -1076,13 +1083,6 @@ out: return status; } -int nfs_attribute_timeout(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); -} - int nfs_attribute_cache_expired(struct inode *inode) { if (nfs_have_delegated_attributes(inode)) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fe2e7810ae80..f1da8c8dd473 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -340,7 +340,6 @@ extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct inode *, int); extern int nfs_open(struct inode *, struct file *); -extern int nfs_attribute_timeout(struct inode *inode); extern int nfs_attribute_cache_expired(struct inode *inode); extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); -- cgit v1.2.3 From 1c48cee83bc2631ab8533311d594aaafe81d8aa9 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 14 Dec 2016 16:31:55 -0500 Subject: pNFS/flexfiles: delete deviceid, don't mark inactive Instead of marking a device inactive, remove it from the cache entirely. Flexfiles has a way to report errors back to the server, so we don't want to stop devices from being tried again for 120 seconds. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 6 ++++-- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 9e111d07f667..45962fe5098c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1126,7 +1126,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, case -EPIPE: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, + &devid->deviceid); rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: @@ -1175,7 +1176,8 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, default: dprintk("%s DS connection error %d\n", __func__, task->tk_status); - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, + &devid->deviceid); } /* FIXME: Need to prevent infinite looping here. */ return -NFS4ERR_RESET_TO_PNFS; diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 3cc39d1c1206..e5a6f248697b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -177,7 +177,7 @@ out_err: static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg, struct nfs4_deviceid_node *devid) { - nfs4_mark_deviceid_unavailable(devid); + nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); if (!ff_layout_has_available_ds(lseg)) pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); -- cgit v1.2.3 From cfd278c280f997cf2fe4662e0acab0fe465f637b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 19 Dec 2016 11:19:31 +1100 Subject: NFSv4.1: nfs4_fl_prepare_ds must be careful about reporting success. Various places assume that if nfs4_fl_prepare_ds() turns a non-NULL 'ds', then ds->ds_clp will also be non-NULL. This is not necessasrily true in the case when the process received a fatal signal while nfs4_pnfs_ds_connect is waiting in nfs4_wait_ds_connect(). In that case ->ds_clp may not be set, and the devid may not recently have been marked unavailable. So add a test for ds_clp == NULL and return NULL in that case. Fixes: c23266d532b4 ("NFS4.1 Fix data server connection race") Signed-off-by: NeilBrown Acked-by: Olga Kornievskaia Acked-by: Adamson, Andy Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayoutdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index a5589b791439..f956ca20a8a3 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -282,7 +282,8 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) s->nfs_client->cl_minorversion); out_test_devid: - if (filelayout_test_devid_unavailable(devid)) + if (ret->ds_clp == NULL || + filelayout_test_devid_unavailable(devid)) ret = NULL; out: return ret; -- cgit v1.2.3 From 3f8f25489fa62437530f654041504936d377d204 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 19 Dec 2016 11:33:13 +1100 Subject: NFSv4: ensure __nfs4_find_lock_state returns consistent result. If a file has both flock locks and OFD locks, then it is possible that two different nfs4 lock states could apply to file accesses from a single process. It is not possible to know, efficiently, which one is "correct". Presumably the state which represents a lock that covers the region undergoing IO would be the "correct" one to use, but finding that has a non-trivial cost and would provide miniscule value. Currently we just return whichever is first in the list, which could result in inconsistent behaviour if an application ever put it self in this position. As consistent behaviour is preferable (when perfectly correct behaviour is not available), change the search to return a consistent result in this circumstance. Specifically: if there is both a flock and OFD lock state, always return the flock one. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 95baf7d340f0..cf869802ff23 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -797,21 +797,33 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) /* * Search the state->lock_states for an existing lock_owner - * that is compatible with current->files + * that is compatible with either of the given owners. + * If the second is non-zero, then the first refers to a Posix-lock + * owner (current->files) and the second refers to a flock/OFD + * owner (struct file*). In that case, prefer a match for the first + * owner. + * If both sorts of locks are held on the one file we cannot know + * which stateid was intended to be used, so a "correct" choice cannot + * be made. Failing that, a "consistent" choice is preferable. The + * consistent choice we make is to prefer the first owner, that of a + * Posix lock. */ static struct nfs4_lock_state * __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, fl_owner_t fl_owner2) { - struct nfs4_lock_state *pos; + struct nfs4_lock_state *pos, *ret = NULL; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (pos->ls_owner != fl_owner && - pos->ls_owner != fl_owner2) - continue; - atomic_inc(&pos->ls_count); - return pos; + if (pos->ls_owner == fl_owner) { + ret = pos; + break; + } + if (pos->ls_owner == fl_owner2) + ret = pos; } - return NULL; + if (ret) + atomic_inc(&ret->ls_count); + return ret; } /* -- cgit v1.2.3 From 86cfb0418537460baf0de0b5e9253784be27a6f9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 19 Dec 2016 11:48:23 +1100 Subject: NFS: Don't disconnect open-owner on NFS4ERR_BAD_SEQID When an NFS4ERR_BAD_SEQID is received the open-owner is removed from the ->state_owners rbtree so that it will no longer be used. If any stateids attached to this open-owner are still in use, and if a request using one gets an NFS4ERR_BAD_STATEID reply, this can for bad. The state is marked as needing recovery and the nfs4_state_manager() is scheduled to clean up. nfs4_state_manager() finds states to be recovered by walking the state_owners rbtree. As the open-owner is not in the rbtree, the bad state is not found so nfs4_state_manager() completes having done nothing. The request is then retried, with a predicatable result (indefinite retries). If the stateid is for a delegation, this open_owner will be used to open files when the delegation is returned. For that to work, a new open-owner needs to be presented to the server. This patch changes NFS4ERR_BAD_SEQID handling to leave the open-owner in the rbtree but updates the 'create_time' so it looks like a new open-owner. With this the indefinite retries no longer happen. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cf869802ff23..1d152f4470cd 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -494,21 +494,18 @@ nfs4_alloc_state_owner(struct nfs_server *server, } static void -nfs4_drop_state_owner(struct nfs4_state_owner *sp) -{ - struct rb_node *rb_node = &sp->so_server_node; - - if (!RB_EMPTY_NODE(rb_node)) { - struct nfs_server *server = sp->so_server; - struct nfs_client *clp = server->nfs_client; - - spin_lock(&clp->cl_lock); - if (!RB_EMPTY_NODE(rb_node)) { - rb_erase(rb_node, &server->state_owners); - RB_CLEAR_NODE(rb_node); - } - spin_unlock(&clp->cl_lock); - } +nfs4_reset_state_owner(struct nfs4_state_owner *sp) +{ + /* This state_owner is no longer usable, but must + * remain in place so that state recovery can find it + * and the opens associated with it. + * It may also be used for new 'open' request to + * return a delegation to the server. + * So update the 'create_time' so that it looks like + * a new state_owner. This will cause the server to + * request an OPEN_CONFIRM to start a new sequence. + */ + sp->so_seqid.create_time = ktime_get(); } static void nfs4_free_state_owner(struct nfs4_state_owner *sp) @@ -1113,7 +1110,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) sp = container_of(seqid->sequence, struct nfs4_state_owner, so_seqid); if (status == -NFS4ERR_BAD_SEQID) - nfs4_drop_state_owner(sp); + nfs4_reset_state_owner(sp); if (!nfs4_has_session(sp->so_server->nfs_client)) nfs_increment_seqid(status, seqid); } -- cgit v1.2.3 From b6808145ad2aa625b962fc55f30484091d5e8fe7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Nov 2016 13:34:16 -0500 Subject: NFSv4: Add encode/decode of the layoutreturn op in OPEN_DOWNGRADE While we do not need to return the RW layout when downgrading from a read/write open state to read-only, we might want to do so in order to reduce the burden on the metadataserver so that it does not need to check for changed data when responding to GETATTR requests. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1af6268a7d8c..8ccb34cf0c19 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -502,11 +502,13 @@ static int nfs4_stat_to_errno(int); (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ + encode_layoutreturn_maxsz + \ encode_open_downgrade_maxsz) #define NFS4_dec_open_downgrade_sz \ (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ + decode_layoutreturn_maxsz + \ decode_open_downgrade_maxsz) #define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ @@ -2356,6 +2358,8 @@ static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); + if (args->lr_args) + encode_layoutreturn(xdr, args->lr_args, &hdr); encode_open_downgrade(xdr, args, &hdr); encode_nops(&hdr); } @@ -6151,6 +6155,12 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, status = decode_putfh(xdr); if (status) goto out; + if (res->lr_res) { + status = decode_layoutreturn(xdr, res->lr_res); + res->lr_ret = status; + if (status) + goto out; + } status = decode_open_downgrade(xdr, res); out: return status; -- cgit v1.2.3 From e71708d4df1d4b81427badb9ac4bc4a813338b17 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Nov 2016 10:56:38 -0500 Subject: pNFS: Return RW layouts on OPEN_DOWNGRADE If the client holds no more writeable open state, and does not hold a write delegation, then send a layoutreturn as part of the OPEN_DOWNGRADE. We do this only for writes, since some layout drivers may require you to also hold a read layout if you are doing a R/W workload. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 896df7bdf85f..59554f3adf29 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1251,6 +1251,7 @@ bool pnfs_roc(struct inode *ino, nfs4_stateid stateid; enum pnfs_iomode iomode = 0; bool layoutreturn = false, roc = false; + bool skip_read = false; if (!nfs_have_layout(ino)) return false; @@ -1270,18 +1271,27 @@ retry: } /* no roc if we hold a delegation */ - if (nfs4_check_delegation(ino, FMODE_READ)) - goto out_noroc; + if (nfs4_check_delegation(ino, FMODE_READ)) { + if (nfs4_check_delegation(ino, FMODE_WRITE)) + goto out_noroc; + skip_read = true; + } list_for_each_entry(ctx, &nfsi->open_files, list) { state = ctx->state; + if (state == NULL) + continue; /* Don't return layout if there is open file state */ - if (state != NULL && state->state != 0) + if (state->state & FMODE_WRITE) goto out_noroc; + if (state->state & FMODE_READ) + skip_read = true; } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) { + if (skip_read && lseg->pls_range.iomode == IOMODE_READ) + continue; /* If we are sending layoutreturn, invalidate all valid lsegs */ if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags)) continue; -- cgit v1.2.3 From a5f925bccce7b0dc083f0c5a8652600881cc38ab Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Dec 2016 09:47:32 -0500 Subject: NFS: Don't abuse NFS_INO_REVAL_FORCED in nfs_post_op_update_inode_locked() The NFS_INO_REVAL_FORCED flag now really only has meaning for the case when we've just been handed a delegation for a file that was already cached, and we're unsure about that cache. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a0cd79646c1b..5561d78b7998 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1550,13 +1550,6 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr { unsigned long invalid = NFS_INO_INVALID_ATTR; - /* - * Don't revalidate the pagecache if we hold a delegation, but do - * force an attribute update - */ - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) - invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_FORCED; - if (S_ISDIR(inode->i_mode)) invalid |= NFS_INO_INVALID_DATA; nfs_set_cache_invalid(inode, invalid); -- cgit v1.2.3 From 9413a1a1bf5d58db610e3d9ba500f4150afa55ad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Dec 2016 11:36:41 -0500 Subject: NFSv4: Also ask for attributes when downgrading to a READ-only state If we're downgrading from a READ+WRITE mode to a READ-only mode, then ask for cache consistency attributes so that we avoid the revalidation in nfs_close_context() Fixes: 3947b74d0f9d ("NFSv4: Don't request a GETATTR on open_downgrade.") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0d9fa18aa243..872ff6756723 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3200,9 +3200,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_wait; } - if (calldata->arg.fmode == 0) { + if (calldata->arg.fmode == 0) task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; + if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { /* Close-to-open cache consistency revalidation */ if (!nfs4_have_delegation(inode, FMODE_READ)) calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; -- cgit v1.2.3 From d8d849835eb2082ea17655538a83fa467633927f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Dec 2016 12:14:44 -0500 Subject: NFSv4: Place the GETATTR operation before the CLOSE In order to benefit from the DENY share lock protection, we should put the GETATTR operation before the CLOSE. Otherwise, we might race with a Windows machine that thinks it is now safe to modify the file. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++++-- fs/nfs/nfs4xdr.c | 16 ++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 872ff6756723..c3263114c6b8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3147,7 +3147,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) res_stateid, calldata->arg.fmode); out_release: nfs_release_seqid(calldata->arg.seqid); - nfs_refresh_inode(calldata->inode, calldata->res.fattr); + nfs_refresh_inode(calldata->inode, &calldata->fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); } @@ -3215,7 +3215,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) nfs4_map_atomic_open_share(NFS_SERVER(inode), calldata->arg.fmode, 0); - nfs_fattr_init(calldata->res.fattr); + if (calldata->res.fattr == NULL) + calldata->arg.bitmask = NULL; + else if (calldata->arg.bitmask == NULL) + calldata->res.fattr = NULL; calldata->timestamp = jiffies; if (nfs4_setup_sequence(NFS_SERVER(inode), &calldata->arg.seq_args, @@ -3282,6 +3285,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask); if (IS_ERR(calldata->arg.seqid)) goto out_free_calldata; + nfs_fattr_init(&calldata->fattr); calldata->arg.fmode = 0; calldata->lr.arg.ld_private = &calldata->lr.ld_private; calldata->res.fattr = &calldata->fattr; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8ccb34cf0c19..6f2365b99fb4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2279,9 +2279,9 @@ static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); if (args->lr_args) encode_layoutreturn(xdr, args->lr_args, &hdr); - encode_close(xdr, args, &hdr); if (args->bitmask != NULL) encode_getfattr(xdr, args->bitmask, &hdr); + encode_close(xdr, args, &hdr); encode_nops(&hdr); } @@ -6494,16 +6494,12 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; } + if (res->fattr != NULL) { + status = decode_getfattr(xdr, res->fattr, res->server); + if (status != 0) + goto out; + } status = decode_close(xdr, res); - if (status != 0) - goto out; - /* - * Note: Server may do delete on close for this file - * in which case the getattr call will fail with - * an ESTALE error. Shouldn't be a problem, - * though, since fattr->valid will remain unset. - */ - decode_getfattr(xdr, res->fattr, res->server); out: return status; } -- cgit v1.2.3 From f07d4a31ccd7dbe3ca49bedc8298245d6877a43e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Dec 2016 10:34:14 -0500 Subject: NFS: Retry the CLOSE if the embedded GETATTR is rejected with EACCES If our CLOSE RPC call is rejected with an EACCES call, then we should remove the GETATTR call from the compound RPC and retry. This could potentially happen when there is a conflict between an ACL denying attribute reads and our use of SP4_MACH_CRED. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c3263114c6b8..4b66b0c469cd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3122,6 +3122,16 @@ static void nfs4_close_done(struct rpc_task *task, void *data) res_stateid = &calldata->res.stateid; renew_lease(server, calldata->timestamp); break; + case -NFS4ERR_ACCESS: + if (calldata->arg.bitmask != NULL) { + calldata->arg.bitmask = NULL; + calldata->res.fattr = NULL; + task->tk_status = 0; + rpc_restart_call_prepare(task); + goto out_release; + + } + break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: -- cgit v1.2.3 From 8ac2b42238f549241a4755de40fd161fba3de438 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Dec 2016 10:23:10 -0500 Subject: NFSv4: Retry the DELEGRETURN if the embedded GETATTR is rejected with EACCES If our DELEGRETURN RPC call is rejected with an EACCES call, then we should remove the GETATTR call from the compound RPC and retry. This could potentially happen when there is a conflict between an ACL denying attribute reads and our use of SP4_MACH_CRED. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++++++ fs/nfs/nfs4xdr.c | 11 +++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4b66b0c469cd..6dcbc5defb7a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5695,6 +5695,14 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_STALE_STATEID: task->tk_status = 0; break; + case -NFS4ERR_ACCESS: + if (data->args.bitmask) { + data->args.bitmask = NULL; + data->res.fattr = NULL; + task->tk_status = 0; + rpc_restart_call_prepare(task); + return; + } default: if (nfs4_async_handle_error(task, data->res.server, NULL, NULL) == -EAGAIN) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6f2365b99fb4..e9255cb453e6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2705,7 +2705,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, encode_putfh(xdr, args->fhandle, &hdr); if (args->lr_args) encode_layoutreturn(xdr, args->lr_args, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + if (args->bitmask) + encode_getfattr(xdr, args->bitmask, &hdr); encode_delegreturn(xdr, args->stateid, &hdr); encode_nops(&hdr); } @@ -6972,9 +6973,11 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, if (status) goto out; } - status = decode_getfattr(xdr, res->fattr, res->server); - if (status != 0) - goto out; + if (res->fattr) { + status = decode_getfattr(xdr, res->fattr, res->server); + if (status != 0) + goto out; + } status = decode_delegreturn(xdr); out: return status; -- cgit v1.2.3